VITESS_THROTTLER
@@ -3407,6 +3407,14 @@ alter_statement:
Type: ForceCutOverAllMigrationType,
}
}
+| ALTER comment_opt VITESS_MIGRATION STRING CUTOVER_THRESHOLD STRING
+ {
+ $$ = &AlterMigration{
+ Type: SetCutOverThresholdMigrationType,
+ UUID: string($4),
+ Threshold: $6,
+ }
+ }
partitions_options_opt:
{
@@ -8301,6 +8309,7 @@ non_reserved_keyword:
| COUNT %prec FUNCTION_CALL_NON_KEYWORD
| CSV
| CURRENT
+| CUTOVER_THRESHOLD
| DATA
| DATE %prec STRING_TYPE_PREFIX_NON_KEYWORD
| DATE_ADD %prec FUNCTION_CALL_NON_KEYWORD
diff --git a/go/vt/vitessdriver/convert.go b/go/vt/vitessdriver/convert.go
index 7ba95db4147..aa8bcedc7ee 100644
--- a/go/vt/vitessdriver/convert.go
+++ b/go/vt/vitessdriver/convert.go
@@ -43,10 +43,8 @@ func (cv *converter) ToNative(v sqltypes.Value) (any, error) {
return v.ToUint64()
case v.IsFloat():
return v.ToFloat64()
- case v.Type() == sqltypes.Datetime, v.Type() == sqltypes.Timestamp:
- return datetimeToNative(v, cv.location)
- case v.Type() == sqltypes.Date:
- return dateToNative(v, cv.location)
+ case v.Type() == sqltypes.Datetime, v.Type() == sqltypes.Timestamp, v.Type() == sqltypes.Date:
+ return v.ToTimeInLocation(cv.location)
case v.IsQuoted() || v.Type() == sqltypes.Bit || v.Type() == sqltypes.Decimal:
out, err = v.ToBytes()
case v.Type() == sqltypes.Expression:
diff --git a/go/vt/vitessdriver/time.go b/go/vt/vitessdriver/time.go
index 70ec2d679ae..c6526197d9d 100644
--- a/go/vt/vitessdriver/time.go
+++ b/go/vt/vitessdriver/time.go
@@ -17,83 +17,12 @@ limitations under the License.
package vitessdriver
import (
- "errors"
"time"
"vitess.io/vitess/go/sqltypes"
)
-// ErrInvalidTime is returned when we fail to parse a datetime
-// string from MySQL. This should never happen unless things are
-// seriously messed up.
-var ErrInvalidTime = errors.New("invalid MySQL time string")
-
var isoTimeFormat = "2006-01-02 15:04:05.999999"
-var isoNullTime = "0000-00-00 00:00:00.000000"
-var isoTimeLength = len(isoTimeFormat)
-
-// parseISOTime pases a time string in MySQL's textual datetime format.
-// This is very similar to ISO8601, with some differences:
-//
-// - There is no T separator between the date and time sections;
-// a space is used instead.
-// - There is never a timezone section in the string, as these datetimes
-// are not timezone-aware. There isn't a Z value for UTC times for
-// the same reason.
-//
-// Note that this function can handle both DATE (which should _always_ have
-// a length of 10) and DATETIME strings (which have a variable length, 18+
-// depending on the number of decimal sub-second places).
-//
-// Also note that this function handles the case where MySQL returns a NULL
-// time (with a string where all sections are zeroes) by returning a zeroed
-// out time.Time object. NULL time strings are not considered a parsing error.
-//
-// See: isoTimeFormat
-func parseISOTime(tstr string, loc *time.Location, minLen, maxLen int) (t time.Time, err error) {
- tlen := len(tstr)
- if tlen < minLen || tlen > maxLen {
- err = ErrInvalidTime
- return
- }
-
- if tstr == isoNullTime[:tlen] {
- // This is what MySQL would send when the date is NULL,
- // so return an empty time.Time instead.
- // This is not a parsing error
- return
- }
-
- if loc == nil {
- loc = time.UTC
- }
-
- // Since the time format returned from MySQL never has a Timezone
- // section, ParseInLocation will initialize the time.Time struct
- // with the default `loc` we're passing here.
- return time.ParseInLocation(isoTimeFormat[:tlen], tstr, loc)
-}
-
-// datetimeToNative converts a Datetime Value into a time.Time
-func datetimeToNative(v sqltypes.Value, loc *time.Location) (time.Time, error) {
- // Valid format string offsets for a DATETIME
- // |DATETIME |19+
- // |------------------|------|
- // "2006-01-02 15:04:05.999999"
- return parseISOTime(v.ToString(), loc, 19, isoTimeLength)
-}
-
-// dateToNative converts a Date Value into a time.Time.
-// Note that there's no specific type in the Go stdlib to represent
-// dates without time components, so the returned Time will have
-// their hours/mins/seconds zeroed out.
-func dateToNative(v sqltypes.Value, loc *time.Location) (time.Time, error) {
- // Valid format string offsets for a DATE
- // |DATE |10
- // |---------|
- // "2006-01-02 00:00:00.000000"
- return parseISOTime(v.ToString(), loc, 10, 10)
-}
// NewDatetime builds a Datetime Value
func NewDatetime(t time.Time, defaultLoc *time.Location) sqltypes.Value {
diff --git a/go/vt/vitessdriver/time_test.go b/go/vt/vitessdriver/time_test.go
deleted file mode 100644
index 949d8f43354..00000000000
--- a/go/vt/vitessdriver/time_test.go
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
-Copyright 2019 The Vitess Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package vitessdriver
-
-import (
- "reflect"
- "testing"
- "time"
-
- "vitess.io/vitess/go/sqltypes"
-)
-
-var randomLocation = time.FixedZone("Nowhere", 3*60*60)
-
-func DatetimeValue(str string) sqltypes.Value {
- return sqltypes.TestValue(sqltypes.Datetime, str)
-}
-
-func DateValue(str string) sqltypes.Value {
- return sqltypes.TestValue(sqltypes.Date, str)
-}
-
-func TestDatetimeToNative(t *testing.T) {
-
- tcases := []struct {
- val sqltypes.Value
- loc *time.Location
- out time.Time
- err bool
- }{{
- val: DatetimeValue("1899-08-24 17:20:00"),
- out: time.Date(1899, 8, 24, 17, 20, 0, 0, time.UTC),
- }, {
- val: DatetimeValue("1952-03-11 01:02:03"),
- loc: time.Local,
- out: time.Date(1952, 3, 11, 1, 2, 3, 0, time.Local),
- }, {
- val: DatetimeValue("1952-03-11 01:02:03"),
- loc: randomLocation,
- out: time.Date(1952, 3, 11, 1, 2, 3, 0, randomLocation),
- }, {
- val: DatetimeValue("1952-03-11 01:02:03"),
- loc: time.UTC,
- out: time.Date(1952, 3, 11, 1, 2, 3, 0, time.UTC),
- }, {
- val: DatetimeValue("1899-08-24 17:20:00.000000"),
- out: time.Date(1899, 8, 24, 17, 20, 0, 0, time.UTC),
- }, {
- val: DatetimeValue("1899-08-24 17:20:00.000001"),
- out: time.Date(1899, 8, 24, 17, 20, 0, int(1*time.Microsecond), time.UTC),
- }, {
- val: DatetimeValue("1899-08-24 17:20:00.123456"),
- out: time.Date(1899, 8, 24, 17, 20, 0, int(123456*time.Microsecond), time.UTC),
- }, {
- val: DatetimeValue("1899-08-24 17:20:00.222"),
- out: time.Date(1899, 8, 24, 17, 20, 0, int(222*time.Millisecond), time.UTC),
- }, {
- val: DatetimeValue("1899-08-24 17:20:00.1234567"),
- err: true,
- }, {
- val: DatetimeValue("1899-08-24 17:20:00.1"),
- out: time.Date(1899, 8, 24, 17, 20, 0, int(100*time.Millisecond), time.UTC),
- }, {
- val: DatetimeValue("0000-00-00 00:00:00"),
- out: time.Time{},
- }, {
- val: DatetimeValue("0000-00-00 00:00:00.0"),
- out: time.Time{},
- }, {
- val: DatetimeValue("0000-00-00 00:00:00.000"),
- out: time.Time{},
- }, {
- val: DatetimeValue("0000-00-00 00:00:00.000000"),
- out: time.Time{},
- }, {
- val: DatetimeValue("0000-00-00 00:00:00.0000000"),
- err: true,
- }, {
- val: DatetimeValue("1899-08-24T17:20:00.000000"),
- err: true,
- }, {
- val: DatetimeValue("1899-02-31 17:20:00.000000"),
- err: true,
- }, {
- val: DatetimeValue("1899-08-24 17:20:00."),
- out: time.Date(1899, 8, 24, 17, 20, 0, 0, time.UTC),
- }, {
- val: DatetimeValue("0000-00-00 00:00:00.000001"),
- err: true,
- }, {
- val: DatetimeValue("1899-08-24 17:20:00 +02:00"),
- err: true,
- }, {
- val: DatetimeValue("1899-08-24"),
- err: true,
- }, {
- val: DatetimeValue("This is not a valid timestamp"),
- err: true,
- }}
-
- for _, tcase := range tcases {
- got, err := datetimeToNative(tcase.val, tcase.loc)
- if tcase.err && err == nil {
- t.Errorf("datetimeToNative(%v, %#v) succeeded; expected error", tcase.val, tcase.loc)
- }
- if !tcase.err && err != nil {
- t.Errorf("datetimeToNative(%v, %#v) failed: %v", tcase.val, tcase.loc, err)
- }
- if !reflect.DeepEqual(got, tcase.out) {
- t.Errorf("datetimeToNative(%v, %#v): %v, want %v", tcase.val, tcase.loc, got, tcase.out)
- }
- }
-}
-
-func TestDateToNative(t *testing.T) {
- tcases := []struct {
- val sqltypes.Value
- loc *time.Location
- out time.Time
- err bool
- }{{
- val: DateValue("1899-08-24"),
- out: time.Date(1899, 8, 24, 0, 0, 0, 0, time.UTC),
- }, {
- val: DateValue("1952-03-11"),
- loc: time.Local,
- out: time.Date(1952, 3, 11, 0, 0, 0, 0, time.Local),
- }, {
- val: DateValue("1952-03-11"),
- loc: randomLocation,
- out: time.Date(1952, 3, 11, 0, 0, 0, 0, randomLocation),
- }, {
- val: DateValue("0000-00-00"),
- out: time.Time{},
- }, {
- val: DateValue("1899-02-31"),
- err: true,
- }, {
- val: DateValue("1899-08-24 17:20:00"),
- err: true,
- }, {
- val: DateValue("0000-00-00 00:00:00"),
- err: true,
- }, {
- val: DateValue("This is not a valid timestamp"),
- err: true,
- }}
-
- for _, tcase := range tcases {
- got, err := dateToNative(tcase.val, tcase.loc)
- if tcase.err && err == nil {
- t.Errorf("dateToNative(%v, %#v) succeeded; expected error", tcase.val, tcase.loc)
- }
- if !tcase.err && err != nil {
- t.Errorf("dateToNative(%v, %#v) failed: %v", tcase.val, tcase.loc, err)
- }
- if !reflect.DeepEqual(got, tcase.out) {
- t.Errorf("dateToNative(%v, %#v): %v, want %v", tcase.val, tcase.loc, got, tcase.out)
- }
- }
-}
diff --git a/go/vt/vtadmin/api.go b/go/vt/vtadmin/api.go
index cef8816504a..4f91459d9ed 100644
--- a/go/vt/vtadmin/api.go
+++ b/go/vt/vtadmin/api.go
@@ -59,6 +59,7 @@ import (
"vitess.io/vitess/go/vt/vtadmin/rbac"
"vitess.io/vitess/go/vt/vtadmin/sort"
"vitess.io/vitess/go/vt/vtadmin/vtadminproto"
+ "vitess.io/vitess/go/vt/vtctl/grpcvtctldserver"
"vitess.io/vitess/go/vt/vtctl/workflow"
"vitess.io/vitess/go/vt/vtenv"
"vitess.io/vitess/go/vt/vterrors"
@@ -488,6 +489,31 @@ func (api *API) ApplySchema(ctx context.Context, req *vtadminpb.ApplySchemaReque
return nil, err
}
+ // Parser with default options. New() itself initializes with default MySQL version.
+ parser, err := sqlparser.New(sqlparser.Options{
+ TruncateUILen: 512,
+ TruncateErrLen: 0,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ // Split the sql statement received from request.
+ sqlParts, err := parser.SplitStatementToPieces(req.Sql)
+ if err != nil {
+ return nil, err
+ }
+
+ req.Request.Sql = sqlParts
+
+ // Set the callerID if not empty.
+ if req.CallerId != "" {
+ req.Request.CallerId = &vtrpcpb.CallerID{Principal: req.CallerId}
+ }
+
+ // Set the default wait replicas timeout.
+ req.Request.WaitReplicasTimeout = protoutil.DurationToProto(grpcvtctldserver.DefaultWaitReplicasTimeout)
+
return c.ApplySchema(ctx, req.Request)
}
diff --git a/go/vt/vtadmin/http/schema_migrations.go b/go/vt/vtadmin/http/schema_migrations.go
index e0207989648..3da6026fe9f 100644
--- a/go/vt/vtadmin/http/schema_migrations.go
+++ b/go/vt/vtadmin/http/schema_migrations.go
@@ -34,19 +34,26 @@ func ApplySchema(ctx context.Context, r Request, api *API) *JSONResponse {
decoder := json.NewDecoder(r.Body)
defer r.Body.Close()
- var req vtctldatapb.ApplySchemaRequest
- if err := decoder.Decode(&req); err != nil {
+ var body struct {
+ Sql string `json:"sql"`
+ CallerId string `json:"caller_id"`
+ Request vtctldatapb.ApplySchemaRequest `json:"request"`
+ }
+
+ if err := decoder.Decode(&body); err != nil {
return NewJSONResponse(nil, &errors.BadRequest{
Err: err,
})
}
vars := mux.Vars(r.Request)
- req.Keyspace = vars["keyspace"]
+ body.Request.Keyspace = vars["keyspace"]
resp, err := api.server.ApplySchema(ctx, &vtadminpb.ApplySchemaRequest{
ClusterId: vars["cluster_id"],
- Request: &req,
+ Sql: body.Sql,
+ CallerId: body.CallerId,
+ Request: &body.Request,
})
return NewJSONResponse(resp, err)
diff --git a/go/vt/vtctl/grpcvtctldserver/server.go b/go/vt/vtctl/grpcvtctldserver/server.go
index e280a410e02..3bfce2204a2 100644
--- a/go/vt/vtctl/grpcvtctldserver/server.go
+++ b/go/vt/vtctl/grpcvtctldserver/server.go
@@ -303,7 +303,9 @@ func (s *VtctldServer) ApplySchema(ctx context.Context, req *vtctldatapb.ApplySc
}
for _, shard := range execResult.SuccessShards {
- resp.RowsAffectedByShard[shard.Shard] = shard.Result.RowsAffected
+ for _, result := range shard.Results {
+ resp.RowsAffectedByShard[shard.Shard] += result.RowsAffected
+ }
}
return resp, err
diff --git a/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go b/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go
index b9c83c3658d..768fae5bff4 100644
--- a/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go
+++ b/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go
@@ -188,6 +188,8 @@ type TabletManagerClient struct {
EventJitter time.Duration
ErrorAfter time.Duration
}
+ // Backing Up - keyed by tablet alias.
+ TabletsBackupState map[string]bool
// keyed by tablet alias.
ChangeTagsResult map[string]struct {
Response *tabletmanagerdatapb.ChangeTagsResponse
@@ -1080,6 +1082,9 @@ func (fake *TabletManagerClient) ReplicationStatus(ctx context.Context, tablet *
}
if result, ok := fake.ReplicationStatusResults[key]; ok {
+ if _, ok = fake.TabletsBackupState[key]; ok {
+ result.Position.BackupRunning = fake.TabletsBackupState[key]
+ }
return result.Position, result.Error
}
diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go
index ef30f48e8ac..70faf8958c7 100644
--- a/go/vt/vtctl/reparentutil/emergency_reparenter.go
+++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go
@@ -258,7 +258,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
// 2. Remove the tablets with the Must_not promote rule
// 3. Remove cross-cell tablets if PreventCrossCellPromotion is specified
// Our final primary candidate MUST belong to this list of valid candidates
- validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, prevPrimary, opts)
+ validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, stoppedReplicationSnapshot.tabletsBackupState, prevPrimary, opts)
if err != nil {
return err
}
@@ -737,9 +737,12 @@ func (erp *EmergencyReparenter) identifyPrimaryCandidate(
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "unreachable - did not find a valid primary candidate even though the valid candidate list was non-empty")
}
-// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any constraint failures and can make forward progress on being promoted
-func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
+// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any
+// constraint failures and can make forward progress on being promoted. It will filter out candidates taking backups
+// if possible.
+func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackupState map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
var restrictedValidTablets []*topodatapb.Tablet
+ var notPreferredValidTablets []*topodatapb.Tablet
for _, tablet := range validTablets {
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
// Remove tablets which have MustNot promote rule since they must never be promoted
@@ -766,9 +769,20 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
}
continue
}
- restrictedValidTablets = append(restrictedValidTablets, tablet)
+ // Put candidates that are running a backup in a separate list
+ backingUp, ok := tabletsBackupState[tabletAliasStr]
+ if ok && backingUp {
+ erp.logger.Infof("Setting %s in list of valid candidates taking a backup", tabletAliasStr)
+ notPreferredValidTablets = append(notPreferredValidTablets, tablet)
+ } else {
+ restrictedValidTablets = append(restrictedValidTablets, tablet)
+ }
+ }
+ if len(restrictedValidTablets) > 0 {
+ return restrictedValidTablets, nil
}
- return restrictedValidTablets, nil
+
+ return notPreferredValidTablets, nil
}
// findErrantGTIDs tries to find errant GTIDs for the valid candidates and returns the updated list of valid candidates.
diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go
index ea6e768d036..3669c34dc11 100644
--- a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go
+++ b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go
@@ -4463,27 +4463,55 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
}
)
allTablets := []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, replicaCrossCellTablet, rdonlyCrossCellTablet}
+ noTabletsTakingBackup := map[string]bool{
+ topoproto.TabletAliasString(primaryTablet.Alias): false, topoproto.TabletAliasString(replicaTablet.Alias): false,
+ topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
+ topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
+ }
+ replicaTakingBackup := map[string]bool{
+ topoproto.TabletAliasString(primaryTablet.Alias): false, topoproto.TabletAliasString(replicaTablet.Alias): true,
+ topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
+ topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
+ }
tests := []struct {
- name string
- durability string
- validTablets []*topodatapb.Tablet
- tabletsReachable []*topodatapb.Tablet
- prevPrimary *topodatapb.Tablet
- opts EmergencyReparentOptions
- filteredTablets []*topodatapb.Tablet
- errShouldContain string
+ name string
+ durability string
+ validTablets []*topodatapb.Tablet
+ tabletsReachable []*topodatapb.Tablet
+ tabletsTakingBackup map[string]bool
+ prevPrimary *topodatapb.Tablet
+ opts EmergencyReparentOptions
+ filteredTablets []*topodatapb.Tablet
+ errShouldContain string
}{
{
- name: "filter must not",
- durability: "none",
- validTablets: allTablets,
- tabletsReachable: allTablets,
- filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet, replicaCrossCellTablet},
+ name: "filter must not",
+ durability: "none",
+ validTablets: allTablets,
+ tabletsReachable: allTablets,
+ tabletsTakingBackup: noTabletsTakingBackup,
+ filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet, replicaCrossCellTablet},
}, {
- name: "filter cross cell",
- durability: "none",
- validTablets: allTablets,
- tabletsReachable: allTablets,
+ name: "host taking backup must not be on the list when there are other candidates",
+ durability: "none",
+ validTablets: allTablets,
+ tabletsReachable: []*topodatapb.Tablet{replicaTablet, replicaCrossCellTablet, rdonlyTablet, rdonlyCrossCellTablet},
+ tabletsTakingBackup: replicaTakingBackup,
+ filteredTablets: []*topodatapb.Tablet{replicaCrossCellTablet},
+ }, {
+ name: "host taking backup must be the only one on the list when there are no other candidates",
+ durability: "none",
+ validTablets: allTablets,
+ tabletsReachable: []*topodatapb.Tablet{replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
+ tabletsTakingBackup: replicaTakingBackup,
+ filteredTablets: []*topodatapb.Tablet{replicaTablet},
+ }, {
+ name: "filter cross cell",
+ durability: "none",
+ validTablets: allTablets,
+ tabletsReachable: allTablets,
+ tabletsTakingBackup: noTabletsTakingBackup,
+
prevPrimary: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone-1",
@@ -4494,11 +4522,12 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
},
filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet},
}, {
- name: "filter establish",
- durability: "cross_cell",
- validTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet},
- tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
- filteredTablets: nil,
+ name: "filter establish",
+ durability: "cross_cell",
+ validTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet},
+ tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
+ tabletsTakingBackup: noTabletsTakingBackup,
+ filteredTablets: nil,
}, {
name: "filter mixed",
durability: "cross_cell",
@@ -4510,34 +4539,38 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
opts: EmergencyReparentOptions{
PreventCrossCellPromotion: true,
},
- validTablets: allTablets,
- tabletsReachable: allTablets,
- filteredTablets: []*topodatapb.Tablet{replicaCrossCellTablet},
+ validTablets: allTablets,
+ tabletsReachable: allTablets,
+ tabletsTakingBackup: noTabletsTakingBackup,
+ filteredTablets: []*topodatapb.Tablet{replicaCrossCellTablet},
}, {
- name: "error - requested primary must not",
- durability: "none",
- validTablets: allTablets,
- tabletsReachable: allTablets,
+ name: "error - requested primary must not",
+ durability: "none",
+ validTablets: allTablets,
+ tabletsReachable: allTablets,
+ tabletsTakingBackup: noTabletsTakingBackup,
opts: EmergencyReparentOptions{
NewPrimaryAlias: rdonlyTablet.Alias,
},
errShouldContain: "proposed primary zone-1-0000000003 has a must not promotion rule",
}, {
- name: "error - requested primary not in same cell",
- durability: "none",
- validTablets: allTablets,
- tabletsReachable: allTablets,
- prevPrimary: primaryTablet,
+ name: "error - requested primary not in same cell",
+ durability: "none",
+ validTablets: allTablets,
+ tabletsReachable: allTablets,
+ tabletsTakingBackup: noTabletsTakingBackup,
+ prevPrimary: primaryTablet,
opts: EmergencyReparentOptions{
PreventCrossCellPromotion: true,
NewPrimaryAlias: replicaCrossCellTablet.Alias,
},
errShouldContain: "proposed primary zone-2-0000000002 is is a different cell as the previous primary",
}, {
- name: "error - requested primary cannot establish",
- durability: "cross_cell",
- validTablets: allTablets,
- tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
+ name: "error - requested primary cannot establish",
+ durability: "cross_cell",
+ validTablets: allTablets,
+ tabletsTakingBackup: noTabletsTakingBackup,
+ tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
opts: EmergencyReparentOptions{
NewPrimaryAlias: primaryTablet.Alias,
},
@@ -4551,7 +4584,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
tt.opts.durability = durability
logger := logutil.NewMemoryLogger()
erp := NewEmergencyReparenter(nil, nil, logger)
- tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.prevPrimary, tt.opts)
+ tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.tabletsTakingBackup, tt.prevPrimary, tt.opts)
if tt.errShouldContain != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errShouldContain)
diff --git a/go/vt/vtctl/reparentutil/reparent_sorter_test.go b/go/vt/vtctl/reparentutil/reparent_sorter_test.go
index ae5d56e884e..87e7b253d54 100644
--- a/go/vt/vtctl/reparentutil/reparent_sorter_test.go
+++ b/go/vt/vtctl/reparentutil/reparent_sorter_test.go
@@ -22,7 +22,6 @@ import (
"github.com/stretchr/testify/require"
"vitess.io/vitess/go/mysql/replication"
-
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
)
diff --git a/go/vt/vtctl/reparentutil/replication.go b/go/vt/vtctl/reparentutil/replication.go
index 8642de84fc7..e7919361a09 100644
--- a/go/vt/vtctl/reparentutil/replication.go
+++ b/go/vt/vtctl/reparentutil/replication.go
@@ -165,9 +165,10 @@ func SetReplicationSource(ctx context.Context, ts *topo.Server, tmc tmclient.Tab
// replicationSnapshot stores the status maps and the tablets that were reachable
// when trying to stopReplicationAndBuildStatusMaps.
type replicationSnapshot struct {
- statusMap map[string]*replicationdatapb.StopReplicationStatus
- primaryStatusMap map[string]*replicationdatapb.PrimaryStatus
- reachableTablets []*topodatapb.Tablet
+ statusMap map[string]*replicationdatapb.StopReplicationStatus
+ primaryStatusMap map[string]*replicationdatapb.PrimaryStatus
+ reachableTablets []*topodatapb.Tablet
+ tabletsBackupState map[string]bool
}
// stopReplicationAndBuildStatusMaps stops replication on all replicas, then
@@ -193,9 +194,10 @@ func stopReplicationAndBuildStatusMaps(
errChan = make(chan concurrency.Error)
allTablets []*topodatapb.Tablet
res = &replicationSnapshot{
- statusMap: map[string]*replicationdatapb.StopReplicationStatus{},
- primaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
- reachableTablets: []*topodatapb.Tablet{},
+ statusMap: map[string]*replicationdatapb.StopReplicationStatus{},
+ primaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
+ reachableTablets: []*topodatapb.Tablet{},
+ tabletsBackupState: map[string]bool{},
}
)
@@ -214,6 +216,9 @@ func stopReplicationAndBuildStatusMaps(
logger.Infof("getting replication position from %v", alias)
stopReplicationStatus, err := tmc.StopReplicationAndGetStatus(groupCtx, tabletInfo.Tablet, replicationdatapb.StopReplicationMode_IOTHREADONLY)
+ m.Lock()
+ res.tabletsBackupState[alias] = stopReplicationStatus.GetBackupRunning()
+ m.Unlock()
if err != nil {
sqlErr, isSQLErr := sqlerror.NewSQLErrorFromError(err).(*sqlerror.SQLError)
if isSQLErr && sqlErr != nil && sqlErr.Number() == sqlerror.ERNotReplica {
diff --git a/go/vt/vtctl/reparentutil/util.go b/go/vt/vtctl/reparentutil/util.go
index fd701f8c69b..c4c23e65c7e 100644
--- a/go/vt/vtctl/reparentutil/util.go
+++ b/go/vt/vtctl/reparentutil/util.go
@@ -58,7 +58,8 @@ const (
// cell as the current primary, and to be different from avoidPrimaryAlias. The
// tablet with the most advanced replication position is chosen to minimize the
// amount of time spent catching up with the current primary. Further ties are
-// broken by the durability rules.
+// broken by the durability rules. Tablets taking backups are excluded from
+// consideration.
// Note that the search for the most advanced replication position will race
// with transactions being executed on the current primary, so when all tablets
// are at roughly the same position, then the choice of new primary-elect will
@@ -126,13 +127,17 @@ func ElectNewPrimary(
tb := tablet
errorGroup.Go(func() error {
// find and store the positions for the tablet
- pos, replLag, err := findPositionAndLagForTablet(groupCtx, tb, logger, tmc, opts.WaitReplicasTimeout)
+ pos, replLag, takingBackup, err := findTabletPositionLagBackupStatus(groupCtx, tb, logger, tmc, opts.WaitReplicasTimeout)
mu.Lock()
defer mu.Unlock()
if err == nil && (opts.TolerableReplLag == 0 || opts.TolerableReplLag >= replLag) {
- validTablets = append(validTablets, tb)
- tabletPositions = append(tabletPositions, pos)
- innodbBufferPool = append(innodbBufferPool, innodbBufferPoolData[topoproto.TabletAliasString(tb.Alias)])
+ if takingBackup {
+ reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v is taking a backup", topoproto.TabletAliasString(tablet.Alias)))
+ } else {
+ validTablets = append(validTablets, tb)
+ tabletPositions = append(tabletPositions, pos)
+ innodbBufferPool = append(innodbBufferPool, innodbBufferPoolData[topoproto.TabletAliasString(tb.Alias)])
+ }
} else {
reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v has %v replication lag which is more than the tolerable amount", topoproto.TabletAliasString(tablet.Alias), replLag))
}
@@ -150,7 +155,7 @@ func ElectNewPrimary(
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "cannot find a tablet to reparent to%v", reasonsToInvalidate.String())
}
- // sort the tablets for finding the best primary
+ // sort preferred tablets for finding the best primary
err = sortTabletsForReparent(validTablets, tabletPositions, innodbBufferPool, opts.durability)
if err != nil {
return nil, err
@@ -159,9 +164,9 @@ func ElectNewPrimary(
return validTablets[0].Alias, nil
}
-// findPositionAndLagForTablet processes the replication position and lag for a single tablet and
+// findTabletPositionLagBackupStatus processes the replication position and lag for a single tablet and
// returns it. It is safe to call from multiple goroutines.
-func findPositionAndLagForTablet(ctx context.Context, tablet *topodatapb.Tablet, logger logutil.Logger, tmc tmclient.TabletManagerClient, waitTimeout time.Duration) (replication.Position, time.Duration, error) {
+func findTabletPositionLagBackupStatus(ctx context.Context, tablet *topodatapb.Tablet, logger logutil.Logger, tmc tmclient.TabletManagerClient, waitTimeout time.Duration) (replication.Position, time.Duration, bool, error) {
logger.Infof("getting replication position from %v", topoproto.TabletAliasString(tablet.Alias))
ctx, cancel := context.WithTimeout(ctx, waitTimeout)
@@ -172,10 +177,10 @@ func findPositionAndLagForTablet(ctx context.Context, tablet *topodatapb.Tablet,
sqlErr, isSQLErr := sqlerror.NewSQLErrorFromError(err).(*sqlerror.SQLError)
if isSQLErr && sqlErr != nil && sqlErr.Number() == sqlerror.ERNotReplica {
logger.Warningf("no replication statue from %v, using empty gtid set", topoproto.TabletAliasString(tablet.Alias))
- return replication.Position{}, 0, nil
+ return replication.Position{}, 0, false, nil
}
logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(tablet.Alias), err)
- return replication.Position{}, 0, err
+ return replication.Position{}, 0, false, err
}
// Use the relay log position if available, otherwise use the executed GTID set (binary log position).
@@ -186,10 +191,10 @@ func findPositionAndLagForTablet(ctx context.Context, tablet *topodatapb.Tablet,
pos, err := replication.DecodePosition(positionString)
if err != nil {
logger.Warningf("cannot decode replica position %v for tablet %v, ignoring tablet: %v", positionString, topoproto.TabletAliasString(tablet.Alias), err)
- return replication.Position{}, 0, err
+ return replication.Position{}, 0, status.BackupRunning, err
}
- return pos, time.Second * time.Duration(status.ReplicationLagSeconds), nil
+ return pos, time.Second * time.Duration(status.ReplicationLagSeconds), status.BackupRunning, nil
}
// FindCurrentPrimary returns the current primary tablet of a shard, if any. The
diff --git a/go/vt/vtctl/reparentutil/util_test.go b/go/vt/vtctl/reparentutil/util_test.go
index f4e9092fc3f..ac44da8175a 100644
--- a/go/vt/vtctl/reparentutil/util_test.go
+++ b/go/vt/vtctl/reparentutil/util_test.go
@@ -139,6 +139,112 @@ func TestElectNewPrimary(t *testing.T) {
},
errContains: nil,
},
+ {
+ name: "Two good replicas, but one of them is taking a backup so we pick the other one",
+ tmc: &chooseNewPrimaryTestTMClient{
+ // both zone1-101 and zone1-102 are equivalent from a replicaiton PoV, but zone1-102 is taking a backup
+ replicationStatuses: map[string]*replicationdatapb.Status{
+ "zone1-0000000101": {
+ Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5",
+ BackupRunning: true,
+ },
+ "zone1-0000000102": {
+ Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5",
+ BackupRunning: false,
+ },
+ },
+ },
+ tolerableReplLag: 50 * time.Second,
+ shardInfo: topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{
+ PrimaryAlias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 100,
+ },
+ }, nil),
+ tabletMap: map[string]*topo.TabletInfo{
+ "primary": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 100,
+ },
+ Type: topodatapb.TabletType_PRIMARY,
+ },
+ },
+ "replica1": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 101,
+ },
+ Type: topodatapb.TabletType_REPLICA,
+ },
+ },
+ "replica2": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 102,
+ },
+ Type: topodatapb.TabletType_REPLICA,
+ },
+ },
+ },
+ avoidPrimaryAlias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 0,
+ },
+ expected: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 102,
+ },
+ errContains: nil,
+ },
+ {
+ name: "Only one replica, but it's taking a backup. We don't elect it.",
+ tmc: &chooseNewPrimaryTestTMClient{
+ // both zone1-101 and zone1-102 are equivalent from a replicaiton PoV, but zone1-102 is taking a backup
+ replicationStatuses: map[string]*replicationdatapb.Status{
+ "zone1-0000000101": {
+ Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5",
+ BackupRunning: true,
+ },
+ },
+ },
+ tolerableReplLag: 50 * time.Second,
+ shardInfo: topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{
+ PrimaryAlias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 100,
+ },
+ }, nil),
+ tabletMap: map[string]*topo.TabletInfo{
+ "primary": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 100,
+ },
+ Type: topodatapb.TabletType_PRIMARY,
+ },
+ },
+ "replica1": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 101,
+ },
+ Type: topodatapb.TabletType_REPLICA,
+ },
+ },
+ },
+ avoidPrimaryAlias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 0,
+ },
+ expected: nil,
+ errContains: []string{"zone1-0000000101 is taking a backup"},
+ },
{
name: "new primary alias provided - no tolerable replication lag",
tolerableReplLag: 0,
@@ -414,6 +520,67 @@ func TestElectNewPrimary(t *testing.T) {
},
errContains: nil,
},
+ {
+ name: "Two replicas, first one with too much lag, another one taking a backup - none is a good candidate",
+ tmc: &chooseNewPrimaryTestTMClient{
+ // zone1-101 is behind zone1-102
+ replicationStatuses: map[string]*replicationdatapb.Status{
+ "zone1-0000000101": {
+ Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1",
+ ReplicationLagSeconds: 55,
+ },
+ "zone1-0000000102": {
+ Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5",
+ BackupRunning: true,
+ },
+ },
+ },
+ tolerableReplLag: 50 * time.Second,
+ shardInfo: topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{
+ PrimaryAlias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 100,
+ },
+ }, nil),
+ tabletMap: map[string]*topo.TabletInfo{
+ "primary": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 100,
+ },
+ Type: topodatapb.TabletType_PRIMARY,
+ },
+ },
+ "replica1": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 101,
+ },
+ Type: topodatapb.TabletType_REPLICA,
+ },
+ },
+ "replica2": {
+ Tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 102,
+ },
+ Type: topodatapb.TabletType_REPLICA,
+ },
+ },
+ },
+ avoidPrimaryAlias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 0,
+ },
+ expected: nil,
+ errContains: []string{
+ "zone1-0000000101 has 55s replication lag which is more than the tolerable amount",
+ "zone1-0000000102 is taking a backup",
+ },
+ },
{
name: "found a replica - more advanced relay log position",
tmc: &chooseNewPrimaryTestTMClient{
@@ -881,12 +1048,13 @@ func TestFindPositionForTablet(t *testing.T) {
ctx := context.Background()
logger := logutil.NewMemoryLogger()
tests := []struct {
- name string
- tmc *testutil.TabletManagerClient
- tablet *topodatapb.Tablet
- expectedPosition string
- expectedLag time.Duration
- expectedErr string
+ name string
+ tmc *testutil.TabletManagerClient
+ tablet *topodatapb.Tablet
+ expectedPosition string
+ expectedLag time.Duration
+ expectedErr string
+ expectedTakingBackup bool
}{
{
name: "executed gtid set",
@@ -911,6 +1079,31 @@ func TestFindPositionForTablet(t *testing.T) {
},
expectedLag: 201 * time.Second,
expectedPosition: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5",
+ }, {
+ name: "Host is taking a backup",
+ tmc: &testutil.TabletManagerClient{
+ ReplicationStatusResults: map[string]struct {
+ Position *replicationdatapb.Status
+ Error error
+ }{
+ "zone1-0000000100": {
+ Position: &replicationdatapb.Status{
+ Position: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5",
+ ReplicationLagSeconds: 201,
+ },
+ },
+ },
+ TabletsBackupState: map[string]bool{"zone1-0000000100": true},
+ },
+ tablet: &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone1",
+ Uid: 100,
+ },
+ },
+ expectedLag: 201 * time.Second,
+ expectedTakingBackup: true,
+ expectedPosition: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5",
}, {
name: "no replication status",
tmc: &testutil.TabletManagerClient{
@@ -981,7 +1174,7 @@ func TestFindPositionForTablet(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- pos, lag, err := findPositionAndLagForTablet(ctx, test.tablet, logger, test.tmc, 10*time.Second)
+ pos, lag, takingBackup, err := findTabletPositionLagBackupStatus(ctx, test.tablet, logger, test.tmc, 10*time.Second)
if test.expectedErr != "" {
require.EqualError(t, err, test.expectedErr)
return
@@ -990,6 +1183,7 @@ func TestFindPositionForTablet(t *testing.T) {
posString := replication.EncodePosition(pos)
require.Equal(t, test.expectedPosition, posString)
require.Equal(t, test.expectedLag, lag)
+ require.Equal(t, test.expectedTakingBackup, takingBackup)
})
}
}
diff --git a/go/vt/vtgate/debugenv.go b/go/vt/vtgate/debugenv.go
index 4fa989c69a3..7213353432d 100644
--- a/go/vt/vtgate/debugenv.go
+++ b/go/vt/vtgate/debugenv.go
@@ -22,9 +22,10 @@ import (
"html"
"net/http"
"strconv"
- "text/template"
"time"
+ "github.com/google/safehtml/template"
+
"vitess.io/vitess/go/acl"
"vitess.io/vitess/go/vt/discovery"
"vitess.io/vitess/go/vt/log"
diff --git a/go/vt/vtgate/evalengine/eval_result.go b/go/vt/vtgate/evalengine/eval_result.go
index d9916af03be..5c1973d8eb1 100644
--- a/go/vt/vtgate/evalengine/eval_result.go
+++ b/go/vt/vtgate/evalengine/eval_result.go
@@ -62,6 +62,7 @@ func (er EvalResult) String() string {
// TupleValues allows for retrieval of the value we expose for public consumption
func (er EvalResult) TupleValues() []sqltypes.Value {
+ // TODO: Make this collation-aware
switch v := er.v.(type) {
case *evalTuple:
result := make([]sqltypes.Value, 0, len(v.t))
diff --git a/go/vt/vtgate/planbuilder/operators/delete.go b/go/vt/vtgate/planbuilder/operators/delete.go
index 4d30d9b9cc1..81e36d54315 100644
--- a/go/vt/vtgate/planbuilder/operators/delete.go
+++ b/go/vt/vtgate/planbuilder/operators/delete.go
@@ -328,7 +328,7 @@ func updateQueryGraphWithSource(ctx *plancontext.PlanningContext, input Operator
if tbl.ID != tblID {
continue
}
- tbl.Alias = sqlparser.NewAliasedTableExpr(sqlparser.NewTableName(vTbl.Name.String()), tbl.Alias.As.String())
+ tbl.Alias = sqlparser.NewAliasedTableExpr(sqlparser.NewTableNameWithQualifier(vTbl.Name.String(), vTbl.Keyspace.Name), tbl.Alias.As.String())
tbl.Table, _ = tbl.Alias.TableName()
}
return op, Rewrote("change query table point to source table")
diff --git a/go/vt/vtgate/planbuilder/plan_test.go b/go/vt/vtgate/planbuilder/plan_test.go
index 9cf92a91ddf..acba2caf937 100644
--- a/go/vt/vtgate/planbuilder/plan_test.go
+++ b/go/vt/vtgate/planbuilder/plan_test.go
@@ -557,6 +557,7 @@ func (s *planTestSuite) TestWithUserDefaultKeyspaceFromFileSharded() {
}
s.testFile("select_cases_with_user_as_default.json", vschema, false)
+ s.testFile("dml_cases_with_user_as_default.json", vschema, false)
}
func (s *planTestSuite) TestWithSystemSchemaAsDefaultKeyspace() {
diff --git a/go/vt/vtgate/planbuilder/testdata/dml_cases_with_user_as_default.json b/go/vt/vtgate/planbuilder/testdata/dml_cases_with_user_as_default.json
new file mode 100644
index 00000000000..ff66967c2ce
--- /dev/null
+++ b/go/vt/vtgate/planbuilder/testdata/dml_cases_with_user_as_default.json
@@ -0,0 +1,24 @@
+[
+ {
+ "comment": "Update reference table from sharded keyspace to unsharded keyspace",
+ "query": "update ambiguous_ref_with_source set done = true where id = 1;",
+ "plan": {
+ "QueryType": "UPDATE",
+ "Original": "update ambiguous_ref_with_source set done = true where id = 1;",
+ "Instructions": {
+ "OperatorType": "Update",
+ "Variant": "Unsharded",
+ "Keyspace": {
+ "Name": "main",
+ "Sharded": false
+ },
+ "TargetTabletType": "PRIMARY",
+ "Query": "update ambiguous_ref_with_source set done = true where id = 1",
+ "Table": "ambiguous_ref_with_source"
+ },
+ "TablesUsed": [
+ "main.ambiguous_ref_with_source"
+ ]
+ }
+ }
+]
\ No newline at end of file
diff --git a/go/vt/vtgate/querylogz.go b/go/vt/vtgate/querylogz.go
index 7c72e950d4a..05d301f28be 100644
--- a/go/vt/vtgate/querylogz.go
+++ b/go/vt/vtgate/querylogz.go
@@ -20,15 +20,15 @@ import (
"net/http"
"strconv"
"strings"
- "text/template"
"time"
- "vitess.io/vitess/go/vt/vtgate/logstats"
+ "github.com/google/safehtml/template"
"vitess.io/vitess/go/acl"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/logz"
"vitess.io/vitess/go/vt/sqlparser"
+ "vitess.io/vitess/go/vt/vtgate/logstats"
)
var (
diff --git a/go/vt/vtgate/querylogz_test.go b/go/vt/vtgate/querylogz_test.go
index 3cecb983b3f..9236b2ac840 100644
--- a/go/vt/vtgate/querylogz_test.go
+++ b/go/vt/vtgate/querylogz_test.go
@@ -35,7 +35,7 @@ import (
func TestQuerylogzHandlerFormatting(t *testing.T) {
req, _ := http.NewRequest("GET", "/querylogz?timeout=10&limit=1", nil)
- logStats := logstats.NewLogStats(context.Background(), "Execute", "select name from test_table limit 1000", "suuid", nil)
+ logStats := logstats.NewLogStats(context.Background(), "Execute", "select name, 'inject ' from test_table limit 1000", "suuid", nil)
logStats.StmtType = "select"
logStats.RowsAffected = 1000
logStats.ShardQueries = 1
@@ -64,7 +64,7 @@ func TestQuerylogzHandlerFormatting(t *testing.T) {
`0.002 | `,
`0.003 | `,
`select | `,
- `select name from test_table limit 1000 | `,
+ regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000 | `),
`1 | `,
`1000 | `,
` | `,
@@ -94,7 +94,7 @@ func TestQuerylogzHandlerFormatting(t *testing.T) {
`0.002 | `,
`0.003 | `,
`select | `,
- `select name from test_table limit 1000 | `,
+ regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000 | `),
`1 | `,
`1000 | `,
` | `,
@@ -124,7 +124,7 @@ func TestQuerylogzHandlerFormatting(t *testing.T) {
`0.002 | `,
`0.003 | `,
`select | `,
- `select name from test_table limit 1000 | `,
+ regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000 | `),
`1 | `,
`1000 | `,
` | `,
diff --git a/go/vt/vtorc/config/config.go b/go/vt/vtorc/config/config.go
index 2d21e377cb6..cafff5acce8 100644
--- a/go/vt/vtorc/config/config.go
+++ b/go/vt/vtorc/config/config.go
@@ -17,14 +17,12 @@
package config
import (
- "encoding/json"
- "fmt"
- "os"
"time"
"github.com/spf13/pflag"
- "vitess.io/vitess/go/vt/log"
+ "vitess.io/vitess/go/viperutil"
+ "vitess.io/vitess/go/vt/servenv"
)
var configurationLoaded = make(chan bool)
@@ -42,200 +40,296 @@ const (
)
var (
- sqliteDataFile = "file::memory:?mode=memory&cache=shared"
- instancePollTime = 5 * time.Second
- snapshotTopologyInterval = 0 * time.Hour
- reasonableReplicationLag = 10 * time.Second
- auditFileLocation = ""
- auditToBackend = false
- auditToSyslog = false
- auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days
- recoveryPeriodBlockDuration = 30 * time.Second
- preventCrossCellFailover = false
- waitReplicasTimeout = 30 * time.Second
- tolerableReplicationLag = 0 * time.Second
- topoInformationRefreshDuration = 15 * time.Second
- recoveryPollDuration = 1 * time.Second
- ersEnabled = true
- convertTabletsWithErrantGTIDs = false
+ instancePollTime = viperutil.Configure(
+ "instance-poll-time",
+ viperutil.Options[time.Duration]{
+ FlagName: "instance-poll-time",
+ Default: 5 * time.Second,
+ Dynamic: true,
+ },
+ )
+
+ preventCrossCellFailover = viperutil.Configure(
+ "prevent-cross-cell-failover",
+ viperutil.Options[bool]{
+ FlagName: "prevent-cross-cell-failover",
+ Default: false,
+ Dynamic: true,
+ },
+ )
+
+ sqliteDataFile = viperutil.Configure(
+ "sqlite-data-file",
+ viperutil.Options[string]{
+ FlagName: "sqlite-data-file",
+ Default: "file::memory:?mode=memory&cache=shared",
+ Dynamic: false,
+ },
+ )
+
+ snapshotTopologyInterval = viperutil.Configure(
+ "snapshot-topology-interval",
+ viperutil.Options[time.Duration]{
+ FlagName: "snapshot-topology-interval",
+ Default: 0 * time.Hour,
+ Dynamic: true,
+ },
+ )
+
+ reasonableReplicationLag = viperutil.Configure(
+ "reasonable-replication-lag",
+ viperutil.Options[time.Duration]{
+ FlagName: "reasonable-replication-lag",
+ Default: 10 * time.Second,
+ Dynamic: true,
+ },
+ )
+
+ auditFileLocation = viperutil.Configure(
+ "audit-file-location",
+ viperutil.Options[string]{
+ FlagName: "audit-file-location",
+ Default: "",
+ Dynamic: false,
+ },
+ )
+
+ auditToBackend = viperutil.Configure(
+ "audit-to-backend",
+ viperutil.Options[bool]{
+ FlagName: "audit-to-backend",
+ Default: false,
+ Dynamic: true,
+ },
+ )
+
+ auditToSyslog = viperutil.Configure(
+ "audit-to-syslog",
+ viperutil.Options[bool]{
+ FlagName: "audit-to-syslog",
+ Default: false,
+ Dynamic: true,
+ },
+ )
+
+ auditPurgeDuration = viperutil.Configure(
+ "audit-purge-duration",
+ viperutil.Options[time.Duration]{
+ FlagName: "audit-purge-duration",
+ Default: 7 * 24 * time.Hour,
+ Dynamic: true,
+ },
+ )
+
+ waitReplicasTimeout = viperutil.Configure(
+ "wait-replicas-timeout",
+ viperutil.Options[time.Duration]{
+ FlagName: "wait-replicas-timeout",
+ Default: 30 * time.Second,
+ Dynamic: true,
+ },
+ )
+
+ tolerableReplicationLag = viperutil.Configure(
+ "tolerable-replication-lag",
+ viperutil.Options[time.Duration]{
+ FlagName: "tolerable-replication-lag",
+ Default: 0 * time.Second,
+ Dynamic: true,
+ },
+ )
+
+ topoInformationRefreshDuration = viperutil.Configure(
+ "topo-information-refresh-duration",
+ viperutil.Options[time.Duration]{
+ FlagName: "topo-information-refresh-duration",
+ Default: 15 * time.Second,
+ Dynamic: true,
+ },
+ )
+
+ recoveryPollDuration = viperutil.Configure(
+ "recovery-poll-duration",
+ viperutil.Options[time.Duration]{
+ FlagName: "recovery-poll-duration",
+ Default: 1 * time.Second,
+ Dynamic: true,
+ },
+ )
+
+ ersEnabled = viperutil.Configure(
+ "allow-emergency-reparent",
+ viperutil.Options[bool]{
+ FlagName: "allow-emergency-reparent",
+ Default: true,
+ Dynamic: true,
+ },
+ )
+
+ convertTabletsWithErrantGTIDs = viperutil.Configure(
+ "change-tablets-with-errant-gtid-to-drained",
+ viperutil.Options[bool]{
+ FlagName: "change-tablets-with-errant-gtid-to-drained",
+ Default: false,
+ Dynamic: true,
+ },
+ )
)
-// RegisterFlags registers the flags required by VTOrc
-func RegisterFlags(fs *pflag.FlagSet) {
- fs.StringVar(&sqliteDataFile, "sqlite-data-file", sqliteDataFile, "SQLite Datafile to use as VTOrc's database")
- fs.DurationVar(&instancePollTime, "instance-poll-time", instancePollTime, "Timer duration on which VTOrc refreshes MySQL information")
- fs.DurationVar(&snapshotTopologyInterval, "snapshot-topology-interval", snapshotTopologyInterval, "Timer duration on which VTOrc takes a snapshot of the current MySQL information it has in the database. Should be in multiple of hours")
- fs.DurationVar(&reasonableReplicationLag, "reasonable-replication-lag", reasonableReplicationLag, "Maximum replication lag on replicas which is deemed to be acceptable")
- fs.StringVar(&auditFileLocation, "audit-file-location", auditFileLocation, "File location where the audit logs are to be stored")
- fs.BoolVar(&auditToBackend, "audit-to-backend", auditToBackend, "Whether to store the audit log in the VTOrc database")
- fs.BoolVar(&auditToSyslog, "audit-to-syslog", auditToSyslog, "Whether to store the audit log in the syslog")
- fs.DurationVar(&auditPurgeDuration, "audit-purge-duration", auditPurgeDuration, "Duration for which audit logs are held before being purged. Should be in multiples of days")
- fs.DurationVar(&recoveryPeriodBlockDuration, "recovery-period-block-duration", recoveryPeriodBlockDuration, "Duration for which a new recovery is blocked on an instance after running a recovery")
- fs.MarkDeprecated("recovery-period-block-duration", "As of v20 this is ignored and will be removed in a future release.")
- fs.BoolVar(&preventCrossCellFailover, "prevent-cross-cell-failover", preventCrossCellFailover, "Prevent VTOrc from promoting a primary in a different cell than the current primary in case of a failover")
- fs.DurationVar(&waitReplicasTimeout, "wait-replicas-timeout", waitReplicasTimeout, "Duration for which to wait for replica's to respond when issuing RPCs")
- fs.DurationVar(&tolerableReplicationLag, "tolerable-replication-lag", tolerableReplicationLag, "Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS")
- fs.DurationVar(&topoInformationRefreshDuration, "topo-information-refresh-duration", topoInformationRefreshDuration, "Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topology server")
- fs.DurationVar(&recoveryPollDuration, "recovery-poll-duration", recoveryPollDuration, "Timer duration on which VTOrc polls its database to run a recovery")
- fs.BoolVar(&ersEnabled, "allow-emergency-reparent", ersEnabled, "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary")
- fs.BoolVar(&convertTabletsWithErrantGTIDs, "change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs, "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED")
+func init() {
+ servenv.OnParseFor("vtorc", registerFlags)
}
-// Configuration makes for vtorc configuration input, which can be provided by user via JSON formatted file.
-// Some of the parameters have reasonable default values, and some (like database credentials) are
-// strictly expected from user.
-// TODO(sougou): change this to yaml parsing, and possible merge with tabletenv.
-type Configuration struct {
- SQLite3DataFile string // full path to sqlite3 datafile
- InstancePollSeconds uint // Number of seconds between instance reads
- SnapshotTopologiesIntervalHours uint // Interval in hour between snapshot-topologies invocation. Default: 0 (disabled)
- ReasonableReplicationLagSeconds int // Above this value is considered a problem
- AuditLogFile string // Name of log file for audit operations. Disabled when empty.
- AuditToSyslog bool // If true, audit messages are written to syslog
- AuditToBackendDB bool // If true, audit messages are written to the backend DB's `audit` table (default: true)
- AuditPurgeDays uint // Days after which audit entries are purged from the database
- RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping
- PreventCrossDataCenterPrimaryFailover bool // When true (default: false), cross-DC primary failover are not allowed, vtorc will do all it can to only fail over within same DC, or else not fail over at all.
- WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockTimeout since that is the total time we use for an ERS.
- TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS.
- TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server.
- RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs
+// registerFlags registers the flags required by VTOrc
+func registerFlags(fs *pflag.FlagSet) {
+ fs.String("sqlite-data-file", sqliteDataFile.Default(), "SQLite Datafile to use as VTOrc's database")
+ fs.Duration("instance-poll-time", instancePollTime.Default(), "Timer duration on which VTOrc refreshes MySQL information")
+ fs.Duration("snapshot-topology-interval", snapshotTopologyInterval.Default(), "Timer duration on which VTOrc takes a snapshot of the current MySQL information it has in the database. Should be in multiple of hours")
+ fs.Duration("reasonable-replication-lag", reasonableReplicationLag.Default(), "Maximum replication lag on replicas which is deemed to be acceptable")
+ fs.String("audit-file-location", auditFileLocation.Default(), "File location where the audit logs are to be stored")
+ fs.Bool("audit-to-backend", auditToBackend.Default(), "Whether to store the audit log in the VTOrc database")
+ fs.Bool("audit-to-syslog", auditToSyslog.Default(), "Whether to store the audit log in the syslog")
+ fs.Duration("audit-purge-duration", auditPurgeDuration.Default(), "Duration for which audit logs are held before being purged. Should be in multiples of days")
+ fs.Bool("prevent-cross-cell-failover", preventCrossCellFailover.Default(), "Prevent VTOrc from promoting a primary in a different cell than the current primary in case of a failover")
+ fs.Duration("wait-replicas-timeout", waitReplicasTimeout.Default(), "Duration for which to wait for replica's to respond when issuing RPCs")
+ fs.Duration("tolerable-replication-lag", tolerableReplicationLag.Default(), "Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS")
+ fs.Duration("topo-information-refresh-duration", topoInformationRefreshDuration.Default(), "Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topology server")
+ fs.Duration("recovery-poll-duration", recoveryPollDuration.Default(), "Timer duration on which VTOrc polls its database to run a recovery")
+ fs.Bool("allow-emergency-reparent", ersEnabled.Default(), "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary")
+ fs.Bool("change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs.Default(), "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED")
+
+ viperutil.BindFlags(fs,
+ instancePollTime,
+ preventCrossCellFailover,
+ sqliteDataFile,
+ snapshotTopologyInterval,
+ reasonableReplicationLag,
+ auditFileLocation,
+ auditToBackend,
+ auditToSyslog,
+ auditPurgeDuration,
+ waitReplicasTimeout,
+ tolerableReplicationLag,
+ topoInformationRefreshDuration,
+ recoveryPollDuration,
+ ersEnabled,
+ convertTabletsWithErrantGTIDs,
+ )
}
-// ToJSONString will marshal this configuration as JSON
-func (config *Configuration) ToJSONString() string {
- b, _ := json.Marshal(config)
- return string(b)
+// GetInstancePollTime is a getter function.
+func GetInstancePollTime() time.Duration {
+ return instancePollTime.Get()
}
-// Config is *the* configuration instance, used globally to get configuration data
-var Config = newConfiguration()
-var readFileNames []string
-
-// UpdateConfigValuesFromFlags is used to update the config values from the flags defined.
-// This is done before we read any configuration files from the user. So the config files take precedence.
-func UpdateConfigValuesFromFlags() {
- Config.SQLite3DataFile = sqliteDataFile
- Config.InstancePollSeconds = uint(instancePollTime / time.Second)
- Config.InstancePollSeconds = uint(instancePollTime / time.Second)
- Config.SnapshotTopologiesIntervalHours = uint(snapshotTopologyInterval / time.Hour)
- Config.ReasonableReplicationLagSeconds = int(reasonableReplicationLag / time.Second)
- Config.AuditLogFile = auditFileLocation
- Config.AuditToBackendDB = auditToBackend
- Config.AuditToSyslog = auditToSyslog
- Config.AuditPurgeDays = uint(auditPurgeDuration / (time.Hour * 24))
- Config.RecoveryPeriodBlockSeconds = int(recoveryPeriodBlockDuration / time.Second)
- Config.PreventCrossDataCenterPrimaryFailover = preventCrossCellFailover
- Config.WaitReplicasTimeoutSeconds = int(waitReplicasTimeout / time.Second)
- Config.TolerableReplicationLagSeconds = int(tolerableReplicationLag / time.Second)
- Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second)
- Config.RecoveryPollSeconds = int(recoveryPollDuration / time.Second)
+// SetInstancePollTime is a setter function.
+func SetInstancePollTime(v time.Duration) {
+ instancePollTime.Set(v)
}
-// ERSEnabled reports whether VTOrc is allowed to run ERS or not.
-func ERSEnabled() bool {
- return ersEnabled
+// GetInstancePollSeconds gets the instance poll time but in seconds.
+func GetInstancePollSeconds() uint {
+ return uint(instancePollTime.Get() / time.Second)
}
-// SetERSEnabled sets the value for the ersEnabled variable. This should only be used from tests.
-func SetERSEnabled(val bool) {
- ersEnabled = val
+// GetPreventCrossCellFailover is a getter function.
+func GetPreventCrossCellFailover() bool {
+ return preventCrossCellFailover.Get()
}
-// ConvertTabletWithErrantGTIDs reports whether VTOrc is allowed to change the tablet type of tablets with errant GTIDs to DRAINED.
-func ConvertTabletWithErrantGTIDs() bool {
- return convertTabletsWithErrantGTIDs
+// GetSQLiteDataFile is a getter function.
+func GetSQLiteDataFile() string {
+ return sqliteDataFile.Get()
}
-// SetConvertTabletWithErrantGTIDs sets the value for the convertTabletWithErrantGTIDs variable. This should only be used from tests.
-func SetConvertTabletWithErrantGTIDs(val bool) {
- convertTabletsWithErrantGTIDs = val
+// GetReasonableReplicationLagSeconds gets the reasonable replication lag but in seconds.
+func GetReasonableReplicationLagSeconds() int64 {
+ return int64(reasonableReplicationLag.Get() / time.Second)
+}
+
+// GetSnapshotTopologyInterval is a getter function.
+func GetSnapshotTopologyInterval() time.Duration {
+ return snapshotTopologyInterval.Get()
}
-// LogConfigValues is used to log the config values.
-func LogConfigValues() {
- b, _ := json.MarshalIndent(Config, "", "\t")
- log.Infof("Running with Configuration - %v", string(b))
+// GetAuditFileLocation is a getter function.
+func GetAuditFileLocation() string {
+ return auditFileLocation.Get()
}
-func newConfiguration() *Configuration {
- return &Configuration{
- SQLite3DataFile: "file::memory:?mode=memory&cache=shared",
- InstancePollSeconds: 5,
- SnapshotTopologiesIntervalHours: 0,
- ReasonableReplicationLagSeconds: 10,
- AuditLogFile: "",
- AuditToSyslog: false,
- AuditToBackendDB: false,
- AuditPurgeDays: 7,
- RecoveryPeriodBlockSeconds: 30,
- PreventCrossDataCenterPrimaryFailover: false,
- WaitReplicasTimeoutSeconds: 30,
- TopoInformationRefreshSeconds: 15,
- RecoveryPollSeconds: 1,
- }
+// SetAuditFileLocation is a setter function.
+func SetAuditFileLocation(v string) {
+ auditFileLocation.Set(v)
}
-func (config *Configuration) postReadAdjustments() error {
- if config.SQLite3DataFile == "" {
- return fmt.Errorf("SQLite3DataFile must be set")
- }
+// GetAuditToSyslog is a getter function.
+func GetAuditToSyslog() bool {
+ return auditToSyslog.Get()
+}
+
+// SetAuditToSyslog is a setter function.
+func SetAuditToSyslog(v bool) {
+ auditToSyslog.Set(v)
+}
+
+// GetAuditToBackend is a getter function.
+func GetAuditToBackend() bool {
+ return auditToBackend.Get()
+}
+
+// SetAuditToBackend is a setter function.
+func SetAuditToBackend(v bool) {
+ auditToBackend.Set(v)
+}
- return nil
+// GetAuditPurgeDays gets the audit purge duration but in days.
+func GetAuditPurgeDays() int64 {
+ return int64(auditPurgeDuration.Get() / (24 * time.Hour))
}
-// read reads configuration from given file, or silently skips if the file does not exist.
-// If the file does exist, then it is expected to be in valid JSON format or the function bails out.
-func read(fileName string) (*Configuration, error) {
- if fileName == "" {
- return Config, fmt.Errorf("Empty file name")
- }
- file, err := os.Open(fileName)
- if err != nil {
- return Config, err
- }
- decoder := json.NewDecoder(file)
- err = decoder.Decode(Config)
- if err == nil {
- log.Infof("Read config: %s", fileName)
- } else {
- log.Fatal("Cannot read config file:", fileName, err)
- }
- if err := Config.postReadAdjustments(); err != nil {
- log.Fatal(err)
- }
- return Config, err
+// SetAuditPurgeDays sets the audit purge duration.
+func SetAuditPurgeDays(days int64) {
+ auditPurgeDuration.Set(time.Duration(days) * 24 * time.Hour)
}
-// Read reads configuration from zero, either, some or all given files, in order of input.
-// A file can override configuration provided in previous file.
-func Read(fileNames ...string) *Configuration {
- for _, fileName := range fileNames {
- _, _ = read(fileName)
- }
- readFileNames = fileNames
- return Config
+// GetWaitReplicasTimeout is a getter function.
+func GetWaitReplicasTimeout() time.Duration {
+ return waitReplicasTimeout.Get()
}
-// ForceRead reads configuration from given file name or bails out if it fails
-func ForceRead(fileName string) *Configuration {
- _, err := read(fileName)
- if err != nil {
- log.Fatal("Cannot read config file:", fileName, err)
- }
- readFileNames = []string{fileName}
- return Config
+// GetTolerableReplicationLag is a getter function.
+func GetTolerableReplicationLag() time.Duration {
+ return tolerableReplicationLag.Get()
}
-// Reload re-reads configuration from last used files
-func Reload(extraFileNames ...string) *Configuration {
- for _, fileName := range readFileNames {
- _, _ = read(fileName)
- }
- for _, fileName := range extraFileNames {
- _, _ = read(fileName)
- }
- return Config
+// GetTopoInformationRefreshDuration is a getter function.
+func GetTopoInformationRefreshDuration() time.Duration {
+ return topoInformationRefreshDuration.Get()
+}
+
+// GetRecoveryPollDuration is a getter function.
+func GetRecoveryPollDuration() time.Duration {
+ return recoveryPollDuration.Get()
+}
+
+// ERSEnabled reports whether VTOrc is allowed to run ERS or not.
+func ERSEnabled() bool {
+ return ersEnabled.Get()
+}
+
+// SetERSEnabled sets the value for the ersEnabled variable. This should only be used from tests.
+func SetERSEnabled(val bool) {
+ ersEnabled.Set(val)
+}
+
+// ConvertTabletWithErrantGTIDs reports whether VTOrc is allowed to change the tablet type of tablets with errant GTIDs to DRAINED.
+func ConvertTabletWithErrantGTIDs() bool {
+ return convertTabletsWithErrantGTIDs.Get()
+}
+
+// SetConvertTabletWithErrantGTIDs sets the value for the convertTabletWithErrantGTIDs variable. This should only be used from tests.
+func SetConvertTabletWithErrantGTIDs(val bool) {
+ convertTabletsWithErrantGTIDs.Set(val)
}
// MarkConfigurationLoaded is called once configuration has first been loaded.
diff --git a/go/vt/vtorc/config/config_test.go b/go/vt/vtorc/config/config_test.go
deleted file mode 100644
index 2009b476f1d..00000000000
--- a/go/vt/vtorc/config/config_test.go
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
-Copyright 2022 The Vitess Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package config
-
-import (
- "testing"
- "time"
-
- "github.com/stretchr/testify/require"
-)
-
-func TestUpdateConfigValuesFromFlags(t *testing.T) {
- t.Run("defaults", func(t *testing.T) {
- // Restore the changes we make to the Config parameter
- defer func() {
- Config = newConfiguration()
- }()
- defaultConfig := newConfiguration()
- UpdateConfigValuesFromFlags()
- require.Equal(t, defaultConfig, Config)
- })
-
- t.Run("override auditPurgeDuration", func(t *testing.T) {
- oldAuditPurgeDuration := auditPurgeDuration
- auditPurgeDuration = 8 * time.Hour * 24
- auditPurgeDuration += time.Second + 4*time.Minute
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- auditPurgeDuration = oldAuditPurgeDuration
- }()
-
- testConfig := newConfiguration()
- // auditPurgeDuration is supposed to be in multiples of days.
- // If it is not, then we round down to the nearest number of days.
- testConfig.AuditPurgeDays = 8
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override sqliteDataFile", func(t *testing.T) {
- oldSqliteDataFile := sqliteDataFile
- sqliteDataFile = "newVal"
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- sqliteDataFile = oldSqliteDataFile
- }()
-
- testConfig := newConfiguration()
- testConfig.SQLite3DataFile = "newVal"
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override instancePollTime", func(t *testing.T) {
- oldInstancePollTime := instancePollTime
- instancePollTime = 7 * time.Second
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- instancePollTime = oldInstancePollTime
- }()
-
- testConfig := newConfiguration()
- testConfig.InstancePollSeconds = 7
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override snapshotTopologyInterval", func(t *testing.T) {
- oldSnapshotTopologyInterval := snapshotTopologyInterval
- snapshotTopologyInterval = 1 * time.Hour
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- snapshotTopologyInterval = oldSnapshotTopologyInterval
- }()
-
- testConfig := newConfiguration()
- testConfig.SnapshotTopologiesIntervalHours = 1
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override reasonableReplicationLag", func(t *testing.T) {
- oldReasonableReplicationLag := reasonableReplicationLag
- reasonableReplicationLag = 15 * time.Second
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- reasonableReplicationLag = oldReasonableReplicationLag
- }()
-
- testConfig := newConfiguration()
- testConfig.ReasonableReplicationLagSeconds = 15
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override auditFileLocation", func(t *testing.T) {
- oldAuditFileLocation := auditFileLocation
- auditFileLocation = "newFile"
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- auditFileLocation = oldAuditFileLocation
- }()
-
- testConfig := newConfiguration()
- testConfig.AuditLogFile = "newFile"
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override auditToBackend", func(t *testing.T) {
- oldAuditToBackend := auditToBackend
- auditToBackend = true
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- auditToBackend = oldAuditToBackend
- }()
-
- testConfig := newConfiguration()
- testConfig.AuditToBackendDB = true
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override auditToSyslog", func(t *testing.T) {
- oldAuditToSyslog := auditToSyslog
- auditToSyslog = true
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- auditToSyslog = oldAuditToSyslog
- }()
-
- testConfig := newConfiguration()
- testConfig.AuditToSyslog = true
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override recoveryPeriodBlockDuration", func(t *testing.T) {
- oldRecoveryPeriodBlockDuration := recoveryPeriodBlockDuration
- recoveryPeriodBlockDuration = 5 * time.Minute
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- recoveryPeriodBlockDuration = oldRecoveryPeriodBlockDuration
- }()
-
- testConfig := newConfiguration()
- testConfig.RecoveryPeriodBlockSeconds = 300
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override preventCrossCellFailover", func(t *testing.T) {
- oldPreventCrossCellFailover := preventCrossCellFailover
- preventCrossCellFailover = true
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- preventCrossCellFailover = oldPreventCrossCellFailover
- }()
-
- testConfig := newConfiguration()
- testConfig.PreventCrossDataCenterPrimaryFailover = true
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override waitReplicasTimeout", func(t *testing.T) {
- oldWaitReplicasTimeout := waitReplicasTimeout
- waitReplicasTimeout = 3*time.Minute + 4*time.Second
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- waitReplicasTimeout = oldWaitReplicasTimeout
- }()
-
- testConfig := newConfiguration()
- testConfig.WaitReplicasTimeoutSeconds = 184
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override topoInformationRefreshDuration", func(t *testing.T) {
- oldTopoInformationRefreshDuration := topoInformationRefreshDuration
- topoInformationRefreshDuration = 1 * time.Second
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- topoInformationRefreshDuration = oldTopoInformationRefreshDuration
- }()
-
- testConfig := newConfiguration()
- testConfig.TopoInformationRefreshSeconds = 1
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-
- t.Run("override recoveryPollDuration", func(t *testing.T) {
- oldRecoveryPollDuration := recoveryPollDuration
- recoveryPollDuration = 15 * time.Second
- // Restore the changes we make
- defer func() {
- Config = newConfiguration()
- recoveryPollDuration = oldRecoveryPollDuration
- }()
-
- testConfig := newConfiguration()
- testConfig.RecoveryPollSeconds = 15
- UpdateConfigValuesFromFlags()
- require.Equal(t, testConfig, Config)
- })
-}
diff --git a/go/vt/vtorc/db/db.go b/go/vt/vtorc/db/db.go
index 64143477645..870a3d15949 100644
--- a/go/vt/vtorc/db/db.go
+++ b/go/vt/vtorc/db/db.go
@@ -44,10 +44,12 @@ func (m *vtorcDB) QueryVTOrc(query string, argsArray []any, onRow func(sqlutils.
// OpenTopology returns the DB instance for the vtorc backed database
func OpenVTOrc() (db *sql.DB, err error) {
var fromCache bool
- db, fromCache, err = sqlutils.GetSQLiteDB(config.Config.SQLite3DataFile)
+ db, fromCache, err = sqlutils.GetSQLiteDB(config.GetSQLiteDataFile())
if err == nil && !fromCache {
- log.Infof("Connected to vtorc backend: sqlite on %v", config.Config.SQLite3DataFile)
- _ = initVTOrcDB(db)
+ log.Infof("Connected to vtorc backend: sqlite on %v", config.GetSQLiteDataFile())
+ if err := initVTOrcDB(db); err != nil {
+ log.Fatalf("Cannot initiate vtorc: %+v", err)
+ }
}
if db != nil {
db.SetMaxOpenConns(1)
@@ -58,13 +60,13 @@ func OpenVTOrc() (db *sql.DB, err error) {
// registerVTOrcDeployment updates the vtorc_db_deployments table upon successful deployment
func registerVTOrcDeployment(db *sql.DB) error {
- query := `
- replace into vtorc_db_deployments (
- deployed_version, deployed_timestamp
- ) values (
- ?, datetime('now')
- )
- `
+ query := `REPLACE INTO vtorc_db_deployments (
+ deployed_version,
+ deployed_timestamp
+ ) VALUES (
+ ?,
+ DATETIME('now')
+ )`
if _, err := execInternal(db, query, ""); err != nil {
log.Fatalf("Unable to write to vtorc_db_deployments: %+v", err)
}
@@ -76,27 +78,24 @@ func registerVTOrcDeployment(db *sql.DB) error {
func deployStatements(db *sql.DB, queries []string) error {
tx, err := db.Begin()
if err != nil {
- log.Fatal(err.Error())
return err
}
for _, query := range queries {
if _, err := tx.Exec(query); err != nil {
- log.Fatalf("Cannot initiate vtorc: %+v; query=%+v", err, query)
return err
}
}
- if err := tx.Commit(); err != nil {
- log.Fatal(err.Error())
- }
- return nil
+ return tx.Commit()
}
// ClearVTOrcDatabase is used to clear the VTOrc database. This function is meant to be used by tests to clear the
// database to get a clean slate without starting a new one.
func ClearVTOrcDatabase() {
- db, _, _ := sqlutils.GetSQLiteDB(config.Config.SQLite3DataFile)
+ db, _, _ := sqlutils.GetSQLiteDB(config.GetSQLiteDataFile())
if db != nil {
- _ = initVTOrcDB(db)
+ if err := initVTOrcDB(db); err != nil {
+ log.Fatalf("Cannot re-initiate vtorc: %+v", err)
+ }
}
}
@@ -105,20 +104,24 @@ func ClearVTOrcDatabase() {
func initVTOrcDB(db *sql.DB) error {
log.Info("Initializing vtorc")
log.Info("Migrating database schema")
- _ = deployStatements(db, vtorcBackend)
- _ = registerVTOrcDeployment(db)
-
- _, _ = ExecVTOrc(`PRAGMA journal_mode = WAL`)
- _, _ = ExecVTOrc(`PRAGMA synchronous = NORMAL`)
-
+ if err := deployStatements(db, vtorcBackend); err != nil {
+ return err
+ }
+ if err := registerVTOrcDeployment(db); err != nil {
+ return err
+ }
+ if _, err := ExecVTOrc(`PRAGMA journal_mode = WAL`); err != nil {
+ return err
+ }
+ if _, err := ExecVTOrc(`PRAGMA synchronous = NORMAL`); err != nil {
+ return err
+ }
return nil
}
// execInternal
func execInternal(db *sql.DB, query string, args ...any) (sql.Result, error) {
- var err error
- res, err := sqlutils.ExecNoPrepare(db, query, args...)
- return res, err
+ return sqlutils.ExecNoPrepare(db, query, args...)
}
// ExecVTOrc will execute given query on the vtorc backend database.
diff --git a/go/vt/vtorc/discovery/queue.go b/go/vt/vtorc/discovery/queue.go
index 95751c6ae25..4b18303959b 100644
--- a/go/vt/vtorc/discovery/queue.go
+++ b/go/vt/vtorc/discovery/queue.go
@@ -153,7 +153,7 @@ func (q *Queue) Consume() string {
// alarm if have been waiting for too long
timeOnQueue := time.Since(q.queuedKeys[key])
- if timeOnQueue > time.Duration(config.Config.InstancePollSeconds)*time.Second {
+ if timeOnQueue > config.GetInstancePollTime() {
log.Warningf("key %v spent %.4fs waiting on a discoveryQueue", key, timeOnQueue.Seconds())
}
diff --git a/go/vt/vtorc/inst/analysis.go b/go/vt/vtorc/inst/analysis.go
index 66d6c6dd9ce..3e9e81c5c9f 100644
--- a/go/vt/vtorc/inst/analysis.go
+++ b/go/vt/vtorc/inst/analysis.go
@@ -144,5 +144,5 @@ func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) {
// ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time
// between last_attempted_check to last_checked before we consider the instance as invalid.
func ValidSecondsFromSeenToLastAttemptedCheck() uint {
- return config.Config.InstancePollSeconds + 1
+ return config.GetInstancePollSeconds()
}
diff --git a/go/vt/vtorc/inst/analysis_dao.go b/go/vt/vtorc/inst/analysis_dao.go
index 25d93a6864b..07830bf7dda 100644
--- a/go/vt/vtorc/inst/analysis_dao.go
+++ b/go/vt/vtorc/inst/analysis_dao.go
@@ -47,7 +47,7 @@ func init() {
func initializeAnalysisDaoPostConfiguration() {
config.WaitForConfigurationToBeLoaded()
- recentInstantAnalysis = cache.New(time.Duration(config.Config.RecoveryPollSeconds*2)*time.Second, time.Second)
+ recentInstantAnalysis = cache.New(config.GetRecoveryPollDuration()*2, time.Second)
}
type clusterAnalysis struct {
@@ -68,9 +68,8 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
}
// TODO(sougou); deprecate ReduceReplicationAnalysisCount
- args := sqlutils.Args(config.Config.ReasonableReplicationLagSeconds, ValidSecondsFromSeenToLastAttemptedCheck(), config.Config.ReasonableReplicationLagSeconds, keyspace, shard)
- query := `
- SELECT
+ args := sqlutils.Args(config.GetReasonableReplicationLagSeconds(), ValidSecondsFromSeenToLastAttemptedCheck(), config.GetReasonableReplicationLagSeconds(), keyspace, shard)
+ query := `SELECT
vitess_tablet.info AS tablet_info,
vitess_tablet.tablet_type,
vitess_tablet.primary_timestamp,
@@ -91,13 +90,13 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
IFNULL(
primary_instance.binary_log_file = database_instance_stale_binlog_coordinates.binary_log_file
AND primary_instance.binary_log_pos = database_instance_stale_binlog_coordinates.binary_log_pos
- AND database_instance_stale_binlog_coordinates.first_seen < datetime('now', printf('-%d second', ?)),
+ AND database_instance_stale_binlog_coordinates.first_seen < DATETIME('now', PRINTF('-%d SECOND', ?)),
0
)
) AS is_stale_binlog_coordinates,
MIN(
primary_instance.last_checked <= primary_instance.last_seen
- and primary_instance.last_attempted_check <= datetime(primary_instance.last_seen, printf('+%d second', ?))
+ and primary_instance.last_attempted_check <= DATETIME(primary_instance.last_seen, PRINTF('+%d SECOND', ?))
) = 1 AS is_last_check_valid,
/* To be considered a primary, traditional async replication must not be present/valid AND the host should either */
/* not be a replication group member OR be the primary of the replication group */
@@ -655,13 +654,13 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
// Find if the lastAnalysisHasChanged or not while updating the row if it has.
lastAnalysisChanged := false
{
- sqlResult, err := db.ExecVTOrc(`
- update database_instance_last_analysis set
+ sqlResult, err := db.ExecVTOrc(`UPDATE database_instance_last_analysis
+ SET
analysis = ?,
- analysis_timestamp = datetime('now')
- where
+ analysis_timestamp = DATETIME('now')
+ WHERE
alias = ?
- and analysis != ?
+ AND analysis != ?
`,
string(analysisCode), tabletAlias, string(analysisCode),
)
@@ -682,13 +681,16 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
firstInsertion := false
if !lastAnalysisChanged {
// The insert only returns more than 1 row changed if this is the first insertion.
- sqlResult, err := db.ExecVTOrc(`
- insert or ignore into database_instance_last_analysis (
- alias, analysis_timestamp, analysis
- ) values (
- ?, datetime('now'), ?
- )
- `,
+ sqlResult, err := db.ExecVTOrc(`INSERT OR IGNORE
+ INTO database_instance_last_analysis (
+ alias,
+ analysis_timestamp,
+ analysis
+ ) VALUES (
+ ?,
+ DATETIME('now'),
+ ?
+ )`,
tabletAlias, string(analysisCode),
)
if err != nil {
@@ -708,13 +710,16 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
return nil
}
- _, err := db.ExecVTOrc(`
- insert into database_instance_analysis_changelog (
- alias, analysis_timestamp, analysis
- ) values (
- ?, datetime('now'), ?
- )
- `,
+ _, err := db.ExecVTOrc(`INSERT
+ INTO database_instance_analysis_changelog (
+ alias,
+ analysis_timestamp,
+ analysis
+ ) VALUES (
+ ?,
+ DATETIME('now'),
+ ?
+ )`,
tabletAlias, string(analysisCode),
)
if err == nil {
@@ -727,12 +732,11 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
// ExpireInstanceAnalysisChangelog removes old-enough analysis entries from the changelog
func ExpireInstanceAnalysisChangelog() error {
- _, err := db.ExecVTOrc(`
- delete
- from database_instance_analysis_changelog
- where
- analysis_timestamp < datetime('now', printf('-%d hour', ?))
- `,
+ _, err := db.ExecVTOrc(`DELETE
+ FROM database_instance_analysis_changelog
+ WHERE
+ analysis_timestamp < DATETIME('now', PRINTF('-%d HOUR', ?))
+ `,
config.UnseenInstanceForgetHours,
)
if err != nil {
diff --git a/go/vt/vtorc/inst/audit_dao.go b/go/vt/vtorc/inst/audit_dao.go
index 642fb187509..7ae60fba927 100644
--- a/go/vt/vtorc/inst/audit_dao.go
+++ b/go/vt/vtorc/inst/audit_dao.go
@@ -38,10 +38,10 @@ func AuditOperation(auditType string, tabletAlias string, message string) error
}
auditWrittenToFile := false
- if config.Config.AuditLogFile != "" {
+ if config.GetAuditFileLocation() != "" {
auditWrittenToFile = true
go func() {
- f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0640)
+ f, err := os.OpenFile(config.GetAuditFileLocation(), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0640)
if err != nil {
log.Error(err)
return
@@ -54,15 +54,23 @@ func AuditOperation(auditType string, tabletAlias string, message string) error
}
}()
}
- if config.Config.AuditToBackendDB {
- _, err := db.ExecVTOrc(`
- insert
- into audit (
- audit_timestamp, audit_type, alias, keyspace, shard, message
- ) VALUES (
- datetime('now'), ?, ?, ?, ?, ?
- )
- `,
+ if config.GetAuditToBackend() {
+ _, err := db.ExecVTOrc(`INSERT
+ INTO audit (
+ audit_timestamp,
+ audit_type,
+ alias,
+ keyspace,
+ shard,
+ message
+ ) VALUES (
+ DATETIME('now'),
+ ?,
+ ?,
+ ?,
+ ?,
+ ?
+ )`,
auditType,
tabletAlias,
keyspace,
diff --git a/go/vt/vtorc/inst/audit_dao_test.go b/go/vt/vtorc/inst/audit_dao_test.go
index 1d50de4c146..d22e9177dc3 100644
--- a/go/vt/vtorc/inst/audit_dao_test.go
+++ b/go/vt/vtorc/inst/audit_dao_test.go
@@ -35,13 +35,13 @@ import (
// This test also verifies that we are able to read the recent audits that are written to the databaes.
func TestAuditOperation(t *testing.T) {
// Restore original configurations
- originalAuditSysLog := config.Config.AuditToSyslog
- originalAuditLogFile := config.Config.AuditLogFile
- originalAuditBackend := config.Config.AuditToBackendDB
+ originalAuditSysLog := config.GetAuditToSyslog()
+ originalAuditLogFile := config.GetAuditFileLocation()
+ originalAuditBackend := config.GetAuditToBackend()
defer func() {
- config.Config.AuditToSyslog = originalAuditSysLog
- config.Config.AuditLogFile = originalAuditLogFile
- config.Config.AuditToBackendDB = originalAuditBackend
+ config.SetAuditToSyslog(originalAuditSysLog)
+ config.SetAuditFileLocation(originalAuditLogFile)
+ config.SetAuditToBackend(originalAuditBackend)
}()
orcDb, err := db.OpenVTOrc()
@@ -78,9 +78,9 @@ func TestAuditOperation(t *testing.T) {
message := "test-message"
t.Run("audit to backend", func(t *testing.T) {
- config.Config.AuditLogFile = ""
- config.Config.AuditToSyslog = false
- config.Config.AuditToBackendDB = true
+ config.SetAuditFileLocation("")
+ config.SetAuditToSyslog(false)
+ config.SetAuditToBackend(true)
// Auditing should succeed as expected
err = AuditOperation(auditType, tab100Alias, message)
@@ -106,13 +106,13 @@ func TestAuditOperation(t *testing.T) {
})
t.Run("audit to File", func(t *testing.T) {
- config.Config.AuditToBackendDB = false
- config.Config.AuditToSyslog = false
+ config.SetAuditToBackend(false)
+ config.SetAuditToSyslog(false)
file, err := os.CreateTemp("", "test-auditing-*")
require.NoError(t, err)
defer os.Remove(file.Name())
- config.Config.AuditLogFile = file.Name()
+ config.SetAuditFileLocation(file.Name())
err = AuditOperation(auditType, tab100Alias, message)
require.NoError(t, err)
diff --git a/go/vt/vtorc/inst/instance_dao.go b/go/vt/vtorc/inst/instance_dao.go
index bd4438dd05f..d1421dbc91d 100644
--- a/go/vt/vtorc/inst/instance_dao.go
+++ b/go/vt/vtorc/inst/instance_dao.go
@@ -80,7 +80,7 @@ func init() {
func initializeInstanceDao() {
config.WaitForConfigurationToBeLoaded()
- forgetAliases = cache.New(time.Duration(config.Config.InstancePollSeconds*3)*time.Second, time.Second)
+ forgetAliases = cache.New(config.GetInstancePollTime()*3, time.Second)
cacheInitializationCompleted.Store(true)
}
@@ -114,10 +114,15 @@ func ExecDBWriteFunc(f func() error) error {
func ExpireTableData(tableName string, timestampColumn string) error {
writeFunc := func() error {
- _, err := db.ExecVTOrc(
- fmt.Sprintf("delete from %s where %s < datetime('now', printf('-%%d DAY', ?))", tableName, timestampColumn),
- config.Config.AuditPurgeDays,
+ query := fmt.Sprintf(`DELETE
+ FROM %s
+ WHERE
+ %s < DATETIME('now', PRINTF('-%%d DAY', ?))
+ `,
+ tableName,
+ timestampColumn,
)
+ _, err := db.ExecVTOrc(query, config.GetAuditPurgeDays())
return err
}
return ExecDBWriteFunc(writeFunc)
@@ -357,35 +362,7 @@ Cleanup:
// Add replication group ancestry UUID as well. Otherwise, VTOrc thinks there are errant GTIDs in group
// members and its replicas, even though they are not.
instance.AncestryUUID = strings.Trim(instance.AncestryUUID, ",")
- if instance.ExecutedGtidSet != "" && instance.primaryExecutedGtidSet != "" {
- // Compare primary & replica GTID sets, but ignore the sets that present the primary's UUID.
- // This is because vtorc may pool primary and replica at an inconvenient timing,
- // such that the replica may _seems_ to have more entries than the primary, when in fact
- // it's just that the primary's probing is stale.
- redactedExecutedGtidSet, _ := NewOracleGtidSet(instance.ExecutedGtidSet)
- for _, uuid := range strings.Split(instance.AncestryUUID, ",") {
- if uuid != instance.ServerUUID {
- redactedExecutedGtidSet.RemoveUUID(uuid)
- }
- if instance.IsCoPrimary && uuid == instance.ServerUUID {
- // If this is a co-primary, then this server is likely to show its own generated GTIDs as errant,
- // because its co-primary has not applied them yet
- redactedExecutedGtidSet.RemoveUUID(uuid)
- }
- }
- // Avoid querying the database if there's no point:
- if !redactedExecutedGtidSet.IsEmpty() {
- redactedPrimaryExecutedGtidSet, _ := NewOracleGtidSet(instance.primaryExecutedGtidSet)
- redactedPrimaryExecutedGtidSet.RemoveUUID(instance.SourceUUID)
-
- instance.GtidErrant, err = replication.Subtract(redactedExecutedGtidSet.String(), redactedPrimaryExecutedGtidSet.String())
- if err == nil {
- var gtidCount int64
- gtidCount, err = replication.GTIDCount(instance.GtidErrant)
- currentErrantGTIDCount.Set(tabletAlias, gtidCount)
- }
- }
- }
+ err = detectErrantGTIDs(instance, tablet)
}
latency.Stop("instance")
@@ -412,6 +389,63 @@ Cleanup:
return nil, err
}
+// detectErrantGTIDs detects the errant GTIDs on an instance.
+func detectErrantGTIDs(instance *Instance, tablet *topodatapb.Tablet) (err error) {
+ // If the tablet is not replicating from anyone, then it could be the previous primary.
+ // We should check for errant GTIDs by finding the difference with the shard's current primary.
+ if instance.primaryExecutedGtidSet == "" && instance.SourceHost == "" {
+ var primaryInstance *Instance
+ primaryAlias, _, _ := ReadShardPrimaryInformation(tablet.Keyspace, tablet.Shard)
+ if primaryAlias != "" {
+ // Check if the current tablet is the primary.
+ // If it is, then we don't need to run errant gtid detection on it.
+ if primaryAlias == instance.InstanceAlias {
+ return nil
+ }
+ primaryInstance, _, _ = ReadInstance(primaryAlias)
+ }
+ // Only run errant GTID detection, if we are sure that the data read of the current primary
+ // is up-to-date enough to reflect that it has been promoted. This is needed to prevent
+ // flagging incorrect errant GTIDs. If we were to use old data, we could have some GTIDs
+ // accepted by the old primary (this tablet) that don't show in the new primary's set.
+ if primaryInstance != nil {
+ if primaryInstance.SourceHost == "" {
+ instance.primaryExecutedGtidSet = primaryInstance.ExecutedGtidSet
+ }
+ }
+ }
+ if instance.ExecutedGtidSet != "" && instance.primaryExecutedGtidSet != "" {
+ // Compare primary & replica GTID sets, but ignore the sets that present the primary's UUID.
+ // This is because vtorc may pool primary and replica at an inconvenient timing,
+ // such that the replica may _seems_ to have more entries than the primary, when in fact
+ // it's just that the primary's probing is stale.
+ redactedExecutedGtidSet, _ := NewOracleGtidSet(instance.ExecutedGtidSet)
+ for _, uuid := range strings.Split(instance.AncestryUUID, ",") {
+ if uuid != instance.ServerUUID {
+ redactedExecutedGtidSet.RemoveUUID(uuid)
+ }
+ if instance.IsCoPrimary && uuid == instance.ServerUUID {
+ // If this is a co-primary, then this server is likely to show its own generated GTIDs as errant,
+ // because its co-primary has not applied them yet
+ redactedExecutedGtidSet.RemoveUUID(uuid)
+ }
+ }
+ // Avoid querying the database if there's no point:
+ if !redactedExecutedGtidSet.IsEmpty() {
+ redactedPrimaryExecutedGtidSet, _ := NewOracleGtidSet(instance.primaryExecutedGtidSet)
+ redactedPrimaryExecutedGtidSet.RemoveUUID(instance.SourceUUID)
+
+ instance.GtidErrant, err = replication.Subtract(redactedExecutedGtidSet.String(), redactedPrimaryExecutedGtidSet.String())
+ if err == nil {
+ var gtidCount int64
+ gtidCount, err = replication.GTIDCount(instance.GtidErrant)
+ currentErrantGTIDCount.Set(instance.InstanceAlias, gtidCount)
+ }
+ }
+ }
+ return err
+}
+
// getKeyspaceShardName returns a single string having both the keyspace and shard
func getKeyspaceShardName(keyspace, shard string) string {
return fmt.Sprintf("%v:%v", keyspace, shard)
@@ -439,16 +473,16 @@ func ReadInstanceClusterAttributes(instance *Instance) (err error) {
var primaryExecutedGtidSet string
primaryDataFound := false
- query := `
- select
- replication_depth,
- source_host,
- source_port,
- ancestry_uuid,
- executed_gtid_set
- from database_instance
- where hostname=? and port=?
- `
+ query := `SELECT
+ replication_depth,
+ source_host,
+ source_port,
+ ancestry_uuid,
+ executed_gtid_set
+ FROM database_instance
+ WHERE
+ hostname = ?
+ AND port = ?`
primaryHostname := instance.SourceHost
primaryPort := instance.SourcePort
args := sqlutils.Args(primaryHostname, primaryPort)
@@ -544,8 +578,8 @@ func readInstanceRow(m sqlutils.RowMap) *Instance {
instance.ReplicationDepth = m.GetUint("replication_depth")
instance.IsCoPrimary = m.GetBool("is_co_primary")
instance.HasReplicationCredentials = m.GetBool("has_replication_credentials")
- instance.IsUpToDate = (m.GetUint("seconds_since_last_checked") <= config.Config.InstancePollSeconds)
- instance.IsRecentlyChecked = (m.GetUint("seconds_since_last_checked") <= config.Config.InstancePollSeconds*5)
+ instance.IsUpToDate = m.GetUint("seconds_since_last_checked") <= config.GetInstancePollSeconds()
+ instance.IsRecentlyChecked = m.GetUint("seconds_since_last_checked") <= config.GetInstancePollSeconds()*5
instance.LastSeenTimestamp = m.GetString("last_seen")
instance.IsLastCheckValid = m.GetBool("is_last_check_valid")
instance.SecondsSinceLastSeen = m.GetNullInt64("seconds_since_last_seen")
@@ -562,7 +596,7 @@ func readInstanceRow(m sqlutils.RowMap) *Instance {
instance.Problems = append(instance.Problems, "not_recently_checked")
} else if instance.ReplicationThreadsExist() && !instance.ReplicaRunning() {
instance.Problems = append(instance.Problems, "not_replicating")
- } else if instance.ReplicationLagSeconds.Valid && util.AbsInt64(instance.ReplicationLagSeconds.Int64-int64(instance.SQLDelay)) > int64(config.Config.ReasonableReplicationLagSeconds) {
+ } else if instance.ReplicationLagSeconds.Valid && util.AbsInt64(instance.ReplicationLagSeconds.Int64-int64(instance.SQLDelay)) > int64(config.GetReasonableReplicationLagSeconds()) {
instance.Problems = append(instance.Problems, "replication_lag")
}
if instance.GtidErrant != "" {
@@ -580,20 +614,22 @@ func readInstancesByCondition(condition string, args []any, sort string) ([](*In
if sort == "" {
sort = `alias`
}
- query := fmt.Sprintf(`
- select
- *,
- strftime('%%s', 'now') - strftime('%%s', last_checked) as seconds_since_last_checked,
- ifnull(last_checked <= last_seen, 0) as is_last_check_valid,
- strftime('%%s', 'now') - strftime('%%s', last_seen) as seconds_since_last_seen
- from
- vitess_tablet
- left join database_instance using (alias, hostname, port)
- where
- %s
- order by
- %s
- `, condition, sort)
+ query := fmt.Sprintf(`SELECT
+ *,
+ STRFTIME('%%s', 'now') - STRFTIME('%%s', last_checked) AS seconds_since_last_checked,
+ IFNULL(last_checked <= last_seen, 0) AS is_last_check_valid,
+ STRFTIME('%%s', 'now') - STRFTIME('%%s', last_seen) AS seconds_since_last_seen
+ FROM
+ vitess_tablet
+ LEFT JOIN database_instance USING (alias, hostname, port)
+ WHERE
+ %s
+ ORDER BY
+ %s
+ `,
+ condition,
+ sort,
+ )
err := db.QueryVTOrc(query, args, func(m sqlutils.RowMap) error {
instance := readInstanceRow(m)
@@ -614,9 +650,7 @@ func readInstancesByCondition(condition string, args []any, sort string) ([](*In
// ReadInstance reads an instance from the vtorc backend database
func ReadInstance(tabletAlias string) (*Instance, bool, error) {
- condition := `
- alias = ?
- `
+ condition := `alias = ?`
instances, err := readInstancesByCondition(condition, sqlutils.Args(tabletAlias), "")
// We know there will be at most one (alias is the PK).
// And we expect to find one.
@@ -633,30 +667,28 @@ func ReadInstance(tabletAlias string) (*Instance, bool, error) {
// ReadProblemInstances reads all instances with problems
func ReadProblemInstances(keyspace string, shard string) ([](*Instance), error) {
condition := `
- keyspace LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
- and shard LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
- and (
- (last_seen < last_checked)
- or (strftime('%%s', 'now') - strftime('%%s', last_checked) > ?)
- or (replication_sql_thread_state not in (-1 ,1))
- or (replication_io_thread_state not in (-1 ,1))
- or (abs(cast(replication_lag_seconds as integer) - cast(sql_delay as integer)) > ?)
- or (abs(cast(replica_lag_seconds as integer) - cast(sql_delay as integer)) > ?)
- or (gtid_errant != '')
- )
- `
-
- args := sqlutils.Args(keyspace, keyspace, shard, shard, config.Config.InstancePollSeconds*5, config.Config.ReasonableReplicationLagSeconds, config.Config.ReasonableReplicationLagSeconds)
+ keyspace LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
+ AND shard LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
+ AND (
+ (last_seen < last_checked)
+ OR (STRFTIME('%%s', 'now') - STRFTIME('%%s', last_checked) > ?)
+ OR (replication_sql_thread_state NOT IN (-1 ,1))
+ OR (replication_io_thread_state NOT IN (-1 ,1))
+ OR (ABS(CAST(replication_lag_seconds AS integer) - CAST(sql_delay AS integer)) > ?)
+ OR (ABS(CAST(replica_lag_seconds AS integer) - CAST(sql_delay AS integer)) > ?)
+ OR (gtid_errant != '')
+ )`
+
+ args := sqlutils.Args(keyspace, keyspace, shard, shard, config.GetInstancePollSeconds()*5, config.GetReasonableReplicationLagSeconds(), config.GetReasonableReplicationLagSeconds())
return readInstancesByCondition(condition, args, "")
}
// ReadInstancesWithErrantGTIds reads all instances with errant GTIDs
func ReadInstancesWithErrantGTIds(keyspace string, shard string) ([]*Instance, error) {
condition := `
- keyspace LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
- and shard LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
- and gtid_errant != ''
- `
+ keyspace LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
+ AND shard LIKE (CASE WHEN ? = '' THEN '%' ELSE ? END)
+ AND gtid_errant != ''`
args := sqlutils.Args(keyspace, keyspace, shard, shard)
return readInstancesByCondition(condition, args, "")
@@ -664,15 +696,14 @@ func ReadInstancesWithErrantGTIds(keyspace string, shard string) ([]*Instance, e
// GetKeyspaceShardName gets the keyspace shard name for the given instance key
func GetKeyspaceShardName(tabletAlias string) (keyspace string, shard string, err error) {
- query := `
- select
- keyspace,
- shard
- from
- vitess_tablet
- where
- alias = ?
- `
+ query := `SELECT
+ keyspace,
+ shard
+ FROM
+ vitess_tablet
+ WHERE
+ alias = ?
+ `
err = db.QueryVTOrc(query, sqlutils.Args(tabletAlias), func(m sqlutils.RowMap) error {
keyspace = m.GetString("keyspace")
shard = m.GetString("shard")
@@ -695,28 +726,27 @@ func GetKeyspaceShardName(tabletAlias string) (keyspace string, shard string, er
// the instance.
func ReadOutdatedInstanceKeys() ([]string, error) {
var res []string
- query := `
- SELECT
- alias
- FROM
- database_instance
- WHERE
- CASE
- WHEN last_attempted_check <= last_checked
- THEN last_checked < datetime('now', printf('-%d second', ?))
- ELSE last_checked < datetime('now', printf('-%d second', ?))
- END
- UNION
- SELECT
- vitess_tablet.alias
- FROM
- vitess_tablet LEFT JOIN database_instance ON (
- vitess_tablet.alias = database_instance.alias
- )
- WHERE
- database_instance.alias IS NULL
- `
- args := sqlutils.Args(config.Config.InstancePollSeconds, 2*config.Config.InstancePollSeconds)
+ query := `SELECT
+ alias
+ FROM
+ database_instance
+ WHERE
+ CASE
+ WHEN last_attempted_check <= last_checked
+ THEN last_checked < DATETIME('now', PRINTF('-%d SECOND', ?))
+ ELSE last_checked < DATETIME('now', PRINTF('-%d SECOND', ?))
+ END
+ UNION
+ SELECT
+ vitess_tablet.alias
+ FROM
+ vitess_tablet LEFT JOIN database_instance ON (
+ vitess_tablet.alias = database_instance.alias
+ )
+ WHERE
+ database_instance.alias IS NULL
+ `
+ args := sqlutils.Args(config.GetInstancePollSeconds(), 2*config.GetInstancePollSeconds())
err := db.QueryVTOrc(query, args, func(m sqlutils.RowMap) error {
tabletAlias := m.GetString("alias")
@@ -758,12 +788,17 @@ func mkInsert(table string, columns []string, values []string, nrRows int, inser
}
col := strings.Join(columns, ", ")
- q.WriteString(fmt.Sprintf(`%s %s
- (%s)
- VALUES
- %s
- `,
- insertStr, table, col, val.String()))
+ query := fmt.Sprintf(`%s %s
+ (%s)
+ VALUES
+ %s
+ `,
+ insertStr,
+ table,
+ col,
+ val.String(),
+ )
+ q.WriteString(query)
return q.String(), nil
}
@@ -849,13 +884,13 @@ func mkInsertForInstances(instances []*Instance, instanceWasActuallyFound bool,
for i := range columns {
values[i] = "?"
}
- values[3] = "datetime('now')" // last_checked
- values[4] = "datetime('now')" // last_attempted_check
+ values[3] = "DATETIME('now')" // last_checked
+ values[4] = "DATETIME('now')" // last_attempted_check
values[5] = "1" // last_check_partial_success
if updateLastSeen {
columns = append(columns, "last_seen")
- values = append(values, "datetime('now')")
+ values = append(values, "DATETIME('now')")
}
var args []any
@@ -971,14 +1006,13 @@ func WriteInstance(instance *Instance, instanceWasActuallyFound bool, lastError
// for a given instance
func UpdateInstanceLastChecked(tabletAlias string, partialSuccess bool) error {
writeFunc := func() error {
- _, err := db.ExecVTOrc(`
- update
- database_instance
- set
- last_checked = datetime('now'),
- last_check_partial_success = ?
- where
- alias = ?`,
+ _, err := db.ExecVTOrc(`UPDATE database_instance
+ SET
+ last_checked = DATETIME('now'),
+ last_check_partial_success = ?
+ WHERE
+ alias = ?
+ `,
partialSuccess,
tabletAlias,
)
@@ -1000,13 +1034,12 @@ func UpdateInstanceLastChecked(tabletAlias string, partialSuccess bool) error {
// we have a "hanging" issue.
func UpdateInstanceLastAttemptedCheck(tabletAlias string) error {
writeFunc := func() error {
- _, err := db.ExecVTOrc(`
- update
- database_instance
- set
- last_attempted_check = datetime('now')
- where
- alias = ?`,
+ _, err := db.ExecVTOrc(`UPDATE database_instance
+ SET
+ last_attempted_check = DATETIME('now')
+ WHERE
+ alias = ?
+ `,
tabletAlias,
)
if err != nil {
@@ -1037,11 +1070,11 @@ func ForgetInstance(tabletAlias string) error {
currentErrantGTIDCount.Reset(tabletAlias)
// Delete from the 'vitess_tablet' table.
- _, err := db.ExecVTOrc(`
- delete
- from vitess_tablet
- where
- alias = ?`,
+ _, err := db.ExecVTOrc(`DELETE
+ FROM vitess_tablet
+ WHERE
+ alias = ?
+ `,
tabletAlias,
)
if err != nil {
@@ -1050,11 +1083,11 @@ func ForgetInstance(tabletAlias string) error {
}
// Also delete from the 'database_instance' table.
- sqlResult, err := db.ExecVTOrc(`
- delete
- from database_instance
- where
- alias = ?`,
+ sqlResult, err := db.ExecVTOrc(`DELETE
+ FROM database_instance
+ WHERE
+ alias = ?
+ `,
tabletAlias,
)
if err != nil {
@@ -1078,11 +1111,11 @@ func ForgetInstance(tabletAlias string) error {
// ForgetLongUnseenInstances will remove entries of all instances that have long since been last seen.
func ForgetLongUnseenInstances() error {
- sqlResult, err := db.ExecVTOrc(`
- delete
- from database_instance
- where
- last_seen < datetime('now', printf('-%d hour', ?))`,
+ sqlResult, err := db.ExecVTOrc(`DELETE
+ FROM database_instance
+ WHERE
+ last_seen < DATETIME('now', PRINTF('-%d HOUR', ?))
+ `,
config.UnseenInstanceForgetHours,
)
if err != nil {
@@ -1103,18 +1136,26 @@ func ForgetLongUnseenInstances() error {
// SnapshotTopologies records topology graph for all existing topologies
func SnapshotTopologies() error {
writeFunc := func() error {
- _, err := db.ExecVTOrc(`
- insert or ignore into
- database_instance_topology_history (snapshot_unix_timestamp,
- alias, hostname, port, source_host, source_port, keyspace, shard, version)
- select
- strftime('%s', 'now'),
- vitess_tablet.alias, vitess_tablet.hostname, vitess_tablet.port,
- database_instance.source_host, database_instance.source_port,
+ _, err := db.ExecVTOrc(`INSERT OR IGNORE
+ INTO database_instance_topology_history (
+ snapshot_unix_timestamp,
+ alias,
+ hostname,
+ port,
+ source_host,
+ source_port,
+ keyspace,
+ shard,
+ version
+ )
+ SELECT
+ STRFTIME('%s', 'now'),
+ vitess_tablet.alias, vitess_tablet.hostname, vitess_tablet.port,
+ database_instance.source_host, database_instance.source_port,
vitess_tablet.keyspace, vitess_tablet.shard, database_instance.version
- from
- vitess_tablet left join database_instance using (alias, hostname, port)
- `,
+ FROM
+ vitess_tablet LEFT JOIN database_instance USING (alias, hostname, port)
+ `,
)
if err != nil {
log.Error(err)
@@ -1127,15 +1168,17 @@ func SnapshotTopologies() error {
}
func ExpireStaleInstanceBinlogCoordinates() error {
- expireSeconds := config.Config.ReasonableReplicationLagSeconds * 2
+ expireSeconds := config.GetReasonableReplicationLagSeconds() * 2
if expireSeconds < config.StaleInstanceCoordinatesExpireSeconds {
expireSeconds = config.StaleInstanceCoordinatesExpireSeconds
}
writeFunc := func() error {
- _, err := db.ExecVTOrc(`
- delete from database_instance_stale_binlog_coordinates
- where first_seen < datetime('now', printf('-%d second', ?))
- `, expireSeconds,
+ _, err := db.ExecVTOrc(`DELETE
+ FROM database_instance_stale_binlog_coordinates
+ WHERE
+ first_seen < DATETIME('now', PRINTF('-%d SECOND', ?))
+ `,
+ expireSeconds,
)
if err != nil {
log.Error(err)
@@ -1157,7 +1200,7 @@ func GetDatabaseState() (string, error) {
ts := tableState{
TableName: tableName,
}
- err := db.QueryVTOrc("select * from "+tableName, nil, func(rowMap sqlutils.RowMap) error {
+ err := db.QueryVTOrc("SELECT * FROM "+tableName, nil, func(rowMap sqlutils.RowMap) error {
ts.Rows = append(ts.Rows, rowMap)
return nil
})
diff --git a/go/vt/vtorc/inst/instance_dao_test.go b/go/vt/vtorc/inst/instance_dao_test.go
index 2416c1abb90..cc3217442ed 100644
--- a/go/vt/vtorc/inst/instance_dao_test.go
+++ b/go/vt/vtorc/inst/instance_dao_test.go
@@ -14,6 +14,7 @@ import (
"vitess.io/vitess/go/vt/external/golib/sqlutils"
"vitess.io/vitess/go/vt/log"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
+ "vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vtorc/config"
"vitess.io/vitess/go/vt/vtorc/db"
@@ -65,7 +66,7 @@ func TestMkInsertSingle(t *testing.T) {
replica_sql_running, replica_io_running, replication_sql_thread_state, replication_io_thread_state, has_replication_filters, supports_oracle_gtid, oracle_gtid, source_uuid, ancestry_uuid, executed_gtid_set, gtid_mode, gtid_purged, gtid_errant, mariadb_gtid, pseudo_gtid,
source_log_file, read_source_log_pos, relay_source_log_file, exec_source_log_pos, relay_log_file, relay_log_pos, last_sql_error, last_io_error, replication_lag_seconds, replica_lag_seconds, sql_delay, data_center, region, physical_environment, replication_depth, is_co_primary, has_replication_credentials, allow_tls, semi_sync_enforced, semi_sync_primary_enabled, semi_sync_primary_timeout, semi_sync_primary_wait_for_replica_count, semi_sync_replica_enabled, semi_sync_primary_status, semi_sync_primary_clients, semi_sync_replica_status, last_discovery_latency, last_seen)
VALUES
- (?, ?, ?, datetime('now'), datetime('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
+ (?, ?, ?, DATETIME('now'), DATETIME('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME('now'))
`
a1 := `zone1-i710, i710, 3306, 710, , 5.6.7, 5.6, MySQL, false, false, STATEMENT,
FULL, false, false, , 0, , 0, 0, 0,
@@ -88,9 +89,9 @@ func TestMkInsertThree(t *testing.T) {
replica_sql_running, replica_io_running, replication_sql_thread_state, replication_io_thread_state, has_replication_filters, supports_oracle_gtid, oracle_gtid, source_uuid, ancestry_uuid, executed_gtid_set, gtid_mode, gtid_purged, gtid_errant, mariadb_gtid, pseudo_gtid,
source_log_file, read_source_log_pos, relay_source_log_file, exec_source_log_pos, relay_log_file, relay_log_pos, last_sql_error, last_io_error, replication_lag_seconds, replica_lag_seconds, sql_delay, data_center, region, physical_environment, replication_depth, is_co_primary, has_replication_credentials, allow_tls, semi_sync_enforced, semi_sync_primary_enabled, semi_sync_primary_timeout, semi_sync_primary_wait_for_replica_count, semi_sync_replica_enabled, semi_sync_primary_status, semi_sync_primary_clients, semi_sync_replica_status, last_discovery_latency, last_seen)
VALUES
- (?, ?, ?, datetime('now'), datetime('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now')),
- (?, ?, ?, datetime('now'), datetime('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now')),
- (?, ?, ?, datetime('now'), datetime('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))
+ (?, ?, ?, DATETIME('now'), DATETIME('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME('now')),
+ (?, ?, ?, DATETIME('now'), DATETIME('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME('now')),
+ (?, ?, ?, DATETIME('now'), DATETIME('now'), 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME('now'))
`
a3 := `
zone1-i710, i710, 3306, 710, , 5.6.7, 5.6, MySQL, false, false, STATEMENT, FULL, false, false, , 0, , 0, 0, 0, false, false, 0, 0, false, false, false, , , , , , , false, false, , 0, mysql.000007, 10, , 0, , , {0 false}, {0 false}, 0, , , , 0, false, false, false, false, false, 0, 0, false, false, 0, false, 0,
@@ -241,11 +242,11 @@ func TestReadProblemInstances(t *testing.T) {
// We need to set InstancePollSeconds to a large value otherwise all the instances are reported as having problems since their last_checked is very old.
// Setting this value to a hundred years, we ensure that this test doesn't fail with this issue for the next hundred years.
- oldVal := config.Config.InstancePollSeconds
+ oldVal := config.GetInstancePollTime()
defer func() {
- config.Config.InstancePollSeconds = oldVal
+ config.SetInstancePollTime(oldVal)
}()
- config.Config.InstancePollSeconds = 60 * 60 * 24 * 365 * 100
+ config.SetInstancePollTime(60 * 60 * 24 * 365 * 100 * time.Second)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
@@ -325,11 +326,11 @@ func TestReadInstancesWithErrantGTIds(t *testing.T) {
// We need to set InstancePollSeconds to a large value otherwise all the instances are reported as having problems since their last_checked is very old.
// Setting this value to a hundred years, we ensure that this test doesn't fail with this issue for the next hundred years.
- oldVal := config.Config.InstancePollSeconds
+ oldVal := config.GetInstancePollTime()
defer func() {
- config.Config.InstancePollSeconds = oldVal
+ config.SetInstancePollTime(oldVal)
}()
- config.Config.InstancePollSeconds = 60 * 60 * 24 * 365 * 100
+ config.SetInstancePollTime(60 * 60 * 24 * 365 * 100 * time.Second)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
@@ -428,27 +429,27 @@ func TestReadOutdatedInstanceKeys(t *testing.T) {
}{
{
name: "No problems",
- sql: []string{"update database_instance set last_checked = datetime('now')"},
+ sql: []string{"update database_instance set last_checked = DATETIME('now')"},
instancesRequired: nil,
}, {
name: "One instance is outdated",
sql: []string{
- "update database_instance set last_checked = datetime('now')",
- "update database_instance set last_checked = datetime('now', '-1 hour') where alias = 'zone1-0000000100'",
+ "update database_instance set last_checked = DATETIME('now')",
+ "update database_instance set last_checked = DATETIME('now', '-1 hour') where alias = 'zone1-0000000100'",
},
instancesRequired: []string{"zone1-0000000100"},
}, {
name: "One instance doesn't have myql data",
sql: []string{
- "update database_instance set last_checked = datetime('now')",
+ "update database_instance set last_checked = DATETIME('now')",
`INSERT INTO vitess_tablet VALUES('zone1-0000000103','localhost',7706,'ks','0','zone1',2,'0001-01-01 00:00:00+00:00','');`,
},
instancesRequired: []string{"zone1-0000000103"},
}, {
name: "One instance doesn't have myql data and one is outdated",
sql: []string{
- "update database_instance set last_checked = datetime('now')",
- "update database_instance set last_checked = datetime('now', '-1 hour') where alias = 'zone1-0000000100'",
+ "update database_instance set last_checked = DATETIME('now')",
+ "update database_instance set last_checked = DATETIME('now', '-1 hour') where alias = 'zone1-0000000100'",
`INSERT INTO vitess_tablet VALUES('zone1-0000000103','localhost',7706,'ks','0','zone1',2,'0001-01-01 00:00:00+00:00','');`,
},
instancesRequired: []string{"zone1-0000000103", "zone1-0000000100"},
@@ -459,13 +460,13 @@ func TestReadOutdatedInstanceKeys(t *testing.T) {
waitForCacheInitialization()
// We are setting InstancePollSeconds to 59 minutes, just for the test.
- oldVal := config.Config.InstancePollSeconds
+ oldVal := config.GetInstancePollTime()
oldCache := forgetAliases
defer func() {
forgetAliases = oldCache
- config.Config.InstancePollSeconds = oldVal
+ config.SetInstancePollTime(oldVal)
}()
- config.Config.InstancePollSeconds = 60 * 25
+ config.SetInstancePollTime(60 * 25 * time.Second)
forgetAliases = cache.New(time.Minute, time.Minute)
for _, tt := range tests {
@@ -485,10 +486,10 @@ func TestReadOutdatedInstanceKeys(t *testing.T) {
errInDataCollection := db.QueryVTOrcRowsMap(`select alias,
last_checked,
last_attempted_check,
-ROUND((JULIANDAY(datetime('now')) - JULIANDAY(last_checked)) * 86400) AS difference,
+ROUND((JULIANDAY(DATETIME('now')) - JULIANDAY(last_checked)) * 86400) AS difference,
last_attempted_check <= last_checked as use1,
-last_checked < datetime('now', '-1500 second') as is_outdated1,
-last_checked < datetime('now', '-3000 second') as is_outdated2
+last_checked < DATETIME('now', '-1500 second') as is_outdated1,
+last_checked < DATETIME('now', '-3000 second') as is_outdated2
from database_instance`, func(rowMap sqlutils.RowMap) error {
log.Errorf("Row in database_instance - %+v", rowMap)
return nil
@@ -512,12 +513,12 @@ func TestUpdateInstanceLastChecked(t *testing.T) {
name: "Verify updated last checked",
tabletAlias: "zone1-0000000100",
partialSuccess: false,
- conditionToCheck: "last_checked >= datetime('now', '-30 second') and last_check_partial_success = false",
+ conditionToCheck: "last_checked >= DATETIME('now', '-30 second') and last_check_partial_success = false",
}, {
name: "Verify partial success",
tabletAlias: "zone1-0000000100",
partialSuccess: true,
- conditionToCheck: "last_checked >= datetime('now', '-30 second') and last_check_partial_success = true",
+ conditionToCheck: "last_checked >= DATETIME('now', '-30 second') and last_check_partial_success = true",
}, {
name: "Verify no error on unknown tablet",
tabletAlias: "unknown tablet",
@@ -563,7 +564,7 @@ func TestUpdateInstanceLastAttemptedCheck(t *testing.T) {
{
name: "Verify updated last checked",
tabletAlias: "zone1-0000000100",
- conditionToCheck: "last_attempted_check >= datetime('now', '-30 second')",
+ conditionToCheck: "last_attempted_check >= DATETIME('now', '-30 second')",
}, {
name: "Verify no error on unknown tablet",
tabletAlias: "unknown tablet",
@@ -718,10 +719,10 @@ func TestGetDatabaseState(t *testing.T) {
}
func TestExpireTableData(t *testing.T) {
- oldVal := config.Config.AuditPurgeDays
- config.Config.AuditPurgeDays = 10
+ oldVal := config.GetAuditPurgeDays()
+ config.SetAuditPurgeDays(10)
defer func() {
- config.Config.AuditPurgeDays = oldVal
+ config.SetAuditPurgeDays(oldVal)
}()
tests := []struct {
@@ -736,19 +737,19 @@ func TestExpireTableData(t *testing.T) {
tableName: "audit",
timestampColumn: "audit_timestamp",
expectedRowCount: 1,
- insertQuery: `insert into audit (audit_id, audit_timestamp, audit_type, alias, message, keyspace, shard) values
-(1, datetime('now', '-50 DAY'), 'a','a','a','a','a'),
-(2, datetime('now', '-5 DAY'), 'a','a','a','a','a')`,
+ insertQuery: `INSERT INTO audit (audit_id, audit_timestamp, audit_type, alias, message, keyspace, shard) VALUES
+(1, DATETIME('now', '-50 DAY'), 'a','a','a','a','a'),
+(2, DATETIME('now', '-5 DAY'), 'a','a','a','a','a')`,
},
{
name: "ExpireRecoveryDetectionHistory",
tableName: "recovery_detection",
timestampColumn: "detection_timestamp",
expectedRowCount: 2,
- insertQuery: `insert into recovery_detection (detection_id, detection_timestamp, alias, analysis, keyspace, shard) values
-(1, datetime('now', '-3 DAY'),'a','a','a','a'),
-(2, datetime('now', '-5 DAY'),'a','a','a','a'),
-(3, datetime('now', '-15 DAY'),'a','a','a','a')`,
+ insertQuery: `INSERT INTO recovery_detection (detection_id, detection_timestamp, alias, analysis, keyspace, shard) VALUES
+(1, DATETIME('now', '-3 DAY'),'a','a','a','a'),
+(2, DATETIME('now', '-5 DAY'),'a','a','a','a'),
+(3, DATETIME('now', '-15 DAY'),'a','a','a','a')`,
},
}
for _, tt := range tests {
@@ -773,3 +774,165 @@ func TestExpireTableData(t *testing.T) {
})
}
}
+
+func TestDetectErrantGTIDs(t *testing.T) {
+ tests := []struct {
+ name string
+ instance *Instance
+ primaryInstance *Instance
+ wantErr bool
+ wantErrantGTID string
+ }{
+ {
+ name: "No errant GTIDs",
+ instance: &Instance{
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10539,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34",
+ primaryExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10591,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34",
+ AncestryUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff,230ea8ea-81e3-11e4-972a-e25ec4bd140a",
+ ServerUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ SourceUUID: "230ea8ea-81e3-11e4-972a-e25ec4bd140a",
+ },
+ }, {
+ name: "Errant GTIDs on replica",
+ instance: &Instance{
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10539,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:34",
+ primaryExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10591,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34",
+ AncestryUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff,230ea8ea-81e3-11e4-972a-e25ec4bd140a",
+ ServerUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ SourceUUID: "230ea8ea-81e3-11e4-972a-e25ec4bd140a",
+ },
+ wantErrantGTID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff:34",
+ },
+ {
+ name: "No errant GTIDs on old primary",
+ instance: &Instance{
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10539,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341",
+ AncestryUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ ServerUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ },
+ primaryInstance: &Instance{
+ SourceHost: "",
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341",
+ },
+ },
+ {
+ name: "Errant GTIDs on old primary",
+ instance: &Instance{
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10539,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-342",
+ AncestryUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ ServerUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ },
+ primaryInstance: &Instance{
+ SourceHost: "",
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341",
+ },
+ wantErrantGTID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff:342",
+ }, {
+ name: "Old information for new primary",
+ instance: &Instance{
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10539,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-342",
+ AncestryUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ ServerUUID: "316d193c-70e5-11e5-adb2-ecf4bb2262ff",
+ },
+ primaryInstance: &Instance{
+ SourceHost: "localhost",
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10539,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-311",
+ },
+ },
+ }
+
+ keyspaceName := "ks"
+ shardName := "0"
+ tablet := &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone-1",
+ Uid: 100,
+ },
+ Keyspace: keyspaceName,
+ Shard: shardName,
+ }
+ primaryTablet := &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone-1",
+ Uid: 101,
+ },
+ Keyspace: keyspaceName,
+ Shard: shardName,
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Clear the database after the test. The easiest way to do that is to run all the initialization commands again.
+ defer func() {
+ db.ClearVTOrcDatabase()
+ }()
+ db.ClearVTOrcDatabase()
+
+ // Save shard record for the primary tablet.
+ err := SaveShard(topo.NewShardInfo(keyspaceName, shardName, &topodatapb.Shard{
+ PrimaryAlias: primaryTablet.Alias,
+ }, nil))
+ require.NoError(t, err)
+
+ if tt.primaryInstance != nil {
+ tt.primaryInstance.InstanceAlias = topoproto.TabletAliasString(primaryTablet.Alias)
+ err = SaveTablet(primaryTablet)
+ require.NoError(t, err)
+ err = WriteInstance(tt.primaryInstance, true, nil)
+ require.NoError(t, err)
+ }
+
+ tt.instance.InstanceAlias = topoproto.TabletAliasString(tablet.Alias)
+ err = detectErrantGTIDs(tt.instance, tablet)
+ if tt.wantErr {
+ require.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ require.EqualValues(t, tt.wantErrantGTID, tt.instance.GtidErrant)
+ })
+ }
+}
+
+// TestPrimaryErrantGTIDs tests that we don't run Errant GTID detection on the primary tablet itself!
+func TestPrimaryErrantGTIDs(t *testing.T) {
+ // Clear the database after the test. The easiest way to do that is to run all the initialization commands again.
+ defer func() {
+ db.ClearVTOrcDatabase()
+ }()
+ db.ClearVTOrcDatabase()
+ keyspaceName := "ks"
+ shardName := "0"
+ tablet := &topodatapb.Tablet{
+ Alias: &topodatapb.TabletAlias{
+ Cell: "zone-1",
+ Uid: 100,
+ },
+ Keyspace: keyspaceName,
+ Shard: shardName,
+ }
+ instance := &Instance{
+ SourceHost: "",
+ ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341",
+ InstanceAlias: topoproto.TabletAliasString(tablet.Alias),
+ }
+
+ // Save shard record for the primary tablet.
+ err := SaveShard(topo.NewShardInfo(keyspaceName, shardName, &topodatapb.Shard{
+ PrimaryAlias: tablet.Alias,
+ }, nil))
+ require.NoError(t, err)
+
+ // Store the tablet record and the instance.
+ err = SaveTablet(tablet)
+ require.NoError(t, err)
+ err = WriteInstance(instance, true, nil)
+ require.NoError(t, err)
+
+ // After this if we read a new information for the record that updates its
+ // gtid set further, we shouldn't be detecting errant GTIDs on it since it is the primary!
+ // We shouldn't be comparing it with a previous version of itself!
+ instance.ExecutedGtidSet = "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-351"
+ err = detectErrantGTIDs(instance, tablet)
+ require.NoError(t, err)
+ require.EqualValues(t, "", instance.GtidErrant)
+}
diff --git a/go/vt/vtorc/inst/tablet_dao.go b/go/vt/vtorc/inst/tablet_dao.go
index af304292a70..f48f2b97370 100644
--- a/go/vt/vtorc/inst/tablet_dao.go
+++ b/go/vt/vtorc/inst/tablet_dao.go
@@ -56,13 +56,13 @@ func fullStatus(tabletAlias string) (*replicationdatapb.FullStatus, error) {
// ReadTablet reads the vitess tablet record.
func ReadTablet(tabletAlias string) (*topodatapb.Tablet, error) {
- query := `
- select
- info
- from
- vitess_tablet
- where alias = ?
- `
+ query := `SELECT
+ info
+ FROM
+ vitess_tablet
+ WHERE
+ alias = ?
+ `
args := sqlutils.Args(tabletAlias)
tablet := &topodatapb.Tablet{}
opts := prototext.UnmarshalOptions{DiscardUnknown: true}
@@ -84,14 +84,28 @@ func SaveTablet(tablet *topodatapb.Tablet) error {
if err != nil {
return err
}
- _, err = db.ExecVTOrc(`
- replace
- into vitess_tablet (
- alias, hostname, port, cell, keyspace, shard, tablet_type, primary_timestamp, info
- ) values (
- ?, ?, ?, ?, ?, ?, ?, ?, ?
- )
- `,
+ _, err = db.ExecVTOrc(`REPLACE
+ INTO vitess_tablet (
+ alias,
+ hostname,
+ port,
+ cell,
+ keyspace,
+ shard,
+ tablet_type,
+ primary_timestamp,
+ info
+ ) VALUES (
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?
+ )`,
topoproto.TabletAliasString(tablet.Alias),
tablet.MysqlHostname,
int(tablet.MysqlPort),
diff --git a/go/vt/vtorc/logic/disable_recovery.go b/go/vt/vtorc/logic/disable_recovery.go
index 60650798876..c5446eeb9ff 100644
--- a/go/vt/vtorc/logic/disable_recovery.go
+++ b/go/vt/vtorc/logic/disable_recovery.go
@@ -40,14 +40,13 @@ import (
// IsRecoveryDisabled returns true if Recoveries are disabled globally
func IsRecoveryDisabled() (disabled bool, err error) {
- query := `
- SELECT
- COUNT(*) as mycount
- FROM
- global_recovery_disable
- WHERE
- disable_recovery=?
- `
+ query := `SELECT
+ COUNT(*) AS mycount
+ FROM
+ global_recovery_disable
+ WHERE
+ disable_recovery = ?
+ `
err = db.QueryVTOrc(query, sqlutils.Args(1), func(m sqlutils.RowMap) error {
mycount := m.GetInt("mycount")
disabled = (mycount > 0)
@@ -63,21 +62,19 @@ func IsRecoveryDisabled() (disabled bool, err error) {
// DisableRecovery ensures recoveries are disabled globally
func DisableRecovery() error {
- _, err := db.ExecVTOrc(`
- INSERT OR IGNORE INTO global_recovery_disable
- (disable_recovery)
- VALUES (1)
- `,
- )
+ _, err := db.ExecVTOrc(`INSERT OR IGNORE
+ INTO global_recovery_disable (
+ disable_recovery
+ ) VALUES (1)`)
return err
}
// EnableRecovery ensures recoveries are enabled globally
func EnableRecovery() error {
// The "WHERE" clause is just to avoid full-scan reports by monitoring tools
- _, err := db.ExecVTOrc(`
- DELETE FROM global_recovery_disable WHERE disable_recovery >= 0
- `,
- )
+ _, err := db.ExecVTOrc(`DELETE
+ FROM global_recovery_disable
+ WHERE
+ disable_recovery >= 0`)
return err
}
diff --git a/go/vt/vtorc/logic/tablet_discovery.go b/go/vt/vtorc/logic/tablet_discovery.go
index e9bbcee35cb..e62c0652e62 100644
--- a/go/vt/vtorc/logic/tablet_discovery.go
+++ b/go/vt/vtorc/logic/tablet_discovery.go
@@ -66,12 +66,12 @@ func OpenTabletDiscovery() <-chan time.Time {
ts = topo.Open()
tmc = inst.InitializeTMC()
// Clear existing cache and perform a new refresh.
- if _, err := db.ExecVTOrc("delete from vitess_tablet"); err != nil {
+ if _, err := db.ExecVTOrc("DELETE FROM vitess_tablet"); err != nil {
log.Error(err)
}
// We refresh all information from the topo once before we start the ticks to do it on a timer.
populateAllInformation()
- return time.Tick(time.Second * time.Duration(config.Config.TopoInformationRefreshSeconds)) //nolint SA1015: using time.Tick leaks the underlying ticker
+ return time.Tick(config.GetTopoInformationRefreshDuration()) //nolint SA1015: using time.Tick leaks the underlying ticker
}
// populateAllInformation initializes all the information for VTOrc to function.
diff --git a/go/vt/vtorc/logic/topology_recovery.go b/go/vt/vtorc/logic/topology_recovery.go
index aec137a45b4..f14eca624c9 100644
--- a/go/vt/vtorc/logic/topology_recovery.go
+++ b/go/vt/vtorc/logic/topology_recovery.go
@@ -21,7 +21,6 @@ import (
"encoding/json"
"fmt"
"math/rand/v2"
- "time"
"vitess.io/vitess/go/stats"
"vitess.io/vitess/go/vt/log"
@@ -235,8 +234,8 @@ func runEmergencyReparentOp(ctx context.Context, analysisEntry *inst.Replication
tablet.Shard,
reparentutil.EmergencyReparentOptions{
IgnoreReplicas: nil,
- WaitReplicasTimeout: time.Duration(config.Config.WaitReplicasTimeoutSeconds) * time.Second,
- PreventCrossCellPromotion: config.Config.PreventCrossDataCenterPrimaryFailover,
+ WaitReplicasTimeout: config.GetWaitReplicasTimeout(),
+ PreventCrossCellPromotion: config.GetPreventCrossCellFailover(),
WaitAllTablets: waitForAllTablets,
},
)
@@ -703,8 +702,8 @@ func electNewPrimary(ctx context.Context, analysisEntry *inst.ReplicationAnalysi
analyzedTablet.Keyspace,
analyzedTablet.Shard,
reparentutil.PlannedReparentOptions{
- WaitReplicasTimeout: time.Duration(config.Config.WaitReplicasTimeoutSeconds) * time.Second,
- TolerableReplLag: time.Duration(config.Config.TolerableReplicationLagSeconds) * time.Second,
+ WaitReplicasTimeout: config.GetWaitReplicasTimeout(),
+ TolerableReplLag: config.GetTolerableReplicationLag(),
},
)
diff --git a/go/vt/vtorc/logic/topology_recovery_dao.go b/go/vt/vtorc/logic/topology_recovery_dao.go
index 730e6b2a158..137251c4fc8 100644
--- a/go/vt/vtorc/logic/topology_recovery_dao.go
+++ b/go/vt/vtorc/logic/topology_recovery_dao.go
@@ -30,21 +30,20 @@ import (
// InsertRecoveryDetection inserts the recovery analysis that has been detected.
func InsertRecoveryDetection(analysisEntry *inst.ReplicationAnalysis) error {
- sqlResult, err := db.ExecVTOrc(`
- insert or ignore
- into recovery_detection (
- alias,
- analysis,
- keyspace,
- shard,
- detection_timestamp
- ) values (
- ?,
- ?,
- ?,
- ?,
- datetime('now')
- )`,
+ sqlResult, err := db.ExecVTOrc(`INSERT OR IGNORE
+ INTO recovery_detection (
+ alias,
+ analysis,
+ keyspace,
+ shard,
+ detection_timestamp
+ ) VALUES (
+ ?,
+ ?,
+ ?,
+ ?,
+ DATETIME('now')
+ )`,
analysisEntry.AnalyzedInstanceAlias,
string(analysisEntry.Analysis),
analysisEntry.ClusterDetails.Keyspace,
@@ -65,26 +64,24 @@ func InsertRecoveryDetection(analysisEntry *inst.ReplicationAnalysis) error {
func writeTopologyRecovery(topologyRecovery *TopologyRecovery) (*TopologyRecovery, error) {
analysisEntry := topologyRecovery.AnalysisEntry
- sqlResult, err := db.ExecVTOrc(`
- insert or ignore
- into topology_recovery (
- recovery_id,
- alias,
- start_recovery,
- analysis,
- keyspace,
- shard,
- detection_id
- ) values (
- ?,
- ?,
- datetime('now'),
- ?,
- ?,
- ?,
- ?
- )
- `,
+ sqlResult, err := db.ExecVTOrc(`INSERT OR IGNORE
+ INTO topology_recovery (
+ recovery_id,
+ alias,
+ start_recovery,
+ analysis,
+ keyspace,
+ shard,
+ detection_id
+ ) VALUES (
+ ?,
+ ?,
+ DATETIME('now'),
+ ?,
+ ?,
+ ?,
+ ?
+ )`,
sqlutils.NilIfZero(topologyRecovery.ID),
analysisEntry.AnalyzedInstanceAlias,
string(analysisEntry.Analysis),
@@ -138,15 +135,16 @@ func AttemptRecoveryRegistration(analysisEntry *inst.ReplicationAnalysis) (*Topo
// ResolveRecovery is called on completion of a recovery process and updates the recovery status.
// It does not clear the "active period" as this still takes place in order to avoid flapping.
func writeResolveRecovery(topologyRecovery *TopologyRecovery) error {
- _, err := db.ExecVTOrc(`
- update topology_recovery set
- is_successful = ?,
- successor_alias = ?,
- all_errors = ?,
- end_recovery = datetime('now')
- where
- recovery_id = ?
- `, topologyRecovery.IsSuccessful,
+ _, err := db.ExecVTOrc(`UPDATE topology_recovery
+ SET
+ is_successful = ?,
+ successor_alias = ?,
+ all_errors = ?,
+ end_recovery = DATETIME('now')
+ WHERE
+ recovery_id = ?
+ `,
+ topologyRecovery.IsSuccessful,
topologyRecovery.SuccessorAlias,
strings.Join(topologyRecovery.AllErrors, "\n"),
topologyRecovery.ID,
@@ -160,26 +158,27 @@ func writeResolveRecovery(topologyRecovery *TopologyRecovery) error {
// readRecoveries reads recovery entry/audit entries from topology_recovery
func readRecoveries(whereCondition string, limit string, args []any) ([]*TopologyRecovery, error) {
res := []*TopologyRecovery{}
- query := fmt.Sprintf(`
- select
- recovery_id,
- alias,
- start_recovery,
- IFNULL(end_recovery, '') AS end_recovery,
- is_successful,
- ifnull(successor_alias, '') as successor_alias,
- analysis,
- keyspace,
- shard,
- all_errors,
- detection_id
- from
+ query := fmt.Sprintf(`SELECT
+ recovery_id,
+ alias,
+ start_recovery,
+ IFNULL(end_recovery, '') AS end_recovery,
+ is_successful,
+ IFNULL(successor_alias, '') AS successor_alias,
+ analysis,
+ keyspace,
+ shard,
+ all_errors,
+ detection_id
+ FROM
topology_recovery
%s
- order by
- recovery_id desc
+ ORDER BY recovery_id DESC
%s
- `, whereCondition, limit)
+ `,
+ whereCondition,
+ limit,
+ )
err := db.QueryVTOrc(query, args, func(m sqlutils.RowMap) error {
topologyRecovery := *NewTopologyRecovery(inst.ReplicationAnalysis{})
topologyRecovery.ID = m.GetInt64("recovery_id")
@@ -211,11 +210,10 @@ func readRecoveries(whereCondition string, limit string, args []any) ([]*Topolog
// ReadActiveClusterRecoveries reads recoveries that are ongoing for the given cluster.
func ReadActiveClusterRecoveries(keyspace string, shard string) ([]*TopologyRecovery, error) {
- whereClause := `
- where
- end_recovery IS NULL
- and keyspace=?
- and shard=?`
+ whereClause := `WHERE
+ end_recovery IS NULL
+ AND keyspace = ?
+ AND shard = ?`
return readRecoveries(whereClause, ``, sqlutils.Args(keyspace, shard))
}
@@ -225,23 +223,30 @@ func ReadRecentRecoveries(page int) ([]*TopologyRecovery, error) {
whereClause := ""
var args []any
if len(whereConditions) > 0 {
- whereClause = fmt.Sprintf("where %s", strings.Join(whereConditions, " and "))
+ whereClause = fmt.Sprintf("WHERE %s", strings.Join(whereConditions, " AND "))
}
- limit := `
- limit ?
- offset ?`
+ limit := `LIMIT ? OFFSET ?`
args = append(args, config.AuditPageSize, page*config.AuditPageSize)
return readRecoveries(whereClause, limit, args)
}
// writeTopologyRecoveryStep writes down a single step in a recovery process
func writeTopologyRecoveryStep(topologyRecoveryStep *TopologyRecoveryStep) error {
- sqlResult, err := db.ExecVTOrc(`
- insert or ignore
- into topology_recovery_steps (
- recovery_step_id, recovery_id, audit_at, message
- ) values (?, ?, datetime('now'), ?)
- `, sqlutils.NilIfZero(topologyRecoveryStep.ID), topologyRecoveryStep.RecoveryID, topologyRecoveryStep.Message,
+ sqlResult, err := db.ExecVTOrc(`INSERT OR IGNORE
+ INTO topology_recovery_steps (
+ recovery_step_id,
+ recovery_id,
+ audit_at,
+ message
+ ) VALUES (
+ ?,
+ ?,
+ DATETIME('now'),
+ ?
+ )`,
+ sqlutils.NilIfZero(topologyRecoveryStep.ID),
+ topologyRecoveryStep.RecoveryID,
+ topologyRecoveryStep.Message,
)
if err != nil {
log.Error(err)
diff --git a/go/vt/vtorc/logic/topology_recovery_dao_test.go b/go/vt/vtorc/logic/topology_recovery_dao_test.go
index 20dfb7e91e2..6a1d7c4c48f 100644
--- a/go/vt/vtorc/logic/topology_recovery_dao_test.go
+++ b/go/vt/vtorc/logic/topology_recovery_dao_test.go
@@ -70,10 +70,10 @@ func TestTopologyRecovery(t *testing.T) {
}
func TestExpireTableData(t *testing.T) {
- oldVal := config.Config.AuditPurgeDays
- config.Config.AuditPurgeDays = 10
+ oldVal := config.GetAuditPurgeDays()
+ config.SetAuditPurgeDays(10)
defer func() {
- config.Config.AuditPurgeDays = oldVal
+ config.SetAuditPurgeDays(oldVal)
}()
tests := []struct {
diff --git a/go/vt/vtorc/logic/vtorc.go b/go/vt/vtorc/logic/vtorc.go
index 9a468d1508a..b8cf404d050 100644
--- a/go/vt/vtorc/logic/vtorc.go
+++ b/go/vt/vtorc/logic/vtorc.go
@@ -17,11 +17,8 @@
package logic
import (
- "os"
- "os/signal"
"sync"
"sync/atomic"
- "syscall"
"time"
"github.com/patrickmn/go-cache"
@@ -73,26 +70,6 @@ func init() {
})
}
-// used in several places
-func instancePollSecondsDuration() time.Duration {
- return time.Duration(config.Config.InstancePollSeconds) * time.Second
-}
-
-// acceptSighupSignal registers for SIGHUP signal from the OS to reload the configuration files.
-func acceptSighupSignal() {
- c := make(chan os.Signal, 1)
-
- signal.Notify(c, syscall.SIGHUP)
- go func() {
- for range c {
- log.Infof("Received SIGHUP. Reloading configuration")
- _ = inst.AuditOperation("reload-configuration", "", "Triggered via SIGHUP")
- config.Reload()
- discoveryMetrics.SetExpirePeriod(time.Duration(config.DiscoveryCollectionRetentionSeconds) * time.Second)
- }
- }()
-}
-
// closeVTOrc runs all the operations required to cleanly shutdown VTOrc
func closeVTOrc() {
log.Infof("Starting VTOrc shutdown")
@@ -161,7 +138,7 @@ func DiscoverInstance(tabletAlias string, forceDiscovery bool) {
defer func() {
latency.Stop("total")
discoveryTime := latency.Elapsed("total")
- if discoveryTime > instancePollSecondsDuration() {
+ if discoveryTime > config.GetInstancePollTime() {
instancePollSecondsExceededCounter.Add(1)
log.Warningf("discoverInstance exceeded InstancePollSeconds for %+v, took %.4fs", tabletAlias, discoveryTime.Seconds())
if metric != nil {
@@ -177,7 +154,7 @@ func DiscoverInstance(tabletAlias string, forceDiscovery bool) {
// Calculate the expiry period each time as InstancePollSeconds
// _may_ change during the run of the process (via SIGHUP) and
// it is not possible to change the cache's default expiry..
- if existsInCacheError := recentDiscoveryOperationKeys.Add(tabletAlias, true, instancePollSecondsDuration()); existsInCacheError != nil && !forceDiscovery {
+ if existsInCacheError := recentDiscoveryOperationKeys.Add(tabletAlias, true, config.GetInstancePollTime()); existsInCacheError != nil && !forceDiscovery {
// Just recently attempted
return
}
@@ -271,24 +248,23 @@ func onHealthTick() {
// nolint SA1015: using time.Tick leaks the underlying ticker
func ContinuousDiscovery() {
log.Infof("continuous discovery: setting up")
- recentDiscoveryOperationKeys = cache.New(instancePollSecondsDuration(), time.Second)
+ recentDiscoveryOperationKeys = cache.New(config.GetInstancePollTime(), time.Second)
go handleDiscoveryRequests()
healthTick := time.Tick(config.HealthPollSeconds * time.Second)
caretakingTick := time.Tick(time.Minute)
- recoveryTick := time.Tick(time.Duration(config.Config.RecoveryPollSeconds) * time.Second)
+ recoveryTick := time.Tick(config.GetRecoveryPollDuration())
tabletTopoTick := OpenTabletDiscovery()
var recoveryEntrance int64
var snapshotTopologiesTick <-chan time.Time
- if config.Config.SnapshotTopologiesIntervalHours > 0 {
- snapshotTopologiesTick = time.Tick(time.Duration(config.Config.SnapshotTopologiesIntervalHours) * time.Hour)
+ if config.GetSnapshotTopologyInterval() > 0 {
+ snapshotTopologiesTick = time.Tick(config.GetSnapshotTopologyInterval())
}
go func() {
_ = ometrics.InitMetrics()
}()
- go acceptSighupSignal()
// On termination of the server, we should close VTOrc cleanly
servenv.OnTermSync(closeVTOrc)
diff --git a/go/vt/vtorc/process/health.go b/go/vt/vtorc/process/health.go
index 87a11733f66..f72d7b05210 100644
--- a/go/vt/vtorc/process/health.go
+++ b/go/vt/vtorc/process/health.go
@@ -35,12 +35,17 @@ var ThisNodeHealth = &NodeHealth{}
// writeHealthToDatabase writes to the database and returns if it was successful.
func writeHealthToDatabase() bool {
- _, err := db.ExecVTOrc("delete from node_health")
+ _, err := db.ExecVTOrc("DELETE FROM node_health")
if err != nil {
log.Error(err)
return false
}
- sqlResult, err := db.ExecVTOrc(`insert into node_health (last_seen_active) values (datetime('now'))`)
+ sqlResult, err := db.ExecVTOrc(`INSERT
+ INTO node_health (
+ last_seen_active
+ ) VALUES (
+ DATETIME('now')
+ )`)
if err != nil {
log.Error(err)
return false
diff --git a/go/vt/vtorc/server/api.go b/go/vt/vtorc/server/api.go
index 5e9a84c0a29..177f2c80333 100644
--- a/go/vt/vtorc/server/api.go
+++ b/go/vt/vtorc/server/api.go
@@ -25,6 +25,7 @@ import (
"time"
"vitess.io/vitess/go/acl"
+ "vitess.io/vitess/go/viperutil/debug"
"vitess.io/vitess/go/vt/servenv"
"vitess.io/vitess/go/vt/vtorc/collection"
"vitess.io/vitess/go/vt/vtorc/discovery"
@@ -46,6 +47,7 @@ const (
enableGlobalRecoveriesAPI = "/api/enable-global-recoveries"
replicationAnalysisAPI = "/api/replication-analysis"
databaseStateAPI = "/api/database-state"
+ configAPI = "/api/config"
healthAPI = "/debug/health"
AggregatedDiscoveryMetricsAPI = "/api/aggregated-discovery-metrics"
@@ -62,6 +64,7 @@ var (
enableGlobalRecoveriesAPI,
replicationAnalysisAPI,
databaseStateAPI,
+ configAPI,
healthAPI,
AggregatedDiscoveryMetricsAPI,
}
@@ -90,6 +93,8 @@ func (v *vtorcAPI) ServeHTTP(response http.ResponseWriter, request *http.Request
replicationAnalysisAPIHandler(response, request)
case databaseStateAPI:
databaseStateAPIHandler(response)
+ case configAPI:
+ configAPIHandler(response)
case AggregatedDiscoveryMetricsAPI:
AggregatedDiscoveryMetricsAPIHandler(response, request)
default:
@@ -106,7 +111,7 @@ func getACLPermissionLevelForAPI(apiEndpoint string) string {
return acl.MONITORING
case disableGlobalRecoveriesAPI, enableGlobalRecoveriesAPI:
return acl.ADMIN
- case replicationAnalysisAPI:
+ case replicationAnalysisAPI, configAPI:
return acl.MONITORING
case healthAPI, databaseStateAPI:
return acl.MONITORING
@@ -180,6 +185,17 @@ func databaseStateAPIHandler(response http.ResponseWriter) {
writePlainTextResponse(response, ds, http.StatusOK)
}
+// configAPIHandler is the handler for the configAPI endpoint
+func configAPIHandler(response http.ResponseWriter) {
+ settingsMap := debug.AllSettings()
+ jsonOut, err := json.MarshalIndent(settingsMap, "", "\t")
+ if err != nil {
+ http.Error(response, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ writePlainTextResponse(response, string(jsonOut), http.StatusOK)
+}
+
// AggregatedDiscoveryMetricsAPIHandler is the handler for the discovery metrics endpoint
func AggregatedDiscoveryMetricsAPIHandler(response http.ResponseWriter, request *http.Request) {
// return metrics for last x seconds
diff --git a/go/vt/vtorc/server/api_test.go b/go/vt/vtorc/server/api_test.go
index c352d1e600f..ab6b9eed9af 100644
--- a/go/vt/vtorc/server/api_test.go
+++ b/go/vt/vtorc/server/api_test.go
@@ -31,6 +31,9 @@ func TestGetACLPermissionLevelForAPI(t *testing.T) {
}, {
apiEndpoint: healthAPI,
want: acl.MONITORING,
+ }, {
+ apiEndpoint: configAPI,
+ want: acl.MONITORING,
}, {
apiEndpoint: "gibberish",
want: acl.ADMIN,
diff --git a/go/vt/vttablet/endtoend/config_test.go b/go/vt/vttablet/endtoend/config_test.go
index 4abf5b36c21..c3ad5f8a9db 100644
--- a/go/vt/vttablet/endtoend/config_test.go
+++ b/go/vt/vttablet/endtoend/config_test.go
@@ -36,7 +36,7 @@ import (
)
func TestPoolSize(t *testing.T) {
- revert := changeVar(t, "PoolSize", "1")
+ revert := changeVar(t, "ReadPoolSize", "1")
defer revert()
vstart := framework.DebugVars()
@@ -92,7 +92,7 @@ func TestTxPoolSize(t *testing.T) {
defer client2.Rollback()
verifyIntValue(t, framework.DebugVars(), "FoundRowsPoolAvailable", framework.FetchInt(vstart, "FoundRowsPoolAvailable")-1)
- revert := changeVar(t, "TxPoolSize", "1")
+ revert := changeVar(t, "TransactionPoolSize", "1")
defer revert()
vend := framework.DebugVars()
verifyIntValue(t, vend, "TransactionPoolAvailable", 0)
diff --git a/go/vt/vttablet/grpctmserver/server.go b/go/vt/vttablet/grpctmserver/server.go
index 777f641b1fc..889448a7cd3 100644
--- a/go/vt/vttablet/grpctmserver/server.go
+++ b/go/vt/vttablet/grpctmserver/server.go
@@ -354,7 +354,7 @@ func (s *server) MysqlHostMetrics(ctx context.Context, request *tabletmanagerdat
func (s *server) ReplicationStatus(ctx context.Context, request *tabletmanagerdatapb.ReplicationStatusRequest) (response *tabletmanagerdatapb.ReplicationStatusResponse, err error) {
defer s.tm.HandleRPCPanic(ctx, "ReplicationStatus", request, response, false /*verbose*/, &err)
ctx = callinfo.GRPCCallInfo(ctx)
- response = &tabletmanagerdatapb.ReplicationStatusResponse{}
+ response = &tabletmanagerdatapb.ReplicationStatusResponse{BackupRunning: s.tm.IsBackupRunning()}
status, err := s.tm.ReplicationStatus(ctx)
if err == nil {
response.Status = status
@@ -637,6 +637,9 @@ func (s *server) StopReplicationAndGetStatus(ctx context.Context, request *table
if err == nil {
response.Status = statusResponse.Status
}
+
+ response.BackupRunning = s.tm.IsBackupRunning()
+
return response, err
}
diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go
index 555cadd53ea..f8b5cfd9b8d 100644
--- a/go/vt/vttablet/onlineddl/executor.go
+++ b/go/vt/vttablet/onlineddl/executor.go
@@ -94,13 +94,18 @@ var (
ptOSCBinaryPath = "/usr/bin/pt-online-schema-change"
migrationCheckInterval = 1 * time.Minute
retainOnlineDDLTables = 24 * time.Hour
- defaultCutOverThreshold = 10 * time.Second
maxConcurrentOnlineDDLs = 256
migrationNextCheckIntervals = []time.Duration{1 * time.Second, 5 * time.Second, 10 * time.Second, 20 * time.Second}
cutoverIntervals = []time.Duration{0, 1 * time.Minute, 5 * time.Minute, 10 * time.Minute, 30 * time.Minute}
)
+const (
+ defaultCutOverThreshold = 10 * time.Second
+ minCutOverThreshold = 5 * time.Second
+ maxCutOverThreshold = 30 * time.Second
+)
+
func init() {
servenv.OnParseFor("vtcombo", registerOnlineDDLFlags)
servenv.OnParseFor("vttablet", registerOnlineDDLFlags)
@@ -199,13 +204,19 @@ func newGCTableRetainTime() time.Time {
return time.Now().UTC().Add(retainOnlineDDLTables)
}
-// getMigrationCutOverThreshold returns the cut-over threshold for the given migration. The migration's
-// DDL Strategy may explicitly set the threshold; otherwise, we return the default cut-over threshold.
-func getMigrationCutOverThreshold(onlineDDL *schema.OnlineDDL) time.Duration {
- if threshold, _ := onlineDDL.StrategySetting().CutOverThreshold(); threshold != 0 {
- return threshold
+// safeMigrationCutOverThreshold receives a desired threshold, and returns a cut-over threshold that
+// is reasonable to use
+func safeMigrationCutOverThreshold(threshold time.Duration) (time.Duration, error) {
+ switch {
+ case threshold == 0:
+ return defaultCutOverThreshold, nil
+ case threshold < minCutOverThreshold:
+ return defaultCutOverThreshold, vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "cut-over min value is %v", minCutOverThreshold)
+ case threshold > maxCutOverThreshold:
+ return defaultCutOverThreshold, vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "cut-over max value is %v", maxCutOverThreshold)
+ default:
+ return threshold, nil
}
- return defaultCutOverThreshold
}
// NewExecutor creates a new gh-ost executor.
@@ -890,8 +901,6 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
var sentryTableName string
- migrationCutOverThreshold := getMigrationCutOverThreshold(onlineDDL)
-
waitForPos := func(s *VReplStream, pos replication.Position, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
@@ -951,8 +960,8 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
}
defer preparationsConn.Recycle()
// Set large enough `@@lock_wait_timeout` so that it does not interfere with the cut-over operation.
- // The code will ensure everything that needs to be terminated by `migrationCutOverThreshold` will be terminated.
- preparationConnRestoreLockWaitTimeout, err := e.initConnectionLockWaitTimeout(ctx, preparationsConn.Conn, 3*migrationCutOverThreshold)
+ // The code will ensure everything that needs to be terminated by `onlineDDL.CutOverThreshold` will be terminated.
+ preparationConnRestoreLockWaitTimeout, err := e.initConnectionLockWaitTimeout(ctx, preparationsConn.Conn, 3*onlineDDL.CutOverThreshold)
if err != nil {
return vterrors.Wrap(err, "failed setting lock_wait_timeout on locking connection")
}
@@ -989,7 +998,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
// impacts query serving so we wait for a multiple of the cutover threshold here, with
// that variable primarily serving to limit the max time we later spend waiting for
// a position again AFTER we've taken the locks and table access is blocked.
- if err := waitForPos(s, postSentryPos, migrationCutOverThreshold*3); err != nil {
+ if err := waitForPos(s, postSentryPos, onlineDDL.CutOverThreshold*3); err != nil {
return vterrors.Wrapf(err, "failed waiting for pos after sentry creation")
}
e.updateMigrationStage(ctx, onlineDDL.UUID, "post-sentry pos reached")
@@ -1001,8 +1010,8 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
}
defer lockConn.Recycle()
// Set large enough `@@lock_wait_timeout` so that it does not interfere with the cut-over operation.
- // The code will ensure everything that needs to be terminated by `migrationCutOverThreshold` will be terminated.
- lockConnRestoreLockWaitTimeout, err := e.initConnectionLockWaitTimeout(ctx, lockConn.Conn, 5*migrationCutOverThreshold)
+ // The code will ensure everything that needs to be terminated by `onlineDDL.CutOverThreshold` will be terminated.
+ lockConnRestoreLockWaitTimeout, err := e.initConnectionLockWaitTimeout(ctx, lockConn.Conn, 5*onlineDDL.CutOverThreshold)
if err != nil {
return vterrors.Wrapf(err, "failed setting lock_wait_timeout on locking connection")
}
@@ -1016,8 +1025,8 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
return vterrors.Wrapf(err, "failed getting rename connection")
}
// Set large enough `@@lock_wait_timeout` so that it does not interfere with the cut-over operation.
- // The code will ensure everything that needs to be terminated by `migrationCutOverThreshold` will be terminated.
- renameConnRestoreLockWaitTimeout, err := e.initConnectionLockWaitTimeout(ctx, renameConn.Conn, 5*migrationCutOverThreshold*4)
+ // The code will ensure everything that needs to be terminated by `onlineDDL.CutOverThreshold` will be terminated.
+ renameConnRestoreLockWaitTimeout, err := e.initConnectionLockWaitTimeout(ctx, renameConn.Conn, 5*onlineDDL.CutOverThreshold*4)
if err != nil {
return vterrors.Wrapf(err, "failed setting lock_wait_timeout on rename connection")
}
@@ -1052,7 +1061,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
// This function waits until it finds the RENAME TABLE... query running in MySQL's PROCESSLIST, or until timeout
// The function assumes that one of the renamed tables is locked, thus causing the RENAME to block. If nothing
// is locked, then the RENAME will be near-instantaneous and it's unlikely that the function will find it.
- renameWaitCtx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold)
+ renameWaitCtx, cancel := context.WithTimeout(ctx, onlineDDL.CutOverThreshold)
defer cancel()
for {
@@ -1081,7 +1090,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
// Preparation is complete. We proceed to cut-over.
toggleBuffering := func(bufferQueries bool) error {
log.Infof("toggling buffering: %t in migration %v", bufferQueries, onlineDDL.UUID)
- timeout := migrationCutOverThreshold + qrBufferExtraTimeout
+ timeout := onlineDDL.CutOverThreshold + qrBufferExtraTimeout
e.toggleBufferTableFunc(bufferingCtx, onlineDDL.Table, timeout, bufferQueries)
if !bufferQueries {
@@ -1147,7 +1156,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
// real production
e.updateMigrationStage(ctx, onlineDDL.UUID, "locking tables")
- lockCtx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold)
+ lockCtx, cancel := context.WithTimeout(ctx, onlineDDL.CutOverThreshold)
defer cancel()
lockTableQuery := sqlparser.BuildParsedQuery(sqlLockTwoTablesWrite, sentryTableName, onlineDDL.Table)
if _, err := lockConn.Conn.Exec(lockCtx, lockTableQuery.Query, 1, false); err != nil {
@@ -1187,7 +1196,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
}
e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-lock pos: %v", replication.EncodePosition(postWritesPos))
- if err := waitForPos(s, postWritesPos, migrationCutOverThreshold); err != nil {
+ if err := waitForPos(s, postWritesPos, onlineDDL.CutOverThreshold); err != nil {
e.updateMigrationStage(ctx, onlineDDL.UUID, "timeout while waiting for post-lock pos: %v", err)
return vterrors.Wrapf(err, "failed waiting for pos after locking")
}
@@ -1220,14 +1229,14 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
{
dropTableQuery := sqlparser.BuildParsedQuery(sqlDropTable, sentryTableName)
- lockCtx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold)
+ lockCtx, cancel := context.WithTimeout(ctx, onlineDDL.CutOverThreshold)
defer cancel()
if _, err := lockConn.Conn.Exec(lockCtx, dropTableQuery.Query, 1, false); err != nil {
return vterrors.Wrapf(err, "failed dropping sentry table")
}
}
{
- lockCtx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold)
+ lockCtx, cancel := context.WithTimeout(ctx, onlineDDL.CutOverThreshold)
defer cancel()
e.updateMigrationStage(ctx, onlineDDL.UUID, "unlocking tables")
if _, err := lockConn.Conn.Exec(lockCtx, sqlUnlockTables, 1, false); err != nil {
@@ -1235,7 +1244,7 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh
}
}
{
- lockCtx, cancel := context.WithTimeout(ctx, migrationCutOverThreshold)
+ lockCtx, cancel := context.WithTimeout(ctx, onlineDDL.CutOverThreshold)
defer cancel()
e.updateMigrationStage(lockCtx, onlineDDL.UUID, "waiting for RENAME to complete")
if err := <-renameCompleteChan; err != nil {
@@ -2034,7 +2043,9 @@ func (e *Executor) readMigration(ctx context.Context, uuid string) (onlineDDL *s
WasReadyToComplete: row.AsInt64("was_ready_to_complete", 0),
TabletAlias: row["tablet"].ToString(),
MigrationContext: row["migration_context"].ToString(),
+ CutOverThreshold: time.Second * time.Duration(row.AsInt64("cutover_threshold_seconds", 0)),
}
+ onlineDDL.CutOverThreshold, _ = safeMigrationCutOverThreshold(onlineDDL.CutOverThreshold)
return onlineDDL, row, nil
}
@@ -3572,55 +3583,36 @@ func (e *Executor) isPreserveForeignKeySupported(ctx context.Context) (isSupport
// and is up to date with the binlogs.
func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, onlineDDL *schema.OnlineDDL, s *VReplStream) (isReady bool, err error) {
// Check all the cases where migration is still running:
- {
- // when ready to cut-over, pos must have some value
- if s.pos == "" {
- return false, nil
- }
+ // when ready to cut-over, pos must have some value
+ if s.pos == "" {
+ return false, nil
}
- {
- // Both time_updated and transaction_timestamp must be in close proximity to each
- // other and to the time now, otherwise that means we're lagging and it's not a good time
- // to cut-over
- durationDiff := func(t1, t2 time.Time) time.Duration {
- return t1.Sub(t2).Abs()
- }
- migrationCutOverThreshold := getMigrationCutOverThreshold(onlineDDL)
-
- timeNow := time.Now()
- timeUpdated := time.Unix(s.timeUpdated, 0)
- if durationDiff(timeNow, timeUpdated) > migrationCutOverThreshold {
- return false, nil
- }
- // Let's look at transaction timestamp. This gets written by any ongoing
- // writes on the server (whether on this table or any other table)
- transactionTimestamp := time.Unix(s.transactionTimestamp, 0)
- if durationDiff(timeNow, transactionTimestamp) > migrationCutOverThreshold {
- return false, nil
- }
+ // Both time_updated and transaction_timestamp must be in close proximity to each
+ // other and to the time now, otherwise that means we're lagging and it's not a good time
+ // to cut-over
+ if s.Lag() > onlineDDL.CutOverThreshold {
+ return false, nil
}
- {
- // copy_state must have no entries for this vreplication id: if entries are
- // present that means copy is still in progress
- query, err := sqlparser.ParseAndBind(sqlReadCountCopyState,
- sqltypes.Int32BindVariable(s.id),
- )
- if err != nil {
- return false, err
- }
- r, err := e.execQuery(ctx, query)
- if err != nil {
- return false, err
- }
- csRow := r.Named().Row()
- if csRow == nil {
- return false, err
- }
- count := csRow.AsInt64("cnt", 0)
- if count > 0 {
- // Still copying
- return false, nil
- }
+ // copy_state must have no entries for this vreplication id: if entries are
+ // present that means copy is still in progress
+ query, err := sqlparser.ParseAndBind(sqlReadCountCopyState,
+ sqltypes.Int32BindVariable(s.id),
+ )
+ if err != nil {
+ return false, err
+ }
+ r, err := e.execQuery(ctx, query)
+ if err != nil {
+ return false, err
+ }
+ csRow := r.Named().Row()
+ if csRow == nil {
+ return false, err
+ }
+ count := csRow.AsInt64("cnt", 0)
+ if count > 0 {
+ // Still copying
+ return false, nil
}
return true, nil
@@ -3767,6 +3759,7 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i
}
_ = e.updateRowsCopied(ctx, uuid, s.rowsCopied)
_ = e.updateMigrationProgressByRowsCopied(ctx, uuid, s.rowsCopied)
+ _ = e.updateMigrationVreplicationLagSeconds(ctx, uuid, int64(s.Lag().Seconds()))
_ = e.updateMigrationETASecondsByProgress(ctx, uuid)
if s.timeThrottled != 0 {
// Avoid creating a 0000-00-00 00:00:00 timestamp
@@ -4525,6 +4518,18 @@ func (e *Executor) updateRowsCopied(ctx context.Context, uuid string, rowsCopied
return err
}
+func (e *Executor) updateMigrationVreplicationLagSeconds(ctx context.Context, uuid string, vreplicationLagSeconds int64) error {
+ query, err := sqlparser.ParseAndBind(sqlUpdateMigrationVreplicationLagSeconds,
+ sqltypes.Int64BindVariable(vreplicationLagSeconds),
+ sqltypes.StringBindVariable(uuid),
+ )
+ if err != nil {
+ return err
+ }
+ _, err = e.execQuery(ctx, query)
+ return err
+}
+
func (e *Executor) updateVitessLivenessIndicator(ctx context.Context, uuid string, livenessIndicator int64) error {
query, err := sqlparser.ParseAndBind(sqlUpdateMigrationVitessLivenessIndicator,
sqltypes.Int64BindVariable(livenessIndicator),
@@ -4747,6 +4752,42 @@ func (e *Executor) ForceCutOverPendingMigrations(ctx context.Context) (result *s
return result, nil
}
+func (e *Executor) SetMigrationCutOverThreshold(ctx context.Context, uuid string, thresholdString string) (result *sqltypes.Result, err error) {
+ if atomic.LoadInt64(&e.isOpen) == 0 {
+ return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, schema.ErrOnlineDDLDisabled.Error())
+ }
+ if !schema.IsOnlineDDLUUID(uuid) {
+ return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Not a valid migration ID in FORCE_CUTOVER: %s", uuid)
+ }
+ threshold, err := time.ParseDuration(thresholdString)
+ if err != nil {
+ return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid cut-over threshold value: %s. Try '5s' to '30s'", thresholdString)
+ }
+
+ log.Infof("SetMigrationCutOverThreshold: request to set cut-over threshold to %v on migration %s", threshold, uuid)
+ e.migrationMutex.Lock()
+ defer e.migrationMutex.Unlock()
+
+ threshold, err = safeMigrationCutOverThreshold(threshold)
+ if err != nil {
+ return nil, err
+ }
+ query, err := sqlparser.ParseAndBind(sqlUpdateCutOverThresholdSeconds,
+ sqltypes.Int64BindVariable(int64(threshold.Seconds())),
+ sqltypes.StringBindVariable(uuid),
+ )
+ if err != nil {
+ return nil, err
+ }
+ rs, err := e.execQuery(ctx, query)
+ if err != nil {
+ return nil, err
+ }
+ e.triggerNextCheckInterval()
+ log.Infof("SetMigrationCutOverThreshold: migration %s cut-over threshold was set to", uuid, threshold)
+ return rs, nil
+}
+
// CompleteMigration clears the postpone_completion flag for a given migration, assuming it was set in the first place
func (e *Executor) CompleteMigration(ctx context.Context, uuid string) (result *sqltypes.Result, err error) {
if atomic.LoadInt64(&e.isOpen) == 0 {
@@ -5032,7 +5073,14 @@ func (e *Executor) SubmitMigration(
// Explicit retention indicated by `--retain-artifact` DDL strategy flag for this migration. Override!
retainArtifactsSeconds = int64((retainArtifacts).Seconds())
}
-
+ cutoverThreshold, err := onlineDDL.StrategySetting().CutOverThreshold()
+ if err != nil {
+ return nil, vterrors.Wrapf(err, "parsing cut-over threshold in migration %v", onlineDDL.UUID)
+ }
+ cutoverThreshold, err = safeMigrationCutOverThreshold(cutoverThreshold)
+ if err != nil {
+ return nil, vterrors.Wrapf(err, "validating cut-over threshold in migration %v", onlineDDL.UUID)
+ }
_, allowConcurrentMigration := e.allowConcurrentMigration(onlineDDL)
submitQuery, err := sqlparser.ParseAndBind(sqlInsertMigration,
sqltypes.StringBindVariable(onlineDDL.UUID),
@@ -5048,6 +5096,7 @@ func (e *Executor) SubmitMigration(
sqltypes.StringBindVariable(string(schema.OnlineDDLStatusQueued)),
sqltypes.StringBindVariable(e.TabletAliasString()),
sqltypes.Int64BindVariable(retainArtifactsSeconds),
+ sqltypes.Int64BindVariable(int64(cutoverThreshold.Seconds())),
sqltypes.BoolBindVariable(onlineDDL.StrategySetting().IsPostponeLaunch()),
sqltypes.BoolBindVariable(onlineDDL.StrategySetting().IsPostponeCompletion()),
sqltypes.BoolBindVariable(allowConcurrentMigration),
diff --git a/go/vt/vttablet/onlineddl/executor_test.go b/go/vt/vttablet/onlineddl/executor_test.go
index 2533f3a4b48..105da7fc1e3 100644
--- a/go/vt/vttablet/onlineddl/executor_test.go
+++ b/go/vt/vttablet/onlineddl/executor_test.go
@@ -25,6 +25,7 @@ import (
"time"
"github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
)
func TestShouldCutOverAccordingToBackoff(t *testing.T) {
@@ -164,3 +165,59 @@ func TestShouldCutOverAccordingToBackoff(t *testing.T) {
})
}
}
+
+func TestSafeMigrationCutOverThreshold(t *testing.T) {
+ require.NotZero(t, defaultCutOverThreshold)
+ require.GreaterOrEqual(t, defaultCutOverThreshold, minCutOverThreshold)
+ require.LessOrEqual(t, defaultCutOverThreshold, maxCutOverThreshold)
+
+ tcases := []struct {
+ threshold time.Duration
+ expect time.Duration
+ isErr bool
+ }{
+ {
+ threshold: 0,
+ expect: defaultCutOverThreshold,
+ },
+ {
+ threshold: 2 * time.Second,
+ expect: defaultCutOverThreshold,
+ isErr: true,
+ },
+ {
+ threshold: 75 * time.Second,
+ expect: defaultCutOverThreshold,
+ isErr: true,
+ },
+ {
+ threshold: defaultCutOverThreshold,
+ expect: defaultCutOverThreshold,
+ },
+ {
+ threshold: 5 * time.Second,
+ expect: 5 * time.Second,
+ },
+ {
+ threshold: 15 * time.Second,
+ expect: 15 * time.Second,
+ },
+ {
+ threshold: 25 * time.Second,
+ expect: 25 * time.Second,
+ },
+ }
+ for _, tcase := range tcases {
+ t.Run(tcase.threshold.String(), func(t *testing.T) {
+ threshold, err := safeMigrationCutOverThreshold(tcase.threshold)
+ if tcase.isErr {
+ assert.Error(t, err)
+ require.Equal(t, tcase.expect, defaultCutOverThreshold)
+ // And keep testing, because we then also expect the threshold to be the default
+ } else {
+ assert.NoError(t, err)
+ }
+ assert.Equal(t, tcase.expect, threshold)
+ })
+ }
+}
diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go
index 1b120dfa58c..943a3b1df07 100644
--- a/go/vt/vttablet/onlineddl/schema.go
+++ b/go/vt/vttablet/onlineddl/schema.go
@@ -32,13 +32,14 @@ const (
migration_status,
tablet,
retain_artifacts_seconds,
+ cutover_threshold_seconds,
postpone_launch,
postpone_completion,
allow_concurrent,
reverted_uuid,
is_view
) VALUES (
- %a, %a, %a, %a, %a, %a, %a, %a, %a, NOW(6), %a, %a, %a, %a, %a, %a, %a, %a, %a
+ %a, %a, %a, %a, %a, %a, %a, %a, %a, NOW(6), %a, %a, %a, %a, %a, %a, %a, %a, %a, %a
)`
sqlSelectQueuedMigrations = `SELECT
@@ -86,6 +87,11 @@ const (
WHERE
migration_uuid=%a
`
+ sqlUpdateMigrationVreplicationLagSeconds = `UPDATE _vt.schema_migrations
+ SET vreplication_lag_seconds=%a
+ WHERE
+ migration_uuid=%a
+ `
sqlUpdateMigrationIsView = `UPDATE _vt.schema_migrations
SET is_view=%a
WHERE
@@ -181,6 +187,11 @@ const (
WHERE
migration_uuid=%a
`
+ sqlUpdateCutOverThresholdSeconds = `UPDATE _vt.schema_migrations
+ SET cutover_threshold_seconds=%a
+ WHERE
+ migration_uuid=%a
+ `
sqlUpdateLaunchMigration = `UPDATE _vt.schema_migrations
SET postpone_launch=0
WHERE
@@ -429,6 +440,7 @@ const (
removed_unique_keys,
migration_context,
retain_artifacts_seconds,
+ cutover_threshold_seconds,
is_view,
ready_to_complete,
ready_to_complete_timestamp is not null as was_ready_to_complete,
diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go
index 26eb614e95a..2761c27c801 100644
--- a/go/vt/vttablet/onlineddl/vrepl.go
+++ b/go/vt/vttablet/onlineddl/vrepl.go
@@ -30,6 +30,7 @@ import (
"net/url"
"strconv"
"strings"
+ "time"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/mysql/collations/charset"
@@ -96,6 +97,19 @@ func (v *VReplStream) hasError() (isTerminal bool, vreplError error) {
return false, nil
}
+// Lag returns the vreplication lag, as determined by the higher of the transaction timestamp and the time updated.
+func (s *VReplStream) Lag() time.Duration {
+ durationDiff := func(t1, t2 time.Time) time.Duration {
+ return t1.Sub(t2).Abs()
+ }
+ timeNow := time.Now()
+ timeUpdated := time.Unix(s.timeUpdated, 0)
+ // Let's look at transaction timestamp. This gets written by any ongoing
+ // writes on the server (whether on this table or any other table)
+ transactionTimestamp := time.Unix(s.transactionTimestamp, 0)
+ return max(durationDiff(timeNow, timeUpdated), durationDiff(timeNow, transactionTimestamp))
+}
+
// VRepl is an online DDL helper for VReplication based migrations (ddl_strategy="online")
type VRepl struct {
workflow string
diff --git a/go/vt/vttablet/tabletmanager/rpc_agent.go b/go/vt/vttablet/tabletmanager/rpc_agent.go
index 203e1d006ab..445d74cb930 100644
--- a/go/vt/vttablet/tabletmanager/rpc_agent.go
+++ b/go/vt/vttablet/tabletmanager/rpc_agent.go
@@ -166,6 +166,8 @@ type RPCTM interface {
RestoreFromBackup(ctx context.Context, logger logutil.Logger, request *tabletmanagerdatapb.RestoreFromBackupRequest) error
+ IsBackupRunning() bool
+
// HandleRPCPanic is to be called in a defer statement in each
// RPC input point.
HandleRPCPanic(ctx context.Context, name string, args, reply any, verbose bool, err *error)
diff --git a/go/vt/vttablet/tabletmanager/rpc_backup.go b/go/vt/vttablet/tabletmanager/rpc_backup.go
index a66264d98af..22fe72716dd 100644
--- a/go/vt/vttablet/tabletmanager/rpc_backup.go
+++ b/go/vt/vttablet/tabletmanager/rpc_backup.go
@@ -205,6 +205,10 @@ func (tm *TabletManager) RestoreFromBackup(ctx context.Context, logger logutil.L
return err
}
+func (tm *TabletManager) IsBackupRunning() bool {
+ return tm._isBackupRunning
+}
+
func (tm *TabletManager) beginBackup(backupMode string) error {
tm.mutex.Lock()
defer tm.mutex.Unlock()
diff --git a/go/vt/vttablet/tabletmanager/vdiff/framework_test.go b/go/vt/vttablet/tabletmanager/vdiff/framework_test.go
index 33a0da8e23f..7d4cdb78c20 100644
--- a/go/vt/vttablet/tabletmanager/vdiff/framework_test.go
+++ b/go/vt/vttablet/tabletmanager/vdiff/framework_test.go
@@ -396,6 +396,10 @@ func (dbc *realDBClient) Close() {
dbc.conn.Close()
}
+func (dbc *realDBClient) IsClosed() bool {
+ return dbc.conn.IsClosed()
+}
+
func (dbc *realDBClient) ExecuteFetch(query string, maxrows int) (*sqltypes.Result, error) {
// Use Clone() because the contents of memory region referenced by
// string can change when clients (e.g. vcopier) use unsafe string methods.
diff --git a/go/vt/vttablet/tabletmanager/vreplication/framework_test.go b/go/vt/vttablet/tabletmanager/vreplication/framework_test.go
index 12d20e3a867..fe8b62d3cef 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/framework_test.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/framework_test.go
@@ -479,6 +479,10 @@ func (dbc *realDBClient) Close() {
dbc.conn.Close()
}
+func (dbc *realDBClient) IsClosed() bool {
+ return dbc.conn.IsClosed()
+}
+
func (dbc *realDBClient) ExecuteFetch(query string, maxrows int) (*sqltypes.Result, error) {
// Use Clone() because the contents of memory region referenced by
// string can change when clients (e.g. vcopier) use unsafe string methods.
diff --git a/go/vt/vttablet/tabletmanager/vreplication/utils.go b/go/vt/vttablet/tabletmanager/vreplication/utils.go
index bb1c469cc93..67b52c56261 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/utils.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/utils.go
@@ -232,6 +232,24 @@ func isUnrecoverableError(err error) bool {
sqlerror.ERWrongValueCountOnRow:
log.Errorf("Got unrecoverable error: %v", sqlErr)
return true
+ case sqlerror.ERErrorDuringCommit:
+ switch sqlErr.HaErrorCode() {
+ case
+ 0, // Not really a HA error.
+ sqlerror.HaErrLockDeadlock,
+ sqlerror.HaErrLockTableFull,
+ sqlerror.HaErrLockWaitTimeout,
+ sqlerror.HaErrNotInLockPartitions,
+ sqlerror.HaErrQueryInterrupted,
+ sqlerror.HaErrRolledBack,
+ sqlerror.HaErrTooManyConcurrentTrxs,
+ sqlerror.HaErrUndoRecTooBig:
+ // These are recoverable errors.
+ return false
+ default:
+ log.Errorf("Got unrecoverable error: %v", sqlErr)
+ return true
+ }
}
return false
}
diff --git a/go/vt/vttablet/tabletmanager/vreplication/utils_test.go b/go/vt/vttablet/tabletmanager/vreplication/utils_test.go
index 69a57c34341..15093e299fc 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/utils_test.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/utils_test.go
@@ -152,6 +152,16 @@ func TestIsUnrecoverableError(t *testing.T) {
err: sqlerror.NewSQLError(sqlerror.ERDataOutOfRange, "data out of range", "test"),
expected: true,
},
+ {
+ name: "SQL error with HaErrDiskFullNowait error",
+ err: sqlerror.NewSQLError(sqlerror.ERErrorDuringCommit, "unknown", "ERROR HY000: Got error 204 - 'No more room in disk' during COMMIT"),
+ expected: true,
+ },
+ {
+ name: "SQL error with HaErrLockDeadlock error",
+ err: sqlerror.NewSQLError(sqlerror.ERErrorDuringCommit, "unknown", "ERROR HY000: Got error 149 - 'Lock deadlock; Retry transaction' during COMMIT"),
+ expected: false,
+ },
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diff --git a/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go b/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go
index 0c5c0b5b334..9ec274ab0ea 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go
@@ -186,11 +186,18 @@ func newVReplicator(id int32, source *binlogdatapb.BinlogSource, sourceVStreamer
// code.
func (vr *vreplicator) Replicate(ctx context.Context) error {
err := vr.replicate(ctx)
- if err != nil {
- if err := vr.setMessage(err.Error()); err != nil {
- binlogplayer.LogError("Failed to set error state", err)
+ if err == nil {
+ return nil
+ }
+ if vr.dbClient.IsClosed() {
+ // Connection was possible terminated by the server. We should renew it.
+ if cerr := vr.dbClient.Connect(); cerr != nil {
+ return vterrors.Wrapf(err, "failed to reconnect to the database: %v", cerr)
}
}
+ if err := vr.setMessage(err.Error()); err != nil {
+ binlogplayer.LogError("Failed to set error state", err)
+ }
return err
}
diff --git a/go/vt/vttablet/tabletserver/debugenv.go b/go/vt/vttablet/tabletserver/debugenv.go
index 54cf09db7d6..6f1ea854ea9 100644
--- a/go/vt/vttablet/tabletserver/debugenv.go
+++ b/go/vt/vttablet/tabletserver/debugenv.go
@@ -23,9 +23,10 @@ import (
"html"
"net/http"
"strconv"
- "text/template"
"time"
+ "github.com/google/safehtml/template"
+
"vitess.io/vitess/go/acl"
"vitess.io/vitess/go/vt/log"
)
@@ -70,90 +71,131 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request)
return
}
+ switch r.Method {
+ case http.MethodPost:
+ handlePost(tsv, w, r)
+ case http.MethodGet:
+ handleGet(tsv, w, r)
+ default:
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ }
+}
+
+func handlePost(tsv *TabletServer, w http.ResponseWriter, r *http.Request) {
+ varname := r.FormValue("varname")
+ value := r.FormValue("value")
+
var msg string
- if r.Method == "POST" {
- varname := r.FormValue("varname")
- value := r.FormValue("value")
- setIntVal := func(f func(int)) {
- ival, err := strconv.Atoi(value)
- if err != nil {
- msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err)
- return
- }
- f(ival)
- msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ if varname == "" || value == "" {
+ http.Error(w, "Missing varname or value", http.StatusBadRequest)
+ return
+ }
+
+ setIntVal := func(f func(int)) error {
+ ival, err := strconv.Atoi(value)
+ if err != nil {
+ return fmt.Errorf("invalid int value for %v: %v", varname, err)
}
- setIntValCtx := func(f func(context.Context, int) error) {
- ival, err := strconv.Atoi(value)
- if err == nil {
- err = f(r.Context(), ival)
- if err == nil {
- msg = fmt.Sprintf("Setting %v to: %v", varname, value)
- return
- }
- }
- msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err)
+ f(ival)
+ msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ return nil
+ }
+
+ setIntValCtx := func(f func(context.Context, int) error) error {
+ ival, err := strconv.Atoi(value)
+ if err == nil {
+ err = f(r.Context(), ival)
}
- setInt64Val := func(f func(int64)) {
- ival, err := strconv.ParseInt(value, 10, 64)
- if err != nil {
- msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err)
- return
- }
- f(ival)
- msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ if err != nil {
+ return fmt.Errorf("failed setting value for %v: %v", varname, err)
}
- setDurationVal := func(f func(time.Duration)) {
- durationVal, err := time.ParseDuration(value)
- if err != nil {
- msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err)
- return
- }
- f(durationVal)
- msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ return nil
+ }
+
+ setInt64Val := func(f func(int64)) error {
+ ival, err := strconv.ParseInt(value, 10, 64)
+ if err != nil {
+ return fmt.Errorf("invalid int64 value for %v: %v", varname, err)
}
- setFloat64Val := func(f func(float64)) {
- fval, err := strconv.ParseFloat(value, 64)
- if err != nil {
- msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err)
- return
- }
- f(fval)
- msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ f(ival)
+ msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ return nil
+ }
+
+ setDurationVal := func(f func(time.Duration)) error {
+ durationVal, err := time.ParseDuration(value)
+ if err != nil {
+ return fmt.Errorf("invalid duration value for %v: %v", varname, err)
}
- switch varname {
- case "PoolSize":
- setIntValCtx(tsv.SetPoolSize)
- case "StreamPoolSize":
- setIntValCtx(tsv.SetStreamPoolSize)
- case "TxPoolSize":
- setIntValCtx(tsv.SetTxPoolSize)
- case "MaxResultSize":
- setIntVal(tsv.SetMaxResultSize)
- case "WarnResultSize":
- setIntVal(tsv.SetWarnResultSize)
- case "RowStreamerMaxInnoDBTrxHistLen":
- setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxInnoDBTrxHistLen = val })
- case "RowStreamerMaxMySQLReplLagSecs":
- setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxMySQLReplLagSecs = val })
- case "UnhealthyThreshold":
- setDurationVal(func(d time.Duration) { tsv.Config().Healthcheck.UnhealthyThreshold = d })
- setDurationVal(tsv.hs.SetUnhealthyThreshold)
- setDurationVal(tsv.sm.SetUnhealthyThreshold)
- case "ThrottleMetricThreshold":
- setFloat64Val(tsv.SetThrottleMetricThreshold)
- case "Consolidator":
- tsv.SetConsolidatorMode(value)
- msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ f(durationVal)
+ msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ return nil
+ }
+
+ setFloat64Val := func(f func(float64)) error {
+ fval, err := strconv.ParseFloat(value, 64)
+ if err != nil {
+ return fmt.Errorf("invalid float64 value for %v: %v", varname, err)
}
+ f(fval)
+ msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ return nil
+ }
+
+ var err error
+ switch varname {
+ case "ReadPoolSize":
+ err = setIntValCtx(tsv.SetPoolSize)
+ case "StreamPoolSize":
+ err = setIntValCtx(tsv.SetStreamPoolSize)
+ case "TransactionPoolSize":
+ err = setIntValCtx(tsv.SetTxPoolSize)
+ case "MaxResultSize":
+ err = setIntVal(tsv.SetMaxResultSize)
+ case "WarnResultSize":
+ err = setIntVal(tsv.SetWarnResultSize)
+ case "RowStreamerMaxInnoDBTrxHistLen":
+ err = setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxInnoDBTrxHistLen = val })
+ case "RowStreamerMaxMySQLReplLagSecs":
+ err = setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxMySQLReplLagSecs = val })
+ case "UnhealthyThreshold":
+ err = setDurationVal(func(d time.Duration) { tsv.Config().Healthcheck.UnhealthyThreshold = d })
+ case "ThrottleMetricThreshold":
+ err = setFloat64Val(tsv.SetThrottleMetricThreshold)
+ case "Consolidator":
+ tsv.SetConsolidatorMode(value)
+ msg = fmt.Sprintf("Setting %v to: %v", varname, value)
+ }
+
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return
}
+ vars := getVars(tsv)
+ sendResponse(r, w, vars, msg)
+}
+
+func handleGet(tsv *TabletServer, w http.ResponseWriter, r *http.Request) {
+ vars := getVars(tsv)
+ sendResponse(r, w, vars, "")
+}
+
+func sendResponse(r *http.Request, w http.ResponseWriter, vars []envValue, msg string) {
+ format := r.FormValue("format")
+ if format == "json" {
+ respondWithJSON(w, vars, msg)
+ return
+ }
+ respondWithHTML(w, vars, msg)
+}
+
+func getVars(tsv *TabletServer) []envValue {
var vars []envValue
- vars = addVar(vars, "PoolSize", tsv.PoolSize)
+ vars = addVar(vars, "ReadPoolSize", tsv.PoolSize)
vars = addVar(vars, "StreamPoolSize", tsv.StreamPoolSize)
- vars = addVar(vars, "TxPoolSize", tsv.TxPoolSize)
- vars = addVar(vars, "QueryCacheCapacity", tsv.QueryPlanCacheCap) // QueryCacheCapacity is deprecated in v21, it is replaced by QueryEnginePlanCacheCapacity
- vars = addVar(vars, "QueryEnginePlanCacheCapacity", tsv.QueryPlanCacheCap)
+ vars = addVar(vars, "TransactionPoolSize", tsv.TxPoolSize)
vars = addVar(vars, "MaxResultSize", tsv.MaxResultSize)
vars = addVar(vars, "WarnResultSize", tsv.WarnResultSize)
vars = addVar(vars, "RowStreamerMaxInnoDBTrxHistLen", func() int64 { return tsv.Config().RowStreamer.MaxInnoDBTrxHistLen })
@@ -165,18 +207,22 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request)
Value: tsv.ConsolidatorMode(),
})
- format := r.FormValue("format")
- if format == "json" {
- mvars := make(map[string]string)
- for _, v := range vars {
- mvars[v.Name] = v.Value
- }
- w.Header().Set("Content-Type", "application/json")
- _ = json.NewEncoder(w).Encode(mvars)
- return
+ return vars
+}
+
+func respondWithJSON(w http.ResponseWriter, vars []envValue, msg string) {
+ mvars := make(map[string]string)
+ for _, v := range vars {
+ mvars[v.Name] = v.Value
}
+ if msg != "" {
+ mvars["ResponseMessage"] = msg
+ }
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(mvars)
+}
- // gridTable is reused from twopcz.go.
+func respondWithHTML(w http.ResponseWriter, vars []envValue, msg string) {
w.Write(gridTable)
w.Write([]byte("Internal Variables
\n"))
if msg != "" {
diff --git a/go/vt/vttablet/tabletserver/health_streamer.go b/go/vt/vttablet/tabletserver/health_streamer.go
index f9f65d197b2..eaeba6315e3 100644
--- a/go/vt/vttablet/tabletserver/health_streamer.go
+++ b/go/vt/vttablet/tabletserver/health_streamer.go
@@ -293,8 +293,10 @@ func (hs *healthStreamer) SetUnhealthyThreshold(v time.Duration) {
// so it can read and write to the MySQL instance for schema-tracking.
func (hs *healthStreamer) MakePrimary(serving bool) {
hs.fieldsMu.Lock()
- defer hs.fieldsMu.Unlock()
hs.isServingPrimary = serving
+ // We let go of the lock here because we don't want to hold the lock when calling RegisterNotifier.
+ // If we keep holding the lock, there is a potential deadlock that can happen.
+ hs.fieldsMu.Unlock()
// We register for notifications from the schema Engine only when schema tracking is enabled,
// and we are going to a serving primary state.
if serving && hs.signalWhenSchemaChange {
diff --git a/go/vt/vttablet/tabletserver/health_streamer_test.go b/go/vt/vttablet/tabletserver/health_streamer_test.go
index 3421141ff80..9561518eed6 100644
--- a/go/vt/vttablet/tabletserver/health_streamer_test.go
+++ b/go/vt/vttablet/tabletserver/health_streamer_test.go
@@ -592,13 +592,14 @@ func TestDeadlockBwCloseAndReload(t *testing.T) {
wg := sync.WaitGroup{}
wg.Add(2)
- // Try running Close and reload in parallel multiple times.
+ // Try running Close & MakePrimary and reload in parallel multiple times.
// This reproduces the deadlock quite readily.
go func() {
defer wg.Done()
for i := 0; i < 100; i++ {
hs.Close()
hs.Open()
+ hs.MakePrimary(true)
}
}()
diff --git a/go/vt/vttablet/tabletserver/query_executor.go b/go/vt/vttablet/tabletserver/query_executor.go
index abf296c0583..519b60b79d6 100644
--- a/go/vt/vttablet/tabletserver/query_executor.go
+++ b/go/vt/vttablet/tabletserver/query_executor.go
@@ -991,6 +991,8 @@ func (qre *QueryExecutor) execAlterMigration() (*sqltypes.Result, error) {
return qre.tsv.onlineDDLExecutor.ForceCutOverMigration(qre.ctx, alterMigration.UUID)
case sqlparser.ForceCutOverAllMigrationType:
return qre.tsv.onlineDDLExecutor.ForceCutOverPendingMigrations(qre.ctx)
+ case sqlparser.SetCutOverThresholdMigrationType:
+ return qre.tsv.onlineDDLExecutor.SetMigrationCutOverThreshold(qre.ctx, alterMigration.UUID, alterMigration.Threshold)
}
return nil, vterrors.New(vtrpcpb.Code_UNIMPLEMENTED, "ALTER VITESS_MIGRATION not implemented")
}
diff --git a/go/vt/vttablet/tabletserver/querylogz.go b/go/vt/vttablet/tabletserver/querylogz.go
index 33341d1641b..09f375aa329 100644
--- a/go/vt/vttablet/tabletserver/querylogz.go
+++ b/go/vt/vttablet/tabletserver/querylogz.go
@@ -20,9 +20,10 @@ import (
"net/http"
"strconv"
"strings"
- "text/template"
"time"
+ "github.com/google/safehtml/template"
+
"vitess.io/vitess/go/acl"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/logz"
diff --git a/go/vt/vttablet/tabletserver/querylogz_test.go b/go/vt/vttablet/tabletserver/querylogz_test.go
index 25f03c762c7..ee26437f330 100644
--- a/go/vt/vttablet/tabletserver/querylogz_test.go
+++ b/go/vt/vttablet/tabletserver/querylogz_test.go
@@ -37,7 +37,7 @@ func TestQuerylogzHandler(t *testing.T) {
req, _ := http.NewRequest("GET", "/querylogz?timeout=10&limit=1", nil)
logStats := tabletenv.NewLogStats(context.Background(), "Execute")
logStats.PlanType = planbuilder.PlanSelect.String()
- logStats.OriginalSQL = "select name from test_table limit 1000"
+ logStats.OriginalSQL = "select name, 'inject ' from test_table limit 1000"
logStats.RowsAffected = 1000
logStats.NumberOfQueries = 1
logStats.StartTime, _ = time.Parse("Jan 2 15:04:05", "Nov 29 13:33:09")
@@ -64,7 +64,7 @@ func TestQuerylogzHandler(t *testing.T) {
`0.001 | `,
`1e-08 | `,
`Select | `,
- `select name from test_table limit 1000 | `,
+ regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000 | `),
`1 | `,
`none | `,
`1000 | `,
@@ -95,7 +95,7 @@ func TestQuerylogzHandler(t *testing.T) {
`0.001 | `,
`1e-08 | `,
`Select | `,
- `select name from test_table limit 1000 | `,
+ regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000 | `),
`1 | `,
`none | `,
`1000 | `,
@@ -126,7 +126,7 @@ func TestQuerylogzHandler(t *testing.T) {
`0.001 | `,
`1e-08 | `,
`Select | `,
- `select name from test_table limit 1000 | `,
+ regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000 | `),
`1 | `,
`none | `,
`1000 | `,
diff --git a/go/vt/vttablet/tabletserver/throttle/base/metric_cache.go b/go/vt/vttablet/tabletserver/throttle/base/metric_cache.go
index 8695cb83229..faad65ca79e 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/metric_cache.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/metric_cache.go
@@ -49,6 +49,7 @@ import (
"github.com/patrickmn/go-cache"
"vitess.io/vitess/go/stats"
+ "vitess.io/vitess/go/vt/vttablet/tmclient"
)
// MetricsQueryType indicates the type of metrics query on MySQL backend. See following.
@@ -142,13 +143,13 @@ func (metric *ThrottleMetric) WithError(err error) *ThrottleMetric {
// ReadThrottleMetrics returns a metric for the given probe. Either by explicit query
// or via SHOW REPLICA STATUS
-func ReadThrottleMetrics(ctx context.Context, probe *Probe, metricsFunc func(context.Context) ThrottleMetrics) ThrottleMetrics {
+func ReadThrottleMetrics(ctx context.Context, probe *Probe, tmClient tmclient.TabletManagerClient, metricsFunc func(context.Context, tmclient.TabletManagerClient) ThrottleMetrics) ThrottleMetrics {
if metrics := getCachedThrottleMetrics(probe); metrics != nil {
return metrics
}
started := time.Now()
- throttleMetrics := metricsFunc(ctx)
+ throttleMetrics := metricsFunc(ctx, tmClient)
go func(metrics ThrottleMetrics, started time.Time) {
stats.GetOrNewGauge("ThrottlerProbesLatency", "probes latency").Set(time.Since(started).Nanoseconds())
diff --git a/go/vt/vttablet/tabletserver/throttle/base/metric_name.go b/go/vt/vttablet/tabletserver/throttle/base/metric_name.go
index 98e1288fb23..43bd2d17a8c 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/metric_name.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/metric_name.go
@@ -60,11 +60,14 @@ func (names MetricNames) Unique() MetricNames {
}
const (
- DefaultMetricName MetricName = "default"
- LagMetricName MetricName = "lag"
- ThreadsRunningMetricName MetricName = "threads_running"
- CustomMetricName MetricName = "custom"
- LoadAvgMetricName MetricName = "loadavg"
+ DefaultMetricName MetricName = "default"
+ LagMetricName MetricName = "lag"
+ ThreadsRunningMetricName MetricName = "threads_running"
+ CustomMetricName MetricName = "custom"
+ LoadAvgMetricName MetricName = "loadavg"
+ HistoryListLengthMetricName MetricName = "history_list_length"
+ MysqldLoadAvgMetricName MetricName = "mysqld-loadavg"
+ MysqldDatadirUsedRatioMetricName MetricName = "mysqld-datadir-used-ratio"
)
func (metric MetricName) DefaultScope() Scope {
diff --git a/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go b/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go
index 9867ca18db3..c2e2b44b36f 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go
@@ -21,6 +21,9 @@ import (
"testing"
"github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "vitess.io/vitess/go/textutil"
)
func TestAggregateName(t *testing.T) {
@@ -238,4 +241,27 @@ func TestKnownMetricNames(t *testing.T) {
assert.Contains(t, KnownMetricNames, LoadAvgMetricName)
assert.Contains(t, KnownMetricNames, CustomMetricName)
assert.Contains(t, KnownMetricNames, DefaultMetricName)
+ assert.Contains(t, KnownMetricNames, HistoryListLengthMetricName)
+ assert.Contains(t, KnownMetricNames, MysqldLoadAvgMetricName)
+ assert.Contains(t, KnownMetricNames, MysqldDatadirUsedRatioMetricName)
+}
+
+func TestKnownMetricNamesPascalCase(t *testing.T) {
+ expectCases := map[MetricName]string{
+ LagMetricName: "Lag",
+ ThreadsRunningMetricName: "ThreadsRunning",
+ LoadAvgMetricName: "Loadavg",
+ HistoryListLengthMetricName: "HistoryListLength",
+ CustomMetricName: "Custom",
+ DefaultMetricName: "Default",
+ MysqldLoadAvgMetricName: "MysqldLoadavg",
+ MysqldDatadirUsedRatioMetricName: "MysqldDatadirUsedRatio",
+ }
+ for _, metricName := range KnownMetricNames {
+ t.Run(metricName.String(), func(t *testing.T) {
+ expect, ok := expectCases[metricName]
+ require.True(t, ok)
+ assert.Equal(t, expect, textutil.PascalCase(metricName.String()))
+ })
+ }
}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric.go
index 220dfa6bf60..88fbe2bdd13 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/self_metric.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric.go
@@ -21,15 +21,24 @@ import (
"fmt"
"strconv"
+ "vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
+ "vitess.io/vitess/go/vt/vttablet/tmclient"
)
+type SelfMetricReadParams struct {
+ Throttler metricsPublisher
+ Conn *connpool.Conn
+ TmClient tmclient.TabletManagerClient
+ TabletInfo *topo.TabletInfo
+}
+
type SelfMetric interface {
Name() MetricName
DefaultScope() Scope
DefaultThreshold() float64
RequiresConn() bool
- Read(ctx context.Context, throttler ThrottlerMetricsPublisher, conn *connpool.Conn) *ThrottleMetric
+ Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric
}
var (
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_custom_query.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_custom_query.go
index 585e63ea285..88f789e5dcd 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/self_metric_custom_query.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_custom_query.go
@@ -18,8 +18,6 @@ package base
import (
"context"
-
- "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
)
var _ SelfMetric = registerSelfMetric(&CustomQuerySelfMetric{})
@@ -43,6 +41,6 @@ func (m *CustomQuerySelfMetric) RequiresConn() bool {
return true
}
-func (m *CustomQuerySelfMetric) Read(ctx context.Context, throttler ThrottlerMetricsPublisher, conn *connpool.Conn) *ThrottleMetric {
- return ReadSelfMySQLThrottleMetric(ctx, conn, throttler.GetCustomMetricsQuery())
+func (m *CustomQuerySelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
+ return ReadSelfMySQLThrottleMetric(ctx, params.Conn, params.Throttler.GetCustomMetricsQuery())
}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_default.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_default.go
index 8bce295da7c..97309fa6ea9 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/self_metric_default.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_default.go
@@ -19,8 +19,6 @@ package base
import (
"context"
"fmt"
-
- "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
)
var _ SelfMetric = registerSelfMetric(&DefaultSelfMetric{})
@@ -44,7 +42,7 @@ func (m *DefaultSelfMetric) RequiresConn() bool {
return false
}
-func (m *DefaultSelfMetric) Read(ctx context.Context, throttler ThrottlerMetricsPublisher, conn *connpool.Conn) *ThrottleMetric {
+func (m *DefaultSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
return &ThrottleMetric{
Err: fmt.Errorf("unexpected direct call to DefaultSelfMetric.Read"),
}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_innodb_history_list_length.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_innodb_history_list_length.go
new file mode 100644
index 00000000000..2696b1750ea
--- /dev/null
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_innodb_history_list_length.go
@@ -0,0 +1,68 @@
+/*
+Copyright 2024 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package base
+
+import (
+ "context"
+ "math"
+ "sync/atomic"
+ "time"
+)
+
+var (
+ historyListLengthQuery = "select count as history_len from information_schema.INNODB_METRICS where name = 'trx_rseg_history_len'"
+
+ cachedHistoryListLengthMetric atomic.Pointer[ThrottleMetric]
+ historyListLengthCacheDuration = 5 * time.Second
+ historyListLengthDefaultThreshold = math.Pow10(9)
+)
+
+var _ SelfMetric = registerSelfMetric(&HistoryListLengthSelfMetric{})
+
+type HistoryListLengthSelfMetric struct {
+}
+
+func (m *HistoryListLengthSelfMetric) Name() MetricName {
+ return HistoryListLengthMetricName
+}
+
+func (m *HistoryListLengthSelfMetric) DefaultScope() Scope {
+ return SelfScope
+}
+
+func (m *HistoryListLengthSelfMetric) DefaultThreshold() float64 {
+ return historyListLengthDefaultThreshold
+}
+
+func (m *HistoryListLengthSelfMetric) RequiresConn() bool {
+ return true
+}
+
+func (m *HistoryListLengthSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
+ // This function will be called sequentially, and therefore does not need strong mutex protection. Still, we use atomics
+ // to ensure correctness in case an external goroutine tries to read the metric concurrently.
+ metric := cachedHistoryListLengthMetric.Load()
+ if metric != nil {
+ return metric
+ }
+ metric = ReadSelfMySQLThrottleMetric(ctx, params.Conn, historyListLengthQuery)
+ cachedHistoryListLengthMetric.Store(metric)
+ time.AfterFunc(historyListLengthCacheDuration, func() {
+ cachedHistoryListLengthMetric.Store(nil)
+ })
+ return metric
+}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_lag.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_lag.go
index dc25ee5622a..3d0e4beebe1 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/self_metric_lag.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_lag.go
@@ -23,7 +23,6 @@ import (
"vitess.io/vitess/go/constants/sidecar"
"vitess.io/vitess/go/vt/sqlparser"
- "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
)
var (
@@ -65,6 +64,6 @@ func (m *LagSelfMetric) RequiresConn() bool {
return true
}
-func (m *LagSelfMetric) Read(ctx context.Context, throttler ThrottlerMetricsPublisher, conn *connpool.Conn) *ThrottleMetric {
- return ReadSelfMySQLThrottleMetric(ctx, conn, m.GetQuery())
+func (m *LagSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
+ return ReadSelfMySQLThrottleMetric(ctx, params.Conn, m.GetQuery())
}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_loadavg.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_loadavg.go
index 40a2878421a..2d880169020 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/self_metric_loadavg.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_loadavg.go
@@ -18,20 +18,16 @@ package base
import (
"context"
- "fmt"
- "os"
"runtime"
- "strconv"
- "strings"
+ "sync/atomic"
+ "time"
- "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
+ "vitess.io/vitess/go/osutil"
)
var (
- loadavgOnlyAvailableOnLinuxMetric = &ThrottleMetric{
- Scope: SelfScope,
- Err: fmt.Errorf("loadavg metric is only available on Linux"),
- }
+ cachedLoadAvgMetric atomic.Pointer[ThrottleMetric]
+ loadAvgCacheDuration = 1 * time.Second
)
var _ SelfMetric = registerSelfMetric(&LoadAvgSelfMetric{})
@@ -55,27 +51,26 @@ func (m *LoadAvgSelfMetric) RequiresConn() bool {
return false
}
-func (m *LoadAvgSelfMetric) Read(ctx context.Context, throttler ThrottlerMetricsPublisher, conn *connpool.Conn) *ThrottleMetric {
- if runtime.GOOS != "linux" {
- return loadavgOnlyAvailableOnLinuxMetric
+func (m *LoadAvgSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
+ // This function will be called sequentially, and therefore does not need strong mutex protection. Still, we use atomics
+ // to ensure correctness in case an external goroutine tries to read the metric concurrently.
+ metric := cachedLoadAvgMetric.Load()
+ if metric != nil {
+ return metric
}
- metric := &ThrottleMetric{
+ metric = &ThrottleMetric{
Scope: SelfScope,
}
- {
- content, err := os.ReadFile("/proc/loadavg")
- if err != nil {
- return metric.WithError(err)
- }
- fields := strings.Fields(string(content))
- if len(fields) == 0 {
- return metric.WithError(fmt.Errorf("unexpected /proc/loadavg content"))
- }
- loadAvg, err := strconv.ParseFloat(fields[0], 64)
- if err != nil {
- return metric.WithError(err)
- }
- metric.Value = loadAvg / float64(runtime.NumCPU())
+ val, err := osutil.LoadAvg()
+ if err != nil {
+ return metric.WithError(err)
}
+ metric.Value = val / float64(runtime.NumCPU())
+
+ cachedLoadAvgMetric.Store(metric)
+ time.AfterFunc(loadAvgCacheDuration, func() {
+ cachedLoadAvgMetric.Store(nil)
+ })
+
return metric
}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_mysqld.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_mysqld.go
new file mode 100644
index 00000000000..321837d86b4
--- /dev/null
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_mysqld.go
@@ -0,0 +1,156 @@
+/*
+Copyright 2024 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package base
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "sync/atomic"
+ "time"
+
+ "vitess.io/vitess/go/timer"
+
+ tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
+)
+
+var (
+ mysqlHostMetricsRpcTimeout = 5 * time.Second
+ mysqlHostMetricsRateLimit = 10 * time.Second
+ mysqlHostMetricsRateLimiter atomic.Pointer[timer.RateLimiter]
+ lastMySQLHostMetricsResponse atomic.Pointer[tabletmanagerdatapb.MysqlHostMetricsResponse]
+)
+
+// getMysqlMetricsRateLimiter returns a rate limiter that is active until the given context is cancelled.
+// This function will be called sequentially, but nonetheless it offers _some_ concurrent safety. Namely,
+// that a created rate limiter is guaranteed to be cleaned up
+func getMysqlMetricsRateLimiter(ctx context.Context, rateLimit time.Duration) *timer.RateLimiter {
+ rateLimiter := mysqlHostMetricsRateLimiter.Load()
+ if rateLimiter == nil {
+ rateLimiter = timer.NewRateLimiter(rateLimit)
+ go func() {
+ defer mysqlHostMetricsRateLimiter.Store(nil)
+ defer rateLimiter.Stop()
+ <-ctx.Done()
+ }()
+ mysqlHostMetricsRateLimiter.Store(rateLimiter)
+ }
+ return rateLimiter
+}
+
+// readMysqlHostMetrics reads MySQL host metrics sporadically from the tablet manager (which in turn reads
+// them from mysql deamon). The metrics are then cached, whether successful or not.
+// This idea is that is is very wasteful to read these metrics for every single query. E.g. right now the throttler
+// can issue 4 reads per second, which is wasteful to go through two RPCs to get the disk space usage for example. Even the load
+// average on the MySQL server is not that susceptible to change.
+func readMysqlHostMetrics(ctx context.Context, params *SelfMetricReadParams) error {
+ if params.TmClient == nil {
+ return fmt.Errorf("tmClient is nil")
+ }
+ if params.TabletInfo == nil {
+ return fmt.Errorf("tabletInfo is nil")
+ }
+ rateLimiter := getMysqlMetricsRateLimiter(ctx, mysqlHostMetricsRateLimit)
+ err := rateLimiter.Do(func() error {
+ ctx, cancel := context.WithTimeout(ctx, mysqlHostMetricsRpcTimeout)
+ defer cancel()
+
+ resp, err := params.TmClient.MysqlHostMetrics(ctx, params.TabletInfo.Tablet, &tabletmanagerdatapb.MysqlHostMetricsRequest{})
+ if err != nil {
+ return err
+ }
+ lastMySQLHostMetricsResponse.Store(resp)
+ return nil
+ })
+ return err
+}
+
+// getMysqlHostMetric gets a metric from the last read MySQL host metrics. The metric will either be directly read from
+// tablet manager (which then reads it from the mysql deamon), or from the cache.
+func getMysqlHostMetric(ctx context.Context, params *SelfMetricReadParams, mysqlHostMetricName string) *ThrottleMetric {
+ metric := &ThrottleMetric{
+ Scope: SelfScope,
+ }
+ if err := readMysqlHostMetrics(ctx, params); err != nil {
+ return metric.WithError(err)
+ }
+ resp := lastMySQLHostMetricsResponse.Load()
+ if resp == nil {
+ return metric.WithError(ErrNoResultYet)
+ }
+ mysqlMetric := resp.HostMetrics.Metrics[mysqlHostMetricName]
+ if mysqlMetric == nil {
+ return metric.WithError(ErrNoSuchMetric)
+ }
+ metric.Value = mysqlMetric.Value
+ if mysqlMetric.Error != nil {
+ metric.Err = errors.New(mysqlMetric.Error.Message)
+ }
+ return metric
+}
+
+var _ SelfMetric = registerSelfMetric(&MysqldLoadAvgSelfMetric{})
+var _ SelfMetric = registerSelfMetric(&MysqldDatadirUsedRatioSelfMetric{})
+
+// MysqldLoadAvgSelfMetric stands for the load average per cpu, on the MySQL host.
+type MysqldLoadAvgSelfMetric struct {
+}
+
+func (m *MysqldLoadAvgSelfMetric) Name() MetricName {
+ return MysqldLoadAvgMetricName
+}
+
+func (m *MysqldLoadAvgSelfMetric) DefaultScope() Scope {
+ return SelfScope
+}
+
+func (m *MysqldLoadAvgSelfMetric) DefaultThreshold() float64 {
+ return 1.0
+}
+
+func (m *MysqldLoadAvgSelfMetric) RequiresConn() bool {
+ return false
+}
+
+func (m *MysqldLoadAvgSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
+ return getMysqlHostMetric(ctx, params, "loadavg")
+}
+
+// MysqldDatadirUsedRatioSelfMetric stands for the disk space usage of the mount where MySQL's datadir is located.
+// Range: 0.0 (empty) - 1.0 (full)
+type MysqldDatadirUsedRatioSelfMetric struct {
+}
+
+func (m *MysqldDatadirUsedRatioSelfMetric) Name() MetricName {
+ return MysqldDatadirUsedRatioMetricName
+}
+
+func (m *MysqldDatadirUsedRatioSelfMetric) DefaultScope() Scope {
+ return SelfScope
+}
+
+func (m *MysqldDatadirUsedRatioSelfMetric) DefaultThreshold() float64 {
+ return 0.98
+}
+
+func (m *MysqldDatadirUsedRatioSelfMetric) RequiresConn() bool {
+ return false
+}
+
+func (m *MysqldDatadirUsedRatioSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
+ return getMysqlHostMetric(ctx, params, "datadir-used-ratio")
+}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_mysqld_test.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_mysqld_test.go
new file mode 100644
index 00000000000..39d3f3f5ec2
--- /dev/null
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_mysqld_test.go
@@ -0,0 +1,72 @@
+/*
+Copyright 2024 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package base
+
+import (
+ "context"
+ "fmt"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestGetMysqlMetricsRateLimiter(t *testing.T) {
+ rateLimit := 10 * time.Millisecond
+ for i := range 3 {
+ testName := fmt.Sprintf("iteration %d", i)
+ t.Run(testName, func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+ {
+ rateLimiter := mysqlHostMetricsRateLimiter.Load()
+ assert.Nil(t, rateLimiter)
+ }
+ rateLimiter := getMysqlMetricsRateLimiter(ctx, rateLimit)
+ assert.NotNil(t, rateLimiter)
+ for range 5 {
+ r := getMysqlMetricsRateLimiter(ctx, rateLimit)
+ // Returning the same rate limiter
+ assert.Equal(t, rateLimiter, r)
+ }
+ val := 0
+ incr := func() error {
+ val++
+ return nil
+ }
+ for range 10 {
+ rateLimiter.Do(incr)
+ time.Sleep(2 * rateLimit)
+ }
+ assert.EqualValues(t, 10, val)
+ cancel()
+ // There can be a race condition where the rate limiter still emits one final tick after the context is cancelled.
+ // So we wait enough time to ensure that tick is "wasted".
+ time.Sleep(2 * rateLimit)
+ // Now that the rate limited was stopped (we invoked `cancel()`), its `Do()` should not invoke the function anymore.
+ for range 7 {
+ rateLimiter.Do(incr)
+ time.Sleep(time.Millisecond)
+ }
+ assert.EqualValues(t, 10, val) // Same "10" value as before.
+ {
+ rateLimiter := mysqlHostMetricsRateLimiter.Load()
+ assert.Nil(t, rateLimiter)
+ }
+ })
+ }
+}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_threads_running.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_threads_running.go
index 08f7d408d1c..cb59547a768 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/self_metric_threads_running.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_threads_running.go
@@ -18,8 +18,6 @@ package base
import (
"context"
-
- "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
)
var (
@@ -47,6 +45,6 @@ func (m *ThreadsRunningSelfMetric) RequiresConn() bool {
return true
}
-func (m *ThreadsRunningSelfMetric) Read(ctx context.Context, throttler ThrottlerMetricsPublisher, conn *connpool.Conn) *ThrottleMetric {
- return ReadSelfMySQLThrottleMetric(ctx, conn, threadsRunningMetricQuery)
+func (m *ThreadsRunningSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric {
+ return ReadSelfMySQLThrottleMetric(ctx, params.Conn, threadsRunningMetricQuery)
}
diff --git a/go/vt/vttablet/tabletserver/throttle/base/throttler_metrics_publisher.go b/go/vt/vttablet/tabletserver/throttle/base/throttler_metrics_publisher.go
index 1d2d4d0652c..10020af27e6 100644
--- a/go/vt/vttablet/tabletserver/throttle/base/throttler_metrics_publisher.go
+++ b/go/vt/vttablet/tabletserver/throttle/base/throttler_metrics_publisher.go
@@ -16,8 +16,8 @@ limitations under the License.
package base
-// ThrottlerMetricsPublisher is implemented by throttler.Throttler and is used by SelfMetric
+// metricsPublisher is implemented by throttler.Throttler and is used by SelfMetric
// implementations to query the throttler.
-type ThrottlerMetricsPublisher interface {
+type metricsPublisher interface {
GetCustomMetricsQuery() string
}
diff --git a/go/vt/vttablet/tabletserver/throttle/check.go b/go/vt/vttablet/tabletserver/throttle/check.go
index ccdfcb2ce23..d7f43d85e9d 100644
--- a/go/vt/vttablet/tabletserver/throttle/check.go
+++ b/go/vt/vttablet/tabletserver/throttle/check.go
@@ -188,9 +188,9 @@ func (check *ThrottlerCheck) Check(ctx context.Context, appName string, scope ba
// Out of abundance of caution, we will protect against such a scenario.
return
}
- stats.GetOrNewCounter(fmt.Sprintf("ThrottlerCheck%s%sTotal", textutil.SingleWordCamel(metricScope.String()), textutil.SingleWordCamel(metricName.String())), "").Add(1)
+ stats.GetOrNewCounter(fmt.Sprintf("ThrottlerCheck%s%sTotal", textutil.PascalCase(metricScope.String()), textutil.PascalCase(metricName.String())), "").Add(1)
if !metricCheckResult.IsOK() {
- stats.GetOrNewCounter(fmt.Sprintf("ThrottlerCheck%s%sError", textutil.SingleWordCamel(metricScope.String()), textutil.SingleWordCamel(metricName.String())), "").Add(1)
+ stats.GetOrNewCounter(fmt.Sprintf("ThrottlerCheck%s%sError", textutil.PascalCase(metricScope.String()), textutil.PascalCase(metricName.String())), "").Add(1)
}
}(metricCheckResult)
}
@@ -249,7 +249,7 @@ func (check *ThrottlerCheck) localCheck(ctx context.Context, aggregatedMetricNam
check.throttler.markMetricHealthy(aggregatedMetricName)
}
if timeSinceHealthy, found := check.throttler.timeSinceMetricHealthy(aggregatedMetricName); found {
- go stats.GetOrNewGauge(fmt.Sprintf("ThrottlerCheck%sSecondsSinceHealthy", textutil.SingleWordCamel(scope.String())), fmt.Sprintf("seconds since last healthy check for %v", scope)).Set(int64(timeSinceHealthy.Seconds()))
+ go stats.GetOrNewGauge(fmt.Sprintf("ThrottlerCheck%sSecondsSinceHealthy", textutil.PascalCase(scope.String())), fmt.Sprintf("seconds since last healthy check for %v", scope)).Set(int64(timeSinceHealthy.Seconds()))
}
return checkResult
@@ -261,7 +261,7 @@ func (check *ThrottlerCheck) reportAggregated(aggregatedMetricName string, metri
return
}
if value, err := metricResult.Get(); err == nil {
- stats.GetOrNewGaugeFloat64(fmt.Sprintf("ThrottlerAggregated%s%s", textutil.SingleWordCamel(scope.String()), textutil.SingleWordCamel(metricName.String())), fmt.Sprintf("aggregated value for %v", scope)).Set(value)
+ stats.GetOrNewGaugeFloat64(fmt.Sprintf("ThrottlerAggregated%s%s", textutil.PascalCase(scope.String()), textutil.PascalCase(metricName.String())), fmt.Sprintf("aggregated value for %v", scope)).Set(value)
}
}
diff --git a/go/vt/vttablet/tabletserver/throttle/throttler.go b/go/vt/vttablet/tabletserver/throttle/throttler.go
index af7f59abb7e..839ba9d43b8 100644
--- a/go/vt/vttablet/tabletserver/throttle/throttler.go
+++ b/go/vt/vttablet/tabletserver/throttle/throttler.go
@@ -95,7 +95,6 @@ const (
DefaultThrottleRatio = 1.0
defaultReplicationLagQuery = "select unix_timestamp(now(6))-max(ts/1000000000) as replication_lag from %s.heartbeat"
- threadsRunningQuery = "show global status like 'threads_running'"
inventoryPrefix = "inventory/"
throttlerConfigPrefix = "config/"
@@ -137,6 +136,7 @@ type Throttler struct {
keyspace string
shard string
tabletAlias *topodatapb.TabletAlias
+ tabletInfo atomic.Pointer[topo.TabletInfo]
check *ThrottlerCheck
isEnabled atomic.Bool
@@ -190,7 +190,7 @@ type Throttler struct {
cancelEnableContext context.CancelFunc
throttledAppsMutex sync.Mutex
- readSelfThrottleMetrics func(context.Context) base.ThrottleMetrics // overwritten by unit test
+ readSelfThrottleMetrics func(context.Context, tmclient.TabletManagerClient) base.ThrottleMetrics // overwritten by unit test
}
// ThrottlerStatus published some status values from the throttler
@@ -262,8 +262,8 @@ func NewThrottler(env tabletenv.Env, srvTopoServer srvtopo.Server, ts *topo.Serv
}
throttler.StoreMetricsThreshold(base.RegisteredSelfMetrics[base.LagMetricName].DefaultThreshold())
- throttler.readSelfThrottleMetrics = func(ctx context.Context) base.ThrottleMetrics {
- return throttler.readSelfThrottleMetricsInternal(ctx)
+ throttler.readSelfThrottleMetrics = func(ctx context.Context, tmClient tmclient.TabletManagerClient) base.ThrottleMetrics {
+ return throttler.readSelfThrottleMetricsInternal(ctx, tmClient)
}
return throttler
}
@@ -338,6 +338,15 @@ func (throttler *Throttler) initConfig() {
// readThrottlerConfig proactively reads the throttler's config from SrvKeyspace in local topo
func (throttler *Throttler) readThrottlerConfig(ctx context.Context) (*topodatapb.ThrottlerConfig, error) {
+ // since we're reading from topo, let's seize this opportunity to read table info as well
+ if throttler.tabletInfo.Load() == nil {
+ if ti, err := throttler.ts.GetTablet(ctx, throttler.tabletAlias); err == nil {
+ throttler.tabletInfo.Store(ti)
+ } else {
+ log.Errorf("Throttler: error reading tablet info: %v", err)
+ }
+ }
+
srvks, err := throttler.ts.GetSrvKeyspace(ctx, throttler.tabletAlias.Cell, throttler.keyspace)
if err != nil {
return nil, err
@@ -804,7 +813,7 @@ func (throttler *Throttler) Operate(ctx context.Context, wg *sync.WaitGroup) {
if throttler.IsOpen() {
// frequent
// Always collect self metrics:
- throttler.collectSelfMetrics(ctx)
+ throttler.collectSelfMetrics(ctx, tmClient)
if !throttler.isDormant() {
throttler.collectShardMetrics(ctx, tmClient)
}
@@ -869,7 +878,7 @@ func (throttler *Throttler) Operate(ctx context.Context, wg *sync.WaitGroup) {
}()
}
-func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, tmClient tmclient.TabletManagerClient, probe *base.Probe) (probeFunc func(context.Context) base.ThrottleMetrics) {
+func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, probe *base.Probe) (probeFunc func(context.Context, tmclient.TabletManagerClient) base.ThrottleMetrics) {
metricsWithError := func(err error) base.ThrottleMetrics {
metrics := base.ThrottleMetrics{}
for _, metricName := range base.KnownMetricNames {
@@ -882,7 +891,7 @@ func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, tmClie
}
return metrics
}
- return func(ctx context.Context) base.ThrottleMetrics {
+ return func(ctx context.Context, tmClient tmclient.TabletManagerClient) base.ThrottleMetrics {
// Some reasonable timeout, to ensure we release connections even if they're hanging (otherwise grpc-go keeps polling those connections forever)
ctx, cancel := context.WithTimeout(ctx, 4*activeCollectInterval)
defer cancel()
@@ -940,7 +949,7 @@ func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, tmClie
// readSelfThrottleMetricsInternal rreads all registsred self metrics on this tablet (or backend MySQL server).
// This is the actual place where metrics are read, to be later aggregated and/or propagated to other tablets.
-func (throttler *Throttler) readSelfThrottleMetricsInternal(ctx context.Context) base.ThrottleMetrics {
+func (throttler *Throttler) readSelfThrottleMetricsInternal(ctx context.Context, tmClient tmclient.TabletManagerClient) base.ThrottleMetrics {
result := make(base.ThrottleMetrics, len(base.RegisteredSelfMetrics))
writeMetric := func(metric *base.ThrottleMetric) {
select {
@@ -950,15 +959,20 @@ func (throttler *Throttler) readSelfThrottleMetricsInternal(ctx context.Context)
}
}
readMetric := func(selfMetric base.SelfMetric) *base.ThrottleMetric {
- if !selfMetric.RequiresConn() {
- return selfMetric.Read(ctx, throttler, nil)
+ params := &base.SelfMetricReadParams{
+ Throttler: throttler,
+ TmClient: tmClient,
+ TabletInfo: throttler.tabletInfo.Load(),
}
- conn, err := throttler.pool.Get(ctx, nil)
- if err != nil {
- return &base.ThrottleMetric{Err: err}
+ if selfMetric.RequiresConn() {
+ conn, err := throttler.pool.Get(ctx, nil)
+ if err != nil {
+ return &base.ThrottleMetric{Err: err}
+ }
+ defer conn.Recycle()
+ params.Conn = conn.Conn
}
- defer conn.Recycle()
- return selfMetric.Read(ctx, throttler, conn.Conn)
+ return selfMetric.Read(ctx, params)
}
for metricName, selfMetric := range base.RegisteredSelfMetrics {
if metricName == base.DefaultMetricName {
@@ -975,7 +989,7 @@ func (throttler *Throttler) readSelfThrottleMetricsInternal(ctx context.Context)
return result
}
-func (throttler *Throttler) collectSelfMetrics(ctx context.Context) {
+func (throttler *Throttler) collectSelfMetrics(ctx context.Context, tmClient tmclient.TabletManagerClient) {
probe := throttler.inventory.ClustersProbes[throttler.tabletAliasString()]
if probe == nil {
// probe not created yet
@@ -990,7 +1004,7 @@ func (throttler *Throttler) collectSelfMetrics(ctx context.Context) {
defer atomic.StoreInt64(&probe.QueryInProgress, 0)
// Throttler is probing its own tablet's metrics:
- _ = base.ReadThrottleMetrics(ctx, probe, throttler.readSelfThrottleMetrics)
+ _ = base.ReadThrottleMetrics(ctx, probe, tmClient, throttler.readSelfThrottleMetrics)
}()
}
@@ -1011,9 +1025,9 @@ func (throttler *Throttler) collectShardMetrics(ctx context.Context, tmClient tm
defer atomic.StoreInt64(&probe.QueryInProgress, 0)
// Throttler probing other tablets:
- throttleMetricFunc := throttler.generateTabletProbeFunction(base.ShardScope, tmClient, probe)
+ throttleMetricFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe)
- throttleMetrics := base.ReadThrottleMetrics(ctx, probe, throttleMetricFunc)
+ throttleMetrics := base.ReadThrottleMetrics(ctx, probe, tmClient, throttleMetricFunc)
for _, metric := range throttleMetrics {
select {
case <-ctx.Done():
diff --git a/go/vt/vttablet/tabletserver/throttle/throttler_test.go b/go/vt/vttablet/tabletserver/throttle/throttler_test.go
index fd7921899da..352e641fa35 100644
--- a/go/vt/vttablet/tabletserver/throttle/throttler_test.go
+++ b/go/vt/vttablet/tabletserver/throttle/throttler_test.go
@@ -71,6 +71,24 @@ var (
Value: 2.718,
Err: nil,
},
+ base.HistoryListLengthMetricName: &base.ThrottleMetric{
+ Scope: base.SelfScope,
+ Alias: "",
+ Value: 5,
+ Err: nil,
+ },
+ base.MysqldLoadAvgMetricName: &base.ThrottleMetric{
+ Scope: base.SelfScope,
+ Alias: "",
+ Value: 0.3311,
+ Err: nil,
+ },
+ base.MysqldDatadirUsedRatioMetricName: &base.ThrottleMetric{
+ Scope: base.SelfScope,
+ Alias: "",
+ Value: 0.85,
+ Err: nil,
+ },
}
replicaMetrics = map[string]*MetricResult{
base.LagMetricName.String(): {
@@ -93,6 +111,21 @@ var (
ResponseCode: tabletmanagerdatapb.CheckThrottlerResponseCode_OK,
Value: 5.1,
},
+ base.HistoryListLengthMetricName.String(): {
+ StatusCode: http.StatusOK,
+ ResponseCode: tabletmanagerdatapb.CheckThrottlerResponseCode_OK,
+ Value: 6,
+ },
+ base.MysqldLoadAvgMetricName.String(): {
+ StatusCode: http.StatusOK,
+ ResponseCode: tabletmanagerdatapb.CheckThrottlerResponseCode_OK,
+ Value: 0.2211,
+ },
+ base.MysqldDatadirUsedRatioMetricName.String(): {
+ StatusCode: http.StatusOK,
+ ResponseCode: tabletmanagerdatapb.CheckThrottlerResponseCode_OK,
+ Value: 0.87,
+ },
}
nonPrimaryTabletType atomic.Int32
)
@@ -283,7 +316,7 @@ func newTestThrottler() *Throttler {
throttler.recentCheckDormantDiff = int64(throttler.dormantPeriod / recentCheckRateLimiterInterval)
throttler.recentCheckDiff = int64(3 * time.Second / recentCheckRateLimiterInterval)
- throttler.readSelfThrottleMetrics = func(ctx context.Context) base.ThrottleMetrics {
+ throttler.readSelfThrottleMetrics = func(ctx context.Context, tmClient tmclient.TabletManagerClient) base.ThrottleMetrics {
for _, metric := range selfMetrics {
go func() {
select {
@@ -1827,10 +1860,13 @@ func TestChecks(t *testing.T) {
assert.Equal(t, testAppName.String(), checkResult.AppName)
assert.Equal(t, len(base.KnownMetricNames), len(checkResult.Metrics))
- assert.EqualValues(t, 0.3, checkResult.Metrics[base.LagMetricName.String()].Value) // self lag value, because flags.Scope is set
- assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // self value, because flags.Scope is set
- assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // self value, because flags.Scope is set
- assert.EqualValues(t, 2.718, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // self value, because flags.Scope is set
+ assert.EqualValues(t, 0.3, checkResult.Metrics[base.LagMetricName.String()].Value) // self lag value, because flags.Scope is set
+ assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // self value, because flags.Scope is set
+ assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // self value, because flags.Scope is set
+ assert.EqualValues(t, 2.718, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // self value, because flags.Scope is set
+ assert.EqualValues(t, 5, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // self value, because flags.Scope is set
+ assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // self value, because flags.Scope is set
+ assert.EqualValues(t, 0.85, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // self value, because flags.Scope is set
for _, metric := range checkResult.Metrics {
assert.EqualValues(t, base.SelfScope.String(), metric.Scope)
}
@@ -1886,10 +1922,13 @@ func TestChecks(t *testing.T) {
assert.Equal(t, testAppName.String(), checkResult.AppName)
assert.Equal(t, len(base.KnownMetricNames), len(checkResult.Metrics))
- assert.EqualValues(t, 0.9, checkResult.Metrics[base.LagMetricName.String()].Value) // shard lag value, because flags.Scope is set
- assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // shard value, because flags.Scope is set
- assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // shard value, because flags.Scope is set
- assert.EqualValues(t, 5.1, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // shard value, because flags.Scope is set
+ assert.EqualValues(t, 0.9, checkResult.Metrics[base.LagMetricName.String()].Value) // shard lag value, because flags.Scope is set
+ assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // shard value, because flags.Scope is set
+ assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // shard value, because flags.Scope is set
+ assert.EqualValues(t, 5.1, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // shard value, because flags.Scope is set
+ assert.EqualValues(t, 6, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // shard value, because flags.Scope is set
+ assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // shard value, because flags.Scope is set
+ assert.EqualValues(t, 0.87, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // shard value, because flags.Scope is set
for _, metric := range checkResult.Metrics {
assert.EqualValues(t, base.ShardScope.String(), metric.Scope)
}
@@ -1918,14 +1957,19 @@ func TestChecks(t *testing.T) {
assert.ErrorIs(t, checkResult.Error, base.ErrThresholdExceeded)
assert.Equal(t, len(base.KnownMetricNames), len(checkResult.Metrics))
- assert.EqualValues(t, 0.9, checkResult.Metrics[base.LagMetricName.String()].Value) // shard lag value, because "shard" is the default scope for lag
- assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // self value, because "self" is the default scope for threads_running
- assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // self value, because "self" is the default scope for custom
- assert.EqualValues(t, 2.718, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // self value, because "self" is the default scope for loadavg
+ assert.EqualValues(t, 0.9, checkResult.Metrics[base.LagMetricName.String()].Value) // shard lag value, because "shard" is the default scope for lag
+ assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // self value, because "self" is the default scope for threads_running
+ assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // self value, because "self" is the default scope for custom
+ assert.EqualValues(t, 2.718, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // self value, because "self" is the default scope for loadavg
+ assert.EqualValues(t, 5, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // self value, because "self" is the default scope for loadavg
+ assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // self value, because "self" is the default scope for loadavg
+ assert.EqualValues(t, 0.85, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // self value, because "self" is the default scope for loadavg
assert.EqualValues(t, base.ShardScope.String(), checkResult.Metrics[base.LagMetricName.String()].Scope)
assert.EqualValues(t, base.SelfScope.String(), checkResult.Metrics[base.ThreadsRunningMetricName.String()].Scope)
assert.EqualValues(t, base.SelfScope.String(), checkResult.Metrics[base.CustomMetricName.String()].Scope)
assert.EqualValues(t, base.SelfScope.String(), checkResult.Metrics[base.LoadAvgMetricName.String()].Scope)
+ assert.EqualValues(t, base.SelfScope.String(), checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Scope)
+ assert.EqualValues(t, base.SelfScope.String(), checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Scope)
})
})
t.Run("checks, defined scope masks explicit scope metrics", func(t *testing.T) {
@@ -1939,6 +1983,9 @@ func TestChecks(t *testing.T) {
base.MetricName("self/threads_running"),
base.MetricName("custom"),
base.MetricName("shard/loadavg"),
+ base.MetricName("shard/mysqld-loadavg"),
+ base.MetricName("self/history_list_length"),
+ base.MetricName("self/mysqld-datadir-used-ratio"),
base.MetricName("default"),
}
checkResult := throttler.Check(ctx, testAppName.String(), metricNames, flags)
@@ -1950,10 +1997,13 @@ func TestChecks(t *testing.T) {
assert.ErrorIs(t, checkResult.Error, base.ErrThresholdExceeded)
assert.Equal(t, len(metricNames), len(checkResult.Metrics))
- assert.EqualValues(t, 0.9, checkResult.Metrics[base.LagMetricName.String()].Value) // shard lag value, even though scope name is in metric name
- assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // shard value, even though scope name is in metric name
- assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // shard value because flags.Scope is set
- assert.EqualValues(t, 5.1, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // shard value, not because scope name is in metric name but because flags.Scope is set
+ assert.EqualValues(t, 0.9, checkResult.Metrics[base.LagMetricName.String()].Value) // shard lag value, even though scope name is in metric name
+ assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // shard value, even though scope name is in metric name
+ assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // shard value because flags.Scope is set
+ assert.EqualValues(t, 5.1, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // shard value, not because scope name is in metric name but because flags.Scope is set
+ assert.EqualValues(t, 6, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // shard value, even though scope name is in metric name
+ assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // shard value, not because scope name is in metric name but because flags.Scope is set
+ assert.EqualValues(t, 0.87, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // shard value, even though scope name is in metric name
for _, metric := range checkResult.Metrics {
assert.EqualValues(t, base.ShardScope.String(), metric.Scope)
}
@@ -2222,8 +2272,13 @@ func TestReplica(t *testing.T) {
base.DefaultMetricName:
assert.Error(t, metricResult.Error, "metricName=%v, value=%v, threshold=%v", metricName, metricResult.Value, metricResult.Threshold)
assert.ErrorIs(t, metricResult.Error, base.ErrThresholdExceeded)
- case base.ThreadsRunningMetricName:
+ case base.ThreadsRunningMetricName,
+ base.HistoryListLengthMetricName,
+ base.MysqldLoadAvgMetricName,
+ base.MysqldDatadirUsedRatioMetricName:
assert.NoError(t, metricResult.Error, "metricName=%v, value=%v, threshold=%v", metricName, metricResult.Value, metricResult.Threshold)
+ default:
+ assert.Fail(t, "unexpected metric", "name=%v", metricName)
}
}
})
diff --git a/go/vt/vttablet/tabletserver/tx_pool.go b/go/vt/vttablet/tabletserver/tx_pool.go
index ca8a0ea34b2..302a3d41050 100644
--- a/go/vt/vttablet/tabletserver/tx_pool.go
+++ b/go/vt/vttablet/tabletserver/tx_pool.go
@@ -40,9 +40,8 @@ import (
)
const (
- txLogInterval = 1 * time.Minute
- beginWithCSRO = "start transaction with consistent snapshot, read only"
- trackGtidQuery = "set session session_track_gtids = START_GTID"
+ txLogInterval = 1 * time.Minute
+ beginWithCSRO = "start transaction with consistent snapshot, read only"
)
var txIsolations = map[querypb.ExecuteOptions_TransactionIsolation]string{
@@ -394,16 +393,6 @@ func createStartTxStmt(options *querypb.ExecuteOptions, readOnly bool) (string,
}
func handleConsistentSnapshotCase(ctx context.Context, conn *StatefulConnection) (beginSQL string, sessionStateChanges string, err error) {
- _, err = conn.execWithRetry(ctx, trackGtidQuery, 1, false)
- // We allow this to fail since this is a custom MySQL extension, but we return
- // then if this query was executed or not.
- //
- // Callers also can know because the sessionStateChanges will be empty for a snapshot
- // transaction and get GTID information in another (less efficient) way.
- if err == nil {
- beginSQL = trackGtidQuery + "; "
- }
-
isolationLevel := txIsolations[querypb.ExecuteOptions_CONSISTENT_SNAPSHOT_READ_ONLY]
execSQL, err := setIsolationLevel(ctx, conn, isolationLevel)
diff --git a/go/vt/vttablet/tabletserver/tx_pool_test.go b/go/vt/vttablet/tabletserver/tx_pool_test.go
index c03cac92878..22810d4c422 100644
--- a/go/vt/vttablet/tabletserver/tx_pool_test.go
+++ b/go/vt/vttablet/tabletserver/tx_pool_test.go
@@ -701,11 +701,11 @@ func TestTxPoolBeginStatements(t *testing.T) {
expBeginSQL: "set transaction isolation level serializable; start transaction read only",
}, {
txIsolationLevel: querypb.ExecuteOptions_CONSISTENT_SNAPSHOT_READ_ONLY,
- expBeginSQL: "set session session_track_gtids = START_GTID; set transaction isolation level repeatable read; start transaction with consistent snapshot, read only",
+ expBeginSQL: "set transaction isolation level repeatable read; start transaction with consistent snapshot, read only",
}, {
txIsolationLevel: querypb.ExecuteOptions_CONSISTENT_SNAPSHOT_READ_ONLY,
readOnly: true,
- expBeginSQL: "set session session_track_gtids = START_GTID; set transaction isolation level repeatable read; start transaction with consistent snapshot, read only",
+ expBeginSQL: "set transaction isolation level repeatable read; start transaction with consistent snapshot, read only",
}, {
txIsolationLevel: querypb.ExecuteOptions_AUTOCOMMIT,
expBeginSQL: "",
diff --git a/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go b/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go
index 9bbc98ca2bd..e5115afe6d3 100644
--- a/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go
+++ b/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go
@@ -89,6 +89,8 @@ const (
NotEqual
// IsNotNull is used to filter a column if it is NULL
IsNotNull
+ // In is used to filter a comparable column if equals any of the values from a specific tuple
+ In
)
// Filter contains opcodes for filtering.
@@ -97,6 +99,9 @@ type Filter struct {
ColNum int
Value sqltypes.Value
+ // Values will be used to store tuple/list values.
+ Values []sqltypes.Value
+
// Parameters for VindexMatch.
// Vindex, VindexColumns and KeyRange, if set, will be used
// to filter the row.
@@ -166,6 +171,8 @@ func getOpcode(comparison *sqlparser.ComparisonExpr) (Opcode, error) {
opcode = GreaterThanEqual
case sqlparser.NotEqualOp:
opcode = NotEqual
+ case sqlparser.InOp:
+ opcode = In
default:
return -1, fmt.Errorf("comparison operator %s not supported", comparison.Operator.ToString())
}
@@ -238,6 +245,24 @@ func (plan *Plan) filter(values, result []sqltypes.Value, charsets []collations.
if values[filter.ColNum].IsNull() {
return false, nil
}
+ case In:
+ if filter.Values == nil {
+ return false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unexpected empty filter values when performing IN operator")
+ }
+ found := false
+ for _, filterValue := range filter.Values {
+ match, err := compare(Equal, values[filter.ColNum], filterValue, plan.env.CollationEnv(), charsets[filter.ColNum])
+ if err != nil {
+ return false, err
+ }
+ if match {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return false, nil
+ }
default:
match, err := compare(filter.Opcode, values[filter.ColNum], filter.Value, plan.env.CollationEnv(), charsets[filter.ColNum])
if err != nil {
@@ -514,6 +539,27 @@ func (plan *Plan) getColumnFuncExpr(columnName string) *sqlparser.FuncExpr {
return nil
}
+func (plan *Plan) appendTupleFilter(values sqlparser.ValTuple, opcode Opcode, colnum int) error {
+ pv, err := evalengine.Translate(values, &evalengine.Config{
+ Collation: plan.env.CollationEnv().DefaultConnectionCharset(),
+ Environment: plan.env,
+ })
+ if err != nil {
+ return err
+ }
+ env := evalengine.EmptyExpressionEnv(plan.env)
+ resolved, err := env.Evaluate(pv)
+ if err != nil {
+ return err
+ }
+ plan.Filters = append(plan.Filters, Filter{
+ Opcode: opcode,
+ ColNum: colnum,
+ Values: resolved.TupleValues(),
+ })
+ return nil
+}
+
func (plan *Plan) analyzeWhere(vschema *localVSchema, where *sqlparser.Where) error {
if where == nil {
return nil
@@ -537,6 +583,20 @@ func (plan *Plan) analyzeWhere(vschema *localVSchema, where *sqlparser.Where) er
if err != nil {
return err
}
+ // The Right Expr is typically expected to be a Literal value,
+ // except for the IN operator, where a Tuple value is expected.
+ // Handle the IN operator case first.
+ if opcode == In {
+ values, ok := expr.Right.(sqlparser.ValTuple)
+ if !ok {
+ return fmt.Errorf("unexpected: %v", sqlparser.String(expr))
+ }
+ err := plan.appendTupleFilter(values, opcode, colnum)
+ if err != nil {
+ return err
+ }
+ continue
+ }
val, ok := expr.Right.(*sqlparser.Literal)
if !ok {
return fmt.Errorf("unexpected: %v", sqlparser.String(expr))
diff --git a/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go b/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go
index ba345b2a00b..aba74368802 100644
--- a/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go
+++ b/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go
@@ -710,9 +710,15 @@ func TestPlanBuilderFilterComparison(t *testing.T) {
outFilters: []Filter{{Opcode: LessThan, ColNum: 0, Value: sqltypes.NewInt64(2)},
{Opcode: LessThanEqual, ColNum: 1, Value: sqltypes.NewVarChar("xyz")},
},
+ }, {
+ name: "in-operator",
+ inFilter: "select * from t1 where id in (1, 2)",
+ outFilters: []Filter{
+ {Opcode: In, ColNum: 0, Values: []sqltypes.Value{sqltypes.NewInt64(1), sqltypes.NewInt64(2)}},
+ },
}, {
name: "vindex-and-operators",
- inFilter: "select * from t1 where in_keyrange(id, 'hash', '-80') and id = 2 and val <> 'xyz'",
+ inFilter: "select * from t1 where in_keyrange(id, 'hash', '-80') and id = 2 and val <> 'xyz' and id in (100, 30)",
outFilters: []Filter{
{
Opcode: VindexMatch,
@@ -727,6 +733,7 @@ func TestPlanBuilderFilterComparison(t *testing.T) {
},
{Opcode: Equal, ColNum: 0, Value: sqltypes.NewInt64(2)},
{Opcode: NotEqual, ColNum: 1, Value: sqltypes.NewVarChar("xyz")},
+ {Opcode: In, ColNum: 0, Values: []sqltypes.Value{sqltypes.NewInt64(100), sqltypes.NewInt64(30)}},
},
}}
diff --git a/go/vt/vttablet/tabletserver/vstreamer/snapshot_conn.go b/go/vt/vttablet/tabletserver/vstreamer/snapshot_conn.go
index ec326cc4159..ee141ce9859 100644
--- a/go/vt/vttablet/tabletserver/vstreamer/snapshot_conn.go
+++ b/go/vt/vttablet/tabletserver/vstreamer/snapshot_conn.go
@@ -132,29 +132,6 @@ func (conn *snapshotConn) startSnapshot(ctx context.Context, table string) (gtid
return replication.EncodePosition(mpos), nil
}
-// startSnapshotWithConsistentGTID performs the snapshotting without locking tables. This assumes
-// session_track_gtids = START_GTID, which is a contribution to MySQL and is not in vanilla MySQL at the
-// time of this writing.
-func (conn *snapshotConn) startSnapshotWithConsistentGTID(ctx context.Context) (gtid string, err error) {
- if _, err := conn.ExecuteFetch("set transaction isolation level repeatable read", 1, false); err != nil {
- return "", err
- }
- result, err := conn.ExecuteFetch("start transaction with consistent snapshot, read only", 1, false)
- if err != nil {
- return "", err
- }
- // The "session_track_gtids = START_GTID" patch is only applicable to MySQL56 GTID, which is
- // why we hardcode the position as mysql.Mysql56FlavorID
- mpos, err := replication.ParsePosition(replication.Mysql56FlavorID, result.SessionStateChanges)
- if err != nil {
- return "", err
- }
- if _, err := conn.ExecuteFetch("set @@session.time_zone = '+00:00'", 1, false); err != nil {
- return "", err
- }
- return replication.EncodePosition(mpos), nil
-}
-
// Close rolls back any open transactions and closes the connection.
func (conn *snapshotConn) Close() {
_, _ = conn.ExecuteFetch("rollback", 1, false)
diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go
index 846d62202e7..5282b5f372d 100644
--- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go
+++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go
@@ -1966,7 +1966,7 @@ func TestFilteredMultipleWhere(t *testing.T) {
filter: &binlogdatapb.Filter{
Rules: []*binlogdatapb.Rule{{
Match: "t1",
- Filter: "select id1, val from t1 where in_keyrange('-80') and id2 = 200 and id3 = 1000 and val = 'newton'",
+ Filter: "select id1, val from t1 where in_keyrange('-80') and id2 = 200 and id3 = 1000 and val = 'newton' and id1 in (1, 2, 129)",
}},
},
customFieldEvents: true,
@@ -1988,9 +1988,7 @@ func TestFilteredMultipleWhere(t *testing.T) {
{spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"2", "newton"}}}}},
}},
{"insert into t1 values (3, 100, 2000, 'kepler')", noEvents},
- {"insert into t1 values (128, 200, 1000, 'newton')", []TestRowEvent{
- {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"128", "newton"}}}}},
- }},
+ {"insert into t1 values (128, 200, 1000, 'newton')", noEvents},
{"insert into t1 values (5, 200, 2000, 'kepler')", noEvents},
{"insert into t1 values (129, 200, 1000, 'kepler')", noEvents},
{"commit", nil},
@@ -2080,3 +2078,33 @@ func TestGeneratedInvisiblePrimaryKey(t *testing.T) {
}}
ts.Run()
}
+
+func TestFilteredInOperator(t *testing.T) {
+ ts := &TestSpec{
+ t: t,
+ ddls: []string{
+ "create table t1(id1 int, id2 int, val varbinary(128), primary key(id1))",
+ },
+ options: &TestSpecOptions{
+ filter: &binlogdatapb.Filter{
+ Rules: []*binlogdatapb.Rule{{
+ Match: "t1",
+ Filter: "select id1, val from t1 where val in ('eee', 'bbb', 'ddd') and id1 in (4, 5)",
+ }},
+ },
+ },
+ }
+ defer ts.Close()
+ ts.Init()
+ ts.fieldEvents["t1"].cols[1].skip = true
+ ts.tests = [][]*TestQuery{{
+ {"begin", nil},
+ {"insert into t1 values (1, 100, 'aaa')", noEvents},
+ {"insert into t1 values (2, 200, 'bbb')", noEvents},
+ {"insert into t1 values (3, 100, 'ccc')", noEvents},
+ {"insert into t1 values (4, 200, 'ddd')", nil},
+ {"insert into t1 values (5, 200, 'eee')", nil},
+ {"commit", nil},
+ }}
+ ts.Run()
+}
diff --git a/go/vt/vttablet/tmrpctest/test_tm_rpc.go b/go/vt/vttablet/tmrpctest/test_tm_rpc.go
index 88ba3506ba5..a106b43bf2c 100644
--- a/go/vt/vttablet/tmrpctest/test_tm_rpc.go
+++ b/go/vt/vttablet/tmrpctest/test_tm_rpc.go
@@ -1428,6 +1428,10 @@ func (fra *fakeRPCTM) Backup(ctx context.Context, logger logutil.Logger, request
return nil
}
+func (fra *fakeRPCTM) IsBackupRunning() bool {
+ return false
+}
+
func tmRPCTestBackup(ctx context.Context, t *testing.T, client tmclient.TabletManagerClient, tablet *topodatapb.Tablet) {
req := &tabletmanagerdatapb.BackupRequest{Concurrency: testBackupConcurrency, AllowPrimary: testBackupAllowPrimary}
stream, err := client.Backup(ctx, tablet, req)
diff --git a/go/vt/vttest/local_cluster.go b/go/vt/vttest/local_cluster.go
index 576a78bb761..fa7e40feb12 100644
--- a/go/vt/vttest/local_cluster.go
+++ b/go/vt/vttest/local_cluster.go
@@ -660,6 +660,7 @@ func (db *LocalCluster) JSONConfig() any {
config := map[string]any{
"bind_address": db.vt.BindAddress,
"port": db.vt.Port,
+ "grpc_bind_address": db.vt.BindAddressGprc,
"socket": db.mysql.UnixSocket(),
"vtcombo_mysql_port": db.Env.PortForProtocol("vtcombo_mysql_port", ""),
"mysql": db.Env.PortForProtocol("mysql", ""),
diff --git a/go/vt/vttest/vtprocess.go b/go/vt/vttest/vtprocess.go
index 3f34994bb75..6371811a60e 100644
--- a/go/vt/vttest/vtprocess.go
+++ b/go/vt/vttest/vtprocess.go
@@ -44,16 +44,17 @@ type HealthChecker func(addr string) bool
// It can be spawned manually or through one of the available
// helper methods.
type VtProcess struct {
- Name string
- Directory string
- LogDirectory string
- Binary string
- ExtraArgs []string
- Env []string
- BindAddress string
- Port int
- PortGrpc int
- HealthCheck HealthChecker
+ Name string
+ Directory string
+ LogDirectory string
+ Binary string
+ ExtraArgs []string
+ Env []string
+ BindAddress string
+ BindAddressGprc string
+ Port int
+ PortGrpc int
+ HealthCheck HealthChecker
proc *exec.Cmd
exit chan error
@@ -139,6 +140,11 @@ func (vtp *VtProcess) WaitStart() (err error) {
vtp.proc.Args = append(vtp.proc.Args, fmt.Sprintf("%d", vtp.PortGrpc))
}
+ if vtp.BindAddressGprc != "" {
+ vtp.proc.Args = append(vtp.proc.Args, "--grpc_bind_address")
+ vtp.proc.Args = append(vtp.proc.Args, vtp.BindAddressGprc)
+ }
+
vtp.proc.Args = append(vtp.proc.Args, vtp.ExtraArgs...)
vtp.proc.Env = append(vtp.proc.Env, os.Environ()...)
vtp.proc.Env = append(vtp.proc.Env, vtp.Env...)
@@ -199,16 +205,22 @@ func VtcomboProcess(environment Environment, args *Config, mysql MySQLManager) (
if args.VtComboBindAddress != "" {
vtcomboBindAddress = args.VtComboBindAddress
}
+ grpcBindAddress := "127.0.0.1"
+ if servenv.GRPCBindAddress() != "" {
+ grpcBindAddress = servenv.GRPCBindAddress()
+ }
+
vt := &VtProcess{
- Name: "vtcombo",
- Directory: environment.Directory(),
- LogDirectory: environment.LogDirectory(),
- Binary: environment.BinaryPath("vtcombo"),
- BindAddress: vtcomboBindAddress,
- Port: environment.PortForProtocol("vtcombo", ""),
- PortGrpc: environment.PortForProtocol("vtcombo", "grpc"),
- HealthCheck: environment.ProcessHealthCheck("vtcombo"),
- Env: environment.EnvVars(),
+ Name: "vtcombo",
+ Directory: environment.Directory(),
+ LogDirectory: environment.LogDirectory(),
+ Binary: environment.BinaryPath("vtcombo"),
+ BindAddress: vtcomboBindAddress,
+ BindAddressGprc: grpcBindAddress,
+ Port: environment.PortForProtocol("vtcombo", ""),
+ PortGrpc: environment.PortForProtocol("vtcombo", "grpc"),
+ HealthCheck: environment.ProcessHealthCheck("vtcombo"),
+ Env: environment.EnvVars(),
}
user, pass := mysql.Auth()
diff --git a/go/vt/wrangler/fake_dbclient_test.go b/go/vt/wrangler/fake_dbclient_test.go
index 14ef0913383..02ee79210d7 100644
--- a/go/vt/wrangler/fake_dbclient_test.go
+++ b/go/vt/wrangler/fake_dbclient_test.go
@@ -153,6 +153,10 @@ func (dc *fakeDBClient) Rollback() error {
func (dc *fakeDBClient) Close() {
}
+func (dc *fakeDBClient) IsClosed() bool {
+ return false
+}
+
// ExecuteFetch is part of the DBClient interface
func (dc *fakeDBClient) ExecuteFetch(query string, maxrows int) (*sqltypes.Result, error) {
dc.mu.Lock()
diff --git a/proto/replicationdata.proto b/proto/replicationdata.proto
index 7107332233b..76ca3e02103 100644
--- a/proto/replicationdata.proto
+++ b/proto/replicationdata.proto
@@ -50,6 +50,7 @@ message Status {
bool has_replication_filters = 22;
bool ssl_allowed = 23;
bool replication_lag_unknown = 24;
+ bool backup_running = 25;
}
// Configuration holds replication configuration information gathered from performance_schema and global variables.
@@ -65,6 +66,7 @@ message Configuration {
message StopReplicationStatus {
replicationdata.Status before = 1;
replicationdata.Status after = 2;
+ bool backup_running = 3;
}
// StopReplicationMode is used to provide controls over how replication is stopped.
diff --git a/proto/tabletmanagerdata.proto b/proto/tabletmanagerdata.proto
index bb20e712e7f..9bb60184267 100644
--- a/proto/tabletmanagerdata.proto
+++ b/proto/tabletmanagerdata.proto
@@ -360,6 +360,7 @@ message ReplicationStatusRequest {
message ReplicationStatusResponse {
replicationdata.Status status = 1;
+ bool backup_running = 2;
}
message PrimaryStatusRequest {
@@ -548,6 +549,7 @@ message StopReplicationAndGetStatusResponse {
// Status represents the replication status call right before, and right after telling the replica to stop.
replicationdata.StopReplicationStatus status = 2;
+ bool backup_running = 3;
}
message PromoteReplicaRequest {
diff --git a/proto/vtadmin.proto b/proto/vtadmin.proto
index 78f086ec345..963d1fa5779 100644
--- a/proto/vtadmin.proto
+++ b/proto/vtadmin.proto
@@ -388,7 +388,11 @@ message WorkflowSwitchTrafficRequest {
message ApplySchemaRequest {
string cluster_id = 1;
- vtctldata.ApplySchemaRequest request = 2;
+ // Request.Sql will be overriden by this Sql field.
+ string sql = 2;
+ // Request.CallerId will be overriden by this CallerId field.
+ string caller_id = 3;
+ vtctldata.ApplySchemaRequest request = 4;
}
message CancelSchemaMigrationRequest {
diff --git a/test/config.json b/test/config.json
index c911232ce74..1e278546c7a 100644
--- a/test/config.json
+++ b/test/config.json
@@ -1238,6 +1238,17 @@
"RetryMax": 1,
"Tags": []
},
+ "vtop_example": {
+ "File": "",
+ "Args": [],
+ "Command": [
+ "test/vtop_example.sh"
+ ],
+ "Manual": false,
+ "Shard": "",
+ "RetryMax": 1,
+ "Tags": []
+ },
"vtorc_primary_failure": {
"File": "unused.go",
"Args": ["vitess.io/vitess/go/test/endtoend/vtorc/primaryfailure"],
diff --git a/test/templates/cluster_endtoend_test.tpl b/test/templates/cluster_endtoend_test.tpl
index 01f4555e303..6fe58fae361 100644
--- a/test/templates/cluster_endtoend_test.tpl
+++ b/test/templates/cluster_endtoend_test.tpl
@@ -14,7 +14,7 @@ env:
jobs:
build:
name: Run endtoend tests on {{.Name}}
- runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1{{else}}ubuntu-latest{{end}}
+ runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1-24.04{{else}}ubuntu-24.04{{end}}
steps:
- name: Skip CI
@@ -124,10 +124,19 @@ jobs:
# Get key to latest MySQL repo
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A8D3785C
# Setup MySQL 8.0
- wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.32-1_all.deb
+ wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.33-1_all.deb
echo mysql-apt-config mysql-apt-config/select-server select mysql-8.0 | sudo debconf-set-selections
sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config*
sudo apt-get -qq update
+
+ # We have to install this old version of libaio1 in case we end up testing with MySQL 5.7. See also:
+ # https://bugs.launchpad.net/ubuntu/+source/libaio/+bug/2067501
+ curl -L -O http://mirrors.kernel.org/ubuntu/pool/main/liba/libaio/libaio1_0.3.112-13build1_amd64.deb
+ sudo dpkg -i libaio1_0.3.112-13build1_amd64.deb
+ # libtinfo5 is also needed for older MySQL 5.7 builds.
+ curl -L -O http://mirrors.kernel.org/ubuntu/pool/universe/n/ncurses/libtinfo5_6.3-2ubuntu0.1_amd64.deb
+ sudo dpkg -i libtinfo5_6.3-2ubuntu0.1_amd64.deb
+
# Install everything else we need, and configure
sudo apt-get -qq install -y mysql-server mysql-shell mysql-client make unzip g++ etcd-client etcd-server curl git wget eatmydata xz-utils libncurses6
diff --git a/test/templates/cluster_endtoend_test_docker.tpl b/test/templates/cluster_endtoend_test_docker.tpl
index f68e4223c75..f7e8aa2c1d8 100644
--- a/test/templates/cluster_endtoend_test_docker.tpl
+++ b/test/templates/cluster_endtoend_test_docker.tpl
@@ -6,7 +6,7 @@ permissions: read-all
jobs:
build:
name: Run endtoend tests on {{.Name}}
- runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1{{else}}ubuntu-latest{{end}}
+ runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1-24.04{{else}}ubuntu-24.04{{end}}
steps:
- name: Skip CI
diff --git a/test/templates/cluster_endtoend_test_mysql57.tpl b/test/templates/cluster_endtoend_test_mysql57.tpl
index b51ffc9110e..f4152c939b0 100644
--- a/test/templates/cluster_endtoend_test_mysql57.tpl
+++ b/test/templates/cluster_endtoend_test_mysql57.tpl
@@ -19,7 +19,7 @@ env:
jobs:
build:
name: Run endtoend tests on {{.Name}}
- runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1{{else}}ubuntu-latest{{end}}
+ runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1-24.04{{else}}ubuntu-24.04{{end}}
steps:
- name: Skip CI
@@ -126,13 +126,17 @@ jobs:
# Get key to latest MySQL repo
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A8D3785C
- wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.32-1_all.deb
+ wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.33-1_all.deb
# Bionic packages are still compatible for Jammy since there's no MySQL 5.7
# packages for Jammy.
echo mysql-apt-config mysql-apt-config/repo-codename select bionic | sudo debconf-set-selections
echo mysql-apt-config mysql-apt-config/select-server select mysql-5.7 | sudo debconf-set-selections
sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config*
sudo apt-get update
+ # We have to install this old version of libaio1. See also:
+ # https://bugs.launchpad.net/ubuntu/+source/libaio/+bug/2067501
+ curl -L -O http://mirrors.kernel.org/ubuntu/pool/main/liba/libaio/libaio1_0.3.112-13build1_amd64.deb
+ sudo dpkg -i libaio1_0.3.112-13build1_amd64.deb
sudo DEBIAN_FRONTEND="noninteractive" apt-get install -y mysql-client=5.7* mysql-community-server=5.7* mysql-server=5.7* libncurses6
sudo apt-get install -y make unzip g++ etcd-client etcd-server curl git wget eatmydata
diff --git a/test/templates/cluster_vitess_tester.tpl b/test/templates/cluster_vitess_tester.tpl
index f0b5838d8e8..b8d77754ba6 100644
--- a/test/templates/cluster_vitess_tester.tpl
+++ b/test/templates/cluster_vitess_tester.tpl
@@ -14,7 +14,7 @@ env:
jobs:
build:
name: Run endtoend tests on {{.Name}}
- runs-on: ubuntu-latest
+ runs-on: ubuntu-24.04
steps:
- name: Skip CI
@@ -93,7 +93,7 @@ jobs:
# Get key to latest MySQL repo
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A8D3785C
# Setup MySQL 8.0
- wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.32-1_all.deb
+ wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.33-1_all.deb
echo mysql-apt-config mysql-apt-config/select-server select mysql-8.0 | sudo debconf-set-selections
sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config*
sudo apt-get -qq update
diff --git a/test/templates/unit_test.tpl b/test/templates/unit_test.tpl
index c47b7a1d796..3704aebac4e 100644
--- a/test/templates/unit_test.tpl
+++ b/test/templates/unit_test.tpl
@@ -14,7 +14,7 @@ env:
jobs:
test:
name: {{.Name}}
- runs-on: ubuntu-latest
+ runs-on: ubuntu-24.04
steps:
- name: Skip CI
@@ -87,20 +87,20 @@ jobs:
if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.unit_tests == 'true'
run: |
export DEBIAN_FRONTEND="noninteractive"
- sudo apt-get -qq update
+ sudo apt-get update
# Uninstall any previously installed MySQL first
sudo systemctl stop apparmor
- sudo DEBIAN_FRONTEND="noninteractive" apt-get -qq remove -y --purge mysql-server mysql-client mysql-common
- sudo apt-get -qq -y autoremove
- sudo apt-get -qq -y autoclean
+ sudo DEBIAN_FRONTEND="noninteractive" apt-get remove -y --purge mysql-server mysql-client mysql-common
+ sudo apt-get -y autoremove
+ sudo apt-get -y autoclean
sudo deluser mysql
sudo rm -rf /var/lib/mysql
sudo rm -rf /etc/mysql
# Get key to latest MySQL repo
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A8D3785C
- wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.32-1_all.deb
+ wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.33-1_all.deb
{{if (eq .Platform "mysql57")}}
# Bionic packages are still compatible for Jammy since there's no MySQL 5.7
@@ -108,25 +108,32 @@ jobs:
echo mysql-apt-config mysql-apt-config/repo-codename select bionic | sudo debconf-set-selections
echo mysql-apt-config mysql-apt-config/select-server select mysql-5.7 | sudo debconf-set-selections
sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config*
- sudo apt-get -qq update
- sudo DEBIAN_FRONTEND="noninteractive" apt-get -qq install -y mysql-client=5.7* mysql-community-server=5.7* mysql-server=5.7* libncurses6
+ sudo apt-get update
+ # We have to install this old version of libaio1. See also:
+ # https://bugs.launchpad.net/ubuntu/+source/libaio/+bug/2067501
+ curl -L -O http://mirrors.kernel.org/ubuntu/pool/main/liba/libaio/libaio1_0.3.112-13build1_amd64.deb
+ sudo dpkg -i libaio1_0.3.112-13build1_amd64.deb
+ # libtinfo5 is also needed for older MySQL 5.7 builds.
+ curl -L -O http://mirrors.kernel.org/ubuntu/pool/universe/n/ncurses/libtinfo5_6.3-2ubuntu0.1_amd64.deb
+ sudo dpkg -i libtinfo5_6.3-2ubuntu0.1_amd64.deb
+ sudo DEBIAN_FRONTEND="noninteractive" apt-get install -y mysql-client=5.7* mysql-community-server=5.7* mysql-server=5.7* libncurses6
{{end}}
{{if (eq .Platform "mysql80")}}
echo mysql-apt-config mysql-apt-config/select-server select mysql-8.0 | sudo debconf-set-selections
sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config*
- sudo apt-get -qq update
- sudo DEBIAN_FRONTEND="noninteractive" apt-get -qq install -y mysql-server mysql-client
+ sudo apt-get update
+ sudo DEBIAN_FRONTEND="noninteractive" apt-get install -y mysql-server mysql-client
{{end}}
{{if (eq .Platform "mysql84")}}
echo mysql-apt-config mysql-apt-config/select-server select mysql-8.4-lts | sudo debconf-set-selections
sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config*
- sudo apt-get -qq update
- sudo DEBIAN_FRONTEND="noninteractive" apt-get -qq install -y mysql-server mysql-client
+ sudo apt-get update
+ sudo DEBIAN_FRONTEND="noninteractive" apt-get install -y mysql-server mysql-client
{{end}}
- sudo apt-get -qq install -y make unzip g++ curl git wget ant openjdk-11-jdk eatmydata
+ sudo apt-get install -y make unzip g++ curl git wget ant openjdk-11-jdk eatmydata
sudo service mysql stop
sudo bash -c "echo '/usr/sbin/mysqld { }' > /etc/apparmor.d/usr.sbin.mysqld" # https://bugs.launchpad.net/ubuntu/+source/mariadb-10.1/+bug/1806263
sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/
diff --git a/test/vtop_example.sh b/test/vtop_example.sh
index 5ff90a2be7e..c537c0f844c 100755
--- a/test/vtop_example.sh
+++ b/test/vtop_example.sh
@@ -482,11 +482,12 @@ EOF
waitForKeyspaceToBeServing customer 80- 1
}
+kind delete cluster --name kind || true
# Build the docker image for vitess/lite using the local code
docker build -f docker/lite/Dockerfile -t vitess/lite:pr .
# Build the docker image for vitess/vtadmin using the local code
-docker build -f docker/binaries/vtadmin/Dockerfile --build-arg VT_BASE_VER=pr -t vitess/vtadmin:pr .
+docker build -f docker/binaries/vtadmin/Dockerfile --build-arg VT_BASE_VER=pr -t vitess/vtadmin:pr ./docker/binaries/vtadmin
# Print the docker images available
docker image ls
diff --git a/tools/get_kubectl_kind.sh b/tools/get_kubectl_kind.sh
index 57df414fdd8..169b120aaa0 100755
--- a/tools/get_kubectl_kind.sh
+++ b/tools/get_kubectl_kind.sh
@@ -12,7 +12,7 @@ source build.env
mkdir -p "$VTROOT/bin"
cd "$VTROOT/bin"
-KUBE_VERSION="${KUBE_VERSION:-v1.21.1}"
+KUBE_VERSION="${KUBE_VERSION:-v1.31.0}"
KUBERNETES_RELEASE_URL="${KUBERNETES_RELEASE_URL:-https://dl.k8s.io}"
# Download kubectl if needed.
@@ -28,7 +28,7 @@ ln -sf "kubectl-${KUBE_VERSION}" kubectl
if ! command -v kind &> /dev/null
then
echo "Downloading kind..."
- curl -L https://kind.sigs.k8s.io/dl/v0.12.0/kind-linux-amd64 > "kind"
+ curl -L https://kind.sigs.k8s.io/dl/v0.22.0/kind-linux-amd64 > "kind"
chmod +x "kind"
echo "Installed kind"
else
diff --git a/tools/map-shard-for-value/Makefile b/tools/map-shard-for-value/Makefile
new file mode 100644
index 00000000000..61bc88ac0ed
--- /dev/null
+++ b/tools/map-shard-for-value/Makefile
@@ -0,0 +1,22 @@
+# Copyright 2024 The Vitess Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+build:
+ go build map-shard-for-value.go
+
+test:
+ echo "1\n-1\n99" | go run map-shard-for-value.go --total_shards=4 --vindex=xxhash
+
+clean:
+ rm -f map-shard-for-value
diff --git a/tools/map-shard-for-value/map-shard-for-value.go b/tools/map-shard-for-value/map-shard-for-value.go
new file mode 100755
index 00000000000..18a092d1371
--- /dev/null
+++ b/tools/map-shard-for-value/map-shard-for-value.go
@@ -0,0 +1,207 @@
+/*
+Copyright 2024 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+ "bufio"
+ "context"
+ "encoding/hex"
+ "fmt"
+ "log"
+ "os"
+ "strconv"
+ "strings"
+
+ flag "github.com/spf13/pflag"
+
+ "vitess.io/vitess/go/vt/topo"
+
+ "vitess.io/vitess/go/sqltypes"
+ "vitess.io/vitess/go/vt/key"
+ "vitess.io/vitess/go/vt/proto/topodata"
+ "vitess.io/vitess/go/vt/vtgate/vindexes"
+)
+
+/*
+ * This tool reads a list of values from stdin and prints the
+ * corresponding keyspace ID and shard for each value. It uses the given vindex
+ * and shard ranges to determine the shard. The vindex is expected to be a
+ * single-column vindex. The shard ranges are specified as a comma-separated
+ * list of key ranges, example "-80,80-".
+ * If you have uniformly distributed shards, you can specify the total number
+ * of shards using the -total_shards flag, and the tool will generate the shard ranges
+ * using the same logic as the Vitess operator does (using the key.GenerateShardRanges() function).
+ *
+ * Example usage:
+ * echo "1\n2\n3" | go run shard-from-id.go -vindex=hash -shards=-80,80-
+ *
+ * Currently tested only for integer values and hash/xxhash vindexes.
+ */
+
+func mapShard(allShards []*topodata.ShardReference, ksid key.DestinationKeyspaceID) (string, error) {
+ foundShard := ""
+ addShard := func(shard string) error {
+ foundShard = shard
+ return nil
+ }
+ if err := ksid.Resolve(allShards, addShard); err != nil {
+ return "", fmt.Errorf("failed to resolve keyspace ID: %v:: %s", ksid.String(), err)
+ }
+
+ if foundShard == "" {
+ return "", fmt.Errorf("no shard found for keyspace ID: %v", ksid)
+ }
+ return foundShard, nil
+}
+
+func selectShard(vindex vindexes.Vindex, value sqltypes.Value, allShards []*topodata.ShardReference) (string, key.DestinationKeyspaceID, error) {
+ ctx := context.Background()
+
+ destinations, err := vindexes.Map(ctx, vindex, nil, [][]sqltypes.Value{{value}})
+ if err != nil {
+ return "", nil, fmt.Errorf("failed to map value to keyspace ID: %w", err)
+ }
+
+ if len(destinations) != 1 {
+ return "", nil, fmt.Errorf("unexpected number of destinations: %d", len(destinations))
+ }
+
+ ksid, ok := destinations[0].(key.DestinationKeyspaceID)
+ if !ok {
+ return "", nil, fmt.Errorf("unexpected destination type: %T", destinations[0])
+ }
+
+ foundShard, err := mapShard(allShards, ksid)
+ if err != nil {
+ return "", nil, fmt.Errorf("failed to map shard, original value %v, keyspace id %s: %w", value, ksid, err)
+ }
+ return foundShard, ksid, nil
+}
+
+func getValue(valueStr, valueType string) (sqltypes.Value, error) {
+ var value sqltypes.Value
+
+ switch valueType {
+ case "int":
+ valueInt, err := strconv.ParseInt(valueStr, 10, 64)
+ if err != nil {
+ return value, fmt.Errorf("failed to parse int value: %w", err)
+ }
+ value = sqltypes.NewInt64(valueInt)
+ case "uint":
+ valueUint, err := strconv.ParseUint(valueStr, 10, 64)
+ if err != nil {
+ return value, fmt.Errorf("failed to parse uint value: %w", err)
+ }
+ value = sqltypes.NewUint64(valueUint)
+ case "string":
+ value = sqltypes.NewVarChar(valueStr)
+ default:
+ return value, fmt.Errorf("unsupported value type: %s", valueType)
+ }
+
+ return value, nil
+}
+
+func getShardMap(shardsCSV *string) []*topodata.ShardReference {
+ var allShards []*topodata.ShardReference
+
+ for _, shard := range strings.Split(*shardsCSV, ",") {
+ _, keyRange, err := topo.ValidateShardName(shard)
+ if err != nil {
+ log.Fatalf("invalid shard range: %s", shard)
+ }
+ allShards = append(allShards, &topodata.ShardReference{
+ Name: shard,
+ KeyRange: keyRange,
+ })
+ }
+ return allShards
+}
+
+type output struct {
+ Value string
+ KeyspaceID string
+ Shard string
+}
+
+func processValues(scanner *bufio.Scanner, shardsCSV *string, vindexName string, valueType string) ([]output, error) {
+ allShards := getShardMap(shardsCSV)
+
+ vindex, err := vindexes.CreateVindex(vindexName, vindexName, nil)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create vindex: %v", err)
+ }
+ var outputs []output
+ for scanner.Scan() {
+ valueStr := scanner.Text()
+ if valueStr == "" {
+ continue
+ }
+ value, err := getValue(valueStr, valueType)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get value for: %v, value_type %s:: %v", valueStr, valueType, err)
+ }
+ shard, ksid, err := selectShard(vindex, value, allShards)
+ if err != nil {
+ // ignore errors so that we can go ahead with the computation for other values
+ continue
+ }
+ outputs = append(outputs, output{Value: valueStr, KeyspaceID: hex.EncodeToString(ksid), Shard: shard})
+ }
+ return outputs, nil
+}
+
+func printOutput(outputs []output) {
+ fmt.Println("value,keyspaceID,shard")
+ for _, output := range outputs {
+ fmt.Printf("%s,%s,%s\n", output.Value, output.KeyspaceID, output.Shard)
+ }
+}
+
+func main() {
+ // Explicitly configuring the logger since it was flaky in displaying logs locally without this.
+ log.SetOutput(os.Stderr)
+ log.SetFlags(log.LstdFlags)
+ log.SetPrefix("LOG: ")
+
+ vindexName := flag.String("vindex", "xxhash", "name of the vindex")
+ shardsCSV := flag.String("shards", "", "comma-separated list of shard ranges")
+ totalShards := flag.Int("total_shards", 0, "total number of uniformly distributed shards")
+ valueType := flag.String("value_type", "int", "type of the value (int, uint, or string)")
+ flag.Parse()
+
+ if *totalShards > 0 {
+ if *shardsCSV != "" {
+ log.Fatalf("cannot specify both total_shards and shards")
+ }
+ shardArr, err := key.GenerateShardRanges(*totalShards)
+ if err != nil {
+ log.Fatalf("failed to generate shard ranges: %v", err)
+ }
+ *shardsCSV = strings.Join(shardArr, ",")
+ }
+ if *shardsCSV == "" {
+ log.Fatal("shards or total_shards must be specified")
+ }
+ scanner := bufio.NewScanner(os.Stdin)
+ outputs, err := processValues(scanner, shardsCSV, *vindexName, *valueType)
+ if err != nil {
+ log.Fatalf("failed to process values: %v", err)
+ }
+ printOutput(outputs)
+}
diff --git a/tools/map-shard-for-value/map-shard-for-value.md b/tools/map-shard-for-value/map-shard-for-value.md
new file mode 100644
index 00000000000..17daf7f5fe5
--- /dev/null
+++ b/tools/map-shard-for-value/map-shard-for-value.md
@@ -0,0 +1,47 @@
+## Map Shard for Value Tool
+
+### Overview
+
+The `map-shard-for-value` tool maps a given value to a specific shard. This tool helps in determining
+which shard a particular value belongs to, based on the vindex algorithm and shard ranges.
+
+### Features
+-
+
+- Allows specifying the vindex type (e.g., `hash`, `xxhash`).
+- Allows specifying the shard list of (for uniformly distributed shard ranges) the total number of shards to generate.
+- Designed as a _filter_: Reads input values from `stdin` and outputs the corresponding shard information, so it can be
+ used to map values from a file or another program.
+
+### Usage
+
+```sh
+make build
+```
+
+```sh
+echo "1\n-1\n99" | ./map-shard-for-value --total_shards=4 --vindex=xxhash
+value,keyspaceID,shard
+1,d46405367612b4b7,c0-
+-1,d8e2a6a7c8c7623d,c0-
+99,200533312244abca,-40
+
+echo "1\n-1\n99" | ./map-shard-for-value --vindex=hash --shards="-80,80-"
+value,keyspaceID,shard
+1,166b40b44aba4bd6,-80
+-1,355550b2150e2451,-80
+99,2c40ad56f4593c47,-80
+```
+
+#### Flags
+
+- `--vindex`: Specifies the name of the vindex to use (e.g., `hash`, `xxhash`) (default `xxhash`)
+
+One (and only one) of these is required:
+
+- `--shards`: Comma-separated list of shard ranges
+- `--total_shards`: Total number of shards, only if shards are uniformly distributed
+
+Optional:
+- `--value_type`: Type of the value to map, one of int, uint, string (default `int`)
+
diff --git a/tools/map-shard-for-value/map-shard-for-value_test.go b/tools/map-shard-for-value/map-shard-for-value_test.go
new file mode 100644
index 00000000000..ca014818bb9
--- /dev/null
+++ b/tools/map-shard-for-value/map-shard-for-value_test.go
@@ -0,0 +1,90 @@
+/*
+Copyright 2024 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestProcess(t *testing.T) {
+ type testCase struct {
+ name string
+ shardsCSV string
+ vindexType string
+ values []int
+ valueType string
+ expected []output
+ }
+ testCases := []testCase{
+ {
+ name: "hash,2 shards",
+ shardsCSV: "-80,80-",
+ vindexType: "hash",
+ values: []int{1, 99},
+ valueType: "int",
+ expected: []output{
+ {
+ Value: "1",
+ KeyspaceID: "166b40b44aba4bd6",
+ Shard: "-80",
+ },
+ {
+ Value: "99",
+ KeyspaceID: "2c40ad56f4593c47",
+ Shard: "-80",
+ },
+ },
+ },
+ {
+ name: "xxhash,4 shards",
+ shardsCSV: "-40,40-80,80-c0,c0-",
+ vindexType: "xxhash",
+ values: []int{1, 99},
+ valueType: "int",
+ expected: []output{
+ {
+ Value: "1",
+ KeyspaceID: "d46405367612b4b7",
+ Shard: "c0-",
+ },
+ {
+ Value: "99",
+ KeyspaceID: "200533312244abca",
+ Shard: "-40",
+ },
+ },
+ },
+ }
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ var input strings.Builder
+ for _, num := range tc.values {
+ fmt.Fprintf(&input, "%d\n", num)
+ }
+ reader := strings.NewReader(input.String())
+ scanner := bufio.NewScanner(reader)
+ got, err := processValues(scanner, &tc.shardsCSV, tc.vindexType, tc.valueType)
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expected, got)
+ })
+ }
+}
diff --git a/web/vtadmin/src/api/http.ts b/web/vtadmin/src/api/http.ts
index 3f75330d240..674df961ef0 100644
--- a/web/vtadmin/src/api/http.ts
+++ b/web/vtadmin/src/api/http.ts
@@ -1068,3 +1068,41 @@ export const showVDiff = async ({ clusterID, request }: ShowVDiffParams) => {
return vtadmin.VDiffShowResponse.create(result);
};
+
+export const fetchSchemaMigrations = async (request: vtadmin.IGetSchemaMigrationsRequest) => {
+ const { result } = await vtfetch(`/api/migrations/`, {
+ body: JSON.stringify(request),
+ method: 'post',
+ });
+
+ const err = vtadmin.GetSchemaMigrationsResponse.verify(result);
+ if (err) throw Error(err);
+
+ return vtadmin.GetSchemaMigrationsResponse.create(result);
+};
+
+export interface ApplySchemaParams {
+ clusterID: string;
+ keyspace: string;
+ callerID: string;
+ sql: string;
+ request: vtctldata.IApplySchemaRequest;
+}
+
+export const applySchema = async ({ clusterID, keyspace, callerID, sql, request }: ApplySchemaParams) => {
+ const body = {
+ sql,
+ caller_id: callerID,
+ request,
+ };
+
+ const { result } = await vtfetch(`/api/migration/${clusterID}/${keyspace}`, {
+ body: JSON.stringify(body),
+ method: 'post',
+ });
+
+ const err = vtctldata.ApplySchemaResponse.verify(result);
+ if (err) throw Error(err);
+
+ return vtctldata.ApplySchemaResponse.create(result);
+};
diff --git a/web/vtadmin/src/components/App.tsx b/web/vtadmin/src/components/App.tsx
index ef27a35dc95..3bb41ea35f0 100644
--- a/web/vtadmin/src/components/App.tsx
+++ b/web/vtadmin/src/components/App.tsx
@@ -45,6 +45,8 @@ import { Transactions } from './routes/Transactions';
import { Transaction } from './routes/transaction/Transaction';
import { CreateReshard } from './routes/createWorkflow/CreateReshard';
import { CreateMaterialize } from './routes/createWorkflow/CreateMaterialize';
+import { SchemaMigrations } from './routes/SchemaMigrations';
+import { CreateSchemaMigration } from './routes/createSchemaMigration/CreateSchemaMigration';
export const App = () => {
return (
@@ -140,6 +142,16 @@ export const App = () => {
+
+
+
+
+ {!isReadOnlyMode() && (
+
+
+
+ )}
+
diff --git a/web/vtadmin/src/components/NavRail.tsx b/web/vtadmin/src/components/NavRail.tsx
index 9f9e1bf1681..b30cd165684 100644
--- a/web/vtadmin/src/components/NavRail.tsx
+++ b/web/vtadmin/src/components/NavRail.tsx
@@ -65,6 +65,9 @@ export const NavRail = () => {
+ -
+
+
-
diff --git a/web/vtadmin/src/components/routes/createWorkflow/ErrorDialog.tsx b/web/vtadmin/src/components/dialog/ErrorDialog.tsx
similarity index 94%
rename from web/vtadmin/src/components/routes/createWorkflow/ErrorDialog.tsx
rename to web/vtadmin/src/components/dialog/ErrorDialog.tsx
index 25ac5dedb0b..087876e4cd2 100644
--- a/web/vtadmin/src/components/routes/createWorkflow/ErrorDialog.tsx
+++ b/web/vtadmin/src/components/dialog/ErrorDialog.tsx
@@ -14,8 +14,8 @@
* limitations under the License.
*/
import React from 'react';
-import Dialog from '../../dialog/Dialog';
-import { Icon, Icons } from '../../Icon';
+import Dialog from './Dialog';
+import { Icon, Icons } from '../Icon';
export interface ErrorDialogProps {
errorTitle?: string;
diff --git a/web/vtadmin/src/components/routes/SchemaMigrations.tsx b/web/vtadmin/src/components/routes/SchemaMigrations.tsx
new file mode 100644
index 00000000000..1761d26de49
--- /dev/null
+++ b/web/vtadmin/src/components/routes/SchemaMigrations.tsx
@@ -0,0 +1,195 @@
+/**
+ * Copyright 2024 The Vitess Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { useEffect, useState } from 'react';
+import { useKeyspaces, useSchemaMigrations } from '../../hooks/api';
+import { DataCell } from '../dataTable/DataCell';
+import { DataTable } from '../dataTable/DataTable';
+import { ContentContainer } from '../layout/ContentContainer';
+import { WorkspaceHeader } from '../layout/WorkspaceHeader';
+import { WorkspaceTitle } from '../layout/WorkspaceTitle';
+import { QueryLoadingPlaceholder } from '../placeholders/QueryLoadingPlaceholder';
+import { useDocumentTitle } from '../../hooks/useDocumentTitle';
+import { vtadmin } from '../../proto/vtadmin';
+import { Select } from '../inputs/Select';
+import { ShardLink } from '../links/ShardLink';
+import { formatDateTime } from '../../util/time';
+import { ReadOnlyGate } from '../ReadOnlyGate';
+import { formatSchemaMigrationStatus } from '../../util/schemaMigrations';
+import { Link } from 'react-router-dom';
+import { TabletLink } from '../links/TabletLink';
+import { formatAlias } from '../../util/tablets';
+import { useURLQuery } from '../../hooks/useURLQuery';
+
+const COLUMNS = ['UUID', 'Status', 'DDL Action', 'Timestamps', 'Stage', 'Progress'];
+
+export const SchemaMigrations = () => {
+ useDocumentTitle('Schema Migrations');
+
+ const { query, replaceQuery } = useURLQuery();
+ const urlKeyspace = query['keyspace'];
+ const urlCluster = query['cluster'];
+
+ const keyspacesQuery = useKeyspaces();
+ const { data: keyspaces = [], ...ksQuery } = keyspacesQuery;
+
+ const [selectedKeyspace, setSelectedKeypsace] = useState();
+
+ const request: vtadmin.IGetSchemaMigrationsRequest = {
+ cluster_requests: [
+ {
+ cluster_id: selectedKeyspace && selectedKeyspace.cluster?.id,
+ request: {
+ keyspace: selectedKeyspace && selectedKeyspace.keyspace?.name,
+ },
+ },
+ ],
+ };
+
+ const schemaMigrationsQuery = useSchemaMigrations(request, {
+ enabled: !!selectedKeyspace,
+ });
+
+ const schemaMigrations = schemaMigrationsQuery.data ? schemaMigrationsQuery.data.schema_migrations : [];
+
+ const handleKeyspaceChange = (ks: vtadmin.Keyspace | null | undefined) => {
+ setSelectedKeypsace(ks);
+
+ if (ks) {
+ replaceQuery({ keyspace: ks.keyspace?.name, cluster: ks.cluster?.id });
+ } else {
+ replaceQuery({ keyspace: undefined, cluster: undefined });
+ }
+ };
+
+ useEffect(() => {
+ if (urlKeyspace && urlCluster) {
+ const keyspace = keyspaces.find(
+ (ks) => ks.cluster?.id === String(urlCluster) && ks.keyspace?.name === String(urlKeyspace)
+ );
+
+ if (keyspace) {
+ setSelectedKeypsace(keyspace);
+ } else if (!ksQuery.isLoading) {
+ replaceQuery({ keyspace: undefined, cluster: undefined });
+ }
+ } else {
+ setSelectedKeypsace(undefined);
+ }
+ }, [urlKeyspace, urlCluster, keyspaces, ksQuery.isLoading, replaceQuery]);
+
+ const renderRows = (rows: vtadmin.ISchemaMigration[]) => {
+ return rows.map((row) => {
+ const migrationInfo = row.schema_migration;
+
+ if (!migrationInfo) return <>>;
+
+ return (
+
+
+ {migrationInfo.uuid}
+
+ Tablet{' '}
+
+ {formatAlias(migrationInfo.tablet)}
+
+
+
+ Shard{' '}
+
+ {`${migrationInfo.keyspace}/${migrationInfo.shard}`}
+
+
+
+
+ {formatSchemaMigrationStatus(migrationInfo)}
+
+ {migrationInfo.ddl_action ? migrationInfo.ddl_action : '-'}
+
+ {migrationInfo.added_at && (
+
+ Added
+ {formatDateTime(migrationInfo.added_at?.seconds)}
+
+ )}
+ {migrationInfo.requested_at && (
+
+ Requested
+ {formatDateTime(migrationInfo.requested_at?.seconds)}
+
+ )}
+ {migrationInfo.started_at && (
+
+ Started
+ {formatDateTime(migrationInfo.started_at?.seconds)}
+
+ )}
+ {migrationInfo.completed_at && (
+
+ Completed
+ {formatDateTime(migrationInfo.completed_at?.seconds)}
+
+ )}
+
+ {migrationInfo.stage ? migrationInfo.stage : '-'}
+ {migrationInfo.progress ? `${migrationInfo.progress}%` : '-'}
+
+ );
+ });
+ };
+
+ return (
+
+
+
+
Schema Migrations
+
+
+
+ Create Schema Migration Request
+
+
+
+
+
+
+
+
+
+
+
+
+
+ );
+};
diff --git a/web/vtadmin/src/components/routes/createSchemaMigration/CreateSchemaMigration.module.scss b/web/vtadmin/src/components/routes/createSchemaMigration/CreateSchemaMigration.module.scss
new file mode 100644
index 00000000000..51f5fdca04e
--- /dev/null
+++ b/web/vtadmin/src/components/routes/createSchemaMigration/CreateSchemaMigration.module.scss
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2024 The Vitess Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+.sqlInput {
+ border: solid 2px var(--colorDisabled);
+ border-radius: 6px;
+ display: block;
+ font-family: var(--fontFamilyMonospace);
+ line-height: var(--lineHeightBody);
+ padding: 0.8rem;
+ resize: vertical;
+ width: 100%;
+}
+
+.sqlInput:focus {
+ border-color: var(--colorPrimary);
+ outline: none;
+}
diff --git a/web/vtadmin/src/components/routes/createSchemaMigration/CreateSchemaMigration.tsx b/web/vtadmin/src/components/routes/createSchemaMigration/CreateSchemaMigration.tsx
new file mode 100644
index 00000000000..0f7326d2ae1
--- /dev/null
+++ b/web/vtadmin/src/components/routes/createSchemaMigration/CreateSchemaMigration.tsx
@@ -0,0 +1,270 @@
+/**
+ * Copyright 2024 The Vitess Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { useEffect, useState } from 'react';
+import { Link, useHistory } from 'react-router-dom';
+
+import style from './CreateSchemaMigration.module.scss';
+import { useApplySchema, useClusters, useKeyspaces } from '../../../hooks/api';
+import { useDocumentTitle } from '../../../hooks/useDocumentTitle';
+import { Label } from '../../inputs/Label';
+import { Select } from '../../inputs/Select';
+import { ContentContainer } from '../../layout/ContentContainer';
+import { NavCrumbs } from '../../layout/NavCrumbs';
+import { WorkspaceHeader } from '../../layout/WorkspaceHeader';
+import { WorkspaceTitle } from '../../layout/WorkspaceTitle';
+import { TextInput } from '../../TextInput';
+import { success } from '../../Snackbar';
+import { FormError } from '../../forms/FormError';
+import { vtadmin } from '../../../proto/vtadmin';
+import ErrorDialog from '../../dialog/ErrorDialog';
+
+interface FormData {
+ clusterID: string;
+ keyspace: string;
+ ddlStrategy: string;
+ sql: string;
+ batchSize: number;
+ callerID: string;
+ migrationContext: string;
+ uuidList: string;
+}
+
+const DEFAULT_FORM_DATA: FormData = {
+ clusterID: '',
+ keyspace: '',
+ // Default DDL Strategy set to "vitess".
+ ddlStrategy: 'vitess',
+ sql: '',
+ batchSize: 0,
+ callerID: '',
+ migrationContext: '',
+ uuidList: '',
+};
+
+const DDL_STRATEGY_HELP_TEXT = `Online DDL strategy, compatible with @@ddl_strategy session variable (default "vitess")`;
+
+const MIGRATION_CONTEXT_HELP_TEXT =
+ 'For Online DDL, optionally supply a custom unique string used as context for the migration(s) in this command. By default a unique context is auto-generated by Vitess.';
+
+const CALLER_ID_HELP_TEXT =
+ 'Effective caller ID used for the operation and should map to an ACL name which grants this identity the necessary permissions to perform the operation (this is only necessary when strict table ACLs are used)';
+
+export const CreateSchemaMigration = () => {
+ useDocumentTitle('Create Schema Migration Request');
+
+ const history = useHistory();
+
+ const [formData, setFormData] = useState(DEFAULT_FORM_DATA);
+
+ const [clusterKeyspaces, setClusterKeyspaces] = useState([]);
+
+ const [errorDialogOpen, setErrorDialogOpen] = useState(false);
+
+ const { data: clusters = [], ...clustersQuery } = useClusters();
+
+ const { data: keyspaces = [], ...keyspacesQuery } = useKeyspaces();
+
+ const mutation = useApplySchema(
+ {
+ clusterID: formData.clusterID,
+ keyspace: formData.keyspace,
+ callerID: formData.callerID,
+ sql: formData.sql,
+ request: {
+ ddl_strategy: formData.ddlStrategy,
+ batch_size: formData.batchSize,
+ migration_context: formData.migrationContext,
+ uuid_list: (formData.uuidList && formData.uuidList.split(',').map((uuid) => uuid.trim())) || undefined,
+ },
+ },
+ {
+ onSuccess: () => {
+ success(`Successfully created schema migration request.`, { autoClose: 1600 });
+
+ history.push({
+ pathname: `/migrations`,
+ search: `?keyspace=${formData.keyspace}&cluster=${formData.clusterID}`,
+ });
+ },
+ onError: () => {
+ setErrorDialogOpen(true);
+ },
+ }
+ );
+
+ let selectedCluster = null;
+ if (!!formData.clusterID) {
+ selectedCluster = clusters.find((c) => c.id === formData.clusterID);
+ }
+
+ let selectedKeyspace = null;
+ if (!!formData.keyspace) {
+ selectedKeyspace = keyspaces.find((ks) => ks.keyspace?.name === formData.keyspace);
+ }
+
+ const isValid = !!selectedCluster && !!formData.keyspace && !!formData.sql && !!formData.ddlStrategy;
+
+ const isDisabled = !isValid || mutation.isLoading;
+
+ const onSubmit: React.FormEventHandler = (e) => {
+ e.preventDefault();
+ mutation.mutate();
+ };
+
+ useEffect(() => {
+ // Clear out the selected keyspaces if selected cluster is changed.
+ setFormData((prevFormData) => ({ ...prevFormData, keyspace: '' }));
+ setClusterKeyspaces(keyspaces.filter((ks) => ks.cluster?.id === formData.clusterID));
+ }, [formData.clusterID, keyspaces]);
+
+ useEffect(() => {
+ if (clusters.length === 1) {
+ setFormData((prevFormData) => ({ ...prevFormData, clusterID: clusters[0].id }));
+ }
+ }, [clusters]);
+
+ return (
+
+
+
+ Schema Migrations
+
+
+ Create Schema Migration Request
+
+
+
+
+
+ {mutation.isError && !mutation.isLoading && (
+ {
+ setErrorDialogOpen(false);
+ }}
+ />
+ )}
+
+
+ );
+};
diff --git a/web/vtadmin/src/components/routes/createWorkflow/CreateMaterialize.tsx b/web/vtadmin/src/components/routes/createWorkflow/CreateMaterialize.tsx
index c5d688a1fb7..81447cd0e6d 100644
--- a/web/vtadmin/src/components/routes/createWorkflow/CreateMaterialize.tsx
+++ b/web/vtadmin/src/components/routes/createWorkflow/CreateMaterialize.tsx
@@ -31,7 +31,7 @@ import Toggle from '../../toggle/Toggle';
import { tabletmanagerdata, vtadmin, vtctldata } from '../../../proto/vtadmin';
import { MultiSelect } from '../../inputs/MultiSelect';
import { TABLET_TYPES } from '../../../util/tablets';
-import ErrorDialog from './ErrorDialog';
+import ErrorDialog from '../../dialog/ErrorDialog';
interface FormData {
clusterID: string;
diff --git a/web/vtadmin/src/components/routes/createWorkflow/CreateMoveTables.tsx b/web/vtadmin/src/components/routes/createWorkflow/CreateMoveTables.tsx
index bca84cda4fa..1852d85b848 100644
--- a/web/vtadmin/src/components/routes/createWorkflow/CreateMoveTables.tsx
+++ b/web/vtadmin/src/components/routes/createWorkflow/CreateMoveTables.tsx
@@ -31,7 +31,7 @@ import Toggle from '../../toggle/Toggle';
import { vtadmin } from '../../../proto/vtadmin';
import { MultiSelect } from '../../inputs/MultiSelect';
import { TABLET_TYPES } from '../../../util/tablets';
-import ErrorDialog from './ErrorDialog';
+import ErrorDialog from '../../dialog/ErrorDialog';
interface FormData {
clusterID: string;
diff --git a/web/vtadmin/src/components/routes/createWorkflow/CreateReshard.tsx b/web/vtadmin/src/components/routes/createWorkflow/CreateReshard.tsx
index 4977c59e46b..05a33825174 100644
--- a/web/vtadmin/src/components/routes/createWorkflow/CreateReshard.tsx
+++ b/web/vtadmin/src/components/routes/createWorkflow/CreateReshard.tsx
@@ -31,7 +31,7 @@ import Toggle from '../../toggle/Toggle';
import { tabletmanagerdata, vtadmin } from '../../../proto/vtadmin';
import { MultiSelect } from '../../inputs/MultiSelect';
import { TABLET_TYPES } from '../../../util/tablets';
-import ErrorDialog from './ErrorDialog';
+import ErrorDialog from '../../dialog/ErrorDialog';
interface FormData {
clusterID: string;
diff --git a/web/vtadmin/src/hooks/api.ts b/web/vtadmin/src/hooks/api.ts
index 9261f4f0eb0..18ab3b60a53 100644
--- a/web/vtadmin/src/hooks/api.ts
+++ b/web/vtadmin/src/hooks/api.ts
@@ -95,6 +95,8 @@ import {
showVDiff,
ShowVDiffParams,
createMaterialize,
+ fetchSchemaMigrations,
+ applySchema,
} from '../api/http';
import { vtadmin as pb, vtctldata } from '../proto/vtadmin';
import { formatAlias } from '../util/tablets';
@@ -796,3 +798,25 @@ export const useShowVDiff = (
) => {
return useQuery(['vdiff_show', params], () => showVDiff(params), { ...options });
};
+
+/**
+ * useSchemaMigrations is a query hook that fetches schema migrations.
+ */
+export const useSchemaMigrations = (
+ request: pb.IGetSchemaMigrationsRequest,
+ options?: UseQueryOptions | undefined
+) => {
+ return useQuery(['migrations', request], () => fetchSchemaMigrations(request), { ...options });
+};
+
+/**
+ * useApplySchema is a mutation query hook that creates ApplySchema request.
+ */
+export const useApplySchema = (
+ params: Parameters[0],
+ options: UseMutationOptions>, Error>
+) => {
+ return useMutation>, Error>(() => {
+ return applySchema(params);
+ }, options);
+};
diff --git a/web/vtadmin/src/proto/vtadmin.d.ts b/web/vtadmin/src/proto/vtadmin.d.ts
index 9e11f5f37d9..410aaa644ff 100644
--- a/web/vtadmin/src/proto/vtadmin.d.ts
+++ b/web/vtadmin/src/proto/vtadmin.d.ts
@@ -3720,6 +3720,12 @@ export namespace vtadmin {
/** ApplySchemaRequest cluster_id */
cluster_id?: (string|null);
+ /** ApplySchemaRequest sql */
+ sql?: (string|null);
+
+ /** ApplySchemaRequest caller_id */
+ caller_id?: (string|null);
+
/** ApplySchemaRequest request */
request?: (vtctldata.IApplySchemaRequest|null);
}
@@ -3736,6 +3742,12 @@ export namespace vtadmin {
/** ApplySchemaRequest cluster_id. */
public cluster_id: string;
+ /** ApplySchemaRequest sql. */
+ public sql: string;
+
+ /** ApplySchemaRequest caller_id. */
+ public caller_id: string;
+
/** ApplySchemaRequest request. */
public request?: (vtctldata.IApplySchemaRequest|null);
@@ -26578,6 +26590,9 @@ export namespace tabletmanagerdata {
/** ReplicationStatusResponse status */
status?: (replicationdata.IStatus|null);
+
+ /** ReplicationStatusResponse backup_running */
+ backup_running?: (boolean|null);
}
/** Represents a ReplicationStatusResponse. */
@@ -26592,6 +26607,9 @@ export namespace tabletmanagerdata {
/** ReplicationStatusResponse status. */
public status?: (replicationdata.IStatus|null);
+ /** ReplicationStatusResponse backup_running. */
+ public backup_running: boolean;
+
/**
* Creates a new ReplicationStatusResponse instance using the specified properties.
* @param [properties] Properties to set
@@ -30986,6 +31004,9 @@ export namespace tabletmanagerdata {
/** StopReplicationAndGetStatusResponse status */
status?: (replicationdata.IStopReplicationStatus|null);
+
+ /** StopReplicationAndGetStatusResponse backup_running */
+ backup_running?: (boolean|null);
}
/** Represents a StopReplicationAndGetStatusResponse. */
@@ -31000,6 +31021,9 @@ export namespace tabletmanagerdata {
/** StopReplicationAndGetStatusResponse status. */
public status?: (replicationdata.IStopReplicationStatus|null);
+ /** StopReplicationAndGetStatusResponse backup_running. */
+ public backup_running: boolean;
+
/**
* Creates a new StopReplicationAndGetStatusResponse instance using the specified properties.
* @param [properties] Properties to set
@@ -47923,6 +47947,9 @@ export namespace replicationdata {
/** Status replication_lag_unknown */
replication_lag_unknown?: (boolean|null);
+
+ /** Status backup_running */
+ backup_running?: (boolean|null);
}
/** Represents a Status. */
@@ -48000,6 +48027,9 @@ export namespace replicationdata {
/** Status replication_lag_unknown. */
public replication_lag_unknown: boolean;
+ /** Status backup_running. */
+ public backup_running: boolean;
+
/**
* Creates a new Status instance using the specified properties.
* @param [properties] Properties to set
@@ -48189,6 +48219,9 @@ export namespace replicationdata {
/** StopReplicationStatus after */
after?: (replicationdata.IStatus|null);
+
+ /** StopReplicationStatus backup_running */
+ backup_running?: (boolean|null);
}
/** Represents a StopReplicationStatus. */
@@ -48206,6 +48239,9 @@ export namespace replicationdata {
/** StopReplicationStatus after. */
public after?: (replicationdata.IStatus|null);
+ /** StopReplicationStatus backup_running. */
+ public backup_running: boolean;
+
/**
* Creates a new StopReplicationStatus instance using the specified properties.
* @param [properties] Properties to set
diff --git a/web/vtadmin/src/proto/vtadmin.js b/web/vtadmin/src/proto/vtadmin.js
index d0ce20ccfc2..b8ab0c1186a 100644
--- a/web/vtadmin/src/proto/vtadmin.js
+++ b/web/vtadmin/src/proto/vtadmin.js
@@ -7838,6 +7838,8 @@ export const vtadmin = $root.vtadmin = (() => {
* @memberof vtadmin
* @interface IApplySchemaRequest
* @property {string|null} [cluster_id] ApplySchemaRequest cluster_id
+ * @property {string|null} [sql] ApplySchemaRequest sql
+ * @property {string|null} [caller_id] ApplySchemaRequest caller_id
* @property {vtctldata.IApplySchemaRequest|null} [request] ApplySchemaRequest request
*/
@@ -7864,6 +7866,22 @@ export const vtadmin = $root.vtadmin = (() => {
*/
ApplySchemaRequest.prototype.cluster_id = "";
+ /**
+ * ApplySchemaRequest sql.
+ * @member {string} sql
+ * @memberof vtadmin.ApplySchemaRequest
+ * @instance
+ */
+ ApplySchemaRequest.prototype.sql = "";
+
+ /**
+ * ApplySchemaRequest caller_id.
+ * @member {string} caller_id
+ * @memberof vtadmin.ApplySchemaRequest
+ * @instance
+ */
+ ApplySchemaRequest.prototype.caller_id = "";
+
/**
* ApplySchemaRequest request.
* @member {vtctldata.IApplySchemaRequest|null|undefined} request
@@ -7898,8 +7916,12 @@ export const vtadmin = $root.vtadmin = (() => {
writer = $Writer.create();
if (message.cluster_id != null && Object.hasOwnProperty.call(message, "cluster_id"))
writer.uint32(/* id 1, wireType 2 =*/10).string(message.cluster_id);
+ if (message.sql != null && Object.hasOwnProperty.call(message, "sql"))
+ writer.uint32(/* id 2, wireType 2 =*/18).string(message.sql);
+ if (message.caller_id != null && Object.hasOwnProperty.call(message, "caller_id"))
+ writer.uint32(/* id 3, wireType 2 =*/26).string(message.caller_id);
if (message.request != null && Object.hasOwnProperty.call(message, "request"))
- $root.vtctldata.ApplySchemaRequest.encode(message.request, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+ $root.vtctldata.ApplySchemaRequest.encode(message.request, writer.uint32(/* id 4, wireType 2 =*/34).fork()).ldelim();
return writer;
};
@@ -7939,6 +7961,14 @@ export const vtadmin = $root.vtadmin = (() => {
break;
}
case 2: {
+ message.sql = reader.string();
+ break;
+ }
+ case 3: {
+ message.caller_id = reader.string();
+ break;
+ }
+ case 4: {
message.request = $root.vtctldata.ApplySchemaRequest.decode(reader, reader.uint32());
break;
}
@@ -7980,6 +8010,12 @@ export const vtadmin = $root.vtadmin = (() => {
if (message.cluster_id != null && message.hasOwnProperty("cluster_id"))
if (!$util.isString(message.cluster_id))
return "cluster_id: string expected";
+ if (message.sql != null && message.hasOwnProperty("sql"))
+ if (!$util.isString(message.sql))
+ return "sql: string expected";
+ if (message.caller_id != null && message.hasOwnProperty("caller_id"))
+ if (!$util.isString(message.caller_id))
+ return "caller_id: string expected";
if (message.request != null && message.hasOwnProperty("request")) {
let error = $root.vtctldata.ApplySchemaRequest.verify(message.request);
if (error)
@@ -8002,6 +8038,10 @@ export const vtadmin = $root.vtadmin = (() => {
let message = new $root.vtadmin.ApplySchemaRequest();
if (object.cluster_id != null)
message.cluster_id = String(object.cluster_id);
+ if (object.sql != null)
+ message.sql = String(object.sql);
+ if (object.caller_id != null)
+ message.caller_id = String(object.caller_id);
if (object.request != null) {
if (typeof object.request !== "object")
throw TypeError(".vtadmin.ApplySchemaRequest.request: object expected");
@@ -8025,10 +8065,16 @@ export const vtadmin = $root.vtadmin = (() => {
let object = {};
if (options.defaults) {
object.cluster_id = "";
+ object.sql = "";
+ object.caller_id = "";
object.request = null;
}
if (message.cluster_id != null && message.hasOwnProperty("cluster_id"))
object.cluster_id = message.cluster_id;
+ if (message.sql != null && message.hasOwnProperty("sql"))
+ object.sql = message.sql;
+ if (message.caller_id != null && message.hasOwnProperty("caller_id"))
+ object.caller_id = message.caller_id;
if (message.request != null && message.hasOwnProperty("request"))
object.request = $root.vtctldata.ApplySchemaRequest.toObject(message.request, options);
return object;
@@ -61346,6 +61392,7 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
* @memberof tabletmanagerdata
* @interface IReplicationStatusResponse
* @property {replicationdata.IStatus|null} [status] ReplicationStatusResponse status
+ * @property {boolean|null} [backup_running] ReplicationStatusResponse backup_running
*/
/**
@@ -61371,6 +61418,14 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
*/
ReplicationStatusResponse.prototype.status = null;
+ /**
+ * ReplicationStatusResponse backup_running.
+ * @member {boolean} backup_running
+ * @memberof tabletmanagerdata.ReplicationStatusResponse
+ * @instance
+ */
+ ReplicationStatusResponse.prototype.backup_running = false;
+
/**
* Creates a new ReplicationStatusResponse instance using the specified properties.
* @function create
@@ -61397,6 +61452,8 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
writer = $Writer.create();
if (message.status != null && Object.hasOwnProperty.call(message, "status"))
$root.replicationdata.Status.encode(message.status, writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
+ if (message.backup_running != null && Object.hasOwnProperty.call(message, "backup_running"))
+ writer.uint32(/* id 2, wireType 0 =*/16).bool(message.backup_running);
return writer;
};
@@ -61435,6 +61492,10 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
message.status = $root.replicationdata.Status.decode(reader, reader.uint32());
break;
}
+ case 2: {
+ message.backup_running = reader.bool();
+ break;
+ }
default:
reader.skipType(tag & 7);
break;
@@ -61475,6 +61536,9 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
if (error)
return "status." + error;
}
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ if (typeof message.backup_running !== "boolean")
+ return "backup_running: boolean expected";
return null;
};
@@ -61495,6 +61559,8 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
throw TypeError(".tabletmanagerdata.ReplicationStatusResponse.status: object expected");
message.status = $root.replicationdata.Status.fromObject(object.status);
}
+ if (object.backup_running != null)
+ message.backup_running = Boolean(object.backup_running);
return message;
};
@@ -61511,10 +61577,14 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
if (!options)
options = {};
let object = {};
- if (options.defaults)
+ if (options.defaults) {
object.status = null;
+ object.backup_running = false;
+ }
if (message.status != null && message.hasOwnProperty("status"))
object.status = $root.replicationdata.Status.toObject(message.status, options);
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ object.backup_running = message.backup_running;
return object;
};
@@ -70518,6 +70588,7 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
* @memberof tabletmanagerdata
* @interface IStopReplicationAndGetStatusResponse
* @property {replicationdata.IStopReplicationStatus|null} [status] StopReplicationAndGetStatusResponse status
+ * @property {boolean|null} [backup_running] StopReplicationAndGetStatusResponse backup_running
*/
/**
@@ -70543,6 +70614,14 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
*/
StopReplicationAndGetStatusResponse.prototype.status = null;
+ /**
+ * StopReplicationAndGetStatusResponse backup_running.
+ * @member {boolean} backup_running
+ * @memberof tabletmanagerdata.StopReplicationAndGetStatusResponse
+ * @instance
+ */
+ StopReplicationAndGetStatusResponse.prototype.backup_running = false;
+
/**
* Creates a new StopReplicationAndGetStatusResponse instance using the specified properties.
* @function create
@@ -70569,6 +70648,8 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
writer = $Writer.create();
if (message.status != null && Object.hasOwnProperty.call(message, "status"))
$root.replicationdata.StopReplicationStatus.encode(message.status, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+ if (message.backup_running != null && Object.hasOwnProperty.call(message, "backup_running"))
+ writer.uint32(/* id 3, wireType 0 =*/24).bool(message.backup_running);
return writer;
};
@@ -70607,6 +70688,10 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
message.status = $root.replicationdata.StopReplicationStatus.decode(reader, reader.uint32());
break;
}
+ case 3: {
+ message.backup_running = reader.bool();
+ break;
+ }
default:
reader.skipType(tag & 7);
break;
@@ -70647,6 +70732,9 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
if (error)
return "status." + error;
}
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ if (typeof message.backup_running !== "boolean")
+ return "backup_running: boolean expected";
return null;
};
@@ -70667,6 +70755,8 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
throw TypeError(".tabletmanagerdata.StopReplicationAndGetStatusResponse.status: object expected");
message.status = $root.replicationdata.StopReplicationStatus.fromObject(object.status);
}
+ if (object.backup_running != null)
+ message.backup_running = Boolean(object.backup_running);
return message;
};
@@ -70683,10 +70773,14 @@ export const tabletmanagerdata = $root.tabletmanagerdata = (() => {
if (!options)
options = {};
let object = {};
- if (options.defaults)
+ if (options.defaults) {
object.status = null;
+ object.backup_running = false;
+ }
if (message.status != null && message.hasOwnProperty("status"))
object.status = $root.replicationdata.StopReplicationStatus.toObject(message.status, options);
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ object.backup_running = message.backup_running;
return object;
};
@@ -116245,6 +116339,7 @@ export const replicationdata = $root.replicationdata = (() => {
* @property {boolean|null} [has_replication_filters] Status has_replication_filters
* @property {boolean|null} [ssl_allowed] Status ssl_allowed
* @property {boolean|null} [replication_lag_unknown] Status replication_lag_unknown
+ * @property {boolean|null} [backup_running] Status backup_running
*/
/**
@@ -116438,6 +116533,14 @@ export const replicationdata = $root.replicationdata = (() => {
*/
Status.prototype.replication_lag_unknown = false;
+ /**
+ * Status backup_running.
+ * @member {boolean} backup_running
+ * @memberof replicationdata.Status
+ * @instance
+ */
+ Status.prototype.backup_running = false;
+
/**
* Creates a new Status instance using the specified properties.
* @function create
@@ -116506,6 +116609,8 @@ export const replicationdata = $root.replicationdata = (() => {
writer.uint32(/* id 23, wireType 0 =*/184).bool(message.ssl_allowed);
if (message.replication_lag_unknown != null && Object.hasOwnProperty.call(message, "replication_lag_unknown"))
writer.uint32(/* id 24, wireType 0 =*/192).bool(message.replication_lag_unknown);
+ if (message.backup_running != null && Object.hasOwnProperty.call(message, "backup_running"))
+ writer.uint32(/* id 25, wireType 0 =*/200).bool(message.backup_running);
return writer;
};
@@ -116628,6 +116733,10 @@ export const replicationdata = $root.replicationdata = (() => {
message.replication_lag_unknown = reader.bool();
break;
}
+ case 25: {
+ message.backup_running = reader.bool();
+ break;
+ }
default:
reader.skipType(tag & 7);
break;
@@ -116729,6 +116838,9 @@ export const replicationdata = $root.replicationdata = (() => {
if (message.replication_lag_unknown != null && message.hasOwnProperty("replication_lag_unknown"))
if (typeof message.replication_lag_unknown !== "boolean")
return "replication_lag_unknown: boolean expected";
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ if (typeof message.backup_running !== "boolean")
+ return "backup_running: boolean expected";
return null;
};
@@ -116788,6 +116900,8 @@ export const replicationdata = $root.replicationdata = (() => {
message.ssl_allowed = Boolean(object.ssl_allowed);
if (object.replication_lag_unknown != null)
message.replication_lag_unknown = Boolean(object.replication_lag_unknown);
+ if (object.backup_running != null)
+ message.backup_running = Boolean(object.backup_running);
return message;
};
@@ -116827,6 +116941,7 @@ export const replicationdata = $root.replicationdata = (() => {
object.has_replication_filters = false;
object.ssl_allowed = false;
object.replication_lag_unknown = false;
+ object.backup_running = false;
}
if (message.position != null && message.hasOwnProperty("position"))
object.position = message.position;
@@ -116872,6 +116987,8 @@ export const replicationdata = $root.replicationdata = (() => {
object.ssl_allowed = message.ssl_allowed;
if (message.replication_lag_unknown != null && message.hasOwnProperty("replication_lag_unknown"))
object.replication_lag_unknown = message.replication_lag_unknown;
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ object.backup_running = message.backup_running;
return object;
};
@@ -117139,6 +117256,7 @@ export const replicationdata = $root.replicationdata = (() => {
* @interface IStopReplicationStatus
* @property {replicationdata.IStatus|null} [before] StopReplicationStatus before
* @property {replicationdata.IStatus|null} [after] StopReplicationStatus after
+ * @property {boolean|null} [backup_running] StopReplicationStatus backup_running
*/
/**
@@ -117172,6 +117290,14 @@ export const replicationdata = $root.replicationdata = (() => {
*/
StopReplicationStatus.prototype.after = null;
+ /**
+ * StopReplicationStatus backup_running.
+ * @member {boolean} backup_running
+ * @memberof replicationdata.StopReplicationStatus
+ * @instance
+ */
+ StopReplicationStatus.prototype.backup_running = false;
+
/**
* Creates a new StopReplicationStatus instance using the specified properties.
* @function create
@@ -117200,6 +117326,8 @@ export const replicationdata = $root.replicationdata = (() => {
$root.replicationdata.Status.encode(message.before, writer.uint32(/* id 1, wireType 2 =*/10).fork()).ldelim();
if (message.after != null && Object.hasOwnProperty.call(message, "after"))
$root.replicationdata.Status.encode(message.after, writer.uint32(/* id 2, wireType 2 =*/18).fork()).ldelim();
+ if (message.backup_running != null && Object.hasOwnProperty.call(message, "backup_running"))
+ writer.uint32(/* id 3, wireType 0 =*/24).bool(message.backup_running);
return writer;
};
@@ -117242,6 +117370,10 @@ export const replicationdata = $root.replicationdata = (() => {
message.after = $root.replicationdata.Status.decode(reader, reader.uint32());
break;
}
+ case 3: {
+ message.backup_running = reader.bool();
+ break;
+ }
default:
reader.skipType(tag & 7);
break;
@@ -117287,6 +117419,9 @@ export const replicationdata = $root.replicationdata = (() => {
if (error)
return "after." + error;
}
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ if (typeof message.backup_running !== "boolean")
+ return "backup_running: boolean expected";
return null;
};
@@ -117312,6 +117447,8 @@ export const replicationdata = $root.replicationdata = (() => {
throw TypeError(".replicationdata.StopReplicationStatus.after: object expected");
message.after = $root.replicationdata.Status.fromObject(object.after);
}
+ if (object.backup_running != null)
+ message.backup_running = Boolean(object.backup_running);
return message;
};
@@ -117331,11 +117468,14 @@ export const replicationdata = $root.replicationdata = (() => {
if (options.defaults) {
object.before = null;
object.after = null;
+ object.backup_running = false;
}
if (message.before != null && message.hasOwnProperty("before"))
object.before = $root.replicationdata.Status.toObject(message.before, options);
if (message.after != null && message.hasOwnProperty("after"))
object.after = $root.replicationdata.Status.toObject(message.after, options);
+ if (message.backup_running != null && message.hasOwnProperty("backup_running"))
+ object.backup_running = message.backup_running;
return object;
};
diff --git a/web/vtadmin/src/util/schemaMigrations.ts b/web/vtadmin/src/util/schemaMigrations.ts
new file mode 100644
index 00000000000..c405c4dbecf
--- /dev/null
+++ b/web/vtadmin/src/util/schemaMigrations.ts
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2024 The Vitess Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { invertBy } from 'lodash-es';
+import { vtctldata } from '../proto/vtadmin';
+
+/**
+ * SCHEMA_MIGRATION_STATUS maps numeric schema migration status back to human readable strings.
+ */
+export const SCHEMA_MIGRATION_STATUS = Object.entries(invertBy(vtctldata.SchemaMigration.Status)).reduce(
+ (acc, [k, vs]) => {
+ acc[k] = vs[0];
+ return acc;
+ },
+ {} as { [k: string]: string }
+);
+
+export const formatSchemaMigrationStatus = (schemaMigration: vtctldata.ISchemaMigration) =>
+ schemaMigration.status && SCHEMA_MIGRATION_STATUS[schemaMigration.status];