diff --git a/go/cmd/vttablet/vttablet.go b/go/cmd/vttablet/vttablet.go index 86780ec72fe..072a3d7f70e 100644 --- a/go/cmd/vttablet/vttablet.go +++ b/go/cmd/vttablet/vttablet.go @@ -106,7 +106,7 @@ func main() { VREngine: vreplication.NewEngine(config, ts, tabletAlias.Cell, mysqld, qsc.LagThrottler()), MetadataManager: &mysqlctl.MetadataManager{}, } - if err := tm.Start(tablet, config.Healthcheck.IntervalSeconds.Get()); err != nil { + if err := tm.Start(tablet, config.Healthcheck.ReplicationIntervalSeconds.Get()); err != nil { log.Exitf("failed to parse -tablet-path or initialize DB credentials: %v", err) } servenv.OnClose(func() { diff --git a/go/vt/vttablet/tabletserver/tabletenv/config.go b/go/vt/vttablet/tabletserver/tabletenv/config.go index 652a21eb0e3..b5adbbe30cd 100644 --- a/go/vt/vttablet/tabletserver/tabletenv/config.go +++ b/go/vt/vttablet/tabletserver/tabletenv/config.go @@ -40,6 +40,8 @@ const ( NotOnMaster = "notOnMaster" Polling = "polling" Heartbeat = "heartbeat" + + ReplicationHealthCheckIntervalSecondsFlagName = "replication_health_check_interval" ) var ( @@ -69,17 +71,18 @@ var ( deprecatedFoundRowsPoolSize int // The following vars are used for custom initialization of Tabletconfig. - enableHotRowProtection bool - enableHotRowProtectionDryRun bool - enableConsolidator bool - enableConsolidatorReplicas bool - enableHeartbeat bool - heartbeatInterval time.Duration - healthCheckInterval time.Duration - degradedThreshold time.Duration - unhealthyThreshold time.Duration - transitionGracePeriod time.Duration - enableReplicationReporter bool + enableHotRowProtection bool + enableHotRowProtectionDryRun bool + enableConsolidator bool + enableConsolidatorReplicas bool + enableHeartbeat bool + heartbeatInterval time.Duration + healthCheckInterval time.Duration + replicationHealthCheckInterval time.Duration + degradedThreshold time.Duration + unhealthyThreshold time.Duration + transitionGracePeriod time.Duration + enableReplicationReporter bool ) func init() { @@ -156,6 +159,7 @@ func init() { flagutil.DualFormatBoolVar(¤tConfig.CacheResultFields, "enable_query_plan_field_caching", defaultConfig.CacheResultFields, "This option fetches & caches fields (columns) when storing query plans") flag.DurationVar(&healthCheckInterval, "health_check_interval", 20*time.Second, "Interval between health checks") + flag.DurationVar(&replicationHealthCheckInterval, ReplicationHealthCheckIntervalSecondsFlagName, 20*time.Second, "Interval at which ReplicationManager checks replication health") flag.DurationVar(°radedThreshold, "degraded_threshold", 30*time.Second, "replication lag after which a replica is considered degraded") flag.DurationVar(&unhealthyThreshold, "unhealthy_threshold", 2*time.Hour, "replication lag after which a replica is considered unhealthy") flag.DurationVar(&transitionGracePeriod, "serving_state_grace_period", 0, "how long to pause after broadcasting health to vtgate, before enforcing a new serving state") @@ -207,6 +211,13 @@ func Init() { } currentConfig.Healthcheck.IntervalSeconds.Set(healthCheckInterval) + + var actualReplHealthCheckInterval = healthCheckInterval + if flagWasPassed(ReplicationHealthCheckIntervalSecondsFlagName) { + actualReplHealthCheckInterval = replicationHealthCheckInterval + } + currentConfig.Healthcheck.ReplicationIntervalSeconds.Set(actualReplHealthCheckInterval) + currentConfig.Healthcheck.DegradedThresholdSeconds.Set(degradedThreshold) currentConfig.Healthcheck.UnhealthyThresholdSeconds.Set(unhealthyThreshold) currentConfig.GracePeriods.TransitionSeconds.Set(transitionGracePeriod) @@ -309,9 +320,10 @@ type HotRowProtectionConfig struct { // HealthcheckConfig contains the config for healthcheck. type HealthcheckConfig struct { - IntervalSeconds Seconds `json:"intervalSeconds,omitempty"` - DegradedThresholdSeconds Seconds `json:"degradedThresholdSeconds,omitempty"` - UnhealthyThresholdSeconds Seconds `json:"unhealthyThresholdSeconds,omitempty"` + IntervalSeconds Seconds `json:"intervalSeconds,omitempty"` + ReplicationIntervalSeconds Seconds `json:"replicationIntervalSeconds,omitempty"` + DegradedThresholdSeconds Seconds `json:"degradedThresholdSeconds,omitempty"` + UnhealthyThresholdSeconds Seconds `json:"unhealthyThresholdSeconds,omitempty"` } // GracePeriodsConfig contains various grace periods. @@ -433,9 +445,10 @@ var defaultConfig = TabletConfig{ MaxRows: 10000, }, Healthcheck: HealthcheckConfig{ - IntervalSeconds: 20, - DegradedThresholdSeconds: 30, - UnhealthyThresholdSeconds: 7200, + IntervalSeconds: 20, + ReplicationIntervalSeconds: 20, + DegradedThresholdSeconds: 30, + UnhealthyThresholdSeconds: 7200, }, ReplicationTracker: ReplicationTrackerConfig{ Mode: Disable, @@ -508,3 +521,13 @@ func defaultTransactionLimitConfig() TransactionLimitConfig { TransactionLimitBySubcomponent: false, } } + +func flagWasPassed(name string) bool { + found := false + flag.Visit(func(f *flag.Flag) { + if f.Name == name { + found = true + } + }) + return found +} diff --git a/go/vt/vttablet/tabletserver/tabletenv/config_test.go b/go/vt/vttablet/tabletserver/tabletenv/config_test.go index 71e4ad7f079..aa82d82a5e8 100644 --- a/go/vt/vttablet/tabletserver/tabletenv/config_test.go +++ b/go/vt/vttablet/tabletserver/tabletenv/config_test.go @@ -117,6 +117,7 @@ gracePeriods: {} healthcheck: degradedThresholdSeconds: 30 intervalSeconds: 20 + replicationIntervalSeconds: 20 unhealthyThresholdSeconds: 7200 hotRowProtection: maxConcurrency: 5 @@ -226,6 +227,7 @@ func TestFlags(t *testing.T) { want.HotRowProtection.Mode = Disable want.Consolidator = Enable want.Healthcheck.IntervalSeconds = 20 + want.Healthcheck.ReplicationIntervalSeconds = 20 want.Healthcheck.DegradedThresholdSeconds = 30 want.Healthcheck.UnhealthyThresholdSeconds = 7200 want.ReplicationTracker.HeartbeatIntervalSeconds = 1 @@ -309,9 +311,12 @@ func TestFlags(t *testing.T) { assert.Equal(t, want, currentConfig) healthCheckInterval = 1 * time.Second + replicationHealthCheckInterval = 2 * time.Second currentConfig.Healthcheck.IntervalSeconds = 0 + currentConfig.Healthcheck.ReplicationIntervalSeconds = 0 Init() want.Healthcheck.IntervalSeconds = 1 + want.Healthcheck.ReplicationIntervalSeconds = 1 assert.Equal(t, want, currentConfig) degradedThreshold = 2 * time.Second