From d356459fb6eb8e4b247890b0991ec8eefdeee7b6 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Thu, 28 Sep 2023 11:13:27 +0200 Subject: [PATCH 1/2] `vtctld`/`vtorc`: improve reparenting stats (#13723) Signed-off-by: Tim Vaillancourt Signed-off-by: Manan Gupta Co-authored-by: Manan Gupta --- go/stats/timings.go | 4 +- go/test/endtoend/vtorc/general/vtorc_test.go | 20 ++ .../primaryfailure/primary_failure_test.go | 230 +++++++++++++- go/test/endtoend/vtorc/utils/utils.go | 141 +++++++++ .../reparentutil/emergency_reparenter.go | 24 +- .../reparentutil/emergency_reparenter_test.go | 30 +- .../vtctl/reparentutil/planned_reparenter.go | 16 + .../planned_reparenter_flaky_test.go | 291 ++++++++++++++++++ go/vt/vtctl/reparentutil/util.go | 7 + 9 files changed, 744 insertions(+), 19 deletions(-) diff --git a/go/stats/timings.go b/go/stats/timings.go index 9048bedee11..39d53bf3498 100644 --- a/go/stats/timings.go +++ b/go/stats/timings.go @@ -62,10 +62,12 @@ func NewTimings(name, help, label string, categories ...string) *Timings { return t } -// Reset will clear histograms: used during testing +// Reset will clear histograms and counters: used during testing func (t *Timings) Reset() { t.mu.RLock() t.histograms = make(map[string]*Histogram) + t.totalCount.Store(0) + t.totalTime.Store(0) t.mu.RUnlock() } diff --git a/go/test/endtoend/vtorc/general/vtorc_test.go b/go/test/endtoend/vtorc/general/vtorc_test.go index 560e351fb31..547c3618099 100644 --- a/go/test/endtoend/vtorc/general/vtorc_test.go +++ b/go/test/endtoend/vtorc/general/vtorc_test.go @@ -28,6 +28,7 @@ import ( "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/vtorc/utils" "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/vtorc/logic" ) // Cases to test: @@ -72,6 +73,8 @@ func TestSingleKeyspace(t *testing.T) { utils.CheckPrimaryTablet(t, clusterInfo, shard0.Vttablets[0], true) utils.CheckReplication(t, clusterInfo, shard0.Vttablets[0], shard0.Vttablets[1:], 10*time.Second) + utils.WaitForSuccessfulRecoveryCount(t, clusterInfo.ClusterInstance.VTOrcProcesses[0], logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, clusterInfo.ClusterInstance.VTOrcProcesses[0], keyspace.Name, shard0.Name, 1) } // Cases to test: @@ -88,6 +91,8 @@ func TestKeyspaceShard(t *testing.T) { utils.CheckPrimaryTablet(t, clusterInfo, shard0.Vttablets[0], true) utils.CheckReplication(t, clusterInfo, shard0.Vttablets[0], shard0.Vttablets[1:], 10*time.Second) + utils.WaitForSuccessfulRecoveryCount(t, clusterInfo.ClusterInstance.VTOrcProcesses[0], logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, clusterInfo.ClusterInstance.VTOrcProcesses[0], keyspace.Name, shard0.Name, 1) } // 3. make primary readonly, let orc repair @@ -192,6 +197,9 @@ func TestReplicationFromOtherReplica(t *testing.T) { // TODO(deepthi): we should not need to do this, the DB should be created automatically _, err := curPrimary.VttabletProcess.QueryTablet(fmt.Sprintf("create database IF NOT EXISTS vt_%s", keyspace.Name), keyspace.Name, false) require.NoError(t, err) + vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) var replica, otherReplica *cluster.Vttablet for _, tablet := range shard0.Vttablets { @@ -378,6 +386,9 @@ func TestVtorcWithPrs(t *testing.T) { // find primary from topo curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) assert.NotNil(t, curPrimary, "should have elected a primary") + vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) // find any replica tablet other than the current primary var replica *cluster.Vttablet @@ -403,6 +414,15 @@ func TestVtorcWithPrs(t *testing.T) { // check that the replica gets promoted utils.CheckPrimaryTablet(t, clusterInfo, replica, true) + + // Verify that VTOrc didn't run any other recovery + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 0) + utils.WaitForSuccessfulERSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 0) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.FixPrimaryRecoveryName, 0) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.FixReplicaRecoveryName, 0) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverPrimaryHasPrimaryRecoveryName, 0) utils.VerifyWritesSucceed(t, clusterInfo, replica, shard0.Vttablets, 10*time.Second) } diff --git a/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go b/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go index a922db9f010..fb86db1607c 100644 --- a/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go +++ b/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go @@ -17,6 +17,8 @@ limitations under the License. package primaryfailure import ( + "fmt" + "path" "testing" "time" @@ -40,6 +42,70 @@ func TestDownPrimary(t *testing.T) { // find primary from topo curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) assert.NotNil(t, curPrimary, "should have elected a primary") + vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) + + // find the replica and rdonly tablets + var replica, rdonly *cluster.Vttablet + for _, tablet := range shard0.Vttablets { + // we know we have only two replcia tablets, so the one not the primary must be the other replica + if tablet.Alias != curPrimary.Alias && tablet.Type == "replica" { + replica = tablet + } + if tablet.Type == "rdonly" { + rdonly = tablet + } + } + assert.NotNil(t, replica, "could not find replica tablet") + assert.NotNil(t, rdonly, "could not find rdonly tablet") + + // Start a cross-cell replica + crossCellReplica := utils.StartVttablet(t, clusterInfo, utils.Cell2, false) + + // check that the replication is setup correctly before we failover + utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica, crossCellReplica}, 10*time.Second) + // since all tablets are up and running, InstancePollSecondsExceeded should have `0` zero value + utils.WaitForInstancePollSecondsExceededCount(t, vtOrcProcess, "InstancePollSecondsExceeded", 0, true) + // Make the rdonly vttablet unavailable + err := rdonly.VttabletProcess.TearDown() + require.NoError(t, err) + err = rdonly.MysqlctlProcess.Stop() + require.NoError(t, err) + // We have bunch of Vttablets down. Therefore we expect at least 1 occurrence of InstancePollSecondsExceeded + utils.WaitForInstancePollSecondsExceededCount(t, vtOrcProcess, "InstancePollSecondsExceeded", 1, false) + // Make the current primary vttablet unavailable. + err = curPrimary.VttabletProcess.TearDown() + require.NoError(t, err) + err = curPrimary.MysqlctlProcess.Stop() + require.NoError(t, err) + defer func() { + // we remove the tablet from our global list + utils.PermanentlyRemoveVttablet(clusterInfo, curPrimary) + utils.PermanentlyRemoveVttablet(clusterInfo, rdonly) + }() + + // check that the replica gets promoted + utils.CheckPrimaryTablet(t, clusterInfo, replica, true) + + // also check that the replication is working correctly after failover + utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{crossCellReplica}, 10*time.Second) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 1) + utils.WaitForSuccessfulERSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) +} + +// bring down primary before VTOrc has started, let vtorc repair. +func TestDownPrimaryBeforeVTOrc(t *testing.T) { + defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) + defer cluster.PanicHandler(t) + utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{}, 0, "none") + keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + curPrimary := shard0.Vttablets[0] + + // Promote the first tablet as the primary + err := clusterInfo.ClusterInstance.VtctlclientProcess.InitializeShard(keyspace.Name, shard0.Name, clusterInfo.ClusterInstance.Cell, curPrimary.TabletUID) + require.NoError(t, err) // find the replica and rdonly tablets var replica, rdonly *cluster.Vttablet @@ -58,7 +124,140 @@ func TestDownPrimary(t *testing.T) { // check that the replication is setup correctly before we failover utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica}, 10*time.Second) - // Make the current primary database unavailable. + // Make the current primary vttablet unavailable. + _ = curPrimary.VttabletProcess.TearDown() + err = curPrimary.MysqlctlProcess.Stop() + require.NoError(t, err) + + // Start a VTOrc instance + utils.StartVTOrcs(t, clusterInfo, []string{"--remote_operation_timeout=10s"}, cluster.VTOrcConfiguration{ + PreventCrossDataCenterPrimaryFailover: true, + }, 1) + + vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] + + defer func() { + // we remove the tablet from our global list + utils.PermanentlyRemoveVttablet(clusterInfo, curPrimary) + }() + + // check that the replica gets promoted + utils.CheckPrimaryTablet(t, clusterInfo, replica, true) + + // also check that the replication is working correctly after failover + utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{rdonly}, 10*time.Second) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 1) + utils.WaitForSuccessfulERSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) +} + +// delete the primary record and let vtorc repair. +func TestDeletedPrimaryTablet(t *testing.T) { + defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) + defer cluster.PanicHandler(t) + utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, []string{"--remote_operation_timeout=10s"}, cluster.VTOrcConfiguration{}, 1, "none") + keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + // find primary from topo + curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) + assert.NotNil(t, curPrimary, "should have elected a primary") + vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) + + // find the replica and rdonly tablets + var replica, rdonly *cluster.Vttablet + for _, tablet := range shard0.Vttablets { + // we know we have only two replcia tablets, so the one not the primary must be the other replica + if tablet.Alias != curPrimary.Alias && tablet.Type == "replica" { + replica = tablet + } + if tablet.Type == "rdonly" { + rdonly = tablet + } + } + assert.NotNil(t, replica, "could not find replica tablet") + assert.NotNil(t, rdonly, "could not find rdonly tablet") + + // check that the replication is setup correctly before we failover + utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{replica, rdonly}, 10*time.Second) + + // Disable VTOrc recoveries + vtOrcProcess.DisableGlobalRecoveries(t) + // use vtctlclient to stop replication on the replica + _, err := clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("StopReplication", replica.Alias) + require.NoError(t, err) + // insert a write that is not available on the replica. + utils.VerifyWritesSucceed(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly}, 10*time.Second) + + // Make the current primary vttablet unavailable and delete its tablet record. + _ = curPrimary.VttabletProcess.TearDown() + err = curPrimary.MysqlctlProcess.Stop() + require.NoError(t, err) + // use vtctlclient to start replication on the replica back + _, err = clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("StartReplication", replica.Alias) + require.NoError(t, err) + err = clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommand("DeleteTablets", "--allow-primary", curPrimary.Alias) + require.NoError(t, err) + // Enable VTOrc recoveries now + vtOrcProcess.EnableGlobalRecoveries(t) + + defer func() { + // we remove the tablet from our global list + utils.PermanentlyRemoveVttablet(clusterInfo, curPrimary) + }() + + // check that the replica gets promoted. Also verify that it has all the writes. + utils.CheckPrimaryTablet(t, clusterInfo, replica, true) + utils.CheckTabletUptoDate(t, clusterInfo, replica) + + // also check that the replication is working correctly after failover + utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{rdonly}, 10*time.Second) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverPrimaryTabletDeletedRecoveryName, 1) +} + +// TestDeadPrimaryRecoversImmediately test Vtorc ability to recover immediately if primary is dead. +// Reason is, unlike other recoveries, in DeadPrimary we don't call DiscoverInstance since we know +// that primary is unreachable. This help us save few seconds depending on value of `RemoteOperationTimeout` flag. +func TestDeadPrimaryRecoversImmediately(t *testing.T) { + defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) + defer cluster.PanicHandler(t) + // We specify the --wait-replicas-timeout to a small value because we spawn a cross-cell replica later in the test. + // If that replica is more advanced than the same-cell-replica, then we try to promote the cross-cell replica as an intermediate source. + // If we don't specify a small value of --wait-replicas-timeout, then we would end up waiting for 30 seconds for the dead-primary to respond, failing this test. + utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, []string{"--remote_operation_timeout=10s", "--wait-replicas-timeout=5s"}, cluster.VTOrcConfiguration{ + PreventCrossDataCenterPrimaryFailover: true, + }, 1, "semi_sync") + keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + // find primary from topo + curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0) + assert.NotNil(t, curPrimary, "should have elected a primary") + vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, 1) + utils.WaitForSuccessfulPRSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) + + // find the replica and rdonly tablets + var replica, rdonly *cluster.Vttablet + for _, tablet := range shard0.Vttablets { + // we know we have only two replcia tablets, so the one not the primary must be the other replica + if tablet.Alias != curPrimary.Alias && tablet.Type == "replica" { + replica = tablet + } + if tablet.Type == "rdonly" { + rdonly = tablet + } + } + assert.NotNil(t, replica, "could not find replica tablet") + assert.NotNil(t, rdonly, "could not find rdonly tablet") + + // Start a cross-cell replica + crossCellReplica := utils.StartVttablet(t, clusterInfo, utils.Cell2, false) + + // check that the replication is setup correctly before we failover + utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica, crossCellReplica}, 10*time.Second) + + // Make the current primary vttablet unavailable. + curPrimary.VttabletProcess.Kill() err := curPrimary.MysqlctlProcess.Stop() require.NoError(t, err) defer func() { @@ -69,7 +268,34 @@ func TestDownPrimary(t *testing.T) { // check that the replica gets promoted utils.CheckPrimaryTablet(t, clusterInfo, replica, true) // also check that the replication is working correctly after failover - utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{rdonly}, 10*time.Second) + utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{crossCellReplica}, 10*time.Second) + utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 1) + utils.WaitForSuccessfulERSCount(t, vtOrcProcess, keyspace.Name, shard0.Name, 1) + + // Parse log file and find out how much time it took for DeadPrimary to recover. + logFile := path.Join(vtOrcProcess.LogDir, vtOrcProcess.LogFileName) + // log prefix printed at the end of analysis where we conclude we have DeadPrimary + t1 := extractTimeFromLog(t, logFile, "Proceeding with DeadPrimary recovery") + // log prefix printed at the end of recovery + t2 := extractTimeFromLog(t, logFile, "auditType:RecoverDeadPrimary") + curr := time.Now().Format("2006-01-02") + timeLayout := "2006-01-02 15:04:05.000000" + timeStr1 := fmt.Sprintf("%s %s", curr, t1) + timeStr2 := fmt.Sprintf("%s %s", curr, t2) + time1, err := time.Parse(timeLayout, timeStr1) + if err != nil { + t.Errorf("unable to parse time %s", err.Error()) + } + time2, err := time.Parse(timeLayout, timeStr2) + if err != nil { + t.Errorf("unable to parse time %s", err.Error()) + } + diff := time2.Sub(time1) + fmt.Printf("The difference between %s and %s is %v seconds.\n", t1, t2, diff.Seconds()) + // assert that it takes less than `remote_operation_timeout` to recover from `DeadPrimary` + // use the value provided in `remote_operation_timeout` flag to compare with. + // We are testing against 9.5 seconds to be safe and prevent flakiness. + assert.Less(t, diff.Seconds(), 9.5) } // Failover should not be cross data centers, according to the configuration file diff --git a/go/test/endtoend/vtorc/utils/utils.go b/go/test/endtoend/vtorc/utils/utils.go index 96075aef3c0..c7c4d5390b6 100644 --- a/go/test/endtoend/vtorc/utils/utils.go +++ b/go/test/endtoend/vtorc/utils/utils.go @@ -18,12 +18,16 @@ package utils import ( "context" + "encoding/json" "fmt" "io" + "math" "net/http" "os" "os/exec" "path" + "reflect" + "strconv" "strings" "testing" "time" @@ -915,3 +919,140 @@ func WaitForReadOnlyValue(t *testing.T, curPrimary *cluster.Vttablet, expectValu } return false } + +// WaitForSuccessfulRecoveryCount waits until the given recovery name's count of successful runs matches the count expected +func WaitForSuccessfulRecoveryCount(t *testing.T, vtorcInstance *cluster.VTOrcProcess, recoveryName string, countExpected int) { + t.Helper() + timeout := 15 * time.Second + startTime := time.Now() + for time.Since(startTime) < timeout { + vars := vtorcInstance.GetVars() + successfulRecoveriesMap := vars["SuccessfulRecoveries"].(map[string]interface{}) + successCount := getIntFromValue(successfulRecoveriesMap[recoveryName]) + if successCount == countExpected { + return + } + time.Sleep(time.Second) + } + vars := vtorcInstance.GetVars() + successfulRecoveriesMap := vars["SuccessfulRecoveries"].(map[string]interface{}) + successCount := getIntFromValue(successfulRecoveriesMap[recoveryName]) + assert.EqualValues(t, countExpected, successCount) +} + +// WaitForSuccessfulPRSCount waits until the given keyspace-shard's count of successful prs runs matches the count expected. +func WaitForSuccessfulPRSCount(t *testing.T, vtorcInstance *cluster.VTOrcProcess, keyspace, shard string, countExpected int) { + t.Helper() + timeout := 15 * time.Second + startTime := time.Now() + mapKey := fmt.Sprintf("%v.%v.success", keyspace, shard) + for time.Since(startTime) < timeout { + vars := vtorcInstance.GetVars() + prsCountsMap := vars["planned_reparent_counts"].(map[string]interface{}) + successCount := getIntFromValue(prsCountsMap[mapKey]) + if successCount == countExpected { + return + } + time.Sleep(time.Second) + } + vars := vtorcInstance.GetVars() + prsCountsMap := vars["planned_reparent_counts"].(map[string]interface{}) + successCount := getIntFromValue(prsCountsMap[mapKey]) + assert.EqualValues(t, countExpected, successCount) +} + +// WaitForSuccessfulERSCount waits until the given keyspace-shard's count of successful ers runs matches the count expected. +func WaitForSuccessfulERSCount(t *testing.T, vtorcInstance *cluster.VTOrcProcess, keyspace, shard string, countExpected int) { + t.Helper() + timeout := 15 * time.Second + startTime := time.Now() + mapKey := fmt.Sprintf("%v.%v.success", keyspace, shard) + for time.Since(startTime) < timeout { + vars := vtorcInstance.GetVars() + ersCountsMap := vars["emergency_reparent_counts"].(map[string]interface{}) + successCount := getIntFromValue(ersCountsMap[mapKey]) + if successCount == countExpected { + return + } + time.Sleep(time.Second) + } + vars := vtorcInstance.GetVars() + ersCountsMap := vars["emergency_reparent_counts"].(map[string]interface{}) + successCount := getIntFromValue(ersCountsMap[mapKey]) + assert.EqualValues(t, countExpected, successCount) +} + +// getIntFromValue is a helper function to get an integer from the given value. +// If it is convertible to a float, then we round the number to the nearest integer. +// If the value is not numeric at all, we return 0. +func getIntFromValue(val any) int { + value := reflect.ValueOf(val) + if value.CanFloat() { + return int(math.Round(value.Float())) + } + if value.CanInt() { + return int(value.Int()) + } + return 0 +} + +// WaitForTabletType waits for the tablet to reach a certain type. +func WaitForTabletType(t *testing.T, tablet *cluster.Vttablet, expectedTabletType string) { + t.Helper() + err := tablet.VttabletProcess.WaitForTabletTypes([]string{expectedTabletType}) + require.NoError(t, err) +} + +// WaitForInstancePollSecondsExceededCount waits for 30 seconds and then queries api/aggregated-discovery-metrics. +// It expects to find minimum occurrence or exact count of `keyName` provided. +func WaitForInstancePollSecondsExceededCount(t *testing.T, vtorcInstance *cluster.VTOrcProcess, keyName string, minCountExpected float64, enforceEquality bool) { + t.Helper() + var sinceInSeconds = 30 + duration := time.Duration(sinceInSeconds) + time.Sleep(duration * time.Second) + + statusCode, res, err := vtorcInstance.MakeAPICall("api/aggregated-discovery-metrics?seconds=" + strconv.Itoa(sinceInSeconds)) + if err != nil { + assert.Fail(t, "Not able to call api/aggregated-discovery-metrics") + } + if statusCode == 200 { + resultMap := make(map[string]any) + err := json.Unmarshal([]byte(res), &resultMap) + if err != nil { + assert.Fail(t, "invalid response from api/aggregated-discovery-metrics") + } + successCount := resultMap[keyName] + if iSuccessCount, ok := successCount.(float64); ok { + if enforceEquality { + assert.Equal(t, iSuccessCount, minCountExpected) + } else { + assert.GreaterOrEqual(t, iSuccessCount, minCountExpected) + } + return + } + } + assert.Fail(t, "invalid response from api/aggregated-discovery-metrics") +} + +// PrintVTOrcLogsOnFailure prints the VTOrc logs on failure of the test. +// This function is supposed to be called as the first defer command from the vtorc tests. +func PrintVTOrcLogsOnFailure(t *testing.T, clusterInstance *cluster.LocalProcessCluster) { + // If the test has not failed, then we don't need to print anything. + if !t.Failed() { + return + } + + log.Errorf("Printing VTOrc logs") + for _, vtorc := range clusterInstance.VTOrcProcesses { + if vtorc == nil || vtorc.LogFileName == "" { + continue + } + filePath := path.Join(vtorc.LogDir, vtorc.LogFileName) + log.Errorf("Printing file - %s", filePath) + content, err := os.ReadFile(filePath) + if err != nil { + log.Errorf("Error while reading the file - %v", err) + } + log.Errorf("%s", string(content)) + } +} diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go index ba846ebc147..f3b2ce8d2b4 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go @@ -68,9 +68,14 @@ type EmergencyReparentOptions struct { // counters for Emergency Reparent Shard var ( - ersCounter = stats.NewGauge("ers_counter", "Number of times Emergency Reparent Shard has been run") - ersSuccessCounter = stats.NewGauge("ers_success_counter", "Number of times Emergency Reparent Shard has succeeded") - ersFailureCounter = stats.NewGauge("ers_failure_counter", "Number of times Emergency Reparent Shard has failed") + // TODO(timvaillancourt): remove legacyERS* gauges in v19+. + legacyERSCounter = stats.NewGauge("ers_counter", "Number of times Emergency Reparent Shard has been run") + legacyERSSuccessCounter = stats.NewGauge("ers_success_counter", "Number of times Emergency Reparent Shard has succeeded") + legacyERSFailureCounter = stats.NewGauge("ers_failure_counter", "Number of times Emergency Reparent Shard has failed") + + ersCounter = stats.NewCountersWithMultiLabels("emergency_reparent_counts", "Number of times Emergency Reparent Shard has been run", + []string{"Keyspace", "Shard", "Result"}, + ) ) // NewEmergencyReparenter returns a new EmergencyReparenter object, ready to @@ -98,26 +103,33 @@ func NewEmergencyReparenter(ts *topo.Server, tmc tmclient.TabletManagerClient, l // keyspace and shard. func (erp *EmergencyReparenter) ReparentShard(ctx context.Context, keyspace string, shard string, opts EmergencyReparentOptions) (*events.Reparent, error) { var err error + statsLabels := []string{keyspace, shard} + // First step is to lock the shard for the given operation, if not already locked if err = topo.CheckShardLocked(ctx, keyspace, shard); err != nil { var unlock func(*error) opts.lockAction = erp.getLockAction(opts.NewPrimaryAlias) ctx, unlock, err = erp.ts.LockShard(ctx, keyspace, shard, opts.lockAction) if err != nil { + ersCounter.Add(append(statsLabels, failureResult), 1) return nil, err } defer unlock(&err) } // dispatch success or failure of ERS + startTime := time.Now() ev := &events.Reparent{} defer func() { + reparentShardOpTimings.Add("EmergencyReparentShard", time.Since(startTime)) switch err { case nil: - ersSuccessCounter.Add(1) + legacyERSSuccessCounter.Add(1) + ersCounter.Add(append(statsLabels, successResult), 1) event.DispatchUpdate(ev, "finished EmergencyReparentShard") default: - ersFailureCounter.Add(1) + legacyERSFailureCounter.Add(1) + ersCounter.Add(append(statsLabels, failureResult), 1) event.DispatchUpdate(ev, "failed EmergencyReparentShard: "+err.Error()) } }() @@ -141,7 +153,7 @@ func (erp *EmergencyReparenter) getLockAction(newPrimaryAlias *topodatapb.Tablet func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, opts EmergencyReparentOptions) (err error) { // log the starting of the operation and increment the counter erp.logger.Infof("will initiate emergency reparent shard in keyspace - %s, shard - %s", keyspace, shard) - ersCounter.Add(1) + legacyERSCounter.Add(1) var ( stoppedReplicationSnapshot *replicationSnapshot diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go index af7b33a3151..40c3c295f30 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go @@ -2747,10 +2747,12 @@ func TestEmergencyReparenter_waitForAllRelayLogsToApply(t *testing.T) { } } -func TestEmergencyReparenterCounters(t *testing.T) { - ersCounter.Set(0) - ersSuccessCounter.Set(0) - ersFailureCounter.Set(0) +func TestEmergencyReparenterStats(t *testing.T) { + ersCounter.ResetAll() + legacyERSCounter.Reset() + legacyERSSuccessCounter.Reset() + legacyERSFailureCounter.Reset() + reparentShardOpTimings.Reset() emergencyReparentOps := EmergencyReparentOptions{} tmc := &testutil.TabletManagerClient{ @@ -2879,9 +2881,13 @@ func TestEmergencyReparenterCounters(t *testing.T) { require.NoError(t, err) // check the counter values - require.EqualValues(t, 1, ersCounter.Get()) - require.EqualValues(t, 1, ersSuccessCounter.Get()) - require.EqualValues(t, 0, ersFailureCounter.Get()) + require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1}, ersCounter.Counts()) + require.EqualValues(t, map[string]int64{"All": 1, "EmergencyReparentShard": 1}, reparentShardOpTimings.Counts()) + + // check the legacy counter values + require.EqualValues(t, 1, legacyERSCounter.Get()) + require.EqualValues(t, 1, legacyERSSuccessCounter.Get()) + require.EqualValues(t, 0, legacyERSFailureCounter.Get()) // set emergencyReparentOps to request a non existent tablet emergencyReparentOps.NewPrimaryAlias = &topodatapb.TabletAlias{ @@ -2894,9 +2900,13 @@ func TestEmergencyReparenterCounters(t *testing.T) { require.Error(t, err) // check the counter values - require.EqualValues(t, 2, ersCounter.Get()) - require.EqualValues(t, 1, ersSuccessCounter.Get()) - require.EqualValues(t, 1, ersFailureCounter.Get()) + require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1, "testkeyspace.-.failure": 1}, ersCounter.Counts()) + require.EqualValues(t, map[string]int64{"All": 2, "EmergencyReparentShard": 2}, reparentShardOpTimings.Counts()) + + // check the legacy counter values + require.EqualValues(t, 2, legacyERSCounter.Get()) + require.EqualValues(t, 1, legacyERSSuccessCounter.Get()) + require.EqualValues(t, 1, legacyERSFailureCounter.Get()) } func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { diff --git a/go/vt/vtctl/reparentutil/planned_reparenter.go b/go/vt/vtctl/reparentutil/planned_reparenter.go index 4f46d234e25..31b222f9df5 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter.go @@ -26,6 +26,7 @@ import ( "vitess.io/vitess/go/event" "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/stats" "vitess.io/vitess/go/vt/concurrency" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/topo" @@ -39,6 +40,13 @@ import ( "vitess.io/vitess/go/vt/proto/vtrpc" ) +// counters for Planned Reparent Shard +var ( + prsCounter = stats.NewCountersWithMultiLabels("planned_reparent_counts", "Number of times Planned Reparent Shard has been run", + []string{"Keyspace", "Shard", "Result"}, + ) +) + // PlannedReparenter performs PlannedReparentShard operations. type PlannedReparenter struct { ts *topo.Server @@ -88,11 +96,14 @@ func NewPlannedReparenter(ts *topo.Server, tmc tmclient.TabletManagerClient, log // both the current and desired primary are reachable and in a good state. func (pr *PlannedReparenter) ReparentShard(ctx context.Context, keyspace string, shard string, opts PlannedReparentOptions) (*events.Reparent, error) { var err error + statsLabels := []string{keyspace, shard} + if err = topo.CheckShardLocked(ctx, keyspace, shard); err != nil { var unlock func(*error) opts.lockAction = pr.getLockAction(opts) ctx, unlock, err = pr.ts.LockShard(ctx, keyspace, shard, opts.lockAction) if err != nil { + prsCounter.Add(append(statsLabels, failureResult), 1) return nil, err } defer unlock(&err) @@ -101,18 +112,23 @@ func (pr *PlannedReparenter) ReparentShard(ctx context.Context, keyspace string, if opts.NewPrimaryAlias == nil && opts.AvoidPrimaryAlias == nil { shardInfo, err := pr.ts.GetShard(ctx, keyspace, shard) if err != nil { + prsCounter.Add(append(statsLabels, failureResult), 1) return nil, err } opts.AvoidPrimaryAlias = shardInfo.PrimaryAlias } + startTime := time.Now() ev := &events.Reparent{} defer func() { + reparentShardOpTimings.Add("PlannedReparentShard", time.Since(startTime)) switch err { case nil: + prsCounter.Add(append(statsLabels, successResult), 1) event.DispatchUpdate(ev, "finished PlannedReparentShard") default: + prsCounter.Add(append(statsLabels, failureResult), 1) event.DispatchUpdate(ev, "failed PlannedReparentShard: "+err.Error()) } }() diff --git a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go index 09900c4a624..b186f68b0f2 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go @@ -3719,3 +3719,294 @@ func AssertReparentEventsEqual(t *testing.T, expected *events.Reparent, actual * AssertReparentEventsEqualWithMessage(t, expected, actual, "") } + +// TestPlannedReparenter_verifyAllTabletsReachable tests the functionality of verifyAllTabletsReachable. +func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { + tests := []struct { + name string + tmc tmclient.TabletManagerClient + tabletMap map[string]*topo.TabletInfo + remoteOpTime time.Duration + wantErr string + }{ + { + name: "Success", + tmc: &testutil.TabletManagerClient{ + PrimaryStatusResults: map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + }{ + "zone1-0000000200": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000201": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000100": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + }, + }, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "zone1-0000000200": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 200, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000201": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 201, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + }, { + name: "Failure", + tmc: &testutil.TabletManagerClient{ + PrimaryStatusResults: map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + }{ + "zone1-0000000200": { + Error: fmt.Errorf("primary status failed"), + }, + "zone1-0000000201": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000100": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + }, + }, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "zone1-0000000200": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 200, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000201": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 201, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + wantErr: "primary status failed", + }, { + name: "Timeout", + tmc: &testutil.TabletManagerClient{ + PrimaryStatusDelays: map[string]time.Duration{ + "zone1-0000000100": 20 * time.Second, + }, + PrimaryStatusResults: map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + }{ + "zone1-0000000200": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000201": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000100": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + }, + }, + remoteOpTime: 100 * time.Millisecond, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "zone1-0000000200": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 200, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000201": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 201, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + wantErr: "context deadline exceeded", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + ts := memorytopo.NewServer(ctx, "zone1") + defer ts.Close() + + pr := &PlannedReparenter{ + ts: ts, + tmc: tt.tmc, + } + if tt.remoteOpTime != 0 { + oldTime := topo.RemoteOperationTimeout + topo.RemoteOperationTimeout = tt.remoteOpTime + defer func() { + topo.RemoteOperationTimeout = oldTime + }() + } + err := pr.verifyAllTabletsReachable(context.Background(), tt.tabletMap) + if tt.wantErr == "" { + require.NoError(t, err) + return + } + require.ErrorContains(t, err, tt.wantErr) + }) + } +} + +func TestPlannedReparenterStats(t *testing.T) { + prsCounter.ResetAll() + reparentShardOpTimings.Reset() + + tmc := &testutil.TabletManagerClient{ + PrimaryPositionResults: map[string]struct { + Position string + Error error + }{ + "zone1-0000000100": { + Position: "position1", + Error: nil, + }, + }, + PopulateReparentJournalResults: map[string]error{ + "zone1-0000000100": nil, + }, + SetReplicationSourceResults: map[string]error{ + "zone1-0000000101": nil, + }, + SetReadWriteResults: map[string]error{ + "zone1-0000000100": nil, + }, + // This is only needed to verify reachability, so empty results are fine. + PrimaryStatusResults: map[string]struct { + Status *replicationdatapb.PrimaryStatus + Error error + }{ + "zone1-0000000101": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + "zone1-0000000100": { + Status: &replicationdatapb.PrimaryStatus{}, + }, + }, + } + shards := []*vtctldatapb.Shard{ + { + Keyspace: "testkeyspace", + Name: "-", + }, + } + tablets := []*topodatapb.Tablet{ + { + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + Keyspace: "testkeyspace", + Shard: "-", + }, + { + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + Type: topodatapb.TabletType_REPLICA, + Keyspace: "testkeyspace", + Shard: "-", + }, + } + plannedReparentOps := PlannedReparentOptions{ + NewPrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + } + keyspace := "testkeyspace" + shard := "-" + ts := memorytopo.NewServer(context.Background(), "zone1") + + ctx := context.Background() + logger := logutil.NewMemoryLogger() + + testutil.AddShards(ctx, t, ts, shards...) + testutil.AddTablets(ctx, t, ts, &testutil.AddTabletOptions{ + AlsoSetShardPrimary: true, + SkipShardCreation: false, + }, tablets...) + + prp := NewPlannedReparenter(ts, tmc, logger) + // run a successful prs + _, err := prp.ReparentShard(ctx, keyspace, shard, plannedReparentOps) + require.NoError(t, err) + + // check the counter values + require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1}, prsCounter.Counts()) + require.EqualValues(t, map[string]int64{"All": 1, "PlannedReparentShard": 1}, reparentShardOpTimings.Counts()) + + // set plannedReparentOps to request a non existent tablet + plannedReparentOps.NewPrimaryAlias = &topodatapb.TabletAlias{ + Cell: "bogus", + Uid: 100, + } + + // run a failing prs + _, err = prp.ReparentShard(ctx, keyspace, shard, plannedReparentOps) + require.Error(t, err) + + // check the counter values + require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1, "testkeyspace.-.failure": 1}, prsCounter.Counts()) + require.EqualValues(t, map[string]int64{"All": 2, "PlannedReparentShard": 2}, reparentShardOpTimings.Counts()) +} diff --git a/go/vt/vtctl/reparentutil/util.go b/go/vt/vtctl/reparentutil/util.go index f4cebc3dd7d..ac51a45c65f 100644 --- a/go/vt/vtctl/reparentutil/util.go +++ b/go/vt/vtctl/reparentutil/util.go @@ -23,6 +23,7 @@ import ( "time" "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/stats" "vitess.io/vitess/go/vt/concurrency" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logutil" @@ -38,6 +39,12 @@ import ( "vitess.io/vitess/go/vt/proto/vtrpc" ) +var ( + reparentShardOpTimings = stats.NewTimings("reparent_shard_operation_timings", "Timings of reparent shard operations", "Operation") + failureResult = "failure" + successResult = "success" +) + // ChooseNewPrimary finds a tablet that should become a primary after reparent. // The criteria for the new primary-elect are (preferably) to be in the same // cell as the current primary, and to be different from avoidPrimaryAlias. The From da805ffa80363fe96d98d9256cdec355895a390f Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Fri, 6 Oct 2023 19:06:06 +0200 Subject: [PATCH 2/2] Adjust for v14 go/sync2 Signed-off-by: Tim Vaillancourt --- go/stats/timings.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/stats/timings.go b/go/stats/timings.go index 39d53bf3498..cd51ec77a66 100644 --- a/go/stats/timings.go +++ b/go/stats/timings.go @@ -66,8 +66,8 @@ func NewTimings(name, help, label string, categories ...string) *Timings { func (t *Timings) Reset() { t.mu.RLock() t.histograms = make(map[string]*Histogram) - t.totalCount.Store(0) - t.totalTime.Store(0) + t.totalCount.Set(0) + t.totalTime.Set(0) t.mu.RUnlock() }