From b126df0664e6608307d0988dd94da41c44f67d6b Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Wed, 27 Nov 2024 12:58:13 +0530 Subject: [PATCH 1/2] feat: add a test and fix the problem of running gtid detection on the primary tablet Signed-off-by: Manan Gupta --- go/vt/vtorc/inst/instance_dao.go | 5 +++ go/vt/vtorc/inst/instance_dao_test.go | 46 ++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/go/vt/vtorc/inst/instance_dao.go b/go/vt/vtorc/inst/instance_dao.go index c8ff218710f..cd0ee89e9c8 100644 --- a/go/vt/vtorc/inst/instance_dao.go +++ b/go/vt/vtorc/inst/instance_dao.go @@ -397,6 +397,11 @@ func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatap var primaryInstance *Instance primaryAlias, _, _ := ReadShardPrimaryInformation(tablet.Keyspace, tablet.Shard) if primaryAlias != "" { + // Check if the current tablet is the primary. + // If it is, then we don't need to run errant gtid detection on it. + if primaryAlias == tabletAlias { + return nil + } primaryInstance, _, _ = ReadInstance(primaryAlias) } // Only run errant GTID detection, if we are sure that the data read of the current primary diff --git a/go/vt/vtorc/inst/instance_dao_test.go b/go/vt/vtorc/inst/instance_dao_test.go index f248ded5e2b..7cf589100f8 100644 --- a/go/vt/vtorc/inst/instance_dao_test.go +++ b/go/vt/vtorc/inst/instance_dao_test.go @@ -854,7 +854,7 @@ func TestDetectErrantGTIDs(t *testing.T) { primaryTablet := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ Cell: "zone-1", - Uid: 100, + Uid: 101, }, Keyspace: keyspaceName, Shard: shardName, @@ -891,3 +891,47 @@ func TestDetectErrantGTIDs(t *testing.T) { }) } } + +// TestPrimaryErrantGTIDs tests that we don't run Errant GTID detection on the primary tablet itself! +func TestPrimaryErrantGTIDs(t *testing.T) { + // Clear the database after the test. The easiest way to do that is to run all the initialization commands again. + defer func() { + db.ClearVTOrcDatabase() + }() + db.ClearVTOrcDatabase() + keyspaceName := "ks" + shardName := "0" + tablet := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone-1", + Uid: 100, + }, + Keyspace: keyspaceName, + Shard: shardName, + } + instance := &Instance{ + SourceHost: "", + ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341", + } + + // Save shard record for the primary tablet. + err := SaveShard(topo.NewShardInfo(keyspaceName, shardName, &topodatapb.Shard{ + PrimaryAlias: tablet.Alias, + }, nil)) + require.NoError(t, err) + + // Store the tablet record and the instance. + instance.InstanceAlias = topoproto.TabletAliasString(tablet.Alias) + err = SaveTablet(tablet) + require.NoError(t, err) + err = WriteInstance(instance, true, nil) + require.NoError(t, err) + + // After this if we read a new information for the record that updates its + // gtid set further, we shouldn't be detecting errant GTIDs on it since it is the primary! + // We shouldn't be comparing it with a previous version of itself! + instance.ExecutedGtidSet = "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-351" + err = detectErrantGTIDs(topoproto.TabletAliasString(tablet.Alias), instance, tablet) + require.NoError(t, err) + require.EqualValues(t, "", instance.GtidErrant) +} From 36eda8201067a4596e102a050f07f980aab66bdd Mon Sep 17 00:00:00 2001 From: Manan Gupta Date: Wed, 27 Nov 2024 13:00:25 +0530 Subject: [PATCH 2/2] feat: remove an unrequired parameter in errant GTID detection Signed-off-by: Manan Gupta --- go/vt/vtorc/inst/instance_dao.go | 8 ++++---- go/vt/vtorc/inst/instance_dao_test.go | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/go/vt/vtorc/inst/instance_dao.go b/go/vt/vtorc/inst/instance_dao.go index cd0ee89e9c8..45291394c56 100644 --- a/go/vt/vtorc/inst/instance_dao.go +++ b/go/vt/vtorc/inst/instance_dao.go @@ -362,7 +362,7 @@ Cleanup: // Add replication group ancestry UUID as well. Otherwise, VTOrc thinks there are errant GTIDs in group // members and its replicas, even though they are not. instance.AncestryUUID = strings.Trim(instance.AncestryUUID, ",") - err = detectErrantGTIDs(tabletAlias, instance, tablet) + err = detectErrantGTIDs(instance, tablet) } latency.Stop("instance") @@ -390,7 +390,7 @@ Cleanup: } // detectErrantGTIDs detects the errant GTIDs on an instance. -func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatapb.Tablet) (err error) { +func detectErrantGTIDs(instance *Instance, tablet *topodatapb.Tablet) (err error) { // If the tablet is not replicating from anyone, then it could be the previous primary. // We should check for errant GTIDs by finding the difference with the shard's current primary. if instance.primaryExecutedGtidSet == "" && instance.SourceHost == "" { @@ -399,7 +399,7 @@ func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatap if primaryAlias != "" { // Check if the current tablet is the primary. // If it is, then we don't need to run errant gtid detection on it. - if primaryAlias == tabletAlias { + if primaryAlias == instance.InstanceAlias { return nil } primaryInstance, _, _ = ReadInstance(primaryAlias) @@ -439,7 +439,7 @@ func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatap if err == nil { var gtidCount int64 gtidCount, err = replication.GTIDCount(instance.GtidErrant) - currentErrantGTIDCount.Set(tabletAlias, gtidCount) + currentErrantGTIDCount.Set(instance.InstanceAlias, gtidCount) } } } diff --git a/go/vt/vtorc/inst/instance_dao_test.go b/go/vt/vtorc/inst/instance_dao_test.go index 7cf589100f8..0d59de0588f 100644 --- a/go/vt/vtorc/inst/instance_dao_test.go +++ b/go/vt/vtorc/inst/instance_dao_test.go @@ -881,7 +881,8 @@ func TestDetectErrantGTIDs(t *testing.T) { require.NoError(t, err) } - err = detectErrantGTIDs(topoproto.TabletAliasString(tablet.Alias), tt.instance, tablet) + tt.instance.InstanceAlias = topoproto.TabletAliasString(tablet.Alias) + err = detectErrantGTIDs(tt.instance, tablet) if tt.wantErr { require.Error(t, err) return @@ -912,6 +913,7 @@ func TestPrimaryErrantGTIDs(t *testing.T) { instance := &Instance{ SourceHost: "", ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341", + InstanceAlias: topoproto.TabletAliasString(tablet.Alias), } // Save shard record for the primary tablet. @@ -921,7 +923,6 @@ func TestPrimaryErrantGTIDs(t *testing.T) { require.NoError(t, err) // Store the tablet record and the instance. - instance.InstanceAlias = topoproto.TabletAliasString(tablet.Alias) err = SaveTablet(tablet) require.NoError(t, err) err = WriteInstance(instance, true, nil) @@ -931,7 +932,7 @@ func TestPrimaryErrantGTIDs(t *testing.T) { // gtid set further, we shouldn't be detecting errant GTIDs on it since it is the primary! // We shouldn't be comparing it with a previous version of itself! instance.ExecutedGtidSet = "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-351" - err = detectErrantGTIDs(topoproto.TabletAliasString(tablet.Alias), instance, tablet) + err = detectErrantGTIDs(instance, tablet) require.NoError(t, err) require.EqualValues(t, "", instance.GtidErrant) }