Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remember all tablets in VTOrc #16888

Merged
merged 2 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions go/test/endtoend/vtorc/general/vtorc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,46 @@ func TestMultipleDurabilities(t *testing.T) {
assert.NotNil(t, primary, "should have elected a primary")
}

// TestDrainedTablet tests that we don't forget drained tablets and they still show up in the vtorc output.
func TestDrainedTablet(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)

// Setup a normal cluster and start vtorc
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 0, nil, cluster.VTOrcConfiguration{}, 1, "")
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
shard0 := &keyspace.Shards[0]

// find primary from topo
curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0)
assert.NotNil(t, curPrimary, "should have elected a primary")
vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0]

// find any replica tablet other than the current primary
var replica *cluster.Vttablet
for _, tablet := range shard0.Vttablets {
if tablet.Alias != curPrimary.Alias {
replica = tablet
break
}
}
require.NotNil(t, replica, "could not find any replica tablet")

output, err := clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommandWithOutput(
"ChangeTabletType", replica.Alias, "DRAINED")
require.NoError(t, err, "error in changing tablet type output - %s", output)

// Make sure VTOrc sees the drained tablets and doesn't forget them.
utils.WaitForDrainedTabletInVTOrc(t, vtOrcProcess, 1)

output, err = clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommandWithOutput(
"ChangeTabletType", replica.Alias, "REPLICA")
require.NoError(t, err, "error in changing tablet type output - %s", output)

// We have no drained tablets anymore. Wait for VTOrc to have processed that.
utils.WaitForDrainedTabletInVTOrc(t, vtOrcProcess, 0)
}

// TestDurabilityPolicySetLater tests that VTOrc works even if the durability policy of the keyspace is
// set after VTOrc has been started.
func TestDurabilityPolicySetLater(t *testing.T) {
Expand Down
26 changes: 26 additions & 0 deletions go/test/endtoend/vtorc/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -1204,3 +1204,29 @@ func SemiSyncExtensionLoaded(t *testing.T, tablet *cluster.Vttablet) (mysql.Semi

return conn.SemiSyncExtensionLoaded()
}

// WaitForDrainedTabletInVTOrc waits for VTOrc to see the specified number of drained tablet.
func WaitForDrainedTabletInVTOrc(t *testing.T, vtorcInstance *cluster.VTOrcProcess, count int) {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
t.Errorf("timed out waiting for drained tablet in VTOrc")
return
case <-ticker.C:
statusCode, res, err := vtorcInstance.MakeAPICall("api/database-state")
if err != nil || statusCode != 200 {
continue
}
found := strings.Count(res, fmt.Sprintf(`"tablet_type": "%d"`, topodatapb.TabletType_DRAINED))
if found == count {
return
}
}
}
}
5 changes: 5 additions & 0 deletions go/vt/vtorc/inst/analysis_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,11 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
return nil
}

// We don't want to run any fixes on any non-replica type tablet.
if tablet.Type != topodatapb.TabletType_PRIMARY && !topo.IsReplicaType(tablet.Type) {
return nil
}

primaryTablet := &topodatapb.Tablet{}
if str := m.GetString("primary_tablet_info"); str != "" {
if err := opts.Unmarshal([]byte(str), primaryTablet); err != nil {
Expand Down
41 changes: 41 additions & 0 deletions go/vt/vtorc/inst/analysis_dao_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,47 @@ func TestGetReplicationAnalysisDecision(t *testing.T) {
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: ReplicationStopped,
}, {
name: "No recoveries on drained tablets",
info: []*test.InfoForRecoveryAnalysis{{
TabletInfo: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101},
Hostname: "localhost",
Keyspace: "ks",
Shard: "0",
Type: topodatapb.TabletType_PRIMARY,
MysqlHostname: "localhost",
MysqlPort: 6708,
},
DurabilityPolicy: "none",
LastCheckValid: 1,
CountReplicas: 4,
CountValidReplicas: 4,
CountValidReplicatingReplicas: 3,
CountValidOracleGTIDReplicas: 4,
CountLoggingReplicas: 2,
IsPrimary: 1,
}, {
TabletInfo: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100},
Hostname: "localhost",
Keyspace: "ks",
Shard: "0",
Type: topodatapb.TabletType_DRAINED,
MysqlHostname: "localhost",
MysqlPort: 6709,
},
DurabilityPolicy: "none",
PrimaryTabletInfo: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101},
},
LastCheckValid: 1,
ReadOnly: 1,
ReplicationStopped: 1,
}},
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: NoProblem,
}, {
name: "ReplicaMisconfigured",
info: []*test.InfoForRecoveryAnalysis{{
Expand Down
3 changes: 0 additions & 3 deletions go/vt/vtorc/logic/tablet_discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,6 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an
var wg sync.WaitGroup
for _, tabletInfo := range tablets {
tablet := tabletInfo.Tablet
if tablet.Type != topodatapb.TabletType_PRIMARY && !topo.IsReplicaType(tablet.Type) {
continue
}
tabletAliasString := topoproto.TabletAliasString(tablet.Alias)
latestInstances[tabletAliasString] = true
old, err := inst.ReadTablet(tabletAliasString)
Expand Down
22 changes: 22 additions & 0 deletions go/vt/vtorc/logic/tablet_discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ func TestRefreshTabletsInKeyspaceShard(t *testing.T) {
return nil
})
tab100.MysqlPort = 100
// We refresh once more to ensure we don't affect the next tests since we've made a change again.
refreshTabletsInKeyspaceShard(ctx, keyspace, shard, func(tabletAlias string) {}, false, nil)
}()
// Let's assume tab100 restarted on a different pod. This would change its tablet hostname and port
_, err = ts.UpdateTabletFields(context.Background(), tab100.Alias, func(tablet *topodatapb.Tablet) error {
Expand All @@ -212,6 +214,26 @@ func TestRefreshTabletsInKeyspaceShard(t *testing.T) {
// Also the old tablet should be forgotten
verifyRefreshTabletsInKeyspaceShard(t, false, 1, tablets, nil)
})

t.Run("Replica becomes a drained tablet", func(t *testing.T) {
defer func() {
_, err = ts.UpdateTabletFields(context.Background(), tab101.Alias, func(tablet *topodatapb.Tablet) error {
tablet.Type = topodatapb.TabletType_REPLICA
return nil
})
tab101.Type = topodatapb.TabletType_REPLICA
// We refresh once more to ensure we don't affect the next tests since we've made a change again.
refreshTabletsInKeyspaceShard(ctx, keyspace, shard, func(tabletAlias string) {}, false, nil)
}()
// A replica tablet can be converted to drained type if it has an errant GTID.
_, err = ts.UpdateTabletFields(context.Background(), tab101.Alias, func(tablet *topodatapb.Tablet) error {
tablet.Type = topodatapb.TabletType_DRAINED
return nil
})
tab101.Type = topodatapb.TabletType_DRAINED
// We expect 1 tablet to be refreshed since its type has been changed.
verifyRefreshTabletsInKeyspaceShard(t, false, 1, tablets, nil)
})
}

func TestShardPrimary(t *testing.T) {
Expand Down
Loading