Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PRS and ERS don't promote replicas taking backups #16997

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b2c0b5c
PRS and ERS don't promote replicas taking backups
ejortegau Oct 18, 2024
e9fda77
Rebuild vtadmin proto files
ejortegau Oct 18, 2024
8a196c9
Fix assignment to nil map
ejortegau Oct 18, 2024
d7b8ddd
Fix TestGRPCTMServer
ejortegau Oct 18, 2024
2d22cf4
Improve error message.
ejortegau Oct 22, 2024
54bdcc0
Address PR comments
ejortegau Oct 30, 2024
fc69610
Reorder some code to skip sorting not preferred tablets when we won't…
ejortegau Oct 30, 2024
f7bbd32
Merge branch 'main' into ejortegau/reparenting-skips-tablets-running-…
ejortegau Oct 31, 2024
67f901d
Regenerate some code from proto after fixing merge conflicts
ejortegau Oct 31, 2024
2819bf1
Add change info to changelog
ejortegau Oct 31, 2024
92a11e0
Make linter happy
ejortegau Oct 31, 2024
6db5006
Address PR comments
ejortegau Nov 4, 2024
30114e5
Improve testing for skipping promoting host running backup
ejortegau Nov 4, 2024
ca7078e
Fix imports some more
ejortegau Nov 4, 2024
6840803
Address PR comments
ejortegau Nov 5, 2024
8b41ff1
Fix vtadmin proto stuff
ejortegau Nov 5, 2024
a29efa7
Fix more proto stuff
ejortegau Nov 5, 2024
09331a9
Fix missing fields coming from renaming of proto fields +
ejortegau Nov 5, 2024
e315d12
More field renaming for betetr consistency
ejortegau Nov 5, 2024
3405351
Address PR comment
ejortegau Nov 5, 2024
86f0182
Merge branch 'main' into ejortegau/reparenting-skips-tablets-running-…
ejortegau Nov 8, 2024
61ad1b6
Address PR comments
ejortegau Nov 8, 2024
83f7b62
Run go run ./go/tools/releases/releases.go to make CI check happy
ejortegau Nov 8, 2024
97c0271
Address PR comment
ejortegau Nov 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 118 additions & 98 deletions go/vt/proto/replicationdata/replicationdata.pb.go

Large diffs are not rendered by default.

70 changes: 70 additions & 0 deletions go/vt/proto/replicationdata/replicationdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1,566 changes: 793 additions & 773 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata.pb.go

Large diffs are not rendered by default.

68 changes: 68 additions & 0 deletions go/vt/proto/tabletmanagerdata/tabletmanagerdata_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ type TabletManagerClient struct {
EventJitter time.Duration
ErrorAfter time.Duration
}
// Backing Up - keyed by tablet alias.
BackingUp map[string]bool
// keyed by tablet alias.
ChangeTagsResult map[string]struct {
Response *tabletmanagerdatapb.ChangeTagsResponse
Expand Down Expand Up @@ -1052,6 +1054,9 @@ func (fake *TabletManagerClient) ReplicationStatus(ctx context.Context, tablet *
}

if result, ok := fake.ReplicationStatusResults[key]; ok {
if _, ok = fake.BackingUp[key]; ok {
result.Position.BackingUp = fake.BackingUp[key]
}
return result.Position, result.Error
}

Expand Down
13 changes: 11 additions & 2 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
// 2. Remove the tablets with the Must_not promote rule
// 3. Remove cross-cell tablets if PreventCrossCellPromotion is specified
// Our final primary candidate MUST belong to this list of valid candidates
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, prevPrimary, opts)
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, stoppedReplicationSnapshot.backingUpTablets, prevPrimary, opts)
if err != nil {
return err
}
Expand Down Expand Up @@ -729,7 +729,7 @@ func (erp *EmergencyReparenter) identifyPrimaryCandidate(
}

// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any constraint failures and can make forward progress on being promoted
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackingUp map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
var restrictedValidTablets []*topodatapb.Tablet
for _, tablet := range validTablets {
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
Expand Down Expand Up @@ -757,6 +757,15 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
}
continue
}
// Remove candidates that are running a backup.
backingUp, ok := tabletsBackingUp[tabletAliasStr]
if ok && backingUp {
erp.logger.Infof("Removing %s from list of valid candidates for promotion because it is running a backup", tabletAliasStr)
if opts.NewPrimaryAlias != nil && topoproto.TabletAliasEqual(opts.NewPrimaryAlias, tablet.Alias) {
return nil, vterrors.Errorf(vtrpc.Code_ABORTED, "proposed primary %s will not be able to make forward progress on being promoted", topoproto.TabletAliasString(opts.NewPrimaryAlias))
}
continue
}
restrictedValidTablets = append(restrictedValidTablets, tablet)
}
return restrictedValidTablets, nil
Expand Down
26 changes: 25 additions & 1 deletion go/vt/vtctl/reparentutil/emergency_reparenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4460,11 +4460,22 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
}
)
allTablets := []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, replicaCrossCellTablet, rdonlyCrossCellTablet}
noTabletsBackingUp := map[string]bool{
topoproto.TabletAliasString(primaryTablet.Alias): false, topoproto.TabletAliasString(replicaTablet.Alias): false,
topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
}
replicaBackingUp := map[string]bool{
topoproto.TabletAliasString(primaryTablet.Alias): false, topoproto.TabletAliasString(replicaTablet.Alias): true,
topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
}
tests := []struct {
name string
durability string
validTablets []*topodatapb.Tablet
tabletsReachable []*topodatapb.Tablet
tabletsBackingUp map[string]bool
prevPrimary *topodatapb.Tablet
opts EmergencyReparentOptions
filteredTablets []*topodatapb.Tablet
Expand All @@ -4475,12 +4486,21 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet, replicaCrossCellTablet},
}, {
name: "filter backing up",
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: replicaBackingUp,
filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaCrossCellTablet},
}, {
name: "filter cross cell",
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
prevPrimary: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone-1",
Expand All @@ -4495,6 +4515,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
durability: "cross_cell",
validTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet},
tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
tabletsBackingUp: noTabletsBackingUp,
filteredTablets: nil,
}, {
name: "filter mixed",
Expand All @@ -4509,12 +4530,14 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
},
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
filteredTablets: []*topodatapb.Tablet{replicaCrossCellTablet},
}, {
name: "error - requested primary must not",
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
opts: EmergencyReparentOptions{
NewPrimaryAlias: rdonlyTablet.Alias,
},
Expand All @@ -4535,6 +4558,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
durability: "cross_cell",
validTablets: allTablets,
tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
tabletsBackingUp: noTabletsBackingUp,
opts: EmergencyReparentOptions{
NewPrimaryAlias: primaryTablet.Alias,
},
Expand All @@ -4548,7 +4572,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
tt.opts.durability = durability
logger := logutil.NewMemoryLogger()
erp := NewEmergencyReparenter(nil, nil, logger)
tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.prevPrimary, tt.opts)
tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.tabletsBackingUp, tt.prevPrimary, tt.opts)
if tt.errShouldContain != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errShouldContain)
Expand Down
Loading
Loading