Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

slack-15.0: forward ports slack specific v14 PRS fix #391

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion go/vt/vttablet/tabletmanager/rpc_replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,26 @@ func (tm *TabletManager) setReplicationSourceSemiSyncNoAction(ctx context.Contex
return tm.setReplicationSourceLocked(ctx, parentAlias, timeCreatedNS, waitPosition, forceStartReplication, SemiSyncActionNone)
}

// isSetReplicationSourceLockedRunning returns true if setReplicationSourceLocked is running.
// A mutex is needed because _isSetReplicationSourceLockedRunning is accessed concurrently.
func (tm *TabletManager) isSetReplicationSourceLockedRunning() bool {
tm.mutex.Lock()
defer tm.mutex.Unlock()
return tm._isSetReplicationSourceLockedRunning
}

// setIsSetReplicationSourceLockedRunning sets _isSetReplicationSourceLockedRunning under a lock.
// A mutex is needed because _isSetReplicationSourceLockedRunning is accessed concurrently.
func (tm *TabletManager) setIsSetReplicationSourceLockedRunning(running bool) {
tm.mutex.Lock()
defer tm.mutex.Unlock()
tm._isSetReplicationSourceLockedRunning = running
}

func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentAlias *topodatapb.TabletAlias, timeCreatedNS int64, waitPosition string, forceStartReplication bool, semiSync SemiSyncAction) (err error) {
tm.setIsSetReplicationSourceLockedRunning(true)
defer tm.setIsSetReplicationSourceLockedRunning(false)

// End orchestrator maintenance at the end of fixing replication.
// This is a best effort operation, so it should happen in a goroutine
defer func() {
Expand Down Expand Up @@ -1079,7 +1098,7 @@ func (tm *TabletManager) fixSemiSyncAndReplication(tabletType topodatapb.TabletT
return nil
}

//shouldAck := semiSync == SemiSyncActionSet
// shouldAck := semiSync == SemiSyncActionSet
shouldAck := isPrimaryEligible(tabletType)
acking, err := tm.MysqlDaemon.SemiSyncReplicationStatus()
if err != nil {
Expand Down Expand Up @@ -1123,6 +1142,15 @@ func (tm *TabletManager) handleRelayLogError(err error) error {
// repairReplication tries to connect this server to whoever is
// the current primary of the shard, and start replicating.
func (tm *TabletManager) repairReplication(ctx context.Context) error {
if tm.isSetReplicationSourceLockedRunning() {
// we are actively setting replication source,
// repairReplication will block due to higher
// authority holding a shard lock (PRS on vtctld)
log.Infof("slack-debug: we are actively setting replication source, exiting")

return nil
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vmogilev return nil makes the client unaware the repairReplication never happened. Should we return an error to the RPC caller that explains what happened?

}

tablet := tm.Tablet()

si, err := tm.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard)
Expand Down
2 changes: 2 additions & 0 deletions go/vt/vttablet/tabletmanager/tm_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ type TabletManager struct {
_lockTablesTimer *time.Timer
// _isBackupRunning tells us whether there is a backup that is currently running
_isBackupRunning bool
// _isSetReplicationSourceLockedRunning indicates we are actively running setReplicationSourceLocked
_isSetReplicationSourceLockedRunning bool
}

// BuildTabletFromInput builds a tablet record from input parameters.
Expand Down
Loading