Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delay the next campaign if the node lost the vote #169

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,17 @@ func stepCandidate(r *raft, m pb.Message) error {
// pb.MsgPreVoteResp contains future term of pre-candidate
// m.Term > r.Term; reuse r.Term
r.becomeFollower(r.Term, None)
// Delay the next campaign if the node lost the vote. If a node lost
// the vote, it's highly likely it will also lose next campaign, so
// it makes more sense to prioritize campaigns by other nodes within
// the current term. Normally the randomized election timeout is in
// range [electiontimeout, 2*electiontimeout - 1], now it changes to
// [2*electiontimeout, 3*electiontimeout - 1]. Note all time parameters,
// including `randomizedElectionTimeout` will be automatically reset
// in next term.
if myVoteRespType == pb.MsgVoteResp {
r.randomizedElectionTimeout += r.electionTimeout
ahrtr marked this conversation as resolved.
Show resolved Hide resolved
}
}
case pb.MsgTimeoutNow:
r.logger.Debugf("%x [term %d state %v] ignored MsgTimeoutNow from %x", r.id, r.Term, r.state, m.From)
Expand Down
30 changes: 30 additions & 0 deletions raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,36 @@ func TestPastElectionTimeout(t *testing.T) {
}
}

func TestRandomizedElectionTimeoutOnLostVote(t *testing.T) {
storage := newTestMemoryStorage(withPeers(1, 2, 3))
storage.Append(index(1).terms(1, 2, 3, 4, 5))
r := newTestRaft(1, 10, 1, storage)

term, index := r.raftLog.lastEntryID().term, r.raftLog.lastEntryID().index
r.Term = term

r.becomeCandidate()
msgVote := pb.Message{
From: 1,
To: 2,
Type: pb.MsgVote,
Term: term + 1,
LogTerm: index,
Index: 42,
}
r.stepOrSend([]pb.Message{msgVote})

// The MsgVote is rejected by r2
err := r.Step(pb.Message{From: 2, To: 1, Term: term + 1, Type: pb.MsgVoteResp, Reject: true})
require.NoError(t, err)
require.Less(t, r.randomizedElectionTimeout, r.electionTimeout*2)

// The MsgVote is rejected by r3
err = r.Step(pb.Message{From: 3, To: 1, Term: term + 1, Type: pb.MsgVoteResp, Reject: true})
require.NoError(t, err)
require.GreaterOrEqual(t, r.randomizedElectionTimeout, r.electionTimeout*2)
}

// TestStepIgnoreOldTermMsg to ensure that the Step function ignores the message
// from old term and does not pass it to the actual stepX function.
func TestStepIgnoreOldTermMsg(t *testing.T) {
Expand Down
7 changes: 7 additions & 0 deletions rafttest/interaction_env_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,13 @@ func (env *InteractionEnv) Handle(t *testing.T, d datadriven.TestData) string {
//
// tick-heartbeat 3
err = env.handleTickHeartbeat(t, d)
case "tick":
// Tick a specified interval on the given node.
//
// Example:
//
// tick 3 5
err = env.handleTick(t, d)
case "transfer-leadership":
// Transfer the Raft leader.
//
Expand Down
4 changes: 4 additions & 0 deletions rafttest/interaction_env_handler_add_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ func (env *InteractionEnv) handleAddNodes(t *testing.T, d datadriven.TestData) e
}
case "step-down-on-removal":
arg.Scan(t, i, &cfg.StepDownOnRemoval)
case "heartbeat-tick":
arg.Scan(t, i, &cfg.HeartbeatTick)
case "election-tick":
arg.Scan(t, i, &cfg.ElectionTick)
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions rafttest/interaction_env_handler_tick.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package rafttest

import (
"strconv"
"testing"

"github.com/cockroachdb/datadriven"
Expand All @@ -30,6 +31,21 @@ func (env *InteractionEnv) handleTickHeartbeat(t *testing.T, d datadriven.TestDa
return env.Tick(idx, env.Nodes[idx].Config.HeartbeatTick)
}

func (env *InteractionEnv) handleTick(t *testing.T, d datadriven.TestData) error {
idx := firstAsNodeIdx(t, d)

if len(d.CmdArgs) != 2 || len(d.CmdArgs[1].Vals) > 0 {
t.Fatalf("expected exactly one key with no vals: %+v", d.CmdArgs[1:])
}

n, err := strconv.Atoi(d.CmdArgs[1].Key)
if err != nil {
t.Fatal(err)
}

return env.Tick(idx, n)
}

// Tick the node at the given index the given number of times.
func (env *InteractionEnv) Tick(idx int, num int) error {
for i := 0; i < num; i++ {
Expand Down
227 changes: 227 additions & 0 deletions testdata/vote.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
# Tests that a node that is behind on the log will lose the vote. Other nodes
# will have higher priority to start next campaign within current term.

log-level none
----
ok

add-nodes 2 voters=(1,2) index=10 heartbeat-tick=1 election-tick=5
----
ok

campaign 1
----
ok

stabilize
----
ok

log-level debug
----
ok

raft-state
----
1: StateLeader (Voter) Term:1 Lead:1
2: StateFollower (Voter) Term:1 Lead:1

# Propose a command on 1
propose 1 foo
----
ok

# 2 is now behind on its log.
raft-log 2
----
1/11 EntryNormal ""

# Let 2 campaign. It should fail, because it's behind on its log.
campaign 2
----
INFO 2 is starting a new election at term 1
INFO 2 became candidate at term 2
INFO 2 [logterm: 1, index: 11] sent MsgVote request to 1 at term 2

process-ready 2
----
Ready MustSync=true:
Lead:0 State:StateCandidate
HardState Term:2 Vote:2 Commit:11
Messages:
2->1 MsgVote Term:2 Log:1/11
INFO 2 received MsgVoteResp from 2 at term 2
INFO 2 has received 1 MsgVoteResp votes and 0 vote rejections

deliver-msgs 1
----
2->1 MsgVote Term:2 Log:1/11
INFO 1 [term: 1] received a MsgVote message with higher term from 2 [term: 2]
INFO 1 became follower at term 2
INFO 1 [logterm: 1, index: 12, vote: 0] rejected MsgVote from 2 [logterm: 1, index: 11] at term 2

stabilize
----
> 1 handling Ready
Ready MustSync=true:
Lead:0 State:StateFollower
HardState Term:2 Commit:11
Entries:
1/12 EntryNormal "foo"
Messages:
1->2 MsgApp Term:1 Log:1/11 Commit:11 Entries:[1/12 EntryNormal "foo"]
1->2 MsgVoteResp Term:2 Log:0/0 Rejected (Hint: 0)
INFO 1 [term: 2] ignored a MsgAppResp message with lower term from 1 [term: 1]
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/11 Commit:11 Entries:[1/12 EntryNormal "foo"]
INFO 2 [term: 2] ignored a MsgApp message with lower term from 1 [term: 1]
1->2 MsgVoteResp Term:2 Log:0/0 Rejected (Hint: 0)
INFO 2 received MsgVoteResp rejection from 1 at term 2
INFO 2 has received 1 MsgVoteResp votes and 1 vote rejections
INFO 2 became follower at term 2
> 2 handling Ready
Ready MustSync=false:
Lead:0 State:StateFollower

raft-state
----
1: StateFollower (Voter) Term:2 Lead:0
2: StateFollower (Voter) Term:2 Lead:0

ahrtr marked this conversation as resolved.
Show resolved Hide resolved
raft-log 1
----
1/11 EntryNormal ""
1/12 EntryNormal "foo"

raft-log 2
----
1/11 EntryNormal ""

# tick both nodes 9 times, and let node 2 try first.
# 2 is de-prioritized on the next campaign. Its randomized election timeout
# is in range [2*electionTime, 3*electionTime), namely [10, 15). So 9 ticks
# shouldn't trigger the campaign.
tick 2 9
----
ok

# node 1's randomized election timeout doesn't change, it's still in range
# [electionTime, 2*electionTime), namely [5, 10). So 9 ticks will definitely
# trigger the campaign.
tick 1 9
----
INFO 1 is starting a new election at term 2
INFO 1 became candidate at term 3
INFO 1 [logterm: 1, index: 12] sent MsgVote request to 2 at term 3

# let node 2 try first
process-ready 2
----
<empty Ready>

deliver-msgs 1
----
no messages

stabilize
----
> 1 handling Ready
Ready MustSync=true:
Lead:0 State:StateCandidate
HardState Term:3 Vote:1 Commit:11
Messages:
1->2 MsgVote Term:3 Log:1/12
INFO 1 received MsgVoteResp from 1 at term 3
INFO 1 has received 1 MsgVoteResp votes and 0 vote rejections
> 2 receiving messages
1->2 MsgVote Term:3 Log:1/12
INFO 2 [term: 2] received a MsgVote message with higher term from 1 [term: 3]
INFO 2 became follower at term 3
INFO 2 [logterm: 1, index: 11, vote: 0] cast MsgVote for 1 [logterm: 1, index: 12] at term 3
> 2 handling Ready
Ready MustSync=true:
HardState Term:3 Vote:1 Commit:11
Messages:
2->1 MsgVoteResp Term:3 Log:0/0
> 1 receiving messages
2->1 MsgVoteResp Term:3 Log:0/0
INFO 1 received MsgVoteResp from 2 at term 3
INFO 1 has received 2 MsgVoteResp votes and 0 vote rejections
INFO 1 became leader at term 3
> 1 handling Ready
Ready MustSync=true:
Lead:1 State:StateLeader
Entries:
3/13 EntryNormal ""
Messages:
1->2 MsgApp Term:3 Log:1/12 Commit:11 Entries:[3/13 EntryNormal ""]
> 2 receiving messages
1->2 MsgApp Term:3 Log:1/12 Commit:11 Entries:[3/13 EntryNormal ""]
DEBUG 2 [logterm: 0, index: 12] rejected MsgApp [logterm: 1, index: 12] from 1
> 2 handling Ready
Ready MustSync=false:
Lead:1 State:StateFollower
Messages:
2->1 MsgAppResp Term:3 Log:1/12 Rejected (Hint: 11)
> 1 receiving messages
2->1 MsgAppResp Term:3 Log:1/12 Rejected (Hint: 11)
DEBUG 1 received MsgAppResp(rejected, hint: (index 11, term 1)) from 2 for index 12
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=12]
> 1 handling Ready
Ready MustSync=false:
Messages:
1->2 MsgApp Term:3 Log:1/11 Commit:11 Entries:[
1/12 EntryNormal "foo"
3/13 EntryNormal ""
]
> 2 receiving messages
1->2 MsgApp Term:3 Log:1/11 Commit:11 Entries:[
1/12 EntryNormal "foo"
3/13 EntryNormal ""
]
> 2 handling Ready
Ready MustSync=true:
Entries:
1/12 EntryNormal "foo"
3/13 EntryNormal ""
Messages:
2->1 MsgAppResp Term:3 Log:0/13
> 1 receiving messages
2->1 MsgAppResp Term:3 Log:0/13
> 1 handling Ready
Ready MustSync=false:
HardState Term:3 Vote:1 Commit:13
CommittedEntries:
1/12 EntryNormal "foo"
3/13 EntryNormal ""
Messages:
1->2 MsgApp Term:3 Log:3/13 Commit:13
> 2 receiving messages
1->2 MsgApp Term:3 Log:3/13 Commit:13
> 2 handling Ready
Ready MustSync=false:
HardState Term:3 Vote:1 Commit:13
CommittedEntries:
1/12 EntryNormal "foo"
3/13 EntryNormal ""
Messages:
2->1 MsgAppResp Term:3 Log:0/13
> 1 receiving messages
2->1 MsgAppResp Term:3 Log:0/13

raft-state
----
1: StateLeader (Voter) Term:3 Lead:1
2: StateFollower (Voter) Term:3 Lead:1

raft-log 1
----
1/11 EntryNormal ""
1/12 EntryNormal "foo"
3/13 EntryNormal ""

raft-log 2
----
1/11 EntryNormal ""
1/12 EntryNormal "foo"
3/13 EntryNormal ""
Loading