Skip to content

Commit

Permalink
Simulate new block in timeout recovery check
Browse files Browse the repository at this point in the history
  • Loading branch information
moshababo committed Dec 4, 2023
1 parent bb67407 commit 8bdfdba
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 75 deletions.
60 changes: 38 additions & 22 deletions node/actors/bft/src/testonly/ut_harness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl UTHarness {
let num_validators = 6;
assert_matches!(crate::misc::faulty_replicas(num_validators), res if res > 0);
let mut util = UTHarness::new_with(num_validators).await;
util.set_replica_view(util.owner_as_view_leader_next());
util.set_view(util.owner_as_view_leader_next());
util
}

Expand Down Expand Up @@ -71,26 +71,46 @@ impl UTHarness {
}
}

pub(crate) fn check_recovery_after_timeout(&mut self) {
self.set_replica_view(self.owner_as_view_leader_next());

let base_replica_view = self.replica_view();
let base_leader_view = self.leader_view();
assert!(base_leader_view < base_replica_view);
pub(crate) async fn iterate_next(&mut self) {
let leader_commit = self.new_procedural_leader_commit_many().await;
self.dispatch_leader_commit(leader_commit).await.unwrap();
self.recv_signed()
.await
.unwrap()
.cast::<ReplicaPrepare>()
.unwrap();
}

assert_eq!(self.replica_phase(), Phase::Prepare);
pub(crate) async fn check_recovery_after_timeout(
&mut self,
view: ViewNumber,
high_vote_view: ViewNumber,
high_qc_view: ViewNumber,
) {
let replica_prepare = self
.recv_signed()
.await
.unwrap()
.cast::<ReplicaPrepare>()
.unwrap()
.msg;

let replica_prepare = self.new_current_replica_prepare(|_| {}).cast().unwrap().msg;
self.dispatch_replica_prepare_many(
vec![replica_prepare; self.consensus_threshold()],
self.keys(),
)
.unwrap();
assert_matches!(
replica_prepare,
ReplicaPrepare {
view: _view,
high_vote,
high_qc,
..
} => {
assert_eq!(_view, view);
assert_eq!(high_vote.view, high_vote_view);
assert_eq!(high_qc.message.view, high_qc_view);
}
);

assert_eq!(self.replica_view(), base_replica_view);
assert_eq!(self.leader_view(), base_replica_view);
assert_eq!(self.replica_phase(), Phase::Prepare);
assert_eq!(self.leader_phase(), Phase::Commit);
self.set_replica_view(self.owner_as_view_leader_next());
self.iterate_next().await;
}

pub(crate) fn consensus_threshold(&self) -> usize {
Expand Down Expand Up @@ -447,10 +467,6 @@ impl UTHarness {
self.consensus.replica.view
}

pub(crate) fn leader_view(&self) -> ViewNumber {
self.consensus.leader.view
}

pub(crate) fn replica_phase(&self) -> Phase {
self.consensus.replica.phase
}
Expand Down
139 changes: 86 additions & 53 deletions node/actors/bft/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use crate::{
};
use assert_matches::assert_matches;
use zksync_concurrency::{ctx, testonly::abort_on_panic};
use zksync_consensus_roles::validator::Phase;
use zksync_consensus_roles::validator::{
LeaderCommit, LeaderPrepare, Phase, ReplicaCommit, ReplicaPrepare,
};

async fn run_test(behavior: Behavior, network: Network) {
abort_on_panic();
Expand Down Expand Up @@ -66,22 +68,25 @@ async fn byzantine_real_network() {
run_test(Behavior::Byzantine, Network::Real).await
}

/// Testing liveness after the network becomes idle with leader having no cached prepare messages for the current view.
// Testing liveness after the network becomes idle with leader having no cached prepare messages for the current view.
#[tokio::test]
async fn timeout_leader_no_prepares() {
let mut util = UTHarness::new_many().await;

let base_replica_view = util.replica_view();
let base_leader_view = util.leader_view();
let base_rp = util
.new_current_replica_prepare(|_| {})
.cast::<ReplicaPrepare>()
.unwrap()
.msg;

util.sim_timeout().await;

assert_eq!(util.replica_view(), base_replica_view.next());
assert_eq!(util.replica_phase(), Phase::Prepare);
assert_eq!(util.leader_view(), base_leader_view);
assert_eq!(util.leader_phase(), Phase::Prepare);

util.check_recovery_after_timeout();
util.check_recovery_after_timeout(
base_rp.view.next(),
base_rp.high_vote.view,
base_rp.high_qc.message.view,
)
.await;
}

/// Testing liveness after the network becomes idle with leader having some cached prepare messages for the current view.
Expand All @@ -90,7 +95,7 @@ async fn timeout_leader_some_prepares() {
let mut util = UTHarness::new_many().await;

let replica_prepare = util.new_current_replica_prepare(|_| {});
let res = util.dispatch_replica_prepare_one(replica_prepare);
let res = util.dispatch_replica_prepare_one(replica_prepare.clone());
assert_matches!(
res,
Err(ReplicaPrepareError::NumReceivedBelowThreshold {
Expand All @@ -101,74 +106,95 @@ async fn timeout_leader_some_prepares() {
assert_eq!(threshold, util.consensus_threshold())
}
);

let base_replica_view = util.replica_view();
let base_leader_view = util.leader_view();
let base_rp = replica_prepare.cast::<ReplicaPrepare>().unwrap().msg;

util.sim_timeout().await;

assert_eq!(util.replica_view(), base_replica_view.next());
assert_eq!(util.replica_phase(), Phase::Prepare);
assert_eq!(util.leader_view(), base_leader_view);
assert_eq!(util.leader_phase(), Phase::Prepare);

util.check_recovery_after_timeout();
util.check_recovery_after_timeout(
base_rp.view.next(),
base_rp.high_vote.view,
base_rp.high_qc.message.view,
)
.await;
}

/// Testing liveness after the network becomes idle with leader in commit phase.
#[tokio::test]
async fn timeout_leader_in_commit() {
let mut util = UTHarness::new_many().await;

let replica_prepare = util.new_current_replica_prepare(|_| {}).cast().unwrap().msg;
let base_rp = util
.new_current_replica_prepare(|_| {})
.cast::<ReplicaPrepare>()
.unwrap()
.msg;
util.dispatch_replica_prepare_many(
vec![replica_prepare; util.consensus_threshold()],
vec![base_rp.clone(); util.consensus_threshold()],
util.keys(),
)
.unwrap();

let base_replica_view = util.replica_view();
let base_leader_view = util.leader_view();
util.recv_signed()
.await
.unwrap()
.cast::<LeaderPrepare>()
.unwrap();

util.sim_timeout().await;

assert_eq!(util.replica_view(), base_replica_view.next());
assert_eq!(util.replica_phase(), Phase::Prepare);
assert_eq!(util.leader_view(), base_leader_view);
// Leader is in `Phase::Commit`, but should still accept prepares from newer views.
assert_eq!(util.leader_phase(), Phase::Commit);

util.check_recovery_after_timeout();
util.check_recovery_after_timeout(
base_rp.view.next(),
base_rp.high_vote.view,
base_rp.high_qc.message.view,
)
.await;
}

/// Testing liveness after the network becomes idle with replica in commit phase.
#[tokio::test]
async fn timeout_replica_in_commit() {
let mut util = UTHarness::new_many().await;

let base_rp = util
.new_current_replica_prepare(|_| {})
.cast::<ReplicaPrepare>()
.unwrap()
.msg;

let leader_prepare = util.new_procedural_leader_prepare_many().await;
util.dispatch_leader_prepare(leader_prepare).await.unwrap();
assert_eq!(util.replica_phase(), Phase::Commit);

let base_replica_view = util.replica_view();
let base_leader_view = util.leader_view();
util.recv_signed()
.await
.unwrap()
.cast::<ReplicaCommit>()
.unwrap();

util.sim_timeout().await;

assert_eq!(util.replica_view(), base_replica_view.next());
assert_eq!(util.replica_phase(), Phase::Prepare);
assert_eq!(util.leader_view(), base_leader_view);
// Leader is in `Phase::Commit`, but should still accept prepares from newer views.
assert_eq!(util.leader_phase(), Phase::Commit);

util.check_recovery_after_timeout();
util.check_recovery_after_timeout(
base_rp.view.next(),
base_rp.view,
base_rp.high_qc.message.view,
)
.await;
}

/// Testing liveness after the network becomes idle with leader having some cached commit messages for the current view.
#[tokio::test]
async fn timeout_leader_some_commits() {
let mut util = UTHarness::new_many().await;

let base_rp = util
.new_current_replica_prepare(|_| {})
.cast::<ReplicaPrepare>()
.unwrap()
.msg;

let replica_commit = util.new_procedural_replica_commit_many().await;
let res = util.dispatch_replica_commit_one(replica_commit);
assert_matches!(
Expand All @@ -182,25 +208,30 @@ async fn timeout_leader_some_commits() {
}
);

let base_replica_view = util.replica_view();
let base_leader_view = util.leader_view();

util.sim_timeout().await;

assert_eq!(util.replica_view(), base_replica_view.next());
assert_eq!(util.replica_phase(), Phase::Prepare);
assert_eq!(util.leader_view(), base_leader_view);
// Leader is in `Phase::Commit`, but should still accept prepares from newer views.
assert_eq!(util.leader_phase(), Phase::Commit);

util.check_recovery_after_timeout();
util.check_recovery_after_timeout(
base_rp.view.next(),
base_rp.view,
base_rp.high_qc.message.view,
)
.await;
}

/// Testing liveness after the network becomes idle with leader in a consecutive prepare phase.
#[tokio::test]
async fn timeout_leader_in_consecutive_prepare() {
let mut util = UTHarness::new_many().await;

let base_rp = util
.new_current_replica_prepare(|_| {})
.cast::<ReplicaPrepare>()
.unwrap()
.msg;

let replica_commit = util
.new_procedural_replica_commit_many()
.await
Expand All @@ -212,16 +243,18 @@ async fn timeout_leader_in_consecutive_prepare() {
util.keys(),
)
.unwrap();

let base_replica_view = util.replica_view();
let base_leader_view = util.leader_view();
util.recv_signed()
.await
.unwrap()
.cast::<LeaderCommit>()
.unwrap();

util.sim_timeout().await;

assert_eq!(util.replica_view(), base_replica_view.next());
assert_eq!(util.replica_phase(), Phase::Prepare);
assert_eq!(util.leader_view(), base_leader_view);
assert_eq!(util.leader_phase(), Phase::Prepare);

util.check_recovery_after_timeout();
util.check_recovery_after_timeout(
base_rp.view.next(),
base_rp.view,
base_rp.high_qc.message.view,
)
.await;
}

0 comments on commit 8bdfdba

Please sign in to comment.