From 28beef6a363b9f7c903cac560b4fd342fc45cd7e Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Mon, 29 Jul 2024 14:48:41 +0100 Subject: [PATCH] BFT-498: Add genesis to BatchVotes operations (#160) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What ❔ Adds `genesis: GenesisHash` to `Batch` and reject votes in `BatchVotes` unless they have the same genesis hash as the local node. ## Why ❔ To prevent replay attacks either from other chains or after a fork resulting from an L1 batch reorg. The expected behaviour now is that `BatchVotes` only accepts a single vote per attester, and in the event of an L1 batch reorg the following will have to happen: * The main node is stopped, the blocks are reverted which creates a [fork](https://github.com/matter-labs/zksync-era/blob/0c0d10af703d3f8958c49d0ed46d6cda64945fa1/core/node/block_reverter/src/lib.rs#L371) and a new genesis, then the main node is restarted and serves the new batch height to resume voting from * The external nodes [detect the new genesis](https://github.com/matter-labs/zksync-era/blob/0c0d10af703d3f8958c49d0ed46d6cda64945fa1/core/node/consensus/src/en.rs#L75) through the main node API and stop with an error; upon restart they fetch the new genesis and adjust their database Once the nodes are restarted they will have the new genesis which will prevent any accidental or malicious replay of the old batch votes to them via gossip. They will pick up the new batch number on which they have to vote from the main node, and cast fresh votes on them with the correct genesis hash. The main node won't have to adjust `BatchVotes::min_batch_number` down while running, because it will be restarted and will re-initialise the whole stack from scratch. ### What if we don't fix this? Without this change, a reorg can play out like this: 1. The main node is stopped, blocks are reverted, the next batch to commit goes backwards 2. The main node is restarted and reconnects via gossip to some external node which, before realising that there is a new genesis which would cause it to restart, treats it as a new gossip connection and replays all votes it has in its cache. This causes the main node to re-establish QCs with a gap, but it will fail to save them because the payload won't match, and the saving routing will eventually fail and stop the node. I could be wrong on this, maybe they won't connect via gossip because their genesis is different. A malicious node could certainly do it, but maybe it won't happen by accident. --- node/actors/executor/src/attestation.rs | 4 +- node/actors/network/src/gossip/batch_votes.rs | 20 ++++++- node/actors/network/src/gossip/mod.rs | 3 +- node/actors/network/src/gossip/runner.rs | 1 + node/actors/network/src/gossip/tests/mod.rs | 59 +++++++++++++++---- node/libs/roles/src/attester/conv.rs | 4 +- .../libs/roles/src/attester/messages/batch.rs | 8 ++- node/libs/roles/src/attester/messages/mod.rs | 1 + node/libs/roles/src/attester/testonly.rs | 1 + .../roles/src/proto/attester/messages.proto | 1 + node/libs/storage/src/testonly/in_memory.rs | 5 +- node/libs/storage/src/testonly/mod.rs | 2 +- node/libs/storage/src/tests.rs | 2 +- 13 files changed, 90 insertions(+), 21 deletions(-) diff --git a/node/actors/executor/src/attestation.rs b/node/actors/executor/src/attestation.rs index d7d21918..3a5146fc 100644 --- a/node/actors/executor/src/attestation.rs +++ b/node/actors/executor/src/attestation.rs @@ -46,6 +46,8 @@ impl AttesterRunner { return Ok(()); } + let genesis = self.block_store.genesis().hash(); + // Find the initial range of batches that we want to (re)sign after a (re)start. let last_batch_number = self .batch_store @@ -79,7 +81,7 @@ impl AttesterRunner { // We only have to publish a vote once; future peers can pull it from the register. self.publisher - .publish(attesters, &self.attester.key, batch) + .publish(attesters, &genesis, &self.attester.key, batch) .await .context("publish")?; diff --git a/node/actors/network/src/gossip/batch_votes.rs b/node/actors/network/src/gossip/batch_votes.rs index 1a9f0f3c..1f5ad613 100644 --- a/node/actors/network/src/gossip/batch_votes.rs +++ b/node/actors/network/src/gossip/batch_votes.rs @@ -78,6 +78,7 @@ impl BatchVotes { pub(super) fn update( &mut self, attesters: &attester::Committee, + genesis: &attester::GenesisHash, data: &[Arc>], ) -> anyhow::Result { let mut stats = BatchUpdateStats::default(); @@ -92,6 +93,14 @@ impl BatchVotes { } done.insert(d.key.clone()); + // Disallow votes from different genesis. It might indicate a reorg, + // in which case either this node or the remote peer has to be restarted. + anyhow::ensure!( + d.msg.genesis == *genesis, + "vote for batch with different genesis hash: {:?}", + d.msg.genesis + ); + if d.msg.number < self.min_batch_number { continue; } @@ -129,6 +138,7 @@ impl BatchVotes { pub(super) fn find_quorums( &self, attesters: &attester::Committee, + genesis: &attester::GenesisHash, skip: impl Fn(attester::BatchNumber) -> bool, ) -> Vec { let threshold = attesters.threshold(); @@ -153,6 +163,8 @@ impl BatchVotes { message: attester::Batch { number: *number, hash: *hash, + // This was checked during insertion; we could look up the first in `votes` + genesis: *genesis, }, signatures: sigs, } @@ -229,11 +241,12 @@ impl BatchVotesWatch { pub(crate) async fn update( &self, attesters: &attester::Committee, + genesis: &attester::GenesisHash, data: &[Arc>], ) -> anyhow::Result<()> { let this = self.0.lock().await; let mut votes = this.borrow().clone(); - let stats = votes.update(attesters, data)?; + let stats = votes.update(attesters, genesis, data)?; if let Some(last_added) = stats.last_added { this.send_replace(votes); @@ -287,6 +300,7 @@ impl BatchVotesPublisher { pub async fn publish( &self, attesters: &attester::Committee, + genesis: &attester::GenesisHash, attester: &attester::SecretKey, batch: attester::Batch, ) -> anyhow::Result<()> { @@ -299,6 +313,8 @@ impl BatchVotesPublisher { .last_signed_batch_number .set(attestation.msg.number.0); - self.0.update(attesters, &[Arc::new(attestation)]).await + self.0 + .update(attesters, genesis, &[Arc::new(attestation)]) + .await } } diff --git a/node/actors/network/src/gossip/mod.rs b/node/actors/network/src/gossip/mod.rs index d3af5f4a..f5afd92b 100644 --- a/node/actors/network/src/gossip/mod.rs +++ b/node/actors/network/src/gossip/mod.rs @@ -154,6 +154,7 @@ impl Network { let Some(attesters) = self.genesis().attesters.as_ref() else { return Ok(()); }; + let genesis = self.genesis().hash(); let mut sub = self.batch_votes.subscribe(); loop { // In the future when we might be gossiping about multiple batches at the same time, @@ -161,7 +162,7 @@ impl Network { // on L1 and we can finally increase the minimum as well. let quorums = { let votes = sync::changed(ctx, &mut sub).await?; - votes.find_quorums(attesters, |_| false) + votes.find_quorums(attesters, &genesis, |_| false) }; for qc in quorums { diff --git a/node/actors/network/src/gossip/runner.rs b/node/actors/network/src/gossip/runner.rs index ff36888c..2596ccd7 100644 --- a/node/actors/network/src/gossip/runner.rs +++ b/node/actors/network/src/gossip/runner.rs @@ -68,6 +68,7 @@ impl rpc::Handler for &PushServer<'_> { .batch_votes .update( self.net.genesis().attesters.as_ref().context("attesters")?, + &self.net.genesis().hash(), &req.0, ) .await?; diff --git a/node/actors/network/src/gossip/tests/mod.rs b/node/actors/network/src/gossip/tests/mod.rs index 7a11f093..6811071f 100644 --- a/node/actors/network/src/gossip/tests/mod.rs +++ b/node/actors/network/src/gossip/tests/mod.rs @@ -116,10 +116,12 @@ fn mk_batch( rng: &mut R, key: &attester::SecretKey, number: attester::BatchNumber, + genesis: attester::GenesisHash, ) -> attester::Signed { key.sign_msg(attester::Batch { number, hash: rng.gen(), + genesis, }) } @@ -138,10 +140,12 @@ fn random_netaddr( fn random_batch_vote( rng: &mut R, key: &attester::SecretKey, + genesis: attester::GenesisHash, ) -> Arc> { let batch = attester::Batch { number: attester::BatchNumber(rng.gen_range(0..1000)), hash: rng.gen(), + genesis, }; Arc::new(key.sign_msg(batch.to_owned())) } @@ -170,6 +174,7 @@ fn update_signature( let batch = attester::Batch { number: attester::BatchNumber((batch.number.0 as i64 + batch_number_diff) as u64), hash: rng.gen(), + genesis: batch.genesis, }; Arc::new(key.sign_msg(batch.to_owned())) } @@ -554,12 +559,17 @@ async fn test_batch_votes() { let votes = BatchVotesWatch::default(); let mut sub = votes.subscribe(); + let genesis = rng.gen::(); + // Initial values. let mut want = Signatures::default(); for k in &keys[0..6] { - want.insert(random_batch_vote(rng, k)); + want.insert(random_batch_vote(rng, k, genesis)); } - votes.update(&attesters, &want.as_vec()).await.unwrap(); + votes + .update(&attesters, &genesis, &want.as_vec()) + .await + .unwrap(); assert_eq!(want.0, sub.borrow_and_update().votes); // newer batch number, should be updated @@ -569,10 +579,10 @@ async fn test_batch_votes() { // older batch number, should be ignored let k4v2 = update_signature(rng, &want.get(&keys[4]).msg, &keys[4], -1); // first entry for a key in the config, should be inserted - let k6v1 = random_batch_vote(rng, &keys[6]); + let k6v1 = random_batch_vote(rng, &keys[6], genesis); // entry for a key outside of the config, should be ignored let k8 = rng.gen(); - let k8v1 = random_batch_vote(rng, &k8); + let k8v1 = random_batch_vote(rng, &k8, genesis); // Update the ones we expect to succeed want.insert(k0v2.clone()); @@ -588,7 +598,7 @@ async fn test_batch_votes() { // no entry at all for keys[7] k8v1.clone(), ]; - votes.update(&attesters, &update).await.unwrap(); + votes.update(&attesters, &genesis, &update).await.unwrap(); assert_eq!(want.0, sub.borrow_and_update().votes); // Invalid signature, should be rejected. @@ -596,14 +606,32 @@ async fn test_batch_votes() { rng, &keys[1], attester::BatchNumber(want.get(&keys[0]).msg.number.0 + 1), + genesis, ); k0v3.key = keys[0].public(); - assert!(votes.update(&attesters, &[Arc::new(k0v3)]).await.is_err()); + assert!(votes + .update(&attesters, &genesis, &[Arc::new(k0v3)]) + .await + .is_err()); + + // Invalid genesis, should be rejected. + let other_genesis = rng.gen(); + let k1v3 = mk_batch( + rng, + &keys[1], + attester::BatchNumber(want.get(&keys[1]).msg.number.0 + 1), + other_genesis, + ); + assert!(votes + .update(&attesters, &genesis, &[Arc::new(k1v3)]) + .await + .is_err()); + assert_eq!(want.0, sub.borrow_and_update().votes); // Duplicate entry in the update, should be rejected. assert!(votes - .update(&attesters, &[k8v1.clone(), k8v1]) + .update(&attesters, &genesis, &[k8v1.clone(), k8v1]) .await .is_err()); assert_eq!(want.0, sub.borrow_and_update().votes); @@ -627,7 +655,9 @@ fn test_batch_votes_quorum() { let batch1 = attester::Batch { number: batch0.number.next(), hash: rng.gen(), + genesis: batch0.genesis, }; + let genesis = batch0.genesis; let mut batches = [(batch0, 0u64), (batch1, 0u64)]; let mut votes = BatchVotes::default(); @@ -636,12 +666,14 @@ fn test_batch_votes_quorum() { let b = usize::from(rng.gen_range(0..100) < 80); let batch = &batches[b].0; let vote = sk.sign_msg(batch.clone()); - votes.update(&attesters, &[Arc::new(vote)]).unwrap(); + votes + .update(&attesters, &genesis, &[Arc::new(vote)]) + .unwrap(); batches[b].1 += attesters.weight(&sk.public()).unwrap(); // Check that as soon as we have quorum it's found. if batches[b].1 >= attesters.threshold() { - let qs = votes.find_quorums(&attesters, |_| false); + let qs = votes.find_quorums(&attesters, &genesis, |_| false); assert!(!qs.is_empty(), "should find quorum"); assert!(qs[0].message == *batch); assert!(qs[0].signatures.keys().count() > 0); @@ -655,11 +687,13 @@ fn test_batch_votes_quorum() { { // Check that a quorum can be skipped assert!(votes - .find_quorums(&attesters, |b| b == quorum.number) + .find_quorums(&attesters, &genesis, |b| b == quorum.number) .is_empty()); } else { // Check that if there was no quoroum then we don't find any. - assert!(votes.find_quorums(&attesters, |_| false).is_empty()); + assert!(votes + .find_quorums(&attesters, &genesis, |_| false) + .is_empty()); } // Check that the minimum batch number prunes data. @@ -707,6 +741,7 @@ async fn test_batch_votes_propagation() { let batch = attester::Batch { number: batch.number, hash: rng.gen(), + genesis: setup.genesis.hash(), }; // Start all nodes. @@ -730,7 +765,7 @@ async fn test_batch_votes_propagation() { for (node, key) in nodes.iter().zip(setup.attester_keys.iter()) { node.net .batch_vote_publisher() - .publish(attesters, key, batch.clone()) + .publish(attesters, &setup.genesis.hash(), key, batch.clone()) .await .unwrap(); } diff --git a/node/libs/roles/src/attester/conv.rs b/node/libs/roles/src/attester/conv.rs index e3beec41..7eb3c065 100644 --- a/node/libs/roles/src/attester/conv.rs +++ b/node/libs/roles/src/attester/conv.rs @@ -27,14 +27,16 @@ impl ProtoFmt for Batch { type Proto = proto::Batch; fn read(r: &Self::Proto) -> anyhow::Result { Ok(Self { - hash: read_required(&r.hash).context("hash")?, number: BatchNumber(*required(&r.number).context("number")?), + hash: read_required(&r.hash).context("hash")?, + genesis: read_required(&r.genesis).context("genesis")?, }) } fn build(&self) -> Self::Proto { Self::Proto { number: Some(self.number.0), hash: Some(self.hash.build()), + genesis: Some(self.genesis.build()), } } } diff --git a/node/libs/roles/src/attester/messages/batch.rs b/node/libs/roles/src/attester/messages/batch.rs index 0b88b709..e9554314 100644 --- a/node/libs/roles/src/attester/messages/batch.rs +++ b/node/libs/roles/src/attester/messages/batch.rs @@ -1,4 +1,4 @@ -use super::Signed; +use super::{GenesisHash, Signed}; use crate::{ attester, validator::{Genesis, Payload}, @@ -92,6 +92,12 @@ pub struct Batch { pub number: BatchNumber, /// Hash of the batch. pub hash: BatchHash, + /// Hash of the genesis. + /// + /// This includes the chain ID and the current fork number, which prevents + /// replay attacks from other chains where the same attesters might operate, + /// or from earlier forks, which are created after a revert of L1 batches. + pub genesis: GenesisHash, } /// A certificate for a batch of L2 blocks to be sent to L1. diff --git a/node/libs/roles/src/attester/messages/mod.rs b/node/libs/roles/src/attester/messages/mod.rs index bd68233d..3c28e750 100644 --- a/node/libs/roles/src/attester/messages/mod.rs +++ b/node/libs/roles/src/attester/messages/mod.rs @@ -2,5 +2,6 @@ mod batch; mod msg; +pub use crate::validator::GenesisHash; pub use batch::*; pub use msg::*; diff --git a/node/libs/roles/src/attester/testonly.rs b/node/libs/roles/src/attester/testonly.rs index b4e53f79..ce56f3ad 100644 --- a/node/libs/roles/src/attester/testonly.rs +++ b/node/libs/roles/src/attester/testonly.rs @@ -52,6 +52,7 @@ impl Distribution for Standard { Batch { number: rng.gen(), hash: rng.gen(), + genesis: rng.gen(), } } } diff --git a/node/libs/roles/src/proto/attester/messages.proto b/node/libs/roles/src/proto/attester/messages.proto index 1d117cdd..b6b40ec8 100644 --- a/node/libs/roles/src/proto/attester/messages.proto +++ b/node/libs/roles/src/proto/attester/messages.proto @@ -18,6 +18,7 @@ message BatchHash { message Batch { optional uint64 number = 1; // required optional BatchHash hash = 2; // required + optional validator.GenesisHash genesis = 3; // required } message BatchQC { diff --git a/node/libs/storage/src/testonly/in_memory.rs b/node/libs/storage/src/testonly/in_memory.rs index 9cb583a4..084da660 100644 --- a/node/libs/storage/src/testonly/in_memory.rs +++ b/node/libs/storage/src/testonly/in_memory.rs @@ -20,6 +20,7 @@ struct BlockStoreInner { #[derive(Debug)] struct BatchStoreInner { + genesis: validator::Genesis, persisted: sync::watch::Sender, batches: Mutex>, certs: Mutex>, @@ -69,8 +70,9 @@ impl BlockStore { impl BatchStore { /// New In-memory `BatchStore`. - pub fn new(first: attester::BatchNumber) -> Self { + pub fn new(genesis: validator::Genesis, first: attester::BatchNumber) -> Self { Self(Arc::new(BatchStoreInner { + genesis, persisted: sync::watch::channel(BatchStoreState { first, last: None }).0, batches: Mutex::default(), certs: Mutex::default(), @@ -167,6 +169,7 @@ impl PersistentBatchStore for BatchStore { Ok(Some(attester::Batch { number, hash: attester::BatchHash(hash), + genesis: self.0.genesis.hash(), })) } diff --git a/node/libs/storage/src/testonly/mod.rs b/node/libs/storage/src/testonly/mod.rs index 38c4a4ba..a6329ba8 100644 --- a/node/libs/storage/src/testonly/mod.rs +++ b/node/libs/storage/src/testonly/mod.rs @@ -89,7 +89,7 @@ impl TestMemoryStorage { first: validator::BlockNumber, ) -> Self { let im_blocks = in_memory::BlockStore::new(genesis.clone(), first); - let im_batches = in_memory::BatchStore::new(attester::BatchNumber(0)); + let im_batches = in_memory::BatchStore::new(genesis.clone(), attester::BatchNumber(0)); Self::new_with_im(ctx, im_blocks, im_batches).await } diff --git a/node/libs/storage/src/tests.rs b/node/libs/storage/src/tests.rs index 36266bba..11fa5431 100644 --- a/node/libs/storage/src/tests.rs +++ b/node/libs/storage/src/tests.rs @@ -32,7 +32,7 @@ async fn test_inmemory_batch_store() { let mut setup = Setup::new(rng, 3); setup.push_batches(rng, 5); - let store = &testonly::in_memory::BatchStore::new(BatchNumber(0)); + let store = &testonly::in_memory::BatchStore::new(setup.genesis.clone(), BatchNumber(0)); let mut want = vec![]; for batch in &setup.batches { store.queue_next_batch(ctx, batch.clone()).await.unwrap();