From b35a979530296b4bb72bc1b74c56dfb9116adbbe Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Thu, 1 Aug 2024 12:59:23 +0100 Subject: [PATCH] BFT-496: Move GenesisHash check to AttestationStatusWatch --- node/actors/executor/src/attestation.rs | 19 ++------ node/actors/executor/src/tests.rs | 13 +++-- .../network/src/gossip/attestation_status.rs | 47 ++++++++++++------- node/actors/network/src/testonly.rs | 10 ++-- 4 files changed, 51 insertions(+), 38 deletions(-) diff --git a/node/actors/executor/src/attestation.rs b/node/actors/executor/src/attestation.rs index aca4eeec..42abc62c 100644 --- a/node/actors/executor/src/attestation.rs +++ b/node/actors/executor/src/attestation.rs @@ -136,7 +136,6 @@ pub struct AttestationStatusRunner { status: Arc, client: Box, poll_interval: time::Duration, - genesis: attester::GenesisHash, } impl AttestationStatusRunner { @@ -149,12 +148,14 @@ impl AttestationStatusRunner { poll_interval: time::Duration, genesis: attester::GenesisHash, ) -> ctx::Result<(Arc, Self)> { - let status = Arc::new(AttestationStatusWatch::new(attester::BatchNumber::default())); + let status = Arc::new(AttestationStatusWatch::new( + genesis, + attester::BatchNumber::default(), + )); let mut runner = Self { status: status.clone(), client, poll_interval, - genesis, }; runner.poll_until_some(ctx).await?; Ok((status, runner)) @@ -197,17 +198,7 @@ impl AttestationStatusRunner { loop { match self.client.attestation_status(ctx).await { Ok(Some((genesis, batch_number))) => { - // A change in the genesis wouldn't necessarily be a problem, but at the moment - // the machinery the relies on the status notification cannot handle reorgs - // without restarting the application. On external nodes this happens by polling - // the main node API for any update in the genesis; on the main node it shouldn't - // happen at the moment because `zksync-era` first adjusts the genesis and then - // creates this component. If handing genesis changes on the fly becomes possible - // we can remove this and let the updated status propagate through the system. - if self.genesis != genesis { - return Err(anyhow::format_err!("the attestation status genesis changed since we started the runner: {:?} -> {:?}", self.genesis, genesis).into()); - } - self.status.update(batch_number).await; + self.status.update(genesis, batch_number).await?; return Ok(()); } Ok(None) => { diff --git a/node/actors/executor/src/tests.rs b/node/actors/executor/src/tests.rs index a071daed..a13157a8 100644 --- a/node/actors/executor/src/tests.rs +++ b/node/actors/executor/src/tests.rs @@ -32,8 +32,11 @@ fn config(cfg: &network::Config) -> Config { /// The test executors below are not running with attesters, so we just create an [AttestationStatusWatch] /// that will never be updated. -fn never_attest() -> Arc { - Arc::new(AttestationStatusWatch::new(attester::BatchNumber(0))) +fn never_attest(genesis: &validator::Genesis) -> Arc { + Arc::new(AttestationStatusWatch::new( + genesis.hash(), + attester::BatchNumber::default(), + )) } fn validator( @@ -42,6 +45,7 @@ fn validator( batch_store: Arc, replica_store: impl ReplicaStore, ) -> Executor { + let attestation_status = never_attest(block_store.genesis()); Executor { config: config(cfg), block_store, @@ -52,7 +56,7 @@ fn validator( payload_manager: Box::new(bft::testonly::RandomPayload(1000)), }), attester: None, - attestation_status: never_attest(), + attestation_status, } } @@ -61,13 +65,14 @@ fn fullnode( block_store: Arc, batch_store: Arc, ) -> Executor { + let attestation_status = never_attest(block_store.genesis()); Executor { config: config(cfg), block_store, batch_store, validator: None, attester: None, - attestation_status: never_attest(), + attestation_status, } } diff --git a/node/actors/network/src/gossip/attestation_status.rs b/node/actors/network/src/gossip/attestation_status.rs index bea3a0ae..e6a06f67 100644 --- a/node/actors/network/src/gossip/attestation_status.rs +++ b/node/actors/network/src/gossip/attestation_status.rs @@ -13,20 +13,13 @@ pub struct AttestationStatus { /// The node is expected to poll the main node during initialization until /// the batch to start from is established. pub next_batch_to_attest: attester::BatchNumber, - // The hash of the genesis of the chain to which the L1 batches belong. - // - // A change in this value would indicate a reorg on the main node. - // On the main node itself this is not expected to change because - // a reorg involves a restart, and a regenesis happens before the - // executor is started. If it could happen, it could be used to - // signal to the `BatchVotes` that votes need to be cleared and - // potentially discarded votes received over gossip would need - // to be re-acquired (which doesn't happen at the moment unless - // the connection is re-established). - // - // It is not added yet as the system is not expected to be able - // to handle changes in the value. - //pub genesis: attester::GenesisHash, + /// The hash of the genesis of the chain to which the L1 batches belong. + /// + /// We don't expect to handle a regenesis on the fly without restarting the + /// node, so this value is not expected to change; it's here only to stop + /// any attempt at updating the status with a batch number that refers + /// to a different fork. + pub genesis: attester::GenesisHash, } /// The subscription over the attestation status which voters can monitor for change. @@ -45,8 +38,12 @@ impl fmt::Debug for AttestationStatusWatch { impl AttestationStatusWatch { /// Create a new watch going from a specific batch number. - pub fn new(next_batch_to_attest: attester::BatchNumber) -> Self { + pub fn new( + genesis: attester::GenesisHash, + next_batch_to_attest: attester::BatchNumber, + ) -> Self { Self(Watch::new(AttestationStatus { + genesis, next_batch_to_attest, })) } @@ -57,8 +54,25 @@ impl AttestationStatusWatch { } /// Set the next batch number to attest on and notify subscribers it changed. - pub async fn update(&self, next_batch_to_attest: attester::BatchNumber) { + /// + /// Fails if the genesis we want to update to is not the same as the watch was started with, + /// because the rest of the system is not expected to be able to handle reorgs without a + /// restart of the node. + pub async fn update( + &self, + genesis: attester::GenesisHash, + next_batch_to_attest: attester::BatchNumber, + ) -> anyhow::Result<()> { let this = self.0.lock().await; + { + let status = this.borrow(); + anyhow::ensure!( + status.genesis == genesis, + "the attestation status genesis changed: {:?} -> {:?}", + status.genesis, + genesis + ); + } this.send_if_modified(|status| { if status.next_batch_to_attest == next_batch_to_attest { return false; @@ -66,5 +80,6 @@ impl AttestationStatusWatch { status.next_batch_to_attest = next_batch_to_attest; true }); + Ok(()) } } diff --git a/node/actors/network/src/testonly.rs b/node/actors/network/src/testonly.rs index 7d72dd95..2e43a79d 100644 --- a/node/actors/network/src/testonly.rs +++ b/node/actors/network/src/testonly.rs @@ -173,8 +173,8 @@ impl InstanceRunner { s.spawn_bg(self.net_runner.run(ctx)); s.spawn_bg(async { loop { - if let Ok(Some((_, bn))) = self.batch_store.attestation_status(ctx).await { - self.attestation_status.update(bn).await; + if let Ok(Some((g, bn))) = self.batch_store.attestation_status(ctx).await { + self.attestation_status.update(g, bn).await?; } if ctx.sleep(time::Duration::seconds(1)).await.is_err() { return Ok(()); @@ -199,8 +199,10 @@ impl Instance { ) -> (Self, InstanceRunner) { // Semantically we'd want this to be created at the same level as the stores, // but doing so would introduce a lot of extra cruft in setting up tests. - let attestation_status = - Arc::new(AttestationStatusWatch::new(attester::BatchNumber::default())); + let attestation_status = Arc::new(AttestationStatusWatch::new( + block_store.genesis().hash(), + attester::BatchNumber::default(), + )); let (actor_pipe, dispatcher_pipe) = pipe::new(); let (net, net_runner) = Network::new(