From cedbfd627cdca018df2867677e75c923f778e5cb Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:03:04 -0500 Subject: [PATCH 01/28] feat: add a way to query the highest available tenure --- .../nakamoto/download_state_machine.rs | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/stackslib/src/net/download/nakamoto/download_state_machine.rs b/stackslib/src/net/download/nakamoto/download_state_machine.rs index 4c509ed5c1..c23a3c91ae 100644 --- a/stackslib/src/net/download/nakamoto/download_state_machine.rs +++ b/stackslib/src/net/download/nakamoto/download_state_machine.rs @@ -958,14 +958,14 @@ impl NakamotoDownloadStateMachine { return false; } - let (unconfirmed_tenure_opt, confirmed_tenure_opt) = Self::find_unconfirmed_tenure_ids( + let (confirmed_tenure_opt, unconfirmed_tenure_opt) = Self::find_unconfirmed_tenure_ids( wanted_tenures, prev_wanted_tenures, available_tenures, ); debug!( "Check unconfirmed tenures: highest two available tenures are {:?}, {:?}", - &unconfirmed_tenure_opt, &confirmed_tenure_opt + &confirmed_tenure_opt, &unconfirmed_tenure_opt ); // see if we need any tenures still @@ -980,11 +980,11 @@ impl NakamotoDownloadStateMachine { }); if !is_available_and_processed { - let is_unconfirmed = unconfirmed_tenure_opt + let is_unconfirmed = confirmed_tenure_opt .as_ref() .map(|ch| *ch == wt.tenure_id_consensus_hash) .unwrap_or(false) - || confirmed_tenure_opt + || unconfirmed_tenure_opt .as_ref() .map(|ch| *ch == wt.tenure_id_consensus_hash) .unwrap_or(false); @@ -1553,6 +1553,24 @@ impl NakamotoDownloadStateMachine { } } + /// Find the highest available tenure ID. + /// Returns Some(consensus_hash) for the highest tenure available from at least one node. + /// Returns None if no tenures are available from any peer. + pub fn find_highest_available_tenure(&self) -> Option { + let (t1, t2) = Self::find_unconfirmed_tenure_ids( + &self.wanted_tenures, + self.prev_wanted_tenures.as_ref().unwrap_or(&vec![]), + &self.available_tenures, + ); + if let Some(ch) = t2 { + return Some(ch); + } else if let Some(ch) = t1 { + return Some(ch); + } else { + return None; + } + } + /// Go and get tenures. Returns list of blocks per tenure, identified by consensus hash. /// The blocks will be sorted by height, but may not be contiguous. pub fn run( From 8e283e672a33f7c46d29b7df0acfbff6efec270c Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:03:26 -0500 Subject: [PATCH 02/28] chore: improve documentation on downloader state --- .../net/download/nakamoto/tenure_downloader_set.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/stackslib/src/net/download/nakamoto/tenure_downloader_set.rs b/stackslib/src/net/download/nakamoto/tenure_downloader_set.rs index e5b796181a..e36a0a931e 100644 --- a/stackslib/src/net/download/nakamoto/tenure_downloader_set.rs +++ b/stackslib/src/net/download/nakamoto/tenure_downloader_set.rs @@ -113,9 +113,11 @@ pub struct NakamotoTenureDownloaderSet { /// The set of tenures that have been successfully downloaded (but possibly not yet stored or /// processed) pub(crate) completed_tenures: HashSet, - /// Number of times a tenure download was attempted + /// Number of times a tenure download was attempted. This counter is incremented before the + /// downloader starts pub(crate) attempted_tenures: HashMap, - /// Number of times a tenure download failed + /// Number of times a tenure download failed. This counter is incremented after the downloader + /// finishes in an error state. pub(crate) attempt_failed_tenures: HashMap, /// Peers that should be deprioritized because they're dead (maps to when they can be used /// again) @@ -451,7 +453,13 @@ impl NakamotoTenureDownloaderSet { continue; }; if tenure_info.processed { - // we already have this tenure + // we already have tried to download this tenure, + // but do remove it from `self.completed_tenures` in order to (1) avoid a memory + // leak, and (2) account for the chance that the end-block has changed due to a + // Bitcoin reorg. This way, a subsequent call with the same tenure in `schedule` + // will succeed in starting a downloader. Since `schedule` is derived from on-disk + // state, the only way a "completed" tenure will show up in `schedule` again is if + // it is later determined that the tenure we stored is incomplete or not canonical. debug!("Already have processed tenure {ch}"); self.completed_tenures .remove(&CompletedTenure::from(tenure_info)); From 93fdd22e13bd545e96a02e9cb5b1d2b80f0408c9 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:03:40 -0500 Subject: [PATCH 03/28] chore: report highest available tenure from downloader via NetworkResult --- stackslib/src/net/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stackslib/src/net/mod.rs b/stackslib/src/net/mod.rs index 37d67cfa61..fa1f697069 100644 --- a/stackslib/src/net/mod.rs +++ b/stackslib/src/net/mod.rs @@ -1523,6 +1523,8 @@ pub struct NetworkResult { pub rc_consensus_hash: ConsensusHash, /// The current StackerDB configs pub stacker_db_configs: HashMap, + /// Highest available tenure, if known + pub highest_available_tenure: Option, } impl NetworkResult { @@ -1537,6 +1539,7 @@ impl NetworkResult { stacks_tip_height: u64, rc_consensus_hash: ConsensusHash, stacker_db_configs: HashMap, + highest_available_tenure: Option, ) -> NetworkResult { NetworkResult { stacks_tip, @@ -1567,6 +1570,7 @@ impl NetworkResult { stacks_tip_height, rc_consensus_hash, stacker_db_configs, + highest_available_tenure, } } From b90d5d5d43aec34884d207fcff0e3cc790310021 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:03:57 -0500 Subject: [PATCH 04/28] chore: pass through highest available tenure --- stackslib/src/net/p2p.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stackslib/src/net/p2p.rs b/stackslib/src/net/p2p.rs index 8d85291fb6..fabd7b43f4 100644 --- a/stackslib/src/net/p2p.rs +++ b/stackslib/src/net/p2p.rs @@ -5266,6 +5266,10 @@ impl PeerNetwork { self.stacks_tip.height, self.chain_view.rc_consensus_hash.clone(), self.get_stacker_db_configs_owned(), + self.block_downloader_nakamoto + .as_ref() + .map(|dler| dler.find_highest_available_tenure()) + .flatten(), ); network_result.consume_unsolicited(unsolicited_buffered_messages); From 2429f50f5ae8e7658fc059e1315793c3cebc8452 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:04:12 -0500 Subject: [PATCH 05/28] chore: API sync --- stackslib/src/net/tests/mod.rs | 6 ++++++ stackslib/src/net/tests/relay/epoch2x.rs | 1 + 2 files changed, 7 insertions(+) diff --git a/stackslib/src/net/tests/mod.rs b/stackslib/src/net/tests/mod.rs index 6034027ae5..c073071b06 100644 --- a/stackslib/src/net/tests/mod.rs +++ b/stackslib/src/net/tests/mod.rs @@ -1172,6 +1172,7 @@ fn test_network_result_update() { 1, ConsensusHash([0x11; 20]), HashMap::new(), + None, ); let mut network_result_2 = NetworkResult::new( @@ -1185,6 +1186,7 @@ fn test_network_result_update() { 2, ConsensusHash([0x22; 20]), HashMap::new(), + None, ); let nk1 = NeighborKey { @@ -1622,6 +1624,7 @@ fn test_network_result_update() { 10, ConsensusHash([0xaa; 20]), HashMap::new(), + None, ); let mut new = old.clone(); @@ -1681,6 +1684,7 @@ fn test_network_result_update() { 10, ConsensusHash([0xaa; 20]), HashMap::new(), + None, ); let mut new = old.clone(); @@ -1740,6 +1744,7 @@ fn test_network_result_update() { 11, ConsensusHash([0xbb; 20]), HashMap::new(), + None, ); old.nakamoto_blocks.insert(nblk1.block_id(), nblk1.clone()); old.pushed_nakamoto_blocks.insert( @@ -1764,6 +1769,7 @@ fn test_network_result_update() { 11, ConsensusHash([0xbb; 20]), HashMap::new(), + None, ); let mut new_pushed = new.clone(); diff --git a/stackslib/src/net/tests/relay/epoch2x.rs b/stackslib/src/net/tests/relay/epoch2x.rs index c4d4f7ee31..c2c0b19abb 100644 --- a/stackslib/src/net/tests/relay/epoch2x.rs +++ b/stackslib/src/net/tests/relay/epoch2x.rs @@ -3101,6 +3101,7 @@ fn process_new_blocks_rejects_problematic_asts() { 0, ConsensusHash([0x01; 20]), HashMap::new(), + None, ); network_result.consume_unsolicited(unsolicited); From 2b45248d6445152099a0046987f0bd4b86017f65 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:04:25 -0500 Subject: [PATCH 06/28] feat: add way to set IBD --- testnet/stacks-node/src/globals.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/globals.rs b/testnet/stacks-node/src/globals.rs index 2a9a601723..516389ae67 100644 --- a/testnet/stacks-node/src/globals.rs +++ b/testnet/stacks-node/src/globals.rs @@ -129,12 +129,17 @@ impl Globals { } } - /// Does the inventory sync watcher think we still need to - /// catch up to the chain tip? + /// Are we still in the initial block download period? As in, are there more sortitions to + /// process and/or more tenure-start blocks to process? pub fn in_initial_block_download(&self) -> bool { self.sync_comms.get_ibd() } + /// Flag whether or not the node is in IBD + pub fn set_initial_block_download(&mut self, ibd: bool) { + self.sync_comms.set_ibd(ibd); + } + /// Get the last sortition processed by the relayer thread pub fn get_last_sortition(&self) -> Option { self.last_sortition From 6979a6488fa4244f280fb3945e36cfb8dbcf5775 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:04:39 -0500 Subject: [PATCH 07/28] feat: infer IBD from burnchain IBD and stacks IBD --- testnet/stacks-node/src/nakamoto_node/peer.rs | 2 +- .../stacks-node/src/nakamoto_node/relayer.rs | 36 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/peer.rs b/testnet/stacks-node/src/nakamoto_node/peer.rs index 3c4e6a98f4..9f6ef7ebbc 100644 --- a/testnet/stacks-node/src/nakamoto_node/peer.rs +++ b/testnet/stacks-node/src/nakamoto_node/peer.rs @@ -235,7 +235,7 @@ impl PeerThread { fee_estimator: Option<&Box>, ) -> bool { // initial block download? - let ibd = self.globals.sync_comms.get_ibd(); + let ibd = self.globals.in_initial_block_download(); let download_backpressure = self .results_with_data .as_ref() diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 8cc1293acd..ed08cb6438 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -64,6 +64,7 @@ use crate::neon_node::{ }; use crate::run_loop::nakamoto::{Globals, RunLoop}; use crate::run_loop::RegisteredKey; +use crate::syncctl::PoxSyncWatchdog; use crate::BitcoinRegtestController; /// Command types for the Nakamoto relayer thread, issued to it by other threads @@ -325,6 +326,39 @@ impl RelayerThread { net_result.burn_height ); + let cur_sn = SortitionDB::get_canonical_burn_chain_tip(self.sortdb.conn()) + .expect("FATAL: failed to query sortition DB"); + + let headers_height = self.bitcoin_controller.get_headers_height(); + + // are we still processing sortitions? + let burnchain_ibd = PoxSyncWatchdog::infer_initial_burnchain_block_download( + &self.burnchain, + cur_sn.block_height, + headers_height, + ); + + // if the highest available tenure is known, then is it the same as the ongoing stacks + // tenure? + let stacks_ibd = net_result + .highest_available_tenure + .as_ref() + .map(|ch| *ch == net_result.rc_consensus_hash) + .unwrap_or(false); + + debug!("Relayer: set initial block download inference ({})", burnchain_ibd || stacks_ibd; + "burnchain_ibd" => %burnchain_ibd, + "stacks_ibd" => %stacks_ibd, + "highest_available_tenure" => ?net_result.highest_available_tenure, + "rc_consensus_hash" => %net_result.rc_consensus_hash, + "cur_sn.block_height" => cur_sn.block_height, + "burnchain_headers_height" => headers_height); + + // we're in IBD if we're either still processing sortitions, or the highest available + // tenure is different from the highest processed tenure. + self.globals + .set_initial_block_download(burnchain_ibd || stacks_ibd); + if self.last_network_block_height != net_result.burn_height { // burnchain advanced; disable mining until we also do a download pass. self.last_network_block_height = net_result.burn_height; @@ -342,7 +376,7 @@ impl RelayerThread { &mut self.sortdb, &mut self.chainstate, &mut self.mempool, - self.globals.sync_comms.get_ibd(), + self.globals.in_initial_block_download(), Some(&self.globals.coord_comms), Some(&self.event_dispatcher), ) From 9331e250063d958e5c5f6e88f9e06f464b45eda5 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:04:54 -0500 Subject: [PATCH 08/28] fix: load IBD from globals --- testnet/stacks-node/src/run_loop/nakamoto.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/testnet/stacks-node/src/run_loop/nakamoto.rs b/testnet/stacks-node/src/run_loop/nakamoto.rs index 335fb325d8..bb52a0a05e 100644 --- a/testnet/stacks-node/src/run_loop/nakamoto.rs +++ b/testnet/stacks-node/src/run_loop/nakamoto.rs @@ -545,18 +545,7 @@ impl RunLoop { } let remote_chain_height = burnchain.get_headers_height() - 1; - - // wait for the p2p state-machine to do at least one pass - debug!("Runloop: Wait until Stacks block downloads reach a quiescent state before processing more burnchain blocks"; "remote_chain_height" => remote_chain_height, "local_chain_height" => burnchain_height); - - // TODO: for now, we just set initial block download false. - // I think that the sync watchdog probably needs to change a fair bit - // for nakamoto. There may be some opportunity to refactor this runloop - // as well (e.g., the `mine_start` should be integrated with the - // watchdog so that there's just one source of truth about ibd), - // but I think all of this can be saved for post-neon work. - let ibd = false; - self.pox_watchdog_comms.set_ibd(ibd); + let ibd = globals.in_initial_block_download(); // calculate burnchain sync percentage let percent: f64 = if remote_chain_height > 0 { From 6d3903344f1a45a5ff562da05c401c2e55e9d5d1 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 15:05:02 -0500 Subject: [PATCH 09/28] chore: document pox_sync_wait() better --- testnet/stacks-node/src/syncctl.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/testnet/stacks-node/src/syncctl.rs b/testnet/stacks-node/src/syncctl.rs index 488234d21d..82fda5b405 100644 --- a/testnet/stacks-node/src/syncctl.rs +++ b/testnet/stacks-node/src/syncctl.rs @@ -130,7 +130,7 @@ impl PoxSyncWatchdog { /// Are we in the initial burnchain block download? i.e. is the burn tip snapshot far enough away /// from the burnchain height that we should be eagerly downloading snapshots? - fn infer_initial_burnchain_block_download( + pub fn infer_initial_burnchain_block_download( burnchain: &Burnchain, last_processed_height: u64, burnchain_height: u64, @@ -151,9 +151,18 @@ impl PoxSyncWatchdog { ibd } - /// Wait until the next PoX anchor block arrives. - /// We know for a fact that they all exist for Epochs 2.5 and earlier, in both mainnet and - /// testnet. + /// This code path is only used for Epoch 2.5 and earlier. + /// + /// Wait to poll the burnchain for its height, and compute the maximum height up to which we + /// should process sortitions. + /// + /// This code used to be much more elaborate, and would use a set of heuristics to determine + /// whether or not there could be an outstanding PoX anchor block to try waiting for before + /// attempting to process sortitions without it. However, we now know for a fact that in epoch + /// 2.5 and earlier, in both mainnet and testnet, there are no missing anchor blocks, so this + /// code instead just sleeps for `[burnchain].poll_time_secs` and computes the burn block height of + /// the start of the first reward cycle for which we don't yet have an anchor block. + /// /// Return (still-in-ibd?, maximum-burnchain-sync-height) on success. pub fn pox_sync_wait( &mut self, From b8011da7838cc9116b2c8ecb96ef082a7ba2ec4b Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Fri, 3 Jan 2025 23:59:42 -0500 Subject: [PATCH 10/28] fix: immediately compute highest-available tenure since available_tenures is mutated over the course of the state machine's lifetime --- .../net/download/nakamoto/download_state_machine.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/stackslib/src/net/download/nakamoto/download_state_machine.rs b/stackslib/src/net/download/nakamoto/download_state_machine.rs index c23a3c91ae..67da0597ad 100644 --- a/stackslib/src/net/download/nakamoto/download_state_machine.rs +++ b/stackslib/src/net/download/nakamoto/download_state_machine.rs @@ -109,6 +109,8 @@ pub struct NakamotoDownloadStateMachine { tenure_block_ids: HashMap, /// Who can serve a given tenure pub(crate) available_tenures: HashMap>, + /// What is the highest available tenure, if known? + pub(crate) highest_available_tenure: Option, /// Confirmed tenure download schedule pub(crate) tenure_download_schedule: VecDeque, /// Unconfirmed tenure download schedule @@ -140,6 +142,7 @@ impl NakamotoDownloadStateMachine { state: NakamotoDownloadState::Confirmed, tenure_block_ids: HashMap::new(), available_tenures: HashMap::new(), + highest_available_tenure: None, tenure_download_schedule: VecDeque::new(), unconfirmed_tenure_download_schedule: VecDeque::new(), tenure_downloads: NakamotoTenureDownloaderSet::new(), @@ -862,6 +865,14 @@ impl NakamotoDownloadStateMachine { self.tenure_download_schedule = schedule; self.tenure_block_ids = tenure_block_ids; self.available_tenures = available; + + let highest_available_tenure = self.find_highest_available_tenure(); + self.highest_available_tenure = highest_available_tenure; + + test_debug!( + "new highest_available_tenure: {:?}", + &self.highest_available_tenure + ); } /// Update our tenure download state machines, given our download schedule, our peers' tenure @@ -1556,7 +1567,7 @@ impl NakamotoDownloadStateMachine { /// Find the highest available tenure ID. /// Returns Some(consensus_hash) for the highest tenure available from at least one node. /// Returns None if no tenures are available from any peer. - pub fn find_highest_available_tenure(&self) -> Option { + fn find_highest_available_tenure(&self) -> Option { let (t1, t2) = Self::find_unconfirmed_tenure_ids( &self.wanted_tenures, self.prev_wanted_tenures.as_ref().unwrap_or(&vec![]), From e1feabe9b7f50c415b22009f88fb3214c63edc0e Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 4 Jan 2025 00:00:12 -0500 Subject: [PATCH 11/28] chore: pass ongoing stacks tenure ID to NetworkResult --- stackslib/src/net/mod.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/stackslib/src/net/mod.rs b/stackslib/src/net/mod.rs index 0e8472457e..815aba2a15 100644 --- a/stackslib/src/net/mod.rs +++ b/stackslib/src/net/mod.rs @@ -1469,6 +1469,8 @@ pub const DENY_MIN_BAN_DURATION: u64 = 2; pub struct NetworkResult { /// Stacks chain tip when we began this pass pub stacks_tip: StacksBlockId, + /// Stacks chain tip's tenure ID when we began this pass + pub stacks_tip_tenure_id: ConsensusHash, /// PoX ID as it was when we begin downloading blocks (set if we have downloaded new blocks) pub download_pox_id: Option, /// Network messages we received but did not handle @@ -1519,7 +1521,11 @@ pub struct NetworkResult { pub coinbase_height: u64, /// The observed stacks tip height (different in Nakamoto from coinbase height) pub stacks_tip_height: u64, - /// The consensus hash of the stacks tip (prefixed `rc_` for historical reasons) + /// The consensus hash of the highest complete Stacks tenure at the time the canonical + /// sortition tip was processed. Not guaranteed to be the same across all nodes for the same + /// given sortition tip. + /// + /// TODO: remove this and use canonical Stacks tenure ID instead. pub rc_consensus_hash: ConsensusHash, /// The current StackerDB configs pub stacker_db_configs: HashMap, @@ -1530,6 +1536,7 @@ pub struct NetworkResult { impl NetworkResult { pub fn new( stacks_tip: StacksBlockId, + stacks_tip_tenure_id: ConsensusHash, num_state_machine_passes: u64, num_inv_sync_passes: u64, num_download_passes: u64, @@ -1543,6 +1550,7 @@ impl NetworkResult { ) -> NetworkResult { NetworkResult { stacks_tip, + stacks_tip_tenure_id, unhandled_messages: HashMap::new(), download_pox_id: None, blocks: vec![], From fb619898c5633f711594e4f0330aa96ff9d8fbc4 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 4 Jan 2025 00:00:35 -0500 Subject: [PATCH 12/28] chore: pass highest available tenure from downloader to NetworkResult --- stackslib/src/net/p2p.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stackslib/src/net/p2p.rs b/stackslib/src/net/p2p.rs index 0a30c8e00b..15542e9d40 100644 --- a/stackslib/src/net/p2p.rs +++ b/stackslib/src/net/p2p.rs @@ -5257,6 +5257,7 @@ impl PeerNetwork { ); let mut network_result = NetworkResult::new( self.stacks_tip.block_id(), + self.stacks_tip.consensus_hash.clone(), self.num_state_machine_passes, self.num_inv_sync_passes, self.num_downloader_passes, @@ -5268,7 +5269,7 @@ impl PeerNetwork { self.get_stacker_db_configs_owned(), self.block_downloader_nakamoto .as_ref() - .map(|dler| dler.find_highest_available_tenure()) + .map(|dler| dler.highest_available_tenure.clone()) .flatten(), ); From 4102306ebe4a84ef5bd4a8fe76fc55b24f6723dd Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 4 Jan 2025 00:00:52 -0500 Subject: [PATCH 13/28] chore: API sync --- stackslib/src/net/tests/mod.rs | 6 ++++++ stackslib/src/net/tests/relay/epoch2x.rs | 1 + 2 files changed, 7 insertions(+) diff --git a/stackslib/src/net/tests/mod.rs b/stackslib/src/net/tests/mod.rs index b555d012e8..b2bc84694a 100644 --- a/stackslib/src/net/tests/mod.rs +++ b/stackslib/src/net/tests/mod.rs @@ -1163,6 +1163,7 @@ fn test_boot_nakamoto_peer() { fn test_network_result_update() { let mut network_result_1 = NetworkResult::new( StacksBlockId([0x11; 32]), + ConsensusHash([0x01; 20]), 1, 1, 1, @@ -1177,6 +1178,7 @@ fn test_network_result_update() { let mut network_result_2 = NetworkResult::new( StacksBlockId([0x22; 32]), + ConsensusHash([0x01; 20]), 2, 2, 2, @@ -1615,6 +1617,7 @@ fn test_network_result_update() { // stackerdb uploaded chunks get consolidated correctly let mut old = NetworkResult::new( StacksBlockId([0xaa; 32]), + ConsensusHash([0x01; 20]), 10, 10, 10, @@ -1675,6 +1678,7 @@ fn test_network_result_update() { // stackerdb pushed chunks get consolidated correctly let mut old = NetworkResult::new( StacksBlockId([0xaa; 32]), + ConsensusHash([0x01; 20]), 10, 10, 10, @@ -1735,6 +1739,7 @@ fn test_network_result_update() { // nakamoto blocks obtained via download, upload, or pushed get consoldated let mut old = NetworkResult::new( StacksBlockId([0xbb; 32]), + ConsensusHash([0x01; 20]), 11, 11, 11, @@ -1760,6 +1765,7 @@ fn test_network_result_update() { let new = NetworkResult::new( StacksBlockId([0xbb; 32]), + ConsensusHash([0x01; 20]), 11, 11, 11, diff --git a/stackslib/src/net/tests/relay/epoch2x.rs b/stackslib/src/net/tests/relay/epoch2x.rs index 025ad7a2cb..54fe7deebe 100644 --- a/stackslib/src/net/tests/relay/epoch2x.rs +++ b/stackslib/src/net/tests/relay/epoch2x.rs @@ -3092,6 +3092,7 @@ fn process_new_blocks_rejects_problematic_asts() { let mut network_result = NetworkResult::new( peer.network.stacks_tip.block_id(), + ConsensusHash([0x01; 20]), 0, 0, 0, From 5049ee4f7871d813a9d66769767a4db564b5f97d Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 4 Jan 2025 00:01:12 -0500 Subject: [PATCH 14/28] docs: get_headers_height() is 1-indexed --- testnet/stacks-node/src/burnchains/bitcoin_regtest_controller.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/testnet/stacks-node/src/burnchains/bitcoin_regtest_controller.rs b/testnet/stacks-node/src/burnchains/bitcoin_regtest_controller.rs index f3aaa95ab5..852ed71e83 100644 --- a/testnet/stacks-node/src/burnchains/bitcoin_regtest_controller.rs +++ b/testnet/stacks-node/src/burnchains/bitcoin_regtest_controller.rs @@ -2179,6 +2179,7 @@ impl BurnchainController for BitcoinRegtestController { } } + /// NOTE: this is 1-indexed. If there are 10 headers, then this returns 11 fn get_headers_height(&self) -> u64 { let (_, network_id) = self.config.burnchain.get_bitcoin_network(); let spv_client = SpvClient::new( From 4506b46576a4af9874f532699fdd91d1da911146 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 4 Jan 2025 00:01:30 -0500 Subject: [PATCH 15/28] fix: the highest available tenure may be lower than the ongoing stacks tenure, so check this when inferring IBD --- .../stacks-node/src/nakamoto_node/relayer.rs | 76 +++++++++++++------ 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index ed08cb6438..dcfb48cb8c 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -64,7 +64,6 @@ use crate::neon_node::{ }; use crate::run_loop::nakamoto::{Globals, RunLoop}; use crate::run_loop::RegisteredKey; -use crate::syncctl::PoxSyncWatchdog; use crate::BitcoinRegtestController; /// Command types for the Nakamoto relayer thread, issued to it by other threads @@ -316,41 +315,58 @@ impl RelayerThread { || !self.config.miner.wait_for_block_download } - /// Handle a NetworkResult from the p2p/http state machine. Usually this is the act of - /// * preprocessing and storing new blocks and microblocks - /// * relaying blocks, microblocks, and transacctions - /// * updating unconfirmed state views - pub fn process_network_result(&mut self, mut net_result: NetworkResult) { - debug!( - "Relayer: Handle network result (from {})", - net_result.burn_height - ); - + /// Compute and set the global IBD flag from a NetworkResult + pub fn infer_ibd(&mut self, net_result: &NetworkResult) { let cur_sn = SortitionDB::get_canonical_burn_chain_tip(self.sortdb.conn()) .expect("FATAL: failed to query sortition DB"); - let headers_height = self.bitcoin_controller.get_headers_height(); + // (remember, get_headers_height() is 1-indexed, so account for that with -1) + let headers_height = self + .bitcoin_controller + .get_headers_height() + .saturating_sub(1); // are we still processing sortitions? - let burnchain_ibd = PoxSyncWatchdog::infer_initial_burnchain_block_download( - &self.burnchain, - cur_sn.block_height, - headers_height, - ); + let burnchain_ibd = cur_sn.block_height != headers_height; // if the highest available tenure is known, then is it the same as the ongoing stacks - // tenure? - let stacks_ibd = net_result - .highest_available_tenure - .as_ref() - .map(|ch| *ch == net_result.rc_consensus_hash) - .unwrap_or(false); + // tenure? If so, then we're not IBD. If not, then we're IBD. + // If it is not known, then we're not in IBD. + let stacks_ibd = + if let Some(highest_available_tenure) = net_result.highest_available_tenure.as_ref() { + if *highest_available_tenure != net_result.stacks_tip_tenure_id { + // in IBD if the highest available tenure comes after the stacks tip (not always a + // given, because neighbors may not report all the data we have). + let highest_available_tenure_sn = SortitionDB::get_block_snapshot_consensus( + self.sortdb.conn(), + highest_available_tenure, + ) + .expect("FATAL: failed to query sortition DB") + .expect("FATAL: highest available tenure not in sortition DB"); + + let stacks_tip_tenure_sn = SortitionDB::get_block_snapshot_consensus( + self.sortdb.conn(), + &net_result.stacks_tip_tenure_id, + ) + .expect("FATAL: failed to query sortition DB") + .expect("FATAL: highest available tenure not in sortition DB"); + + highest_available_tenure_sn.block_height > stacks_tip_tenure_sn.block_height + } else { + // they're the same, so not in IBD + false + } + } else { + // we don't know the highest available tenure, so assume that we have it (and thus are + // not in IBD) + false + }; debug!("Relayer: set initial block download inference ({})", burnchain_ibd || stacks_ibd; "burnchain_ibd" => %burnchain_ibd, "stacks_ibd" => %stacks_ibd, "highest_available_tenure" => ?net_result.highest_available_tenure, - "rc_consensus_hash" => %net_result.rc_consensus_hash, + "stacks_tip_tenure_id" => %net_result.stacks_tip_tenure_id, "cur_sn.block_height" => cur_sn.block_height, "burnchain_headers_height" => headers_height); @@ -358,6 +374,18 @@ impl RelayerThread { // tenure is different from the highest processed tenure. self.globals .set_initial_block_download(burnchain_ibd || stacks_ibd); + } + + /// Handle a NetworkResult from the p2p/http state machine. Usually this is the act of + /// * preprocessing and storing new blocks and microblocks + /// * relaying blocks, microblocks, and transacctions + /// * updating unconfirmed state views + pub fn process_network_result(&mut self, mut net_result: NetworkResult) { + debug!( + "Relayer: Handle network result (from {})", + net_result.burn_height + ); + self.infer_ibd(&net_result); if self.last_network_block_height != net_result.burn_height { // burnchain advanced; disable mining until we also do a download pass. From 6dabc04fe95cf47a7ca17f5485fb09ada4ce2a89 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 4 Jan 2025 00:01:52 -0500 Subject: [PATCH 16/28] chore: make method private again --- testnet/stacks-node/src/syncctl.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/syncctl.rs b/testnet/stacks-node/src/syncctl.rs index 82fda5b405..dd2157508e 100644 --- a/testnet/stacks-node/src/syncctl.rs +++ b/testnet/stacks-node/src/syncctl.rs @@ -130,7 +130,7 @@ impl PoxSyncWatchdog { /// Are we in the initial burnchain block download? i.e. is the burn tip snapshot far enough away /// from the burnchain height that we should be eagerly downloading snapshots? - pub fn infer_initial_burnchain_block_download( + fn infer_initial_burnchain_block_download( burnchain: &Burnchain, last_processed_height: u64, burnchain_height: u64, From 4474f2df6810d564e1aad608bac1de0cb1997826 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sat, 4 Jan 2025 00:02:11 -0500 Subject: [PATCH 17/28] chore: expand follower_bootup_simple() to test is_fully_synced flag in /v2/info --- testnet/stacks-node/src/tests/nakamoto_integrations.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 1b84b9c0cd..56c4b8b76a 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -3592,6 +3592,7 @@ fn follower_bootup_simple() { thread::sleep(Duration::from_millis(100)); continue; }; + assert!(info.is_fully_synced, "{:?}", &info); let Ok(follower_info) = get_chain_info_result(&follower_conf) else { debug!("follower_bootup: Could not get follower chain info"); @@ -3601,6 +3602,7 @@ fn follower_bootup_simple() { if follower_info.burn_block_height < info.burn_block_height { debug!("follower_bootup: Follower is behind miner's burnchain view"); + assert!(!follower_info.is_fully_synced, "{:?}", &follower_info); thread::sleep(Duration::from_millis(100)); continue; } @@ -3634,6 +3636,7 @@ fn follower_bootup_simple() { thread::sleep(Duration::from_millis(100)); continue; }; + assert!(info.is_fully_synced, "{:?}", &info); let Ok(follower_info) = get_chain_info_result(&follower_conf) else { debug!("follower_bootup: Could not get follower chain info"); @@ -3645,11 +3648,13 @@ fn follower_bootup_simple() { "follower_bootup: Follower has advanced to miner's tip {}", &info.stacks_tip ); + assert!(follower_info.is_fully_synced, "{:?}", &follower_info); } else { debug!( "follower_bootup: Follower has NOT advanced to miner's tip: {} != {}", &info.stacks_tip, follower_info.stacks_tip ); + assert!(!follower_info.is_fully_synced, "{:?}", &follower_info); } last_tip = info.stacks_tip; @@ -3697,8 +3702,10 @@ fn follower_bootup_simple() { if follower_node_info.stacks_tip_consensus_hash == tip.consensus_hash && follower_node_info.stacks_tip == tip.anchored_header.block_hash() { + assert!(follower_node_info.is_fully_synced); break; } + assert!(!follower_node_info.is_fully_synced); } coord_channel From bd254b75d28116fc50b4bac7f05404a33b87ee8c Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:16:05 -0500 Subject: [PATCH 18/28] chore: more structured logging on why a block proposal is rejected --- stacks-signer/src/v0/signer.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stacks-signer/src/v0/signer.rs b/stacks-signer/src/v0/signer.rs index 988cc8f4a5..040946d967 100644 --- a/stacks-signer/src/v0/signer.rs +++ b/stacks-signer/src/v0/signer.rs @@ -618,6 +618,11 @@ impl Signer { "proposed_block_signer_sighash" => %signer_signature_hash, "proposed_chain_length" => proposed_block.header.chain_length, "expected_at_least" => last_block_info.block.header.chain_length + 1, + "last_block_info.block.header.consensus_hash" => %last_block_info.block.header.consensus_hash, + "proposed_block_consensus_hash" => %proposed_block_consensus_hash, + "last_block_info.state" => %last_block_info.state, + "non_reorgable_block" => non_reorgable_block, + "reorg_timeout_exceeded" => reorg_timeout_exceeded ); return Some(self.create_block_rejection( RejectCode::SortitionViewMismatch, From f632795bcafefe2c55a2b3d7bd608e19ba2f9889 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:16:22 -0500 Subject: [PATCH 19/28] chore: log ibd in debug mode --- stackslib/src/net/p2p.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stackslib/src/net/p2p.rs b/stackslib/src/net/p2p.rs index 9300fa9150..4551909396 100644 --- a/stackslib/src/net/p2p.rs +++ b/stackslib/src/net/p2p.rs @@ -5219,7 +5219,7 @@ impl PeerNetwork { poll_timeout: u64, handler_args: &RPCHandlerArgs, ) -> Result { - debug!(">>>>>>>>>>>>>>>>>>>>>>> Begin Network Dispatch (poll for {}) >>>>>>>>>>>>>>>>>>>>>>>>>>>>", poll_timeout); + debug!(">>>>>>>>>>>>>>>>>>>>>>> Begin Network Dispatch (poll for {}, ibd={}) >>>>>>>>>>>>>>>>>>>>>>>>>>>>", poll_timeout, ibd); let mut poll_states = match self.network { None => { debug!("{:?}: network not connected", &self.local_peer); From 83aee9259f83b67bc33caccd0c977453b702b922 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:16:37 -0500 Subject: [PATCH 20/28] feat: move test flags for stalling and skipping stacks block announces to net::relay since we need to be able to signal to the relayer to _not_ process Stacks blocks --- stackslib/src/net/relay.rs | 48 +++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/stackslib/src/net/relay.rs b/stackslib/src/net/relay.rs index 9121bac2c9..05e08df71c 100644 --- a/stackslib/src/net/relay.rs +++ b/stackslib/src/net/relay.rs @@ -75,6 +75,10 @@ pub mod fault_injection { use std::path::Path; static IGNORE_BLOCK: std::sync::Mutex> = std::sync::Mutex::new(None); + pub static TEST_BLOCK_ANNOUNCE_STALL: std::sync::Mutex> = + std::sync::Mutex::new(None); + pub static TEST_BLOCK_ANNOUNCE_SKIP: std::sync::Mutex> = + std::sync::Mutex::new(None); pub fn ignore_block(height: u64, working_dir: &str) -> bool { if let Some((ignore_height, ignore_dir)) = &*IGNORE_BLOCK.lock().unwrap() { @@ -102,6 +106,34 @@ pub mod fault_injection { warn!("Fault injection: clear ignore block"); *IGNORE_BLOCK.lock().unwrap() = None; } + + pub fn stacks_announce_is_blocked() -> bool { + *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true) + } + + pub fn stacks_announce_is_skipped() -> bool { + *TEST_BLOCK_ANNOUNCE_SKIP.lock().unwrap() == Some(true) + } + + pub fn no_stacks_announce() -> bool { + stacks_announce_is_blocked() || stacks_announce_is_skipped() + } + + pub fn block_stacks_announce() { + *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() = Some(true); + } + + pub fn unblock_stacks_announce() { + *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() = None; + } + + pub fn skip_stacks_announce() { + *TEST_BLOCK_ANNOUNCE_SKIP.lock().unwrap() = Some(true); + } + + pub fn unskip_stacks_announce() { + *TEST_BLOCK_ANNOUNCE_SKIP.lock().unwrap() = None; + } } #[cfg(not(any(test, feature = "testing")))] @@ -113,6 +145,10 @@ pub mod fault_injection { pub fn set_ignore_block(_height: u64, _working_dir: &str) {} pub fn clear_ignore_block() {} + + pub fn no_stacks_announce() -> bool { + false + } } pub struct Relayer { @@ -1085,7 +1121,9 @@ impl Relayer { if accepted { info!("{}", &accept_msg); if let Some(coord_comms) = coord_comms { - if !coord_comms.announce_new_stacks_block() { + if !fault_injection::no_stacks_announce() + && !coord_comms.announce_new_stacks_block() + { return Err(chainstate_error::NetError(net_error::CoordinatorClosed)); } } @@ -2089,7 +2127,9 @@ impl Relayer { new_confirmed_microblocks.len() ); if let Some(coord_comms) = coord_comms { - if !coord_comms.announce_new_stacks_block() { + if !fault_injection::no_stacks_announce() + && !coord_comms.announce_new_stacks_block() + { return Err(net_error::CoordinatorClosed); } } @@ -2178,7 +2218,9 @@ impl Relayer { } if !http_uploaded_blocks.is_empty() { coord_comms.inspect(|comm| { - comm.announce_new_stacks_block(); + if !fault_injection::no_stacks_announce() { + comm.announce_new_stacks_block(); + } }); } From ed37e6753cc873567049296dca6e914a985471bc Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:17:08 -0500 Subject: [PATCH 21/28] chore: remove unused ibd variable and don't announce stacks blocks if instructed not to by fault injection --- testnet/stacks-node/src/nakamoto_node.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node.rs b/testnet/stacks-node/src/nakamoto_node.rs index 09f8c7285f..d6d55ae442 100644 --- a/testnet/stacks-node/src/nakamoto_node.rs +++ b/testnet/stacks-node/src/nakamoto_node.rs @@ -309,14 +309,12 @@ impl StacksNode { /// Process a state coming from the burnchain, by extracting the validated KeyRegisterOp /// and inspecting if a sortition was won. - /// `ibd`: boolean indicating whether or not we are in the initial block download /// Called from the main thread. pub fn process_burnchain_state( &mut self, config: &Config, sortdb: &SortitionDB, sort_id: &SortitionId, - ibd: bool, ) -> Result<(), Error> { let ic = sortdb.index_conn(); @@ -370,7 +368,6 @@ impl StacksNode { "burn_height" => block_height, "leader_keys_count" => num_key_registers, "block_commits_count" => num_block_commits, - "in_initial_block_download?" => ibd, ); self.globals.set_last_sortition(block_snapshot.clone()); From b53c4c62619d4d7d51c1da6a4c806afc2bd236d5 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:17:42 -0500 Subject: [PATCH 22/28] chore: use relayer fault injection logic --- testnet/stacks-node/src/nakamoto_node/miner.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index d9edf97e90..9c3dfe2fe0 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -39,7 +39,7 @@ use stacks::chainstate::stacks::{ }; use stacks::net::p2p::NetworkHandle; use stacks::net::stackerdb::StackerDBs; -use stacks::net::{NakamotoBlocksData, StacksMessageType}; +use stacks::net::{relay, NakamotoBlocksData, StacksMessageType}; use stacks::util::get_epoch_time_secs; use stacks::util::secp256k1::MessageSignature; use stacks_common::types::chainstate::{StacksAddress, StacksBlockId}; @@ -59,8 +59,6 @@ pub static TEST_MINE_STALL: std::sync::Mutex> = std::sync::Mutex::n #[cfg(test)] pub static TEST_BROADCAST_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); #[cfg(test)] -pub static TEST_BLOCK_ANNOUNCE_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); -#[cfg(test)] pub static TEST_SKIP_P2P_BROADCAST: std::sync::Mutex> = std::sync::Mutex::new(None); /// If the miner was interrupted while mining a block, how long should the @@ -219,7 +217,7 @@ impl BlockMinerThread { #[cfg(test)] fn fault_injection_block_announce_stall(new_block: &NakamotoBlock) { - if *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true) { + if relay::fault_injection::stacks_announce_is_blocked() { // Do an extra check just so we don't log EVERY time. warn!("Fault injection: Block announcement is stalled due to testing directive."; "stacks_block_id" => %new_block.block_id(), @@ -227,7 +225,7 @@ impl BlockMinerThread { "height" => new_block.header.chain_length, "consensus_hash" => %new_block.header.consensus_hash ); - while *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true) { + while relay::fault_injection::stacks_announce_is_blocked() { std::thread::sleep(std::time::Duration::from_millis(10)); } info!("Fault injection: Block announcement is no longer stalled due to testing directive."; @@ -495,7 +493,11 @@ impl BlockMinerThread { // wake up chains coordinator Self::fault_injection_block_announce_stall(&new_block); - self.globals.coord().announce_new_stacks_block(); + if !relay::fault_injection::stacks_announce_is_skipped() { + self.globals.coord().announce_new_stacks_block(); + } else { + info!("Miner: skip block announce due to fault injection directive"); + } self.last_block_mined = Some(new_block); self.mined_blocks += 1; From 08d3028ea4caadbe0c99f3721eb9f00e3835ecdb Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:17:57 -0500 Subject: [PATCH 23/28] chore: don't announce a stacks block if fault injection is active --- testnet/stacks-node/src/nakamoto_node/relayer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 8cc1293acd..c924f2c3d9 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -43,7 +43,7 @@ use stacks::monitoring::increment_stx_blocks_mined_counter; use stacks::net::db::LocalPeer; use stacks::net::p2p::NetworkHandle; use stacks::net::relay::Relayer; -use stacks::net::NetworkResult; +use stacks::net::{relay, NetworkResult}; use stacks_common::types::chainstate::{ BlockHeaderHash, BurnchainHeaderHash, StacksBlockId, StacksPublicKey, VRFSeed, }; @@ -489,7 +489,9 @@ impl RelayerThread { self.globals.counters.bump_blocks_processed(); // there may be a bufferred stacks block to process, so wake up the coordinator to check - self.globals.coord_comms.announce_new_stacks_block(); + if !relay::fault_injection::no_stacks_announce() { + self.globals.coord_comms.announce_new_stacks_block(); + } info!( "Relayer: Process sortition"; From b5b60385c1bcfed9c1cf817b1147d013c20d0bea Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:18:14 -0500 Subject: [PATCH 24/28] chore: log if we're still sync'ing --- testnet/stacks-node/src/run_loop/nakamoto.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/testnet/stacks-node/src/run_loop/nakamoto.rs b/testnet/stacks-node/src/run_loop/nakamoto.rs index 335fb325d8..e179712e66 100644 --- a/testnet/stacks-node/src/run_loop/nakamoto.rs +++ b/testnet/stacks-node/src/run_loop/nakamoto.rs @@ -648,7 +648,6 @@ impl RunLoop { self.config(), burnchain.sortdb_mut(), sortition_id, - ibd, ) { // relayer errored, exit. error!("Runloop: Block relayer and miner errored, exiting."; "err" => ?e); @@ -725,6 +724,11 @@ impl RunLoop { globals.raise_initiative("runloop-synced".to_string()); } } + } else { + info!("Runloop: still synchronizing"; + "sortition_db_height" => sortition_db_height, + "burnchain_height" => burnchain_height, + "ibd" => ibd); } } } From 0f42b629b82340d1ab8298c5e69cd4ced50b9f0f Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:19:06 -0500 Subject: [PATCH 25/28] chore: don't set ibd based on burnchain block download --- testnet/stacks-node/src/syncctl.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/testnet/stacks-node/src/syncctl.rs b/testnet/stacks-node/src/syncctl.rs index 488234d21d..c38e811ea9 100644 --- a/testnet/stacks-node/src/syncctl.rs +++ b/testnet/stacks-node/src/syncctl.rs @@ -186,7 +186,6 @@ impl PoxSyncWatchdog { .max(burnchain_height) }; - self.relayer_comms.set_ibd(ibbd); if !self.unconditionally_download { self.relayer_comms .interruptable_sleep(self.steady_state_burnchain_sync_interval)?; From 315c4d80b101d3a1803c0ff2c6d6764d66db4d04 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:19:51 -0500 Subject: [PATCH 26/28] fix: don't allow tenure b to process until after we've mined the block for tenure c --- .../src/tests/nakamoto_integrations.rs | 9 ++--- testnet/stacks-node/src/tests/signer/v0.rs | 39 ++++++++++++++----- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 1b84b9c0cd..0a4ac414a7 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -73,6 +73,7 @@ use stacks::net::api::getstackers::GetStackersResponse; use stacks::net::api::postblock_proposal::{ BlockValidateReject, BlockValidateResponse, NakamotoBlockProposal, ValidateRejectCode, }; +use stacks::net::relay; use stacks::types::chainstate::{ConsensusHash, StacksBlockId}; use stacks::util::hash::hex_bytes; use stacks::util_lib::boot::boot_code_id; @@ -96,9 +97,7 @@ use stacks_signer::signerdb::{BlockInfo, BlockState, ExtraBlockInfo, SignerDb}; use stacks_signer::v0::SpawnedSigner; use super::bitcoin_regtest::BitcoinCoreController; -use crate::nakamoto_node::miner::{ - TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_SKIP_P2P_BROADCAST, -}; +use crate::nakamoto_node::miner::{TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_SKIP_P2P_BROADCAST}; use crate::neon::{Counters, RunLoopCounter}; use crate::operations::BurnchainOpSigner; use crate::run_loop::boot_nakamoto; @@ -4980,7 +4979,7 @@ fn forked_tenure_is_ignored() { // For the next tenure, submit the commit op but do not allow any stacks blocks to be broadcasted. // Stall the miner thread; only wait until the number of submitted commits increases. TEST_BROADCAST_STALL.lock().unwrap().replace(true); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(true); + relay::fault_injection::block_stacks_announce(); let blocks_before = mined_blocks.load(Ordering::SeqCst); let commits_before = commits_submitted.load(Ordering::SeqCst); @@ -5050,7 +5049,7 @@ fn forked_tenure_is_ignored() { .get_stacks_blocks_processed(); next_block_and(&mut btc_regtest_controller, 60, || { test_skip_commit_op.set(false); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + relay::fault_injection::unblock_stacks_announce(); let commits_count = commits_submitted.load(Ordering::SeqCst); let blocks_count = mined_blocks.load(Ordering::SeqCst); let blocks_processed = coord_channel diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 044e5f0cbc..4bb48ed572 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -45,6 +45,7 @@ use stacks::net::api::postblock_proposal::{ BlockValidateResponse, ValidateRejectCode, TEST_VALIDATE_DELAY_DURATION_SECS, TEST_VALIDATE_STALL, }; +use stacks::net::relay; use stacks::net::relay::fault_injection::set_ignore_block; use stacks::types::chainstate::{StacksAddress, StacksBlockId, StacksPrivateKey, StacksPublicKey}; use stacks::types::PublicKey; @@ -57,7 +58,7 @@ use stacks::util_lib::signed_structured_data::pox4::{ }; use stacks_common::bitvec::BitVec; use stacks_common::types::chainstate::TrieHash; -use stacks_common::util::sleep_ms; +use stacks_common::util::{get_epoch_time_ms, sleep_ms}; use stacks_signer::chainstate::{ProposalEvalConfig, SortitionsView}; use stacks_signer::client::{SignerSlotID, StackerDB}; use stacks_signer::config::{build_signer_config_tomls, GlobalConfig as SignerConfig, Network}; @@ -71,9 +72,7 @@ use tracing_subscriber::{fmt, EnvFilter}; use super::SignerTest; use crate::event_dispatcher::{MinedNakamotoBlockEvent, TEST_SKIP_BLOCK_ANNOUNCEMENT}; -use crate::nakamoto_node::miner::{ - TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, -}; +use crate::nakamoto_node::miner::{TEST_BROADCAST_STALL, TEST_MINE_STALL}; use crate::nakamoto_node::stackerdb_listener::TEST_IGNORE_SIGNERS; use crate::neon::Counters; use crate::run_loop::boot_nakamoto; @@ -952,6 +951,8 @@ fn forked_tenure_testing( |config| { // make the duration long enough that the reorg attempt will definitely be accepted config.first_proposal_burn_block_timing = proposal_limit; + config.tenure_last_block_proposal_timeout = Duration::from_secs(0); + // don't allow signers to post signed blocks (limits the amount of fault injection we // need) TEST_SKIP_BLOCK_BROADCAST.set(true); @@ -1019,7 +1020,7 @@ fn forked_tenure_testing( // For the next tenure, submit the commit op but do not allow any stacks blocks to be broadcasted TEST_BROADCAST_STALL.lock().unwrap().replace(true); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(true); + relay::fault_injection::block_stacks_announce(); let blocks_before = mined_blocks.load(Ordering::SeqCst); let commits_before = commits_submitted.load(Ordering::SeqCst); @@ -1094,14 +1095,14 @@ fn forked_tenure_testing( ); assert_ne!(tip_b, tip_a); + info!("Starting Tenure C."); if !expect_tenure_c { + info!("Process Tenure B"); // allow B to process, so it'll be distinct from C - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + relay::fault_injection::unblock_stacks_announce(); sleep_ms(1000); } - info!("Starting Tenure C."); - // Submit a block commit op for tenure C let commits_before = commits_submitted.load(Ordering::SeqCst); let blocks_before = if expect_tenure_c { @@ -1115,14 +1116,29 @@ fn forked_tenure_testing( .nakamoto_test_skip_commit_op .set(false); + let mut tenure_b_deadline = None; + + // have the current miner thread just skip block-processing, so tenure C can start + relay::fault_injection::skip_stacks_announce(); + relay::fault_injection::unblock_stacks_announce(); + next_block_and( &mut signer_test.running_nodes.btc_regtest_controller, 60, || { let commits_count = commits_submitted.load(Ordering::SeqCst); if commits_count > commits_before { - // now allow block B to process if it hasn't already. - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + // once the commit happens, give a grace period for tenure C to be mined and + // procesed + if let Some(dl) = tenure_b_deadline.as_mut() { + if *dl < get_epoch_time_ms() { + // unblock miner thread, but don't process tenure B just yet + coord_channel.lock().unwrap().announce_new_stacks_block(); + *dl = get_epoch_time_ms() + 100_000_000_000; + } + } else { + tenure_b_deadline = Some(get_epoch_time_ms() + 10_000); + } } let rejected_count = rejected_blocks.load(Ordering::SeqCst); let (blocks_count, rbf_count, has_reject_count) = if expect_tenure_c { @@ -1171,6 +1187,9 @@ fn forked_tenure_testing( panic!(); }); + relay::fault_injection::unskip_stacks_announce(); + coord_channel.lock().unwrap().announce_new_stacks_block(); + // allow blocks B and C to be processed sleep_ms(1000); From 77c2db11303b3994c026b940f7f4efbc7adb642c Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 17:51:19 -0500 Subject: [PATCH 27/28] fix: compile issue --- stackslib/src/net/relay.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stackslib/src/net/relay.rs b/stackslib/src/net/relay.rs index 05e08df71c..5c887c372f 100644 --- a/stackslib/src/net/relay.rs +++ b/stackslib/src/net/relay.rs @@ -149,6 +149,10 @@ pub mod fault_injection { pub fn no_stacks_announce() -> bool { false } + + pub fn stacks_announce_is_skipped() -> bool { + false + } } pub struct Relayer { From 321d890ca006db6b189b0b330a9689a8dfc27db1 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Tue, 7 Jan 2025 18:26:54 -0500 Subject: [PATCH 28/28] chore: cargo fmt --- stackslib/src/net/relay.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stackslib/src/net/relay.rs b/stackslib/src/net/relay.rs index 5c887c372f..505a6dfe4d 100644 --- a/stackslib/src/net/relay.rs +++ b/stackslib/src/net/relay.rs @@ -149,7 +149,7 @@ pub mod fault_injection { pub fn no_stacks_announce() -> bool { false } - + pub fn stacks_announce_is_skipped() -> bool { false }