From fb306ea9a5cc591bcded9753970b2a2400537e47 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sun, 1 Dec 2024 07:34:59 -0500 Subject: [PATCH 1/4] chore: fix #5502 by reducing the PoX sync watchdog --- testnet/stacks-node/src/run_loop/neon.rs | 48 +- testnet/stacks-node/src/syncctl.rs | 552 ++---------------- .../src/tests/nakamoto_integrations.rs | 2 +- 3 files changed, 48 insertions(+), 554 deletions(-) diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 5e021e50ab..475265869a 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -1156,19 +1156,8 @@ impl RunLoop { let mut sortition_db_height = rc_aligned_height; let mut burnchain_height = sortition_db_height; - let mut num_sortitions_in_last_cycle = 1; // prepare to fetch the first reward cycle! - let mut target_burnchain_block_height = cmp::min( - burnchain_config.reward_cycle_to_block_height( - burnchain_config - .block_height_to_reward_cycle(burnchain_height) - .expect("BUG: block height is not in a reward cycle") - + 1, - ), - burnchain.get_headers_height() - 1, - ); - debug!("Runloop: Begin main runloop starting a burnchain block {sortition_db_height}"); let mut last_tenure_sortition_height = 0; @@ -1196,17 +1185,13 @@ impl RunLoop { let remote_chain_height = burnchain.get_headers_height() - 1; - // wait for the p2p state-machine to do at least one pass - debug!("Runloop: Wait until Stacks block downloads reach a quiescent state before processing more burnchain blocks"; "remote_chain_height" => remote_chain_height, "local_chain_height" => burnchain_height); - - // wait until it's okay to process the next reward cycle's sortitions - let ibd = match self.get_pox_watchdog().pox_sync_wait( + // wait until it's okay to process the next reward cycle's sortitions. + let (ibd, target_burnchain_block_height) = match self.get_pox_watchdog().pox_sync_wait( &burnchain_config, &burnchain_tip, remote_chain_height, - num_sortitions_in_last_cycle, ) { - Ok(ibd) => ibd, + Ok(x) => x, Err(e) => { debug!("Runloop: PoX sync wait routine aborted: {e:?}"); continue; @@ -1220,9 +1205,6 @@ impl RunLoop { 0.0 }; - // will recalculate this in the following loop - num_sortitions_in_last_cycle = 0; - // Download each burnchain block and process their sortitions. This, in turn, will // cause the node's p2p and relayer threads to go fetch and download Stacks blocks and // process them. This loop runs for one reward cycle, so that the next pass of the @@ -1270,8 +1252,6 @@ impl RunLoop { "Runloop: New burnchain block height {next_sortition_height} > {sortition_db_height}" ); - let mut sort_count = 0; - debug!("Runloop: block mining until we process all sortitions"); signal_mining_blocked(globals.get_miner_status()); @@ -1289,9 +1269,6 @@ impl RunLoop { "Failed to find block in fork processed by burnchain indexer", ) }; - if block.sortition { - sort_count += 1; - } let sortition_id = &block.sortition_id; @@ -1338,9 +1315,8 @@ impl RunLoop { debug!("Runloop: enable miner after processing sortitions"); signal_mining_ready(globals.get_miner_status()); - num_sortitions_in_last_cycle = sort_count; debug!( - "Runloop: Synchronized sortitions up to block height {next_sortition_height} from {sortition_db_height} (chain tip height is {burnchain_height}); {num_sortitions_in_last_cycle} sortitions" + "Runloop: Synchronized sortitions up to block height {next_sortition_height} from {sortition_db_height} (chain tip height is {burnchain_height})" ); sortition_db_height = next_sortition_height; @@ -1359,22 +1335,6 @@ impl RunLoop { } } - // advance one reward cycle at a time. - // If we're still downloading, then this is simply target_burnchain_block_height + reward_cycle_len. - // Otherwise, this is burnchain_tip + reward_cycle_len - let next_target_burnchain_block_height = cmp::min( - burnchain_config.reward_cycle_to_block_height( - burnchain_config - .block_height_to_reward_cycle(target_burnchain_block_height) - .expect("FATAL: burnchain height before system start") - + 1, - ), - remote_chain_height, - ); - - debug!("Runloop: Advance target burnchain block height from {target_burnchain_block_height} to {next_target_burnchain_block_height} (sortition height {sortition_db_height})"); - target_burnchain_block_height = next_target_burnchain_block_height; - if sortition_db_height >= burnchain_height && !ibd { let canonical_stacks_tip_height = SortitionDB::get_canonical_burn_chain_tip(burnchain.sortdb_ref().conn()) diff --git a/testnet/stacks-node/src/syncctl.rs b/testnet/stacks-node/src/syncctl.rs index 395d829c8f..488234d21d 100644 --- a/testnet/stacks-node/src/syncctl.rs +++ b/testnet/stacks-node/src/syncctl.rs @@ -1,20 +1,28 @@ -use std::collections::VecDeque; +// Copyright (C) 2013-2020 Blockstack PBC, a public benefit corporation +// Copyright (C) 2020-2024 Stacks Open Internet Foundation +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::Arc; use stacks::burnchains::{Burnchain, Error as burnchain_error}; -use stacks::chainstate::stacks::db::StacksChainState; use stacks_common::util::{get_epoch_time_secs, sleep_ms}; use crate::burnchains::BurnchainTip; use crate::Config; -// amount of time to wait for an inv or download sync to complete. -// These _really should_ complete before the PoX sync watchdog permits processing the next reward -// cycle, so this number is intentionally high (like, there's something really wrong with your -// network if your node is actualy waiting a day in-between reward cycles). -const SYNC_WAIT_SECS: u64 = 24 * 3600; - #[derive(Clone)] pub struct PoxSyncWatchdogComms { /// how many passes in the p2p state machine have taken place since startup? @@ -56,22 +64,6 @@ impl PoxSyncWatchdogComms { self.last_ibd.load(Ordering::SeqCst) } - /// Wait for at least one inv-sync state-machine passes - pub fn wait_for_inv_sync_pass(&self, timeout: u64) -> Result { - let current = self.get_inv_sync_passes(); - - let now = get_epoch_time_secs(); - while current >= self.get_inv_sync_passes() { - if now + timeout < get_epoch_time_secs() { - debug!("PoX watchdog comms: timed out waiting for one inv-sync pass"); - return Ok(false); - } - self.interruptable_sleep(1)?; - std::hint::spin_loop(); - } - Ok(true) - } - fn interruptable_sleep(&self, secs: u64) -> Result<(), burnchain_error> { let deadline = secs + get_epoch_time_secs(); while get_epoch_time_secs() < deadline { @@ -83,21 +75,6 @@ impl PoxSyncWatchdogComms { Ok(()) } - pub fn wait_for_download_pass(&self, timeout: u64) -> Result { - let current = self.get_download_passes(); - - let now = get_epoch_time_secs(); - while current >= self.get_download_passes() { - if now + timeout < get_epoch_time_secs() { - debug!("PoX watchdog comms: timed out waiting for one download pass"); - return Ok(false); - } - self.interruptable_sleep(1)?; - std::hint::spin_loop(); - } - Ok(true) - } - pub fn should_keep_running(&self) -> bool { self.should_keep_running.load(Ordering::SeqCst) } @@ -124,82 +101,25 @@ impl PoxSyncWatchdogComms { /// unless it's reasonably sure that it has processed all Stacks blocks for this reward cycle. /// This struct monitors the Stacks chainstate to make this determination. pub struct PoxSyncWatchdog { - /// number of attachable but unprocessed staging blocks over time - new_attachable_blocks: VecDeque, - /// number of newly-processed staging blocks over time - new_processed_blocks: VecDeque, - /// last time we asked for attachable blocks - last_attachable_query: u64, - /// last time we asked for processed blocks - last_processed_query: u64, - /// number of samples to take - max_samples: u64, - /// maximum number of blocks to count per query (affects performance!) - max_staging: u64, - /// when did we first start watching? - watch_start_ts: u64, - /// when did we first see a flatline in block-processing rate? - last_block_processed_ts: u64, - /// estimated time for a block to get downloaded. Used to infer how long to wait for the first - /// blocks to show up when waiting for this reward cycle. - estimated_block_download_time: f64, - /// estimated time for a block to get processed -- from when it shows up as attachable to when - /// it shows up as processed. Used to infer how long to wait for the last block to get - /// processed before unblocking burnchain sync for the next reward cycle. - estimated_block_process_time: f64, - /// time between burnchain syncs in stead state + /// time between burnchain syncs in steady state steady_state_burnchain_sync_interval: u64, - /// when to re-sync under steady state - steady_state_resync_ts: u64, - /// chainstate handle - chainstate: StacksChainState, /// handle to relayer thread that informs the watchdog when the P2P state-machine does stuff relayer_comms: PoxSyncWatchdogComms, /// should this sync watchdog always download? used in integration tests. unconditionally_download: bool, } -const PER_SAMPLE_WAIT_MS: u64 = 1000; - impl PoxSyncWatchdog { pub fn new( config: &Config, watchdog_comms: PoxSyncWatchdogComms, ) -> Result { - let mainnet = config.is_mainnet(); - let chain_id = config.burnchain.chain_id; - let chainstate_path = config.get_chainstate_path_str(); let burnchain_poll_time = config.burnchain.poll_time_secs; - let download_timeout = config.connection_options.timeout; - let max_samples = config.node.pox_sync_sample_secs; let unconditionally_download = config.node.pox_sync_sample_secs == 0; - let marf_opts = config.node.get_marf_opts(); - - let (chainstate, _) = - match StacksChainState::open(mainnet, chain_id, &chainstate_path, Some(marf_opts)) { - Ok(cs) => cs, - Err(e) => { - return Err(format!( - "Failed to open chainstate at '{chainstate_path}': {e:?}" - )); - } - }; Ok(PoxSyncWatchdog { unconditionally_download, - new_attachable_blocks: VecDeque::new(), - new_processed_blocks: VecDeque::new(), - last_attachable_query: 0, - last_processed_query: 0, - max_samples, - max_staging: 10, - watch_start_ts: 0, - last_block_processed_ts: 0, - estimated_block_download_time: download_timeout as f64, - estimated_block_process_time: 5.0, steady_state_burnchain_sync_interval: burnchain_poll_time, - steady_state_resync_ts: 0, - chainstate, relayer_comms: watchdog_comms, }) } @@ -208,39 +128,9 @@ impl PoxSyncWatchdog { self.relayer_comms.clone() } - /// How many recently-added Stacks blocks are in an attachable state, up to $max_staging? - fn count_attachable_stacks_blocks(&mut self) -> Result { - // number of staging blocks that have arrived since the last sortition - let cnt = StacksChainState::count_attachable_staging_blocks( - self.chainstate.db(), - self.max_staging, - self.last_attachable_query, - ) - .map_err(|e| format!("Failed to count attachable staging blocks: {e:?}"))?; - - self.last_attachable_query = get_epoch_time_secs(); - Ok(cnt) - } - - /// How many recently-processed Stacks blocks are there, up to $max_staging? - /// ($max_staging is necessary to limit the runtime of this method, since the underlying SQL - /// uses COUNT(*), which in Sqlite is a _O(n)_ operation for _n_ rows) - fn count_processed_stacks_blocks(&mut self) -> Result { - // number of staging blocks that have arrived since the last sortition - let cnt = StacksChainState::count_processed_staging_blocks( - self.chainstate.db(), - self.max_staging, - self.last_processed_query, - ) - .map_err(|e| format!("Failed to count attachable staging blocks: {e:?}"))?; - - self.last_processed_query = get_epoch_time_secs(); - Ok(cnt) - } - /// Are we in the initial burnchain block download? i.e. is the burn tip snapshot far enough away /// from the burnchain height that we should be eagerly downloading snapshots? - pub fn infer_initial_burnchain_block_download( + fn infer_initial_burnchain_block_download( burnchain: &Burnchain, last_processed_height: u64, burnchain_height: u64, @@ -261,182 +151,23 @@ impl PoxSyncWatchdog { ibd } - /// Calculate the first derivative of a list of points - fn derivative(sample_list: &VecDeque) -> Vec { - let mut deltas = vec![]; - let mut prev = 0; - for (i, sample) in sample_list.iter().enumerate() { - if i == 0 { - prev = *sample; - continue; - } - let delta = *sample - prev; - prev = *sample; - deltas.push(delta); - } - deltas - } - - /// Is a derivative approximately flat, with a maximum absolute deviation from 0? - /// Return whether or not the sample is mostly flat, and how many points were over the given - /// error bar in either direction. - fn is_mostly_flat(deriv: &[i64], error: i64) -> (bool, usize) { - let mut total_deviates = 0; - let mut ret = true; - for d in deriv.iter() { - if d.abs() > error { - total_deviates += 1; - ret = false; - } - } - (ret, total_deviates) - } - - /// low and high pass filter average -- take average without the smallest and largest values - fn hilo_filter_avg(samples: &[i64]) -> f64 { - // take average with low and high pass - let mut min = i64::MAX; - let mut max = i64::MIN; - for s in samples.iter() { - if *s < 0 { - // nonsensical result (e.g. due to clock drift?) - continue; - } - if *s < min { - min = *s; - } - if *s > max { - max = *s; - } - } - - let mut count = 0; - let mut sum = 0; - for s in samples.iter() { - if *s < 0 { - // nonsensical result - continue; - } - if *s == min { - continue; - } - if *s == max { - continue; - } - count += 1; - sum += *s; - } - - if count == 0 { - // no viable samples - 1.0 - } else { - (sum as f64) / (count as f64) - } - } - - /// estimate how long a block remains in an unprocessed state - fn estimate_block_process_time( - chainstate: &StacksChainState, - burnchain: &Burnchain, - tip_height: u64, - ) -> f64 { - let this_reward_cycle = burnchain - .block_height_to_reward_cycle(tip_height) - .unwrap_or_else(|| panic!("BUG: no reward cycle for {tip_height}")); - let prev_reward_cycle = this_reward_cycle.saturating_sub(1); - - let start_height = burnchain.reward_cycle_to_block_height(prev_reward_cycle); - let end_height = burnchain.reward_cycle_to_block_height(this_reward_cycle); - - if this_reward_cycle > 0 { - assert!(start_height < end_height); - } else { - // no samples yet - return 1.0; - } - - let block_wait_times = - StacksChainState::measure_block_wait_time(chainstate.db(), start_height, end_height) - .expect("BUG: failed to query chainstate block-processing times"); - - PoxSyncWatchdog::hilo_filter_avg(&block_wait_times) - } - - /// estimate how long a block takes to download - fn estimate_block_download_time( - chainstate: &StacksChainState, - burnchain: &Burnchain, - tip_height: u64, - ) -> f64 { - let this_reward_cycle = burnchain - .block_height_to_reward_cycle(tip_height) - .unwrap_or_else(|| panic!("BUG: no reward cycle for {tip_height}")); - let prev_reward_cycle = this_reward_cycle.saturating_sub(1); - - let start_height = burnchain.reward_cycle_to_block_height(prev_reward_cycle); - let end_height = burnchain.reward_cycle_to_block_height(this_reward_cycle); - - if this_reward_cycle > 0 { - assert!(start_height < end_height); - } else { - // no samples yet - return 1.0; - } - - let block_download_times = StacksChainState::measure_block_download_time( - chainstate.db(), - start_height, - end_height, - ) - .expect("BUG: failed to query chainstate block-download times"); - - PoxSyncWatchdog::hilo_filter_avg(&block_download_times) - } - - /// Reset internal state. Performed when it's okay to begin syncing the burnchain. - /// Updates estimate for block-processing time and block-downloading time. - fn reset(&mut self, burnchain: &Burnchain, tip_height: u64) { - // find the average (with low/high pass filter) time a block spends in the DB without being - // processed, during this reward cycle - self.estimated_block_process_time = - PoxSyncWatchdog::estimate_block_process_time(&self.chainstate, burnchain, tip_height); - - // find the average (with low/high pass filter) time a block spends downloading - self.estimated_block_download_time = - PoxSyncWatchdog::estimate_block_download_time(&self.chainstate, burnchain, tip_height); - - debug!( - "Estimated block download time: {}s. Estimated block processing time: {}s", - self.estimated_block_download_time, self.estimated_block_process_time - ); - - self.new_attachable_blocks.clear(); - self.new_processed_blocks.clear(); - self.last_block_processed_ts = 0; - self.watch_start_ts = 0; - self.steady_state_resync_ts = 0; - } - - /// Wait until all of the Stacks blocks for the given reward cycle are seemingly downloaded and - /// processed. Do so by watching the _rate_ at which attachable Stacks blocks arrive and get - /// processed. - /// Returns whether or not we're still in the initial block download -- i.e. true if we're - /// still downloading burnchain blocks, or we haven't reached steady-state block-processing. + /// Wait until the next PoX anchor block arrives. + /// We know for a fact that they all exist for Epochs 2.5 and earlier, in both mainnet and + /// testnet. + /// Return (still-in-ibd?, maximum-burnchain-sync-height) on success. pub fn pox_sync_wait( &mut self, burnchain: &Burnchain, burnchain_tip: &BurnchainTip, // this is the highest burnchain snapshot we've sync'ed to burnchain_height: u64, // this is the absolute burnchain block height - num_sortitions_in_last_cycle: u64, - ) -> Result { - if self.watch_start_ts == 0 { - self.watch_start_ts = get_epoch_time_secs(); - } - if self.steady_state_resync_ts == 0 { - self.steady_state_resync_ts = - get_epoch_time_secs() + self.steady_state_burnchain_sync_interval; - } + ) -> Result<(bool, u64), burnchain_error> { + let burnchain_rc = burnchain + .block_height_to_reward_cycle(burnchain_height) + .expect("FATAL: burnchain height is before system start"); + + let sortition_rc = burnchain + .block_height_to_reward_cycle(burnchain_tip.block_snapshot.block_height) + .expect("FATAL: sortition height is before system start"); let ibbd = PoxSyncWatchdog::infer_initial_burnchain_block_download( burnchain, @@ -444,220 +175,23 @@ impl PoxSyncWatchdog { burnchain_height, ); - // unconditionally download the first reward cycle - if burnchain_tip.block_snapshot.block_height - < burnchain.first_block_height + (burnchain.pox_constants.reward_cycle_length as u64) - { - debug!("PoX watchdog in first reward cycle -- sync immediately"); - self.relayer_comms.set_ibd(ibbd); + let max_sync_height = if sortition_rc < burnchain_rc { + burnchain + .reward_cycle_to_block_height(sortition_rc + 1) + .min(burnchain_height) + } else { + burnchain_tip + .block_snapshot + .block_height + .max(burnchain_height) + }; + self.relayer_comms.set_ibd(ibbd); + if !self.unconditionally_download { self.relayer_comms .interruptable_sleep(self.steady_state_burnchain_sync_interval)?; - - return Ok(ibbd); - } - - if self.unconditionally_download { - debug!("PoX watchdog set to unconditionally download (ibd={ibbd})"); - self.relayer_comms.set_ibd(ibbd); - return Ok(ibbd); - } - - let mut waited = false; - if ibbd { - // we are far behind the burnchain tip (i.e. not in the last reward cycle), - // so make sure the downloader knows about blocks it doesn't have yet so we can go and - // fetch its blocks before proceeding. - if num_sortitions_in_last_cycle > 0 { - debug!("PoX watchdog: Wait for at least one inventory state-machine pass..."); - self.relayer_comms.wait_for_inv_sync_pass(SYNC_WAIT_SECS)?; - waited = true; - } else { - debug!("PoX watchdog: In initial block download, and no sortitions to consider in this reward cycle -- sync immediately"); - self.relayer_comms.set_ibd(ibbd); - return Ok(ibbd); - } - } else { - debug!("PoX watchdog: not in initial burn block download, so not waiting for an inventory state-machine pass"); } - if burnchain_tip.block_snapshot.block_height - + (burnchain.pox_constants.reward_cycle_length as u64) - >= burnchain_height - { - // unconditionally download if we're within the last reward cycle (after the poll timeout) - if !waited { - debug!( - "PoX watchdog in last reward cycle -- sync after {} seconds", - self.steady_state_burnchain_sync_interval - ); - self.relayer_comms.set_ibd(ibbd); - - self.relayer_comms - .interruptable_sleep(self.steady_state_burnchain_sync_interval)?; - } else { - debug!("PoX watchdog in last reward cycle -- sync immediately"); - self.relayer_comms.set_ibd(ibbd); - } - return Ok(ibbd); - } - - // have we reached steady-state behavior? i.e. have we stopped processing both burnchain - // and Stacks blocks? - let mut steady_state = false; - debug!("PoX watchdog: Wait until chainstate reaches steady-state block-processing..."); - - let ibbd = loop { - if !self.relayer_comms.should_keep_running() { - break false; - } - let ibbd = PoxSyncWatchdog::infer_initial_burnchain_block_download( - burnchain, - burnchain_tip.block_snapshot.block_height, - burnchain_height, - ); - - let expected_first_block_deadline = - self.watch_start_ts + (self.estimated_block_download_time as u64); - let expected_last_block_deadline = self.last_block_processed_ts - + (self.estimated_block_download_time as u64) - + (self.estimated_block_process_time as u64); - - match ( - self.count_attachable_stacks_blocks(), - self.count_processed_stacks_blocks(), - ) { - (Ok(num_available), Ok(num_processed)) => { - self.new_attachable_blocks.push_back(num_available as i64); - self.new_processed_blocks.push_back(num_processed as i64); - - if (self.new_attachable_blocks.len() as u64) > self.max_samples { - self.new_attachable_blocks.pop_front(); - } - if (self.new_processed_blocks.len() as u64) > self.max_samples { - self.new_processed_blocks.pop_front(); - } - - if (self.new_attachable_blocks.len() as u64) < self.max_samples - || (self.new_processed_blocks.len() as u64) < self.max_samples - { - // still getting initial samples - if self.new_processed_blocks.len() % 10 == 0 { - debug!( - "PoX watchdog: Still warming up: {} out of {} samples...", - &self.new_attachable_blocks.len(), - &self.max_samples - ); - } - sleep_ms(PER_SAMPLE_WAIT_MS); - continue; - } - - if self.watch_start_ts > 0 - && get_epoch_time_secs() < expected_first_block_deadline - { - // still waiting for that first block in this reward cycle - debug!("PoX watchdog: Still warming up: waiting until {expected_first_block_deadline}s for first Stacks block download (estimated download time: {}s)...", self.estimated_block_download_time); - sleep_ms(PER_SAMPLE_WAIT_MS); - continue; - } - - if self.watch_start_ts > 0 - && (self.new_attachable_blocks.len() as u64) < self.max_samples - && self.watch_start_ts - + self.max_samples - + self.steady_state_burnchain_sync_interval - * (burnchain.stable_confirmations as u64) - < get_epoch_time_secs() - { - debug!( - "PoX watchdog: could not calculate {} samples in {} seconds. Assuming suspend/resume, or assuming load is too high.", - self.max_samples, - self.max_samples + self.steady_state_burnchain_sync_interval * (burnchain.stable_confirmations as u64) - ); - self.reset(burnchain, burnchain_tip.block_snapshot.block_height); - - self.watch_start_ts = get_epoch_time_secs(); - self.steady_state_resync_ts = - get_epoch_time_secs() + self.steady_state_burnchain_sync_interval; - continue; - } - - // take first derivative of samples -- see if the download and processing rate has gone to 0 - let attachable_delta = PoxSyncWatchdog::derivative(&self.new_attachable_blocks); - let processed_delta = PoxSyncWatchdog::derivative(&self.new_processed_blocks); - - let (flat_attachable, attachable_deviants) = - PoxSyncWatchdog::is_mostly_flat(&attachable_delta, 0); - let (flat_processed, processed_deviants) = - PoxSyncWatchdog::is_mostly_flat(&processed_delta, 0); - - debug!("PoX watchdog: flat-attachable?: {flat_attachable}, flat-processed?: {flat_processed}, estimated block-download time: {}s, estimated block-processing time: {}s", - self.estimated_block_download_time, self.estimated_block_process_time); - - if flat_attachable && flat_processed && self.last_block_processed_ts == 0 { - // we're flat-lining -- this may be the end of this cycle - self.last_block_processed_ts = get_epoch_time_secs(); - } - - if self.last_block_processed_ts > 0 - && get_epoch_time_secs() < expected_last_block_deadline - { - debug!("PoX watchdog: Still processing blocks; waiting until at least min({},{expected_last_block_deadline})s before burnchain synchronization (estimated block-processing time: {}s)", - get_epoch_time_secs() + 1, self.estimated_block_process_time); - sleep_ms(PER_SAMPLE_WAIT_MS); - continue; - } - - if ibbd { - // doing initial burnchain block download right now. - // only proceed to fetch the next reward cycle's burnchain blocks if we're neither downloading nor - // attaching blocks recently - debug!("PoX watchdog: In initial burnchain block download: flat-attachable = {flat_attachable}, flat-processed = {flat_processed}, min-attachable: {attachable_deviants}, min-processed: {processed_deviants}"); - - if !flat_attachable || !flat_processed { - sleep_ms(PER_SAMPLE_WAIT_MS); - continue; - } - } else { - let now = get_epoch_time_secs(); - if now < self.steady_state_resync_ts { - // steady state - if !steady_state { - debug!("PoX watchdog: In steady-state; waiting until at least {} before burnchain synchronization", self.steady_state_resync_ts); - steady_state = flat_attachable && flat_processed; - } - sleep_ms(PER_SAMPLE_WAIT_MS); - continue; - } else { - // steady state - if !steady_state { - debug!("PoX watchdog: In steady-state, but ready burnchain synchronization as of {}", self.steady_state_resync_ts); - steady_state = flat_attachable && flat_processed; - } - } - } - } - (err_attach, err_processed) => { - // can only happen on DB query failure - error!("PoX watchdog: Failed to count recently attached ('{err_attach:?}') and/or processed ('{err_processed:?}') staging blocks"); - panic!(); - } - }; - - if ibbd || !steady_state { - debug!("PoX watchdog: Wait for at least one downloader state-machine pass before resetting..."); - self.relayer_comms.wait_for_download_pass(SYNC_WAIT_SECS)?; - } else { - debug!("PoX watchdog: in steady-state, so not waiting for download pass"); - } - - self.reset(burnchain, burnchain_tip.block_snapshot.block_height); - break ibbd; - }; - - let ret = ibbd || !steady_state; - self.relayer_comms.set_ibd(ret); - Ok(ret) + Ok((ibbd, max_sync_height)) } } diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index ef6199d331..a479e39a36 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -3396,7 +3396,7 @@ fn vote_for_aggregate_key_burn_op() { /// This test boots a follower node using the block downloader #[test] #[ignore] -fn follower_bootup() { +fn follower_bootup_simple() { if env::var("BITCOIND_TEST") != Ok("1".into()) { return; } From 4f6b1906bebe65fd32e279ea7d603c6d66d478a0 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Sun, 1 Dec 2024 21:53:09 -0500 Subject: [PATCH 2/4] chore: force require anchor blocks --- stackslib/src/chainstate/coordinator/mod.rs | 20 +++++++++++++++++++- testnet/stacks-node/src/config.rs | 7 +++++++ testnet/stacks-node/src/run_loop/nakamoto.rs | 1 + testnet/stacks-node/src/run_loop/neon.rs | 1 + 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/stackslib/src/chainstate/coordinator/mod.rs b/stackslib/src/chainstate/coordinator/mod.rs index 5b7c7e89b6..3c78e02cef 100644 --- a/stackslib/src/chainstate/coordinator/mod.rs +++ b/stackslib/src/chainstate/coordinator/mod.rs @@ -198,6 +198,9 @@ pub trait BlockEventDispatcher { } pub struct ChainsCoordinatorConfig { + /// true: assume all anchor blocks are present, and block chain sync until they arrive + /// false: process sortitions in reward cycles without anchor blocks + pub assume_present_anchor_blocks: bool, /// true: use affirmation maps before 2.1 /// false: only use affirmation maps in 2.1 or later pub always_use_affirmation_maps: bool, @@ -209,8 +212,9 @@ pub struct ChainsCoordinatorConfig { impl ChainsCoordinatorConfig { pub fn new() -> ChainsCoordinatorConfig { ChainsCoordinatorConfig { - always_use_affirmation_maps: false, + always_use_affirmation_maps: true, require_affirmed_anchor_blocks: true, + assume_present_anchor_blocks: true, } } } @@ -2336,6 +2340,20 @@ impl< panic!("BUG: no epoch defined at height {}", header.block_height) }); + if self.config.assume_present_anchor_blocks { + // anchor blocks are always assumed to be present in the chain history, + // so report its absence if we don't have it. + if let PoxAnchorBlockStatus::SelectedAndUnknown(missing_anchor_block, _) = + &rc_info.anchor_status + { + info!( + "Currently missing PoX anchor block {}, which is assumed to be present", + &missing_anchor_block + ); + return Ok(Some(missing_anchor_block.clone())); + } + } + if cur_epoch.epoch_id >= StacksEpochId::Epoch21 || self.config.always_use_affirmation_maps { // potentially have an anchor block, but only process the next reward cycle (and // subsequent reward cycles) with it if the prepare-phase block-commits affirm its diff --git a/testnet/stacks-node/src/config.rs b/testnet/stacks-node/src/config.rs index 785ce057e5..2831b9c756 100644 --- a/testnet/stacks-node/src/config.rs +++ b/testnet/stacks-node/src/config.rs @@ -1638,6 +1638,7 @@ pub struct NodeConfig { pub use_test_genesis_chainstate: Option, pub always_use_affirmation_maps: bool, pub require_affirmed_anchor_blocks: bool, + pub assume_present_anchor_blocks: bool, /// Fault injection for failing to push blocks pub fault_injection_block_push_fail_probability: Option, // fault injection for hiding blocks. @@ -1921,6 +1922,7 @@ impl Default for NodeConfig { use_test_genesis_chainstate: None, always_use_affirmation_maps: true, require_affirmed_anchor_blocks: true, + assume_present_anchor_blocks: true, fault_injection_block_push_fail_probability: None, fault_injection_hide_blocks: false, chain_liveness_poll_time_secs: 300, @@ -2393,6 +2395,7 @@ pub struct NodeConfigFile { pub use_test_genesis_chainstate: Option, pub always_use_affirmation_maps: Option, pub require_affirmed_anchor_blocks: Option, + pub assume_present_anchor_blocks: Option, /// At most, how often should the chain-liveness thread /// wake up the chains-coordinator. Defaults to 300s (5 min). pub chain_liveness_poll_time_secs: Option, @@ -2474,6 +2477,10 @@ impl NodeConfigFile { // miners should always try to mine, even if they don't have the anchored // blocks in the canonical affirmation map. Followers, however, can stall. require_affirmed_anchor_blocks: self.require_affirmed_anchor_blocks.unwrap_or(!miner), + // as of epoch 3.0, all prepare phases have anchor blocks. + // at the start of epoch 3.0, the chain stalls without anchor blocks. + // only set this to false if you're doing some very extreme testing. + assume_present_anchor_blocks: true, // chainstate fault_injection activation for hide_blocks. // you can't set this in the config file. fault_injection_hide_blocks: false, diff --git a/testnet/stacks-node/src/run_loop/nakamoto.rs b/testnet/stacks-node/src/run_loop/nakamoto.rs index 16f5a12b2d..335fb325d8 100644 --- a/testnet/stacks-node/src/run_loop/nakamoto.rs +++ b/testnet/stacks-node/src/run_loop/nakamoto.rs @@ -319,6 +319,7 @@ impl RunLoop { let mut fee_estimator = moved_config.make_fee_estimator(); let coord_config = ChainsCoordinatorConfig { + assume_present_anchor_blocks: moved_config.node.assume_present_anchor_blocks, always_use_affirmation_maps: moved_config.node.always_use_affirmation_maps, require_affirmed_anchor_blocks: moved_config .node diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 475265869a..94db4ef162 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -625,6 +625,7 @@ impl RunLoop { let mut fee_estimator = moved_config.make_fee_estimator(); let coord_config = ChainsCoordinatorConfig { + assume_present_anchor_blocks: moved_config.node.assume_present_anchor_blocks, always_use_affirmation_maps: moved_config.node.always_use_affirmation_maps, require_affirmed_anchor_blocks: moved_config .node From 045fcce3aeb09561cdc111a181fc477829e40ee3 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 9 Dec 2024 13:32:08 -0500 Subject: [PATCH 3/4] chore: use test-specific chains coordinator settings to allow tests to handle (now legacy) cases where the PoX anchor block does not arrive in order (or at all) --- stackslib/src/chainstate/coordinator/mod.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/stackslib/src/chainstate/coordinator/mod.rs b/stackslib/src/chainstate/coordinator/mod.rs index 3c78e02cef..c78c68a4a3 100644 --- a/stackslib/src/chainstate/coordinator/mod.rs +++ b/stackslib/src/chainstate/coordinator/mod.rs @@ -217,6 +217,14 @@ impl ChainsCoordinatorConfig { assume_present_anchor_blocks: true, } } + + pub fn test_new() -> ChainsCoordinatorConfig { + ChainsCoordinatorConfig { + always_use_affirmation_maps: false, + require_affirmed_anchor_blocks: false, + assume_present_anchor_blocks: false, + } + } } pub struct ChainsCoordinator< @@ -704,7 +712,7 @@ impl<'a, T: BlockEventDispatcher, U: RewardSetProvider, B: BurnchainHeaderReader notifier: (), atlas_config, atlas_db: Some(atlas_db), - config: ChainsCoordinatorConfig::new(), + config: ChainsCoordinatorConfig::test_new(), burnchain_indexer, refresh_stacker_db: Arc::new(AtomicBool::new(false)), in_nakamoto_epoch: false, From e486b9e19e9bc2c73835d3f7cb6c9c1a3f5e6153 Mon Sep 17 00:00:00 2001 From: Jude Nelson Date: Mon, 16 Dec 2024 14:44:31 -0500 Subject: [PATCH 4/4] chore: add changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f85ed6526b..1bf801607d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE ### Changed +- Nodes will assume that all PoX anchor blocks exist by default, and stall initial block download indefinitely to await their arrival (#5502) + ## [3.1.0.0.1] ### Added