From 8fc6fb68e1892d5c138318c78e3b77e383e2410b Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 23 Nov 2023 10:41:38 +1000 Subject: [PATCH 01/24] Provide access to DiskDb and DiskWriteBatch outside the state using a feature --- zebra-state/Cargo.toml | 3 +++ zebra-state/src/lib.rs | 8 +++++++- zebra-state/src/service/finalized_state.rs | 12 ++++++------ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/zebra-state/Cargo.toml b/zebra-state/Cargo.toml index aa3601ec1cd..7d033b1a6b1 100644 --- a/zebra-state/Cargo.toml +++ b/zebra-state/Cargo.toml @@ -35,6 +35,9 @@ proptest-impl = [ "zebra-chain/proptest-impl" ] +# Experimental shielded blockchain scanning +shielded-scan = [] + # Experimental elasticsearch support elasticsearch = [ "dep:elasticsearch", diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 2855483406b..fe4ccbcf271 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -59,6 +59,12 @@ pub use service::{ OutputIndex, OutputLocation, TransactionLocation, }; +#[cfg(feature = "shielded-scan")] +pub use service::finalized_state::{DiskDb, ReadDisk}; + +#[cfg(any(test, feature = "proptest-impl", feature = "shielded-scan"))] +pub use service::finalized_state::{DiskWriteBatch, WriteDisk}; + #[cfg(feature = "getblocktemplate-rpcs")] pub use response::GetBlockTemplateChainInfo; @@ -66,7 +72,7 @@ pub use response::GetBlockTemplateChainInfo; pub use service::{ arbitrary::{populated_state, CHAIN_TIP_UPDATE_WAIT_LIMIT}, chain_tip::{ChainTipBlock, ChainTipSender}, - finalized_state::{DiskWriteBatch, MAX_ON_DISK_HEIGHT}, + finalized_state::MAX_ON_DISK_HEIGHT, init_test, init_test_services, ReadStateService, }; diff --git a/zebra-state/src/service/finalized_state.rs b/zebra-state/src/service/finalized_state.rs index 55012649e4d..18a88004b7b 100644 --- a/zebra-state/src/service/finalized_state.rs +++ b/zebra-state/src/service/finalized_state.rs @@ -3,7 +3,7 @@ //! Zebra's database is implemented in 4 layers: //! - [`FinalizedState`]: queues, validates, and commits blocks, using... //! - [`ZebraDb`]: reads and writes [`zebra_chain`] types to the database, using... -//! - [`DiskDb`](disk_db::DiskDb): reads and writes format-specific types +//! - [`DiskDb`]: reads and writes format-specific types //! to the database, using... //! - [`disk_format`]: converts types to raw database bytes. //! @@ -38,18 +38,18 @@ mod arbitrary; #[cfg(test)] mod tests; +#[allow(unused_imports)] +pub use disk_db::{DiskWriteBatch, WriteDisk}; pub use disk_format::{OutputIndex, OutputLocation, TransactionLocation}; +#[cfg(feature = "shielded-scan")] +pub use disk_db::{DiskDb, ReadDisk}; + #[cfg(any(test, feature = "proptest-impl"))] pub use disk_format::MAX_ON_DISK_HEIGHT; pub(super) use zebra_db::ZebraDb; -#[cfg(any(test, feature = "proptest-impl"))] -pub use disk_db::DiskWriteBatch; -#[cfg(not(any(test, feature = "proptest-impl")))] -use disk_db::DiskWriteBatch; - /// The finalized part of the chain state, stored in the db. /// /// `rocksdb` allows concurrent writes through a shared reference, From 304bad946eb09a512f75a7058cc8ab56f1220c68 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 23 Nov 2023 10:55:53 +1000 Subject: [PATCH 02/24] Actually let's export ZebraDb for the format upgrade code --- zebra-state/src/lib.rs | 2 +- zebra-state/src/service/finalized_state.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index fe4ccbcf271..7957e2481f3 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -60,7 +60,7 @@ pub use service::{ }; #[cfg(feature = "shielded-scan")] -pub use service::finalized_state::{DiskDb, ReadDisk}; +pub use service::finalized_state::{ReadDisk, ZebraDb}; #[cfg(any(test, feature = "proptest-impl", feature = "shielded-scan"))] pub use service::finalized_state::{DiskWriteBatch, WriteDisk}; diff --git a/zebra-state/src/service/finalized_state.rs b/zebra-state/src/service/finalized_state.rs index 18a88004b7b..408d632206b 100644 --- a/zebra-state/src/service/finalized_state.rs +++ b/zebra-state/src/service/finalized_state.rs @@ -39,16 +39,16 @@ mod arbitrary; mod tests; #[allow(unused_imports)] -pub use disk_db::{DiskWriteBatch, WriteDisk}; +pub use disk_db::{DiskDb, DiskWriteBatch, WriteDisk}; pub use disk_format::{OutputIndex, OutputLocation, TransactionLocation}; +pub use zebra_db::ZebraDb; #[cfg(feature = "shielded-scan")] -pub use disk_db::{DiskDb, ReadDisk}; +pub use disk_db::ReadDisk; #[cfg(any(test, feature = "proptest-impl"))] pub use disk_format::MAX_ON_DISK_HEIGHT; -pub(super) use zebra_db::ZebraDb; /// The finalized part of the chain state, stored in the db. /// From 2e2a99394740b02e6d84d32ee9579255b9ebb0a2 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 23 Nov 2023 11:05:35 +1000 Subject: [PATCH 03/24] Pass column families to ZebraDb as an argument --- zebra-state/src/service/finalized_state.rs | 42 +++++++++++++++++- .../src/service/finalized_state/disk_db.rs | 44 +++---------------- .../src/service/finalized_state/zebra_db.rs | 9 +++- .../zebra_db/block/tests/vectors.rs | 5 ++- zebra-state/src/service/read/tests/vectors.rs | 23 ++++++++-- 5 files changed, 77 insertions(+), 46 deletions(-) diff --git a/zebra-state/src/service/finalized_state.rs b/zebra-state/src/service/finalized_state.rs index 408d632206b..e0e4e786412 100644 --- a/zebra-state/src/service/finalized_state.rs +++ b/zebra-state/src/service/finalized_state.rs @@ -49,6 +49,39 @@ pub use disk_db::ReadDisk; #[cfg(any(test, feature = "proptest-impl"))] pub use disk_format::MAX_ON_DISK_HEIGHT; +/// The column families supported by the running `zebra-state` database code. +pub const STATE_COLUMN_FAMILIES_IN_CODE: &[&str] = &[ + // Blocks + "hash_by_height", + "height_by_hash", + "block_header_by_height", + // Transactions + "tx_by_loc", + "hash_by_tx_loc", + "tx_loc_by_hash", + // Transparent + "balance_by_transparent_addr", + "tx_loc_by_transparent_addr_loc", + "utxo_by_out_loc", + "utxo_loc_by_transparent_addr_loc", + // Sprout + "sprout_nullifiers", + "sprout_anchors", + "sprout_note_commitment_tree", + // Sapling + "sapling_nullifiers", + "sapling_anchors", + "sapling_note_commitment_tree", + "sapling_note_commitment_subtree", + // Orchard + "orchard_nullifiers", + "orchard_anchors", + "orchard_note_commitment_tree", + "orchard_note_commitment_subtree", + // Chain + "history_tree", + "tip_chain_value_pool", +]; /// The finalized part of the chain state, stored in the db. /// @@ -120,7 +153,14 @@ impl FinalizedState { debug_skip_format_upgrades: bool, #[cfg(feature = "elasticsearch")] elastic_db: Option, ) -> Self { - let db = ZebraDb::new(config, network, debug_skip_format_upgrades); + let db = ZebraDb::new( + config, + network, + debug_skip_format_upgrades, + STATE_COLUMN_FAMILIES_IN_CODE + .iter() + .map(ToString::to_string), + ); #[cfg(feature = "elasticsearch")] let new_state = Self { diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index e0abc3ce049..935e43de760 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -617,43 +617,13 @@ impl DiskDb { /// const MEMTABLE_RAM_CACHE_MEGABYTES: usize = 128; - /// The column families supported by the running database code. - const COLUMN_FAMILIES_IN_CODE: &'static [&'static str] = &[ - // Blocks - "hash_by_height", - "height_by_hash", - "block_header_by_height", - // Transactions - "tx_by_loc", - "hash_by_tx_loc", - "tx_loc_by_hash", - // Transparent - "balance_by_transparent_addr", - "tx_loc_by_transparent_addr_loc", - "utxo_by_out_loc", - "utxo_loc_by_transparent_addr_loc", - // Sprout - "sprout_nullifiers", - "sprout_anchors", - "sprout_note_commitment_tree", - // Sapling - "sapling_nullifiers", - "sapling_anchors", - "sapling_note_commitment_tree", - "sapling_note_commitment_subtree", - // Orchard - "orchard_nullifiers", - "orchard_anchors", - "orchard_note_commitment_tree", - "orchard_note_commitment_subtree", - // Chain - "history_tree", - "tip_chain_value_pool", - ]; - /// Opens or creates the database at `config.path` for `network`, /// and returns a shared low-level database wrapper. - pub fn new(config: &Config, network: Network) -> DiskDb { + pub fn new( + config: &Config, + network: Network, + column_families_in_code: impl IntoIterator, + ) -> DiskDb { let path = config.db_path(network); let db_options = DiskDb::options(); @@ -666,9 +636,7 @@ impl DiskDb { // // let column_families_on_disk = DB::list_cf(&db_options, &path).unwrap_or_default(); - let column_families_in_code = Self::COLUMN_FAMILIES_IN_CODE - .iter() - .map(ToString::to_string); + let column_families_in_code = column_families_in_code.into_iter(); let column_families = column_families_on_disk .into_iter() diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index c370c980f5f..7ab20281e54 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -80,7 +80,12 @@ impl ZebraDb { /// /// If `debug_skip_format_upgrades` is true, don't do any format upgrades or format checks. /// This argument is only used when running tests, it is ignored in production code. - pub fn new(config: &Config, network: Network, debug_skip_format_upgrades: bool) -> ZebraDb { + pub fn new( + config: &Config, + network: Network, + debug_skip_format_upgrades: bool, + column_families_in_code: impl IntoIterator, + ) -> ZebraDb { let running_version = database_format_version_in_code(); let disk_version = database_format_version_on_disk(config, network) .expect("unable to read database format version file"); @@ -97,7 +102,7 @@ impl ZebraDb { // changes to the default database version. Then we set the correct version in the // upgrade thread. We need to do the version change in this order, because the version // file can only be changed while we hold the RocksDB database lock. - db: DiskDb::new(config, network), + db: DiskDb::new(config, network, column_families_in_code), }; db.spawn_format_change(config, network, format_change); diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs index 639ad7e5461..022feb3f7bc 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs @@ -27,7 +27,7 @@ use zebra_test::vectors::{MAINNET_BLOCKS, TESTNET_BLOCKS}; use crate::{ request::{FinalizedBlock, Treestate}, - service::finalized_state::{disk_db::DiskWriteBatch, ZebraDb}, + service::finalized_state::{disk_db::DiskWriteBatch, ZebraDb, STATE_COLUMN_FAMILIES_IN_CODE}, CheckpointVerifiedBlock, Config, }; @@ -83,6 +83,9 @@ fn test_block_db_round_trip_with( network, // The raw database accesses in this test create invalid database formats. true, + STATE_COLUMN_FAMILIES_IN_CODE + .iter() + .map(ToString::to_string), ); // Check that each block round-trips to the database diff --git a/zebra-state/src/service/read/tests/vectors.rs b/zebra-state/src/service/read/tests/vectors.rs index 54d668d2f63..94ca0f55974 100644 --- a/zebra-state/src/service/read/tests/vectors.rs +++ b/zebra-state/src/service/read/tests/vectors.rs @@ -21,7 +21,7 @@ use crate::{ init_test_services, populated_state, response::MinedTx, service::{ - finalized_state::{DiskWriteBatch, ZebraDb}, + finalized_state::{DiskWriteBatch, ZebraDb, STATE_COLUMN_FAMILIES_IN_CODE}, non_finalized_state::Chain, read::{orchard_subtrees, sapling_subtrees}, }, @@ -122,7 +122,8 @@ async fn test_read_subtrees() -> Result<()> { // Prepare the finalized state. let db = { - let db = ZebraDb::new(&Config::ephemeral(), Mainnet, true); + let db = new_ephemeral_db(); + let db_subtrees = db_height_range.enumerate().map(dummy_subtree); for db_subtree in db_subtrees { let mut db_batch = DiskWriteBatch::new(); @@ -206,7 +207,8 @@ async fn test_sapling_subtrees() -> Result<()> { // Prepare the finalized state. let db_subtree = NoteCommitmentSubtree::new(0, Height(1), dummy_subtree_root); - let db = ZebraDb::new(&Config::ephemeral(), Mainnet, true); + + let db = new_ephemeral_db(); let mut db_batch = DiskWriteBatch::new(); db_batch.insert_sapling_subtree(&db, &db_subtree); db.write(db_batch) @@ -271,7 +273,8 @@ async fn test_orchard_subtrees() -> Result<()> { // Prepare the finalized state. let db_subtree = NoteCommitmentSubtree::new(0, Height(1), dummy_subtree_root); - let db = ZebraDb::new(&Config::ephemeral(), Mainnet, true); + + let db = new_ephemeral_db(); let mut db_batch = DiskWriteBatch::new(); db_batch.insert_orchard_subtree(&db, &db_subtree); db.write(db_batch) @@ -361,3 +364,15 @@ where { index == &subtree.index && subtree_data == &subtree.into_data() } + +/// Returns a new ephemeral database with no consistency checks. +fn new_ephemeral_db() -> ZebraDb { + ZebraDb::new( + &Config::ephemeral(), + Mainnet, + true, + STATE_COLUMN_FAMILIES_IN_CODE + .iter() + .map(ToString::to_string), + ) +} From 37a1e532314dcc977fa1dda127b787058fc2f943 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 10:37:35 +1000 Subject: [PATCH 04/24] Allow the database kind to be changed in config.rs --- zebra-state/src/config.rs | 176 +++++++++++++++++++++-------------- zebra-state/src/constants.rs | 21 ++++- zebra-state/src/lib.rs | 5 +- 3 files changed, 125 insertions(+), 77 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 5defecc74c0..27d0d7d70a0 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -14,13 +14,7 @@ use tracing::Span; use zebra_chain::parameters::Network; -use crate::{ - constants::{ - DATABASE_FORMAT_MINOR_VERSION, DATABASE_FORMAT_PATCH_VERSION, DATABASE_FORMAT_VERSION, - DATABASE_FORMAT_VERSION_FILE_NAME, - }, - BoxError, -}; +use crate::{constants::DATABASE_FORMAT_VERSION_FILE_NAME, BoxError}; /// Configuration for the state service. #[derive(Clone, Debug, Deserialize, Serialize)] @@ -128,27 +122,37 @@ fn gen_temp_path(prefix: &str) -> PathBuf { } impl Config { - /// Returns the path for the finalized state database - pub fn db_path(&self, network: Network) -> PathBuf { + /// Returns the path for the database, based on the kind, major version and network. + /// Each incompatible database format or network gets its own unique path. + pub fn db_path( + &self, + db_kind: impl AsRef, + major_version: u64, + network: Network, + ) -> PathBuf { + let db_kind = db_kind.as_ref(); + let major_version = format!("v{}", major_version); let net_dir = network.lowercase_name(); if self.ephemeral { - gen_temp_path(&format!( - "zebra-state-v{}-{}-", - crate::constants::DATABASE_FORMAT_VERSION, - net_dir - )) + gen_temp_path(&format!("zebra-{db_kind}-{major_version}-{net_dir}-")) } else { self.cache_dir - .join("state") - .join(format!("v{}", crate::constants::DATABASE_FORMAT_VERSION)) + .join(db_kind) + .join(major_version) .join(net_dir) } } - /// Returns the path of the database format version file. - pub fn version_file_path(&self, network: Network) -> PathBuf { - let mut version_path = self.db_path(network); + /// Returns the path for the database format minor/patch version file, + /// based on the kind, major version and network. + pub fn version_file_path( + &self, + db_kind: impl AsRef, + major_version: u64, + network: Network, + ) -> PathBuf { + let mut version_path = self.db_path(db_kind, major_version, network); version_path.push(DATABASE_FORMAT_VERSION_FILE_NAME); @@ -193,17 +197,31 @@ impl Default for Config { /// and deletes them from the filesystem. /// /// Iterate over the files and directories in the databases folder and delete if: -/// - The state directory exists. -/// - The entry is a directory. +/// - The `db_kind` directory exists. +/// - The entry in `db_kind` is a directory. /// - The directory name has a prefix `v`. /// - The directory name without the prefix can be parsed as an unsigned number. -/// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. -pub fn check_and_delete_old_databases(config: Config) -> JoinHandle<()> { +/// - The parsed number is lower than the `major_version`. +/// +/// The network is used to generate the path, then ignored. +/// If `config` is an ephemeral database, no databases are deleted. +/// +/// # Panics +/// +/// If the path doesn't match the expected `db_kind/major_version/network` format. +pub fn check_and_delete_old_databases( + config: &Config, + db_kind: impl AsRef, + major_version: u64, + network: Network, +) -> JoinHandle<()> { let current_span = Span::current(); + let config = config.clone(); + let db_kind = db_kind.as_ref().to_string(); spawn_blocking(move || { current_span.in_scope(|| { - delete_old_databases(config); + delete_old_databases(config, db_kind, major_version, network); info!("finished old database version cleanup task"); }) }) @@ -212,20 +230,47 @@ pub fn check_and_delete_old_databases(config: Config) -> JoinHandle<()> { /// Check if there are old database folders and delete them from the filesystem. /// /// See [`check_and_delete_old_databases`] for details. -fn delete_old_databases(config: Config) { +fn delete_old_databases(config: Config, db_kind: String, major_version: u64, network: Network) { if config.ephemeral || !config.delete_old_database { return; } - info!("checking for old database versions"); - - let state_dir = config.cache_dir.join("state"); - if let Some(state_dir) = read_dir(&state_dir) { - for entry in state_dir.flatten() { - let deleted_state = check_and_delete_database(&config, &entry); - - if let Some(deleted_state) = deleted_state { - info!(?deleted_state, "deleted outdated state directory"); + info!(db_kind, "checking for old database versions"); + + let mut db_path = config.db_path(&db_kind, major_version, network); + // Check and remove the network path. + assert_eq!( + db_path.file_name(), + Some(network.lowercase_name().as_ref()), + "unexpected database network path structure" + ); + assert!(db_path.pop()); + + // Check and remove the major version path, we'll iterate over them all below. + assert_eq!( + db_path.file_name(), + Some(format!("v{major_version}").as_ref()), + "unexpected database version path structure" + ); + assert!(db_path.pop()); + + // Check for the correct database kind to iterate within. + assert_eq!( + db_path.file_name(), + Some(db_kind.as_ref()), + "unexpected database kind path structure" + ); + + if let Some(db_kind_dir) = read_dir(&db_path) { + for entry in db_kind_dir.flatten() { + let deleted_db_version = check_and_delete_database(&config, major_version, &entry); + + if let Some(deleted_db_version) = deleted_db_version { + info!( + ?deleted_db_version, + ?db_kind, + "deleted outdated database directory" + ); } } } @@ -247,11 +292,15 @@ fn read_dir(dir: &Path) -> Option { /// See [`check_and_delete_old_databases`] for details. /// /// If the directory was deleted, returns its path. -fn check_and_delete_database(config: &Config, entry: &DirEntry) -> Option { +fn check_and_delete_database( + config: &Config, + major_version: u64, + entry: &DirEntry, +) -> Option { let dir_name = parse_dir_name(entry)?; - let version_number = parse_version_number(&dir_name)?; + let dir_major_version = parse_major_version(&dir_name)?; - if version_number >= crate::constants::DATABASE_FORMAT_VERSION { + if dir_major_version >= major_version { return None; } @@ -296,10 +345,10 @@ fn parse_dir_name(entry: &DirEntry) -> Option { None } -/// Parse the state version number from `dir_name`. +/// Parse the database major version number from `dir_name`. /// /// Returns `None` if parsing fails, or the directory name is not in the expected format. -fn parse_version_number(dir_name: &str) -> Option { +fn parse_major_version(dir_name: &str) -> Option { dir_name .strip_prefix('v') .and_then(|version| version.parse().ok()) @@ -307,23 +356,13 @@ fn parse_version_number(dir_name: &str) -> Option { // TODO: move these to the format upgrade module -/// Returns the full semantic version of the currently running database format code. -/// -/// This is the version implemented by the Zebra code that's currently running, -/// the minor and patch versions on disk can be different. -pub fn database_format_version_in_code() -> Version { - Version::new( - DATABASE_FORMAT_VERSION, - DATABASE_FORMAT_MINOR_VERSION, - DATABASE_FORMAT_PATCH_VERSION, - ) -} - -/// Returns the full semantic version of the on-disk database. +/// Returns the full semantic version of the on-disk database, based on its kind, major version, +/// and network. /// /// Typically, the version is read from a version text file. /// -/// If there is an existing on-disk database, but no version file, returns `Ok(Some(major.0.0))`. +/// If there is an existing on-disk database, but no version file, +/// returns `Ok(Some(major_version.0.0))`. /// (This happens even if the database directory was just newly created.) /// /// If there is no existing on-disk database, returns `Ok(None)`. @@ -332,12 +371,14 @@ pub fn database_format_version_in_code() -> Version { /// implemented by the running Zebra code can be different. pub fn database_format_version_on_disk( config: &Config, + db_kind: impl AsRef, + major_version: u64, network: Network, ) -> Result, BoxError> { - let version_path = config.version_file_path(network); - let db_path = config.db_path(network); + let version_path = config.version_file_path(&db_kind, major_version, network); + let db_path = config.db_path(db_kind, major_version, network); - database_format_version_at_path(&version_path, &db_path) + database_format_version_at_path(&version_path, &db_path, major_version) } /// Returns the full semantic version of the on-disk database at `version_path`. @@ -346,6 +387,7 @@ pub fn database_format_version_on_disk( pub(crate) fn database_format_version_at_path( version_path: &Path, db_path: &Path, + major_version: u64, ) -> Result, BoxError> { let disk_version_file = match fs::read_to_string(version_path) { Ok(version) => Some(version), @@ -363,7 +405,7 @@ pub(crate) fn database_format_version_at_path( .ok_or("invalid database format version file")?; return Ok(Some(Version::new( - DATABASE_FORMAT_VERSION, + major_version, minor.parse()?, patch.parse()?, ))); @@ -374,7 +416,7 @@ pub(crate) fn database_format_version_at_path( match fs::metadata(db_path) { // But there is a database on disk, so it has the current major version with no upgrades. // If the database directory was just newly created, we also return this version. - Ok(_metadata) => Ok(Some(Version::new(DATABASE_FORMAT_VERSION, 0, 0))), + Ok(_metadata) => Ok(Some(Version::new(major_version, 0, 0))), // There's no version file and no database on disk, so it's a new database. // It will be created with the current version, @@ -386,6 +428,7 @@ pub(crate) fn database_format_version_at_path( } // Hide this destructive method from the public API, except in tests. +#[allow(unused_imports)] pub(crate) use hidden::write_database_format_version_to_disk; pub(crate) mod hidden { @@ -395,6 +438,8 @@ pub(crate) mod hidden { /// Writes `changed_version` to the on-disk database after the format is changed. /// (Or a new database is created.) /// + /// The database path is based on its kind, `changed_version.major`, and network. + /// /// # Correctness /// /// This should only be called: @@ -407,22 +452,13 @@ pub(crate) mod hidden { /// This must only be called while RocksDB has an open database for `config`. /// Otherwise, multiple Zebra processes could write the version at the same time, /// corrupting the file. - /// - /// # Panics - /// - /// If the major versions do not match. (The format is incompatible.) pub fn write_database_format_version_to_disk( - changed_version: &Version, config: &Config, + db_kind: impl AsRef, + changed_version: &Version, network: Network, ) -> Result<(), BoxError> { - let version_path = config.version_file_path(network); - - // The major version is already in the directory path. - assert_eq!( - changed_version.major, DATABASE_FORMAT_VERSION, - "tried to do in-place database format change to an incompatible version" - ); + let version_path = config.version_file_path(db_kind, changed_version.major, network); let version = format!("{}.{}", changed_version.minor, changed_version.patch); diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index af232e0dbfe..0481484fa89 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -27,6 +27,9 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY; // TODO: change to HeightDiff pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1; +/// The directory name used to distinguish the state database from Zebra's other databases or flat files. +pub const STATE_DATABASE_KIND: &str = "state"; + /// The database format major version, incremented each time the on-disk database format has a /// breaking data format change. /// @@ -40,7 +43,7 @@ pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1; /// /// Use [`config::database_format_version_in_code()`] or /// [`config::database_format_version_on_disk()`] to get the full semantic format version. -pub(crate) const DATABASE_FORMAT_VERSION: u64 = 25; +const DATABASE_FORMAT_VERSION: u64 = 25; /// The database format minor version, incremented each time the on-disk database format has a /// significant data format change. @@ -49,11 +52,23 @@ pub(crate) const DATABASE_FORMAT_VERSION: u64 = 25; /// - adding new column families, /// - changing the format of a column family in a compatible way, or /// - breaking changes with compatibility code in all supported Zebra versions. -pub(crate) const DATABASE_FORMAT_MINOR_VERSION: u64 = 3; +const DATABASE_FORMAT_MINOR_VERSION: u64 = 3; /// The database format patch version, incremented each time the on-disk database format has a /// significant format compatibility fix. -pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 0; +const DATABASE_FORMAT_PATCH_VERSION: u64 = 0; + +/// Returns the full semantic version of the currently running state database format code. +/// +/// This is the version implemented by the Zebra code that's currently running, +/// the minor and patch versions on disk can be different. +pub fn state_database_format_version_in_code() -> Version { + Version::new( + DATABASE_FORMAT_VERSION, + DATABASE_FORMAT_MINOR_VERSION, + DATABASE_FORMAT_PATCH_VERSION, + ) +} /// Returns the highest database version that modifies the subtree index format. /// diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 7957e2481f3..4e82ebc6b2c 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -39,10 +39,7 @@ mod service; #[cfg(test)] mod tests; -pub use config::{ - check_and_delete_old_databases, database_format_version_in_code, - database_format_version_on_disk, Config, -}; +pub use config::{check_and_delete_old_databases, database_format_version_on_disk, Config}; pub use constants::MAX_BLOCK_REORG_HEIGHT; pub use error::{ BoxError, CloneError, CommitSemanticallyVerifiedError, DuplicateNullifierError, From cc13b8a63fa282dfc54a1140c4d1c3433b97ac34 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 10:38:09 +1000 Subject: [PATCH 05/24] Use the state kind in finalized_state.rs --- zebra-state/src/service/finalized_state.rs | 34 +++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/zebra-state/src/service/finalized_state.rs b/zebra-state/src/service/finalized_state.rs index e0e4e786412..8d2c7e794a7 100644 --- a/zebra-state/src/service/finalized_state.rs +++ b/zebra-state/src/service/finalized_state.rs @@ -2,9 +2,8 @@ //! //! Zebra's database is implemented in 4 layers: //! - [`FinalizedState`]: queues, validates, and commits blocks, using... -//! - [`ZebraDb`]: reads and writes [`zebra_chain`] types to the database, using... -//! - [`DiskDb`]: reads and writes format-specific types -//! to the database, using... +//! - [`ZebraDb`]: reads and writes [`zebra_chain`] types to the state database, using... +//! - [`DiskDb`]: reads and writes generic types to any column family in the database, using... //! - [`disk_format`]: converts types to raw database bytes. //! //! These layers allow us to split [`zebra_chain`] types for efficient database storage. @@ -12,8 +11,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{ io::{stderr, stdout, Write}, @@ -23,6 +22,7 @@ use std::{ use zebra_chain::{block, parallel::tree::NoteCommitmentTrees, parameters::Network}; use crate::{ + constants::{state_database_format_version_in_code, STATE_DATABASE_KIND}, request::{FinalizableBlock, FinalizedBlock, Treestate}, service::{check, QueuedCheckpointVerified}, BoxError, CheckpointVerifiedBlock, CloneError, Config, @@ -98,9 +98,6 @@ pub struct FinalizedState { // This configuration cannot be modified after the database is initialized, // because some clones would have different values. // - /// The configured network. - network: Network, - /// The configured stop height. /// /// Commit blocks to the finalized state up to this height, then exit Zebra. @@ -155,6 +152,8 @@ impl FinalizedState { ) -> Self { let db = ZebraDb::new( config, + STATE_DATABASE_KIND, + &state_database_format_version_in_code(), network, debug_skip_format_upgrades, STATE_COLUMN_FAMILIES_IN_CODE @@ -164,7 +163,6 @@ impl FinalizedState { #[cfg(feature = "elasticsearch")] let new_state = Self { - network, debug_stop_at_height: config.debug_stop_at_height.map(block::Height), db, elastic_db, @@ -173,7 +171,6 @@ impl FinalizedState { #[cfg(not(feature = "elasticsearch"))] let new_state = Self { - network, debug_stop_at_height: config.debug_stop_at_height.map(block::Height), db, }; @@ -222,7 +219,7 @@ impl FinalizedState { /// Returns the configured network for this database. pub fn network(&self) -> Network { - self.network + self.db.network() } /// Commit a checkpoint-verified block to the state. @@ -333,7 +330,7 @@ impl FinalizedState { // thread, if it shows up in profiles check::block_commitment_is_valid_for_chain_history( block.clone(), - self.network, + self.network(), &history_tree, )?; @@ -399,9 +396,12 @@ impl FinalizedState { let finalized_inner_block = finalized.block.clone(); let note_commitment_trees = finalized.treestate.note_commitment_trees.clone(); - let result = - self.db - .write_block(finalized, prev_note_commitment_trees, self.network, source); + let result = self.db.write_block( + finalized, + prev_note_commitment_trees, + self.network(), + source, + ); if result.is_ok() { // Save blocks to elasticsearch if the feature is enabled. @@ -461,7 +461,7 @@ impl FinalizedState { // less than this number of seconds. const CLOSE_TO_TIP_SECONDS: i64 = 14400; // 4 hours - let mut blocks_size_to_dump = match self.network { + let mut blocks_size_to_dump = match self.network() { Network::Mainnet => MAINNET_AWAY_FROM_TIP_BULK_SIZE, Network::Testnet => TESTNET_AWAY_FROM_TIP_BULK_SIZE, }; @@ -491,7 +491,7 @@ impl FinalizedState { let rt = tokio::runtime::Runtime::new() .expect("runtime creation for elasticsearch should not fail."); let blocks = self.elastic_blocks.clone(); - let network = self.network; + let network = self.network(); rt.block_on(async move { let response = client From b5bf38600a7926a84789d3f566212f54b60ff95b Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 10:39:20 +1000 Subject: [PATCH 06/24] Allow different database kinds in ZebraDb, but don't move the upgrade code yet --- .../finalized_state/disk_format/upgrade.rs | 2 + .../src/service/finalized_state/zebra_db.rs | 125 +++++++++++++----- 2 files changed, 94 insertions(+), 33 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index d2981e62a53..76a70ad90bb 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -392,6 +392,8 @@ impl DbFormatChange { Ok(()) } + // TODO: Move state-specific upgrade code to a finalized_state/* module. + /// Apply any required format updates to the database. /// Format changes should be launched in an independent `std::thread`. /// diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index 7ab20281e54..0e7ae142a88 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -14,10 +14,11 @@ use std::{ sync::{mpsc, Arc}, }; +use semver::Version; use zebra_chain::parameters::Network; use crate::{ - config::{database_format_version_in_code, database_format_version_on_disk}, + config::database_format_version_on_disk, service::finalized_state::{ disk_db::DiskDb, disk_format::{ @@ -25,7 +26,7 @@ use crate::{ upgrade::{DbFormatChange, DbFormatChangeThreadHandle}, }, }, - Config, + write_database_format_version_to_disk, BoxError, Config, }; pub mod block; @@ -37,7 +38,7 @@ pub mod transparent; #[cfg(any(test, feature = "proptest-impl"))] pub mod arbitrary; -/// Wrapper struct to ensure high-level typed database access goes through the correct API. +/// Wrapper struct to ensure high-level `zebra-state` database access goes through the correct API. /// /// `rocksdb` allows concurrent writes through a shared reference, /// so database instances are cloneable. When the final clone is dropped, @@ -51,11 +52,13 @@ pub struct ZebraDb { // /// The configuration for the database. // - // TODO: use the database and version paths instead, and refactor the upgrade code to use paths + // TODO: move the config to DiskDb config: Arc, /// Should format upgrades and format checks be skipped for this instance? /// Only used in test code. + // + // TODO: move this to DiskDb debug_skip_format_upgrades: bool, // Owned State @@ -68,6 +71,8 @@ pub struct ZebraDb { /// /// This field should be dropped before the database field, so the format upgrade task is /// cancelled before the database is dropped. This helps avoid some kinds of deadlocks. + // + // TODO: move the generic upgrade code and fields to DiskDb format_change_handle: Option, /// The inner low-level database wrapper for the RocksDB database. @@ -75,23 +80,32 @@ pub struct ZebraDb { } impl ZebraDb { - /// Opens or creates the database at `config.path` for `network`, + /// Opens or creates the database at a path based on the kind, major version and network, + /// with the supplied column families, preserving any existing column families, /// and returns a shared high-level typed database wrapper. /// /// If `debug_skip_format_upgrades` is true, don't do any format upgrades or format checks. /// This argument is only used when running tests, it is ignored in production code. + // + // TODO: rename to StateDb and remove the db_kind and column_families_in_code arguments pub fn new( config: &Config, + db_kind: impl AsRef, + format_version_in_code: &Version, network: Network, debug_skip_format_upgrades: bool, column_families_in_code: impl IntoIterator, ) -> ZebraDb { - let running_version = database_format_version_in_code(); - let disk_version = database_format_version_on_disk(config, network) - .expect("unable to read database format version file"); + let disk_version = database_format_version_on_disk( + config, + &db_kind, + format_version_in_code.major, + network, + ) + .expect("unable to read database format version file"); // Log any format changes before opening the database, in case opening fails. - let format_change = DbFormatChange::open_database(running_version, disk_version); + let format_change = DbFormatChange::open_database(format_version_in_code, disk_version); // Open the database and do initial checks. let mut db = ZebraDb { @@ -102,21 +116,22 @@ impl ZebraDb { // changes to the default database version. Then we set the correct version in the // upgrade thread. We need to do the version change in this order, because the version // file can only be changed while we hold the RocksDB database lock. - db: DiskDb::new(config, network, column_families_in_code), + db: DiskDb::new( + config, + db_kind, + format_version_in_code, + network, + column_families_in_code, + ), }; - db.spawn_format_change(config, network, format_change); + db.spawn_format_change(format_change); db } /// Launch any required format changes or format checks, and store their thread handle. - pub fn spawn_format_change( - &mut self, - config: &Config, - network: Network, - format_change: DbFormatChange, - ) { + pub fn spawn_format_change(&mut self, format_change: DbFormatChange) { // Always do format upgrades & checks in production code. if cfg!(test) && self.debug_skip_format_upgrades { return; @@ -133,18 +148,59 @@ impl ZebraDb { // TODO: // - should debug_stop_at_height wait for the upgrade task to finish? - // - if needed, make upgrade_db into a FinalizedState, - // or move the FinalizedState methods needed for upgrades to ZebraDb. - let format_change_handle = format_change.spawn_format_change( - config.clone(), - network, - initial_tip_height, - upgrade_db, - ); + let format_change_handle = + format_change.spawn_format_change(upgrade_db, initial_tip_height); self.format_change_handle = Some(format_change_handle); } + /// Returns config for this database. + pub fn config(&self) -> &Config { + &self.config + } + + /// Returns the configured database kind for this database. + pub fn db_kind(&self) -> String { + self.db.db_kind() + } + + /// Returns the format version of the running code that created this `ZebraDb` instance in memory. + pub fn format_version_in_code(&self) -> Version { + self.db.format_version_in_code() + } + + /// Returns the fixed major version for this database. + pub fn major_version(&self) -> u64 { + self.format_version_in_code().major + } + + /// Returns the format version of this database on disk. + /// + /// See `database_format_version_on_disk()` for details. + pub fn format_version_on_disk(&self) -> Result, BoxError> { + database_format_version_on_disk( + self.config(), + self.db_kind(), + self.major_version(), + self.network(), + ) + } + + /// Updates the format of this database on disk to the suppled version. + /// + /// See `write_database_format_version_to_disk()` for details. + pub(crate) fn update_format_version_on_disk( + &self, + new_version: &Version, + ) -> Result<(), BoxError> { + write_database_format_version_to_disk( + self.config(), + self.db_kind(), + new_version, + self.network(), + ) + } + /// Returns the configured network for this database. pub fn network(&self) -> Network { self.db.network() @@ -196,8 +252,13 @@ impl ZebraDb { // which would then make unrelated PRs fail when Zebra starts up. // If the upgrade has completed, or we've done a downgrade, check the state is valid. - let disk_version = database_format_version_on_disk(&self.config, self.network()) - .expect("unexpected invalid or unreadable database version file"); + let disk_version = database_format_version_on_disk( + &self.config, + self.db_kind(), + self.major_version(), + self.network(), + ) + .expect("unexpected invalid or unreadable database version file"); if let Some(disk_version) = disk_version { // We need to keep the cancel handle until the format check has finished, @@ -206,14 +267,12 @@ impl ZebraDb { // We block here because the checks are quick and database validity is // consensus-critical. - if disk_version >= database_format_version_in_code() { - DbFormatChange::check_new_blocks() + if disk_version >= self.db.format_version_in_code() { + DbFormatChange::check_new_blocks(self) .run_format_change_or_check( - &self.config, - self.network(), - // This argument is not used by the format check. - None, self, + // The initial tip height is not used by the new blocks format check. + None, &never_cancel_receiver, ) .expect("cancel handle is never used"); From c1e89fcc06826046b932b354906e2a83771685c4 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 10:39:41 +1000 Subject: [PATCH 07/24] Allow different database kinds in DiskDb --- .../src/service/finalized_state/disk_db.rs | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index 935e43de760..509c9bff0fd 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -22,6 +22,7 @@ use itertools::Itertools; use rlimit::increase_nofile_limit; use rocksdb::ReadOptions; +use semver::Version; use zebra_chain::parameters::Network; use crate::{ @@ -65,6 +66,12 @@ pub struct DiskDb { // This configuration cannot be modified after the database is initialized, // because some clones would have different values. // + /// The configured database kind for this database. + db_kind: String, + + /// The format version of the running Zebra code. + format_version_in_code: Version, + /// The configured network for this database. network: Network, @@ -93,10 +100,6 @@ pub struct DiskDb { /// /// [`rocksdb::WriteBatch`] is a batched set of database updates, /// which must be written to the database using `DiskDb::write(batch)`. -// -// TODO: move DiskDb, FinalizedBlock, and the source String into this struct, -// (DiskDb can be cloned), -// and make them accessible via read-only methods #[must_use = "batches must be written to the database"] #[derive(Default)] pub struct DiskWriteBatch { @@ -617,14 +620,18 @@ impl DiskDb { /// const MEMTABLE_RAM_CACHE_MEGABYTES: usize = 128; - /// Opens or creates the database at `config.path` for `network`, + /// Opens or creates the database at a path based on the kind, major version and network, + /// with the supplied column families, preserving any existing column families, /// and returns a shared low-level database wrapper. pub fn new( config: &Config, + db_kind: impl AsRef, + format_version_in_code: &Version, network: Network, column_families_in_code: impl IntoIterator, ) -> DiskDb { - let path = config.db_path(network); + let db_kind = db_kind.as_ref(); + let path = config.db_path(db_kind, format_version_in_code.major, network); let db_options = DiskDb::options(); @@ -651,6 +658,8 @@ impl DiskDb { info!("Opened Zebra state cache at {}", path.display()); let db = DiskDb { + db_kind: db_kind.to_string(), + format_version_in_code: format_version_in_code.clone(), network, ephemeral: config.ephemeral, db: Arc::new(db), @@ -672,6 +681,21 @@ impl DiskDb { // Accessor methods + /// Returns the configured database kind for this database. + pub fn db_kind(&self) -> String { + self.db_kind.clone() + } + + /// Returns the format version of the running code that created this `DiskDb` instance in memory. + pub fn format_version_in_code(&self) -> Version { + self.format_version_in_code.clone() + } + + /// Returns the fixed major version for this database. + pub fn major_version(&self) -> u64 { + self.format_version_in_code().major + } + /// Returns the configured network for this database. pub fn network(&self) -> Network { self.network From 3afb3f8f73e3e1421c2112129cb92786684bf62e Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 10:40:21 +1000 Subject: [PATCH 08/24] Allow different database kinds in upgrade.rs, but don't split the upgrade code out yet --- .../finalized_state/disk_format/upgrade.rs | 111 +++++++----------- 1 file changed, 42 insertions(+), 69 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 76a70ad90bb..7dc157b2f05 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -15,17 +15,13 @@ use zebra_chain::{ task::{CheckForPanics, WaitForPanics}, CodeTimer, }, - parameters::Network, }; use DbFormatChange::*; use crate::{ - config::write_database_format_version_to_disk, - constants::{latest_version_for_adding_subtrees, DATABASE_FORMAT_VERSION}, - database_format_version_in_code, database_format_version_on_disk, + constants::latest_version_for_adding_subtrees, service::finalized_state::{DiskWriteBatch, ZebraDb}, - Config, }; pub(crate) mod add_subtrees; @@ -113,7 +109,9 @@ impl DbFormatChange { /// Also logs that change at info level. /// /// If `disk_version` is `None`, Zebra is creating a new database. - pub fn open_database(running_version: Version, disk_version: Option) -> Self { + pub fn open_database(running_version: &Version, disk_version: Option) -> Self { + let running_version = running_version.clone(); + let Some(disk_version) = disk_version else { info!( %running_version, @@ -160,8 +158,8 @@ impl DbFormatChange { /// This check makes sure the upgrade and new block code produce the same data. /// /// Also logs the check at info level. - pub fn check_new_blocks() -> Self { - let running_version = database_format_version_in_code(); + pub fn check_new_blocks(db: &ZebraDb) -> Self { + let running_version = db.format_version_in_code(); info!(%running_version, "checking new blocks were written in current database format"); CheckNewBlocksCurrent { running_version } @@ -219,14 +217,12 @@ impl DbFormatChange { /// Launch a `std::thread` that applies this format change to the database, /// then continues running to perform periodic format checks. /// - /// `initial_tip_height` is the database height when it was opened, and `upgrade_db` is the + /// `initial_tip_height` is the database height when it was opened, and `db` is the /// database instance to upgrade or check. pub fn spawn_format_change( self, - config: Config, - network: Network, + db: ZebraDb, initial_tip_height: Option, - upgrade_db: ZebraDb, ) -> DbFormatChangeThreadHandle { // # Correctness // @@ -237,13 +233,7 @@ impl DbFormatChange { let span = Span::current(); let update_task = thread::spawn(move || { span.in_scope(move || { - self.format_change_run_loop( - config, - network, - initial_tip_height, - upgrade_db, - cancel_receiver, - ) + self.format_change_run_loop(db, initial_tip_height, cancel_receiver) }) }); @@ -264,21 +254,13 @@ impl DbFormatChange { /// newly added blocks matches the current format. It will run until it is cancelled or panics. fn format_change_run_loop( self, - config: Config, - network: Network, + db: ZebraDb, initial_tip_height: Option, - upgrade_db: ZebraDb, cancel_receiver: mpsc::Receiver, ) -> Result<(), CancelFormatChange> { - self.run_format_change_or_check( - &config, - network, - initial_tip_height, - &upgrade_db, - &cancel_receiver, - )?; - - let Some(debug_validity_check_interval) = config.debug_validity_check_interval else { + self.run_format_change_or_check(&db, initial_tip_height, &cancel_receiver)?; + + let Some(debug_validity_check_interval) = db.config().debug_validity_check_interval else { return Ok(()); }; @@ -292,11 +274,9 @@ impl DbFormatChange { return Err(CancelFormatChange); } - Self::check_new_blocks().run_format_change_or_check( - &config, - network, + Self::check_new_blocks(&db).run_format_change_or_check( + &db, initial_tip_height, - &upgrade_db, &cancel_receiver, )?; } @@ -306,24 +286,16 @@ impl DbFormatChange { #[allow(clippy::unwrap_in_result)] pub(crate) fn run_format_change_or_check( &self, - config: &Config, - network: Network, + db: &ZebraDb, initial_tip_height: Option, - upgrade_db: &ZebraDb, cancel_receiver: &mpsc::Receiver, ) -> Result<(), CancelFormatChange> { match self { // Perform any required upgrades, then mark the state as upgraded. - Upgrade { .. } => self.apply_format_upgrade( - config, - network, - initial_tip_height, - upgrade_db, - cancel_receiver, - )?, + Upgrade { .. } => self.apply_format_upgrade(db, initial_tip_height, cancel_receiver)?, NewlyCreated { .. } => { - Self::mark_as_newly_created(config, network); + Self::mark_as_newly_created(db); } Downgrade { .. } => { @@ -332,7 +304,7 @@ impl DbFormatChange { // At the start of a format downgrade, the database must be marked as partially or // fully downgraded. This lets newer Zebra versions know that some blocks with older // formats have been added to the database. - Self::mark_as_downgraded(config, network); + Self::mark_as_downgraded(db); // Older supported versions just assume they can read newer formats, // because they can't predict all changes a newer Zebra version could make. @@ -373,10 +345,10 @@ impl DbFormatChange { // (unless a future upgrade breaks these format checks) // - re-opening the current version should be valid, regardless of whether the upgrade // or new block code created the format (or any combination). - Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)?.unwrap_or_else(|_| { + Self::format_validity_checks_detailed(db, cancel_receiver)?.unwrap_or_else(|_| { panic!( "unexpected invalid database format: delete and re-sync the database at '{:?}'", - upgrade_db.path() + db.path() ) }); @@ -407,10 +379,8 @@ impl DbFormatChange { #[allow(clippy::unwrap_in_result)] fn apply_format_upgrade( &self, - config: &Config, - network: Network, - initial_tip_height: Option, db: &ZebraDb, + initial_tip_height: Option, cancel_receiver: &mpsc::Receiver, ) -> Result<(), CancelFormatChange> { let Upgrade { @@ -432,7 +402,7 @@ impl DbFormatChange { "marking empty database as upgraded" ); - Self::mark_as_upgraded_to(&database_format_version_in_code(), config, network); + Self::mark_as_upgraded_to(db, newer_running_version); info!( %newer_running_version, @@ -512,7 +482,7 @@ impl DbFormatChange { // Mark the database as upgraded. Zebra won't repeat the upgrade anymore once the // database is marked, so the upgrade MUST be complete at this point. - Self::mark_as_upgraded_to(&version_for_pruning_trees, config, network); + Self::mark_as_upgraded_to(db, &version_for_pruning_trees); timer.finish(module_path!(), line!(), "deduplicate trees upgrade"); } @@ -540,7 +510,7 @@ impl DbFormatChange { // Mark the database as upgraded. Zebra won't repeat the upgrade anymore once the // database is marked, so the upgrade MUST be complete at this point. - Self::mark_as_upgraded_to(&latest_version_for_adding_subtrees, config, network); + Self::mark_as_upgraded_to(db, &latest_version_for_adding_subtrees); timer.finish(module_path!(), line!(), "add subtrees upgrade"); } @@ -566,7 +536,7 @@ impl DbFormatChange { // Mark the database as upgraded. Zebra won't repeat the upgrade anymore once the // database is marked, so the upgrade MUST be complete at this point. - Self::mark_as_upgraded_to(&version_for_tree_keys_and_caches, config, network); + Self::mark_as_upgraded_to(db, &version_for_tree_keys_and_caches); timer.finish(module_path!(), line!(), "tree keys and caches upgrade"); } @@ -723,12 +693,13 @@ impl DbFormatChange { /// # Panics /// /// If the format should not have been upgraded, because the database is not newly created. - fn mark_as_newly_created(config: &Config, network: Network) { - let disk_version = database_format_version_on_disk(config, network) + fn mark_as_newly_created(db: &ZebraDb) { + let running_version = db.format_version_in_code(); + let disk_version = db + .format_version_on_disk() .expect("unable to read database format version file path"); - let running_version = database_format_version_in_code(); - let default_new_version = Some(Version::new(DATABASE_FORMAT_VERSION, 0, 0)); + let default_new_version = Some(Version::new(running_version.major, 0, 0)); // The database version isn't empty any more, because we've created the RocksDB database // and acquired its lock. (If it is empty, we have a database locking bug.) @@ -739,7 +710,7 @@ impl DbFormatChange { running: {running_version}" ); - write_database_format_version_to_disk(&running_version, config, network) + db.update_format_version_on_disk(&running_version) .expect("unable to write database format version file to disk"); info!( @@ -770,11 +741,12 @@ impl DbFormatChange { /// - older or the same as the disk version /// (multiple upgrades to the same version are not allowed) /// - greater than the running version (that's a logic bug) - fn mark_as_upgraded_to(format_upgrade_version: &Version, config: &Config, network: Network) { - let disk_version = database_format_version_on_disk(config, network) + fn mark_as_upgraded_to(db: &ZebraDb, format_upgrade_version: &Version) { + let running_version = db.format_version_in_code(); + let disk_version = db + .format_version_on_disk() .expect("unable to read database format version file") .expect("tried to upgrade a newly created database"); - let running_version = database_format_version_in_code(); assert!( running_version > disk_version, @@ -800,7 +772,7 @@ impl DbFormatChange { running: {running_version}" ); - write_database_format_version_to_disk(format_upgrade_version, config, network) + db.update_format_version_on_disk(format_upgrade_version) .expect("unable to write database format version file to disk"); info!( @@ -828,11 +800,12 @@ impl DbFormatChange { /// If the state is newly created, because the running version should be the same. /// /// Multiple downgrades are allowed, because they all downgrade to the same running version. - fn mark_as_downgraded(config: &Config, network: Network) { - let disk_version = database_format_version_on_disk(config, network) + fn mark_as_downgraded(db: &ZebraDb) { + let running_version = db.format_version_in_code(); + let disk_version = db + .format_version_on_disk() .expect("unable to read database format version file") .expect("can't downgrade a newly created database"); - let running_version = database_format_version_in_code(); assert!( disk_version >= running_version, @@ -841,7 +814,7 @@ impl DbFormatChange { running: {running_version}" ); - write_database_format_version_to_disk(&running_version, config, network) + db.update_format_version_on_disk(&running_version) .expect("unable to write database format version file to disk"); info!( From 8ba666e840bc34f8d542eebd993685353d869b6c Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 11:18:01 +1000 Subject: [PATCH 09/24] Add new arguments to raw database tests --- .../service/finalized_state/zebra_db/block/tests/vectors.rs | 3 +++ zebra-state/src/service/read/tests/vectors.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs index 022feb3f7bc..4a3385ebe8a 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/vectors.rs @@ -26,6 +26,7 @@ use zebra_chain::{ use zebra_test::vectors::{MAINNET_BLOCKS, TESTNET_BLOCKS}; use crate::{ + constants::{state_database_format_version_in_code, STATE_DATABASE_KIND}, request::{FinalizedBlock, Treestate}, service::finalized_state::{disk_db::DiskWriteBatch, ZebraDb, STATE_COLUMN_FAMILIES_IN_CODE}, CheckpointVerifiedBlock, Config, @@ -80,6 +81,8 @@ fn test_block_db_round_trip_with( let state = ZebraDb::new( &Config::ephemeral(), + STATE_DATABASE_KIND, + &state_database_format_version_in_code(), network, // The raw database accesses in this test create invalid database formats. true, diff --git a/zebra-state/src/service/read/tests/vectors.rs b/zebra-state/src/service/read/tests/vectors.rs index 94ca0f55974..38d0887aaec 100644 --- a/zebra-state/src/service/read/tests/vectors.rs +++ b/zebra-state/src/service/read/tests/vectors.rs @@ -18,6 +18,7 @@ use zebra_test::{ }; use crate::{ + constants::{state_database_format_version_in_code, STATE_DATABASE_KIND}, init_test_services, populated_state, response::MinedTx, service::{ @@ -369,6 +370,8 @@ where fn new_ephemeral_db() -> ZebraDb { ZebraDb::new( &Config::ephemeral(), + STATE_DATABASE_KIND, + &state_database_format_version_in_code(), Mainnet, true, STATE_COLUMN_FAMILIES_IN_CODE From 9a4c1942b1a9ebbfcd7c82e82ce19559c575df96 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 11:25:23 +1000 Subject: [PATCH 10/24] Fix doc links --- zebra-state/src/constants.rs | 9 ++++++--- zebra-state/src/service/finalized_state/disk_db.rs | 4 ++-- zebra-state/src/service/finalized_state/disk_format.rs | 4 ++-- .../src/service/finalized_state/disk_format/block.rs | 4 ++-- .../src/service/finalized_state/disk_format/chain.rs | 4 ++-- .../src/service/finalized_state/disk_format/shielded.rs | 4 ++-- .../service/finalized_state/disk_format/transparent.rs | 4 ++-- zebra-state/src/service/finalized_state/zebra_db.rs | 4 ++-- .../src/service/finalized_state/zebra_db/block.rs | 4 ++-- .../src/service/finalized_state/zebra_db/chain.rs | 4 ++-- .../src/service/finalized_state/zebra_db/shielded.rs | 4 ++-- .../src/service/finalized_state/zebra_db/transparent.rs | 4 ++-- 12 files changed, 28 insertions(+), 25 deletions(-) diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index 0481484fa89..1cbe05e8342 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -6,7 +6,10 @@ use semver::Version; // For doc comment links #[allow(unused_imports)] -use crate::config::{self, Config}; +use crate::{ + config::{self, Config}, + constants, +}; pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY; @@ -41,8 +44,8 @@ pub const STATE_DATABASE_KIND: &str = "state"; /// - we previously added compatibility code, and /// - it's available in all supported Zebra versions. /// -/// Use [`config::database_format_version_in_code()`] or -/// [`config::database_format_version_on_disk()`] to get the full semantic format version. +/// Instead of using this constant directly, use [`constants::state_database_format_version_in_code()`] +/// or [`config::database_format_version_on_disk()`] to get the full semantic format version. const DATABASE_FORMAT_VERSION: u64 = 25; /// The database format minor version, incremented each time the on-disk database format has a diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index 509c9bff0fd..64bace06213 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -7,8 +7,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constants must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{ collections::{BTreeMap, HashMap}, diff --git a/zebra-state/src/service/finalized_state/disk_format.rs b/zebra-state/src/service/finalized_state/disk_format.rs index 072841d6635..f35bcd0027d 100644 --- a/zebra-state/src/service/finalized_state/disk_format.rs +++ b/zebra-state/src/service/finalized_state/disk_format.rs @@ -2,8 +2,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::sync::Arc; diff --git a/zebra-state/src/service/finalized_state/disk_format/block.rs b/zebra-state/src/service/finalized_state/disk_format/block.rs index c3db39b4423..4b5ed0e83fe 100644 --- a/zebra-state/src/service/finalized_state/disk_format/block.rs +++ b/zebra-state/src/service/finalized_state/disk_format/block.rs @@ -2,8 +2,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use zebra_chain::{ block::{self, Height}, diff --git a/zebra-state/src/service/finalized_state/disk_format/chain.rs b/zebra-state/src/service/finalized_state/disk_format/chain.rs index 512424ec16f..f728d6e26c9 100644 --- a/zebra-state/src/service/finalized_state/disk_format/chain.rs +++ b/zebra-state/src/service/finalized_state/disk_format/chain.rs @@ -2,8 +2,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::collections::BTreeMap; diff --git a/zebra-state/src/service/finalized_state/disk_format/shielded.rs b/zebra-state/src/service/finalized_state/disk_format/shielded.rs index d9716847632..bcd24d5c604 100644 --- a/zebra-state/src/service/finalized_state/disk_format/shielded.rs +++ b/zebra-state/src/service/finalized_state/disk_format/shielded.rs @@ -2,8 +2,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use bincode::Options; diff --git a/zebra-state/src/service/finalized_state/disk_format/transparent.rs b/zebra-state/src/service/finalized_state/disk_format/transparent.rs index d3fb01c390f..534883c267e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/transparent.rs +++ b/zebra-state/src/service/finalized_state/disk_format/transparent.rs @@ -2,8 +2,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{cmp::max, fmt::Debug}; diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index 0e7ae142a88..7d70b80942f 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -6,8 +6,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{ path::Path, diff --git a/zebra-state/src/service/finalized_state/zebra_db/block.rs b/zebra-state/src/service/finalized_state/zebra_db/block.rs index 7d4d4006405..37365c77c00 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block.rs @@ -6,8 +6,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{ collections::{BTreeMap, HashMap, HashSet}, diff --git a/zebra-state/src/service/finalized_state/zebra_db/chain.rs b/zebra-state/src/service/finalized_state/zebra_db/chain.rs index 1708f9da437..43f0d1ae1df 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/chain.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/chain.rs @@ -8,8 +8,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{borrow::Borrow, collections::HashMap, sync::Arc}; diff --git a/zebra-state/src/service/finalized_state/zebra_db/shielded.rs b/zebra-state/src/service/finalized_state/zebra_db/shielded.rs index cb4029b4a50..efeaedb0c0d 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/shielded.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/shielded.rs @@ -9,8 +9,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{ collections::{BTreeMap, HashMap}, diff --git a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs index e9b41639f18..bbf15ea9111 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs @@ -8,8 +8,8 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must -//! be incremented each time the database format (column, serialization, etc) changes. +//! [`crate::constants::state_database_format_version_in_code()`] must be incremented +//! each time the database format (column, serialization, etc) changes. use std::{ collections::{BTreeMap, BTreeSet, HashMap, HashSet}, From d39a571bfec4d4a0b1af940be4c4f873248e6840 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 11:27:53 +1000 Subject: [PATCH 11/24] Fix internal imports --- zebra-state/src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 4e82ebc6b2c..2c64088b829 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -73,6 +73,9 @@ pub use service::{ init_test, init_test_services, ReadStateService, }; +#[cfg(not(any(test, feature = "proptest-impl")))] +pub(crate) use config::hidden::write_database_format_version_to_disk; + #[cfg(any(test, feature = "proptest-impl"))] pub use config::hidden::write_database_format_version_to_disk; From a5693a2be8ea8fd7822232bffed06ba867045554 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 11:29:59 +1000 Subject: [PATCH 12/24] Fix unused code --- zebra-state/src/service/finalized_state/zebra_db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index 7d70b80942f..155d19686aa 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -171,7 +171,7 @@ impl ZebraDb { /// Returns the fixed major version for this database. pub fn major_version(&self) -> u64 { - self.format_version_in_code().major + self.db.major_version() } /// Returns the format version of this database on disk. From bf2e6b0c492ff856a3ee011ee7c8cd05d5ec2872 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 11:36:58 +1000 Subject: [PATCH 13/24] Update zebrad version metadata --- zebra-state/src/config.rs | 20 ++++++++++++++++++-- zebra-state/src/lib.rs | 7 +++++-- zebrad/src/application.rs | 7 ++++--- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 27d0d7d70a0..29a00650496 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -14,7 +14,10 @@ use tracing::Span; use zebra_chain::parameters::Network; -use crate::{constants::DATABASE_FORMAT_VERSION_FILE_NAME, BoxError}; +use crate::{ + constants::{DATABASE_FORMAT_VERSION_FILE_NAME, STATE_DATABASE_KIND}, + state_database_format_version_in_code, BoxError, +}; /// Configuration for the state service. #[derive(Clone, Debug, Deserialize, Serialize)] @@ -356,7 +359,20 @@ fn parse_major_version(dir_name: &str) -> Option { // TODO: move these to the format upgrade module -/// Returns the full semantic version of the on-disk database, based on its kind, major version, +/// Returns the full semantic version of the on-disk state database, based on its config and network. +pub fn state_database_format_version_on_disk( + config: &Config, + network: Network, +) -> Result, BoxError> { + database_format_version_on_disk( + config, + STATE_DATABASE_KIND, + state_database_format_version_in_code().major, + network, + ) +} + +/// Returns the full semantic version of the on-disk database, based on its config, kind, major version, /// and network. /// /// Typically, the version is read from a version text file. diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 2c64088b829..00bc6e44160 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -39,8 +39,11 @@ mod service; #[cfg(test)] mod tests; -pub use config::{check_and_delete_old_databases, database_format_version_on_disk, Config}; -pub use constants::MAX_BLOCK_REORG_HEIGHT; +pub use config::{ + check_and_delete_old_databases, database_format_version_on_disk, + state_database_format_version_on_disk, Config, +}; +pub use constants::{state_database_format_version_in_code, MAX_BLOCK_REORG_HEIGHT}; pub use error::{ BoxError, CloneError, CommitSemanticallyVerifiedError, DuplicateNullifierError, ValidateContextError, diff --git a/zebrad/src/application.rs b/zebrad/src/application.rs index f0ce1a204d8..24c8ac9b784 100644 --- a/zebrad/src/application.rs +++ b/zebrad/src/application.rs @@ -16,7 +16,8 @@ use semver::{BuildMetadata, Version}; use zebra_network::constants::PORT_IN_USE_ERROR; use zebra_state::{ - constants::LOCK_FILE_ERROR, database_format_version_in_code, database_format_version_on_disk, + constants::LOCK_FILE_ERROR, state_database_format_version_in_code, + state_database_format_version_on_disk, }; use crate::{ @@ -267,7 +268,7 @@ impl Application for ZebradApp { // reads state disk version file, doesn't open RocksDB database let disk_db_version = - match database_format_version_on_disk(&config.state, config.network.network) { + match state_database_format_version_on_disk(&config.state, config.network.network) { Ok(Some(version)) => version.to_string(), // This "version" is specially formatted to match a relaxed version regex in CI Ok(None) => "creating.new.database".to_string(), @@ -286,7 +287,7 @@ impl Application for ZebradApp { // code constant ( "running state version", - database_format_version_in_code().to_string(), + state_database_format_version_in_code().to_string(), ), // state disk file, doesn't open database ("initial disk state version", disk_db_version), From 055a74618ec8d59232c6c189b38faa7d82dc376d Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 12:54:37 +1000 Subject: [PATCH 14/24] Create a specific state database delete function --- zebra-state/src/config.rs | 13 +++++++++++++ zebrad/src/commands/start.rs | 6 ++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 29a00650496..49b45ca936a 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -196,6 +196,19 @@ impl Default for Config { // Cleaning up old database versions // TODO: put this in a different module? +/// Spawns a task that checks if there are old state database folders, +/// and deletes them from the filesystem. +/// +/// See `check_and_delete_old_databases()` for details. +pub fn check_and_delete_old_state_databases(config: &Config, network: Network) -> JoinHandle<()> { + check_and_delete_old_databases( + config, + STATE_DATABASE_KIND, + state_database_format_version_in_code().major, + network, + ) +} + /// Spawns a task that checks if there are old database folders, /// and deletes them from the filesystem. /// diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index 371d8f2fb0c..fbb7730008e 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -249,8 +249,10 @@ impl StartCmd { ); info!("spawning delete old databases task"); - let mut old_databases_task_handle = - zebra_state::check_and_delete_old_databases(config.state.clone()); + let mut old_databases_task_handle = zebra_state::check_and_delete_old_state_databases( + config.state.clone(), + config.network.network, + ); info!("spawning progress logging task"); let progress_task_handle = tokio::spawn( From 841ad504db165e14986f1043f0f10bb7e1de2145 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 27 Nov 2023 13:02:00 +1000 Subject: [PATCH 15/24] Fix state exports --- zebra-state/src/lib.rs | 4 ++-- zebrad/src/commands/start.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 00bc6e44160..168e5910b4c 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -40,8 +40,8 @@ mod service; mod tests; pub use config::{ - check_and_delete_old_databases, database_format_version_on_disk, - state_database_format_version_on_disk, Config, + check_and_delete_old_databases, check_and_delete_old_state_databases, + database_format_version_on_disk, state_database_format_version_on_disk, Config, }; pub use constants::{state_database_format_version_in_code, MAX_BLOCK_REORG_HEIGHT}; pub use error::{ diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index fbb7730008e..64a0cbe9669 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -250,7 +250,7 @@ impl StartCmd { info!("spawning delete old databases task"); let mut old_databases_task_handle = zebra_state::check_and_delete_old_state_databases( - config.state.clone(), + &config.state, config.network.network, ); From 68a4b6c6ac11fdd51e21c25b1e61e8ba41a40009 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 05:49:36 +1000 Subject: [PATCH 16/24] Fix zebrad tests --- zebrad/tests/acceptance.rs | 20 +++++++++---------- zebrad/tests/common/checkpoints.rs | 4 ++-- .../common/lightwalletd/wallet_grpc_test.rs | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index ecc60961408..de6b297c122 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -159,7 +159,7 @@ use zebra_chain::{ }; use zebra_network::constants::PORT_IN_USE_ERROR; use zebra_node_services::rpc_client::RpcRequestClient; -use zebra_state::{constants::LOCK_FILE_ERROR, database_format_version_in_code}; +use zebra_state::{constants::LOCK_FILE_ERROR, state_database_format_version_in_code}; use zebra_test::{ args, @@ -1856,7 +1856,7 @@ fn lightwalletd_integration_test(test_type: TestType) -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), [format!( "Opened RPC endpoint at {}", zebra_rpc_address.expect("lightwalletd test must have RPC port") @@ -1866,7 +1866,7 @@ fn lightwalletd_integration_test(test_type: TestType) -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), None, )?; } @@ -1978,7 +1978,7 @@ fn lightwalletd_integration_test(test_type: TestType) -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), None, )?; } @@ -2004,7 +2004,7 @@ fn lightwalletd_integration_test(test_type: TestType) -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), None, )?; } @@ -2192,7 +2192,7 @@ fn zebra_state_conflict() -> Result<()> { dir_conflict_full.push("state"); dir_conflict_full.push(format!( "v{}", - zebra_state::database_format_version_in_code().major, + zebra_state::state_database_format_version_in_code().major, )); dir_conflict_full.push(config.network.network.to_string().to_lowercase()); format!( @@ -2526,7 +2526,7 @@ async fn new_state_format() -> Result<()> { /// (or just add a delay during tests) #[tokio::test] async fn update_state_format() -> Result<()> { - let mut fake_version = database_format_version_in_code(); + let mut fake_version = state_database_format_version_in_code(); fake_version.minor = 0; fake_version.patch = 0; @@ -2543,7 +2543,7 @@ async fn update_state_format() -> Result<()> { /// Future version compatibility is a best-effort attempt, this test can be disabled if it fails. #[tokio::test] async fn downgrade_state_format() -> Result<()> { - let mut fake_version = database_format_version_in_code(); + let mut fake_version = state_database_format_version_in_code(); fake_version.minor = u16::MAX.into(); fake_version.patch = 0; @@ -2631,7 +2631,7 @@ async fn state_format_test( // Give zebra_state enough time to actually write the database version to disk. tokio::time::sleep(Duration::from_secs(1)).await; - let running_version = database_format_version_in_code(); + let running_version = state_database_format_version_in_code(); match fake_version.cmp(&running_version) { Ordering::Less => expect_older_version = true, @@ -2737,7 +2737,7 @@ async fn fully_synced_rpc_z_getsubtreesbyindex_snapshot_test() -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), None, )?; diff --git a/zebrad/tests/common/checkpoints.rs b/zebrad/tests/common/checkpoints.rs index cb1db8d0530..c67130845e4 100644 --- a/zebrad/tests/common/checkpoints.rs +++ b/zebrad/tests/common/checkpoints.rs @@ -20,7 +20,7 @@ use zebra_chain::{ }; use zebra_consensus::MAX_CHECKPOINT_HEIGHT_GAP; use zebra_node_services::rpc_client::RpcRequestClient; -use zebra_state::database_format_version_in_code; +use zebra_state::state_database_format_version_in_code; use zebra_test::{ args, command::{Arguments, TestDirExt, NO_MATCHES_REGEX_ITER}, @@ -98,7 +98,7 @@ pub async fn run(network: Network) -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), None, )?; } diff --git a/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs b/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs index 3030f1c63ba..e341f1a2cd5 100644 --- a/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs +++ b/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs @@ -43,7 +43,7 @@ use zebra_chain::{ parameters::NetworkUpgrade::{Nu5, Sapling}, serialization::ZcashDeserializeInto, }; -use zebra_state::database_format_version_in_code; +use zebra_state::state_database_format_version_in_code; use crate::common::{ cached_state::{ @@ -122,7 +122,7 @@ pub async fn run() -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), [format!("Opened RPC endpoint at {zebra_rpc_address}")], )?; } @@ -159,7 +159,7 @@ pub async fn run() -> Result<()> { wait_for_state_version_upgrade( &mut zebrad, &state_version_message, - database_format_version_in_code(), + state_database_format_version_in_code(), None, )?; } From fcef8d991b0c52eff92870a3352a1d11102d7bd4 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 05:53:40 +1000 Subject: [PATCH 17/24] Fix zebrad state write tests --- zebra-state/src/config.rs | 16 +++++++++++++++- zebra-state/src/lib.rs | 8 ++++++-- zebrad/tests/acceptance.rs | 8 ++++++-- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 49b45ca936a..d90ae8c42d0 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -458,12 +458,26 @@ pub(crate) fn database_format_version_at_path( // Hide this destructive method from the public API, except in tests. #[allow(unused_imports)] -pub(crate) use hidden::write_database_format_version_to_disk; +pub(crate) use hidden::{ + write_database_format_version_to_disk, write_state_database_format_version_to_disk, +}; pub(crate) mod hidden { use super::*; + /// Writes `changed_version` to the on-disk state database after the format is changed. + /// (Or a new database is created.) + /// + /// See `write_database_format_version_to_disk()` for details. + pub fn write_state_database_format_version_to_disk( + config: &Config, + changed_version: &Version, + network: Network, + ) -> Result<(), BoxError> { + write_database_format_version_to_disk(config, STATE_DATABASE_KIND, changed_version, network) + } + /// Writes `changed_version` to the on-disk database after the format is changed. /// (Or a new database is created.) /// diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 168e5910b4c..a50d2a7b11d 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -77,10 +77,14 @@ pub use service::{ }; #[cfg(not(any(test, feature = "proptest-impl")))] -pub(crate) use config::hidden::write_database_format_version_to_disk; +pub(crate) use config::hidden::{ + write_database_format_version_to_disk, write_state_database_format_version_to_disk, +}; #[cfg(any(test, feature = "proptest-impl"))] -pub use config::hidden::write_database_format_version_to_disk; +pub use config::hidden::{ + write_database_format_version_to_disk, write_state_database_format_version_to_disk, +}; #[cfg(any(test, feature = "proptest-impl"))] pub use constants::latest_version_for_adding_subtrees; diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index de6b297c122..244edd8e334 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2625,8 +2625,12 @@ async fn state_format_test( .zebrad_config(test_name, false, Some(dir.path()), network) .expect("already checked config")?; - zebra_state::write_database_format_version_to_disk(fake_version, &config.state, network) - .expect("can't write fake database version to disk"); + zebra_state::write_state_database_format_version_to_disk( + &config.state, + fake_version, + network, + ) + .expect("can't write fake database version to disk"); // Give zebra_state enough time to actually write the database version to disk. tokio::time::sleep(Duration::from_secs(1)).await; From 93ad4a5cebe834b6813b12f55eff182fb3e4a51c Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 08:45:16 +1000 Subject: [PATCH 18/24] Make CI run again --- zebra-state/src/service/finalized_state.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zebra-state/src/service/finalized_state.rs b/zebra-state/src/service/finalized_state.rs index 8d2c7e794a7..72e575e5e83 100644 --- a/zebra-state/src/service/finalized_state.rs +++ b/zebra-state/src/service/finalized_state.rs @@ -50,6 +50,8 @@ pub use disk_db::ReadDisk; pub use disk_format::MAX_ON_DISK_HEIGHT; /// The column families supported by the running `zebra-state` database code. +/// +/// Existing column families that aren't listed here are preserved when the database is opened. pub const STATE_COLUMN_FAMILIES_IN_CODE: &[&str] = &[ // Blocks "hash_by_height", From d0448352b25aeef0e4dc698c36db52d102c7a668 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 10:30:31 +1000 Subject: [PATCH 19/24] Fix dead code warnings for test methods --- zebra-state/src/config.rs | 1 + zebra-state/src/lib.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index d90ae8c42d0..990aac83863 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -463,6 +463,7 @@ pub(crate) use hidden::{ }; pub(crate) mod hidden { + #![allow(dead_code)] use super::*; diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index a50d2a7b11d..3ceb08035f7 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -77,6 +77,7 @@ pub use service::{ }; #[cfg(not(any(test, feature = "proptest-impl")))] +#[allow(unused_imports)] pub(crate) use config::hidden::{ write_database_format_version_to_disk, write_state_database_format_version_to_disk, }; From ee157d0948827261347bdcd25e05d36ed8827134 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 12:45:16 +1000 Subject: [PATCH 20/24] Remove unnecessary async on some tests --- zebrad/tests/acceptance.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 244edd8e334..aae3aa1fc1b 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2331,8 +2331,8 @@ async fn fully_synced_rpc_test() -> Result<()> { Ok(()) } -#[tokio::test] -async fn delete_old_databases() -> Result<()> { +#[test] +fn delete_old_databases() -> Result<()> { use std::fs::{canonicalize, create_dir}; let _init_guard = zebra_test::init(); @@ -2804,10 +2804,10 @@ async fn fully_synced_rpc_z_getsubtreesbyindex_snapshot_test() -> Result<()> { Ok(()) } -#[cfg(feature = "zebra-scan")] /// Test that the scanner gets started when the node starts. -#[tokio::test] -async fn scan_task_starts() -> Result<()> { +#[cfg(feature = "zebra-scan")] +#[test] +fn scan_task_starts() -> Result<()> { use indexmap::IndexMap; const ZECPAGES_VIEWING_KEY: &str = "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz"; From 9995f5bc9fa7a0fb4668d5430a766af67cd29f5c Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 12:52:16 +1000 Subject: [PATCH 21/24] Fix logging required by tests --- zebra-state/src/config.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 990aac83863..947157b2519 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -283,9 +283,8 @@ fn delete_old_databases(config: Config, db_kind: String, major_version: u64, net if let Some(deleted_db_version) = deleted_db_version { info!( - ?deleted_db_version, - ?db_kind, - "deleted outdated database directory" + ?deleted_db, + "deleted outdated {db_kind} database directory" ); } } From cecc351672fa969f52067b055b55920ba4e2fade Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 12:52:48 +1000 Subject: [PATCH 22/24] Fix logging required in test itself --- zebrad/tests/acceptance.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index aae3aa1fc1b..3be477c8b74 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2379,7 +2379,7 @@ fn delete_old_databases() -> Result<()> { // inside dir was deleted child.expect_stdout_line_matches(format!( - "deleted outdated state directory deleted_state={canonicalized_inside_dir:?}" + "deleted outdated state directory deleted_db={canonicalized_inside_dir:?}" ))?; assert!(!inside_dir.as_path().exists()); From c501532eeb944c963cc68df2615e4fab0d6f1e41 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 12:58:31 +1000 Subject: [PATCH 23/24] Fix variable names --- zebra-state/src/config.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 947157b2519..47996756583 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -279,13 +279,10 @@ fn delete_old_databases(config: Config, db_kind: String, major_version: u64, net if let Some(db_kind_dir) = read_dir(&db_path) { for entry in db_kind_dir.flatten() { - let deleted_db_version = check_and_delete_database(&config, major_version, &entry); + let deleted_db = check_and_delete_database(&config, major_version, &entry); - if let Some(deleted_db_version) = deleted_db_version { - info!( - ?deleted_db, - "deleted outdated {db_kind} database directory" - ); + if let Some(deleted_db) = deleted_db { + info!(?deleted_db, "deleted outdated {db_kind} database directory"); } } } From 0b7f5b921d8daf0decfc639c5dc05df14988962f Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 28 Nov 2023 17:20:07 +1000 Subject: [PATCH 24/24] Try to copy the message and add regexes --- zebrad/tests/acceptance.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 3be477c8b74..2acad35b30f 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2379,7 +2379,7 @@ fn delete_old_databases() -> Result<()> { // inside dir was deleted child.expect_stdout_line_matches(format!( - "deleted outdated state directory deleted_db={canonicalized_inside_dir:?}" + "deleted outdated state database directory.*deleted_db.*=.*{canonicalized_inside_dir:?}" ))?; assert!(!inside_dir.as_path().exists());