From f4e8e10f4a6c6d0cacc43e208f9c41da5a194041 Mon Sep 17 00:00:00 2001 From: Matt Bell Date: Thu, 5 Sep 2024 14:03:12 -0400 Subject: [PATCH 1/3] Add doc comments --- src/lib.rs | 11 +++++ src/merk/mod.rs | 29 ++--------- src/merk/snapshot.rs | 103 +++++++++++++++++++++++++++++++++------- src/proofs/query/map.rs | 20 +++++++- src/proofs/tree.rs | 6 +++ src/test_utils/mod.rs | 2 + src/tree/ops.rs | 2 + 7 files changed, 129 insertions(+), 44 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fe2864a..9f7cd46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,14 @@ +//! A high-performance Merkle key/value store. +//! +//! Merk is a crypto key/value store - more specifically, it's a Merkle AVL tree +//! built on top of RocksDB (Facebook's fork of LevelDB). +//! +//! Its priorities are performance and reliability. While Merk was designed to +//! be the state database for blockchains, it can also be used anywhere an +//! auditable key/value store is needed. + +#![warn(missing_docs)] +#![warn(clippy::missing_docs_in_private_items)] #![feature(trivial_bounds)] #[global_allocator] diff --git a/src/merk/mod.rs b/src/merk/mod.rs index 9a3cad3..6417e97 100644 --- a/src/merk/mod.rs +++ b/src/merk/mod.rs @@ -11,7 +11,7 @@ use rocksdb::DB; use rocksdb::{checkpoint::Checkpoint, ColumnFamilyDescriptor, WriteBatch}; use crate::error::{Error, Result}; -use crate::proofs::{encode_into, query::QueryItem, Query}; +use crate::proofs::{encode_into, query::QueryItem}; use crate::tree::{Batch, Commit, Fetch, GetResult, Hash, Op, RefWalker, Tree, Walker, NULL_HASH}; pub use self::snapshot::Snapshot; @@ -272,33 +272,12 @@ impl Merk { /// /// The proof returned is in an encoded format which can be verified with /// `merk::verify`. - /// - /// This will fail if the keys in `query` are not sorted and unique. This - /// check adds some overhead, so if you are sure your batch is sorted and - /// unique you can use the unsafe `prove_unchecked` for a small performance - /// gain. - pub fn prove(&self, query: Query) -> Result> { - self.prove_unchecked(query) - } - - /// Creates a Merkle proof for the list of queried keys. For each key in the - /// query, if the key is found in the store then the value will be proven to - /// be in the tree. For each key in the query that does not exist in the - /// tree, its absence will be proven by including boundary keys. - /// - /// The proof returned is in an encoded format which can be verified with - /// `merk::verify`. - /// - /// This is unsafe because the keys in `query` must be sorted and unique - - /// if they are not, there will be undefined behavior. For a safe version of - /// this method which checks to ensure the batch is sorted and unique, see - /// `prove`. - pub fn prove_unchecked(&self, query: I) -> Result> + pub fn prove(&self, query: I) -> Result> where Q: Into, I: IntoIterator, { - self.use_tree_mut(move |maybe_tree| prove_unchecked(maybe_tree, self.source(), query)) + self.use_tree_mut(move |maybe_tree| prove(maybe_tree, self.source(), query)) } pub fn flush(&self) -> Result<()> { @@ -480,7 +459,7 @@ fn root_hash(maybe_tree: Option<&Tree>) -> Hash { maybe_tree.map_or(NULL_HASH, |tree| tree.hash()) } -fn prove_unchecked(maybe_tree: Option<&mut Tree>, source: F, query: I) -> Result> +fn prove(maybe_tree: Option<&mut Tree>, source: F, query: I) -> Result> where Q: Into, I: IntoIterator, diff --git a/src/merk/snapshot.rs b/src/merk/snapshot.rs index bf7f043..061075a 100644 --- a/src/merk/snapshot.rs +++ b/src/merk/snapshot.rs @@ -1,18 +1,38 @@ +//! In-memory snapshots of database state. +//! +//! Snapshots are read-only views of the database state at a particular point in +//! time. This can be useful for retaining recent versions of history which can +//! be queried against. Merk snapshots are backed by the similar RocksDB +//! snapshot, but with the added ability to create proofs. + use std::cell::Cell; use crate::{ - proofs::{query::QueryItem, Query}, + proofs::query::QueryItem, tree::{Fetch, RefWalker, Tree, NULL_HASH}, Hash, Result, }; +/// A read-only view of the database state at a particular point in time. +/// +/// `Snapshot`s are cheap to create since they are just a handle and don't copy +/// any data - they instead just prevent the underlying replaced data from being +/// compacted in RocksDB until they are dropped. They are only held in memory, +/// and will not be persisted after the process exits. pub struct Snapshot<'a> { + /// The underlying RocksDB snapshot. ss: Option>, + /// The Merk tree at the time the snapshot was created. tree: Cell>, + /// Whether the underlying RocksDB snapshot should be dropped when the + /// `Snapshot` is dropped. should_drop_ss: bool, } impl<'a> Snapshot<'a> { + /// Creates a new `Snapshot` from a RocksDB snapshot and a Merk tree. + /// + /// The RocksDB snapshot will be dropped when the [Snapshot] is dropped. pub fn new(db: rocksdb::Snapshot<'a>, tree: Option) -> Self { Snapshot { ss: Some(db), @@ -21,6 +41,8 @@ impl<'a> Snapshot<'a> { } } + /// Converts the [Snapshot] into a [StaticSnapshot], an alternative which + /// has easier (but more dangerous) lifetime requirements. pub fn staticize(mut self) -> StaticSnapshot { let ss: RocksDBSnapshot = unsafe { std::mem::transmute(self.ss.take().unwrap()) }; StaticSnapshot { @@ -30,6 +52,8 @@ impl<'a> Snapshot<'a> { } } + /// Gets the value associated with the given key, from the time the snapshot + /// was created. pub fn get(&self, key: &[u8]) -> Result>> { self.use_tree(|maybe_tree| { maybe_tree @@ -38,24 +62,23 @@ impl<'a> Snapshot<'a> { }) } + /// Gets the root hash of the tree at the time the snapshot was created. pub fn root_hash(&self) -> Hash { self.use_tree(|tree| tree.map_or(NULL_HASH, |tree| tree.hash())) } - pub fn prove(&self, query: Query) -> Result> { - self.prove_unchecked(query) - } - - pub fn prove_unchecked(&self, query: I) -> Result> + /// Proves the given query against the tree at the time the snapshot was + /// created. + pub fn prove(&self, query: I) -> Result> where Q: Into, I: IntoIterator, { - self.use_tree_mut(move |maybe_tree| { - super::prove_unchecked(maybe_tree, self.source(), query) - }) + self.use_tree_mut(move |maybe_tree| super::prove(maybe_tree, self.source(), query)) } + /// Walks the tree at the time the snapshot was created, fetching the child + /// node from the backing store if necessary. pub fn walk(&self, f: impl FnOnce(Option>) -> T) -> T { let mut tree = self.tree.take(); let maybe_walker = tree @@ -66,14 +89,19 @@ impl<'a> Snapshot<'a> { res } + /// Returns an iterator over the keys and values in the backing store from + /// the time the snapshot was created. pub fn raw_iter(&self) -> rocksdb::DBRawIterator { self.ss.as_ref().unwrap().raw_iterator() } + /// A data source which can be used to fetch values from the backing store, + /// from the time the snapshot was created. fn source(&self) -> SnapshotSource { SnapshotSource(self.ss.as_ref().unwrap()) } + /// Uses the tree, and then puts it back. fn use_tree(&self, f: impl FnOnce(Option<&Tree>) -> T) -> T { let tree = self.tree.take(); let res = f(tree.as_ref()); @@ -81,6 +109,7 @@ impl<'a> Snapshot<'a> { res } + /// Uses the tree mutably, and then puts it back. fn use_tree_mut(&self, f: impl FnOnce(Option<&mut Tree>) -> T) -> T { let mut tree = self.tree.take(); let res = f(tree.as_mut()); @@ -97,6 +126,10 @@ impl<'a> Drop for Snapshot<'a> { } } +/// A data source which can be used to fetch values from the backing store, from +/// the time the snapshot was created. +/// +/// This implements [Fetch] and should be used with a type such as [RefWalker]. #[derive(Clone)] pub struct SnapshotSource<'a>(&'a rocksdb::Snapshot<'a>); @@ -109,14 +142,40 @@ impl<'a> Fetch for SnapshotSource<'a> { } } +/// A read-only view of the database state at a particular point in time, but +/// with an internal raw pointer to allow for manual lifetime management. +/// +/// This is useful when you would otherwise want a [Snapshot], but you want to +/// use the database while the snapshot is still alive. This is unsafe because +/// it is the caller's responsibility to ensure that the underlying RocksDB +/// snapshot outlives the [StaticSnapshot]. +/// +/// By default, the RocksDB snapshot will not be dropped when the +/// [StaticSnapshot] is dropped, resulting in a memory leak. For correct usage, +/// you must call [StaticSnapshot::drop] to ensure the RocksDB snapshot gets +/// dropped when the [StaticSnapshot] is dropped. pub struct StaticSnapshot { + /// A Merk tree based on the database state at the time the snapshot was + /// created. tree: Cell>, + /// A raw pointer to the RocksDB snapshot. inner: *const (), + /// Used to detect whether the `StaticSnapshot` was set to manually drop + /// before its [Drop::drop] implementation was called. pub should_drop: bool, } +/// An equivalent struct to the [rocksdb::Snapshot] struct within the `rocksdb` +/// crate. This is used to access the private fields of the foreign crate's +/// struct by first transmuting. +/// +/// To guarantee that breaking changes in the `rocksdb` crate do not affect the +/// transmutation into this struct, see the +/// [tests::rocksdb_snapshot_struct_format] test. struct RocksDBSnapshot<'a> { + /// A reference to the associated RocksDB database. _db: &'a rocksdb::DB, + /// A raw pointer to the snapshot handle. inner: *const (), } @@ -127,12 +186,17 @@ unsafe impl Send for StaticSnapshot {} unsafe impl Sync for StaticSnapshot {} impl StaticSnapshot { - /// Creates a new static snapshot from a RocksDB snapshot. + /// Converts the [StaticSnapshot] to a [Snapshot] by re-associating with the + /// database it was originally created from. /// /// # Safety - /// This function is unsafe because the `StaticSnapshot` will hold a raw - /// pointer to the RocksDB snapshot, and it is the caller's responsibility - /// to ensure that the snapshot outlives the `StaticSnapshot`. + /// This will cause undefined behavior if a database other than the one + /// originally used to create the snapshot is passed as an argument. + /// + /// This will also cause a memory leak if the underlying RocksDB snapshot is + /// not dropped by calling [StaticSnapshot::drop]. Unlike most uses of + /// [Snapshot], the RocksDB snapshot will not be dropped when the + /// [Snapshot] returned by this method is dropped. pub unsafe fn with_db<'a>(&self, db: &'a rocksdb::DB) -> Snapshot<'a> { let db_ss = RocksDBSnapshot { _db: db, @@ -147,18 +211,23 @@ impl StaticSnapshot { } } - /// Drops the snapshot without cleaning up the underlying RocksDB snapshot. + /// Drops the [StaticSnapshot] and the underlying RocksDB snapshot. /// /// # Safety - /// This function is unsafe because it allows to drop the snapshot without - /// cleaning up the underlying RocksDB snapshot. Ensure that the snapshot - /// is manually freed when this function is called. + /// This function is unsafe because it results in the RocksDB snapshot being + /// dropped, which could lead to use-after-free bugs if there are still + /// references to the snapshot in other [Snapshot] or [StaticSnapshot] + /// instances. The caller must be sure this is the last remaining reference + /// before calling this method. pub unsafe fn drop(mut self, db: &rocksdb::DB) { let mut ss = self.with_db(db); ss.should_drop_ss = true; self.should_drop = true; + // the snapshot drop implementation is now called, which includes + // dropping the RocksDB snapshot } + /// Clones the root node of the Merk tree into a new [Tree]. fn clone_tree(&self) -> Cell> { let tree = self.tree.take().unwrap(); let tree_clone = Cell::new(Some(Tree::decode( diff --git a/src/proofs/query/map.rs b/src/proofs/query/map.rs index c81ea09..774ca66 100644 --- a/src/proofs/query/map.rs +++ b/src/proofs/query/map.rs @@ -110,10 +110,16 @@ impl Map { } } + /// Joins two `Map`s together, combining the data in both. + /// + /// If the maps contain contiguous iteration ranges, the contiguous ranges + /// will be joined. If the maps have differing values for the same key, this + /// will panic (this should never happen if the queries came from the same + /// root and the proofs were verified). pub fn join(self, other: Map) -> Map { // TODO: join at the partial tree level, joining with only Map data means - // data from different joins which happen to be contiguous will be marked - // as non-contiguous + // data from different joins which happen to be contiguous (without explicitly + // querying based on next/prev) will be marked as non-contiguous let mut entries = self.entries.clone(); entries.extend(other.entries); for (key, (contiguous, val)) in entries.iter_mut() { @@ -129,6 +135,13 @@ impl Map { } } + /// Returns `true` if the [Map] can verify that there is no unproven data + /// between `key` and the node to its right (or the global tree edge). + /// + /// For example, if the underlying tree contains the key `[a, b, c, d]` and + /// the map contains the keys `[a, b, d]`, then `contiguous_right(a)` will + /// return `true`, `contiguous_right(b)` and `contiguous_right(c)` will + /// return `false`, and `contiguous_right(d)` will return `true`. fn contiguous_right(&self, key: &[u8]) -> bool { self.entries .range((Bound::Excluded(key.to_vec()), Bound::Unbounded)) @@ -146,6 +159,7 @@ fn bound_to_inner(bound: Bound) -> Option { } } +/// Converts the inner key value of a `Bound` from a byte slice to a `Vec`. fn bound_to_vec(bound: Bound<&&[u8]>) -> Bound> { match bound { Bound::Unbounded => Bound::Unbounded, @@ -154,6 +168,8 @@ fn bound_to_vec(bound: Bound<&&[u8]>) -> Bound> { } } +/// Converts the inner key values of a [RangeBounds] from byte slices to +/// `Vec`. fn bounds_to_vec<'a, R: RangeBounds<&'a [u8]>>(bounds: R) -> (Bound>, Bound>) { ( bound_to_vec(bounds.start_bound()), diff --git a/src/proofs/tree.rs b/src/proofs/tree.rs index 791bfe7..68abe74 100644 --- a/src/proofs/tree.rs +++ b/src/proofs/tree.rs @@ -6,7 +6,9 @@ use crate::tree::{kv_hash, node_hash, Hash, Hasher, NULL_HASH}; /// be up-to-date. #[derive(Debug)] pub struct Child { + /// The child node. pub tree: Box, + /// The hash of the child node. pub hash: Hash, } @@ -14,9 +16,13 @@ pub struct Child { /// when verifying Merkle proofs. #[derive(Debug)] pub struct Tree { + /// The node at the root of this tree. pub node: Node, + /// The left child of this tree. pub left: Option, + /// The right child of this tree. pub right: Option, + /// The height of this tree. pub height: usize, } diff --git a/src/test_utils/mod.rs b/src/test_utils/mod.rs index e2e5983..c7302a1 100644 --- a/src/test_utils/mod.rs +++ b/src/test_utils/mod.rs @@ -1,3 +1,5 @@ +#![allow(missing_docs)] + mod crash_merk; mod temp_merk; diff --git a/src/tree/ops.rs b/src/tree/ops.rs index e230cb8..1a7fa42 100644 --- a/src/tree/ops.rs +++ b/src/tree/ops.rs @@ -6,7 +6,9 @@ use Op::*; /// An operation to be applied to a key in the store. pub enum Op { + /// Inserts or updates the key/value entry to the given value. Put(Vec), + /// Deletes the key/value entry. Delete, } From dae72c441045a0d500d4123377997b8a2ed59391 Mon Sep 17 00:00:00 2001 From: Matt Bell Date: Fri, 6 Sep 2024 10:42:40 -0400 Subject: [PATCH 2/3] Fix bench --- benches/merk.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/merk.rs b/benches/merk.rs index 419131b..cf68988 100644 --- a/benches/merk.rs +++ b/benches/merk.rs @@ -168,7 +168,7 @@ fn prove_1m_1_rand_rocksdb_noprune(b: &mut Bencher) { for (key, _) in batch { keys.push(merk::proofs::query::QueryItem::Key(key)); } - merk.prove_unchecked(keys).expect("prove failed"); + merk.prove(keys).expect("prove failed"); i = (i + 1) % (initial_size / batch_size); merk.commit(std::collections::LinkedList::new(), &[]) From 37cbd0ad6d3b82af5120e6052a8c1509c51038e9 Mon Sep 17 00:00:00 2001 From: Matt Bell Date: Fri, 6 Sep 2024 10:43:56 -0400 Subject: [PATCH 3/3] Disable doc comment warnings until we have full coverage --- src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9f7cd46..43d1e17 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,6 @@ //! be the state database for blockchains, it can also be used anywhere an //! auditable key/value store is needed. -#![warn(missing_docs)] -#![warn(clippy::missing_docs_in_private_items)] #![feature(trivial_bounds)] #[global_allocator]