From fcef4079988b3ffd2a1435f39d02a7111c8ca76a Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Sat, 21 Oct 2023 22:15:01 +0200 Subject: [PATCH 1/2] wip --- Cargo.toml | 1 + common/sealable-trie/Cargo.toml | 1 + common/sealable-trie/src/trie.rs | 30 ++- common/sealable-trie/src/trie/del.rs | 296 +++++++++++++++++++++++++ common/sealable-trie/src/trie/seal.rs | 9 +- common/sealable-trie/src/trie/set.rs | 4 +- common/sealable-trie/src/trie/tests.rs | 185 ++++++++++++++-- 7 files changed, 487 insertions(+), 39 deletions(-) create mode 100644 common/sealable-trie/src/trie/del.rs diff --git a/Cargo.toml b/Cargo.toml index 169c753a..6085b84d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ anchor-lang = {version = "0.28.0", features = ["init-if-needed"]} base64 = { version = "0.21", default-features = false, features = ["alloc"] } borsh = { version = "0.10.3", default-features = false } derive_more = "0.99.17" +hex-literal = "0.4.1" ibc = { version = "0.45.0", default-features = false, features = ["serde", "borsh"] } ibc-proto = { version = "0.35.0", default-features = false, features = ["serde"] } pretty_assertions = "1.4.0" diff --git a/common/sealable-trie/Cargo.toml b/common/sealable-trie/Cargo.toml index b43bc79f..43c32cc7 100644 --- a/common/sealable-trie/Cargo.toml +++ b/common/sealable-trie/Cargo.toml @@ -16,6 +16,7 @@ memory.workspace = true stdx.workspace = true [dev-dependencies] +hex-literal.workspace = true pretty_assertions.workspace = true rand.workspace = true diff --git a/common/sealable-trie/src/trie.rs b/common/sealable-trie/src/trie.rs index f1960647..a010848f 100644 --- a/common/sealable-trie/src/trie.rs +++ b/common/sealable-trie/src/trie.rs @@ -6,6 +6,7 @@ use memory::Ptr; use crate::nodes::{Node, NodeRef, RawNode, Reference}; use crate::{bits, proof}; +mod del; mod seal; mod set; #[cfg(test)] @@ -244,9 +245,8 @@ impl> Trie { pub fn set(&mut self, key: &[u8], value_hash: &CryptoHash) -> Result<()> { let (ptr, hash) = (self.root_ptr, self.root_hash.clone()); let key = bits::Slice::from_bytes(key).ok_or(Error::KeyTooLong)?; - let (ptr, hash) = - set::SetContext::new(&mut self.alloc, key, value_hash) - .set(ptr, &hash)?; + let (ptr, hash) = set::Context::new(&mut self.alloc, key, value_hash) + .set(ptr, &hash)?; self.root_ptr = Some(ptr); self.root_hash = hash; Ok(()) @@ -264,19 +264,35 @@ impl> Trie { /// an error. // TODO(mina86): Add seal_with_proof. pub fn seal(&mut self, key: &[u8]) -> Result<()> { - let key = bits::Slice::from_bytes(key).ok_or(Error::KeyTooLong)?; if self.root_hash == EMPTY_TRIE_ROOT { return Err(Error::NotFound); } - - let seal = seal::SealContext::new(&mut self.alloc, key) + let key = bits::Slice::from_bytes(key).ok_or(Error::KeyTooLong)?; + let removed = seal::Context::new(&mut self.alloc, key) .seal(NodeRef::new(self.root_ptr, &self.root_hash))?; - if seal { + if removed { self.root_ptr = None; } Ok(()) } + /// Deletes value at given key. Returns `false` if key was not found. + pub fn del(&mut self, key: &[u8]) -> Result { + let key = bits::Slice::from_bytes(key).ok_or(Error::KeyTooLong)?; + let res = del::Context::new(&mut self.alloc, key) + .del(self.root_ptr, &self.root_hash); + match res { + Ok(res) => { + let (ptr, hash) = res.unwrap_or((None, EMPTY_TRIE_ROOT)); + self.root_ptr = ptr; + self.root_hash = hash; + Ok(true) + } + Err(Error::NotFound) => Ok(false), + Err(err) => Err(err), + } + } + /// Prints the trie. Used for testing and debugging only. #[cfg(test)] pub(crate) fn print(&self) { diff --git a/common/sealable-trie/src/trie/del.rs b/common/sealable-trie/src/trie/del.rs new file mode 100644 index 00000000..6b9a1051 --- /dev/null +++ b/common/sealable-trie/src/trie/del.rs @@ -0,0 +1,296 @@ +use lib::hash::CryptoHash; +use memory::Ptr; + +use super::{Error, Result}; +use crate::bits; +use crate::nodes::{Node, NodeRef, RawNode, Reference, ValueRef}; + +/// Context for [`Trie::del`] operation. +pub(super) struct Context<'a, A: memory::Allocator> { + /// Part of the key yet to be traversed. + /// + /// It starts as the key user provided and as trie is traversed bits are + /// removed from its front. + key: bits::Slice<'a>, + + /// Allocator used to allocate new nodes. + wlog: memory::WriteLog<'a, A>, +} + +impl<'a, A: memory::Allocator> Context<'a, A> { + pub(super) fn new(alloc: &'a mut A, key: bits::Slice<'a>) -> Self { + let wlog = memory::WriteLog::new(alloc); + Self { key, wlog } + } + + /// Inserts value hash into the trie. + pub(super) fn del( + mut self, + root_ptr: Option, + root_hash: &CryptoHash, + ) -> Result, CryptoHash)>> { + if *root_hash == super::EMPTY_TRIE_ROOT { + return Err(Error::NotFound); + }; + let action = + self.handle(NodeRef { ptr: root_ptr, hash: root_hash }, false)?; + let res = self.ref_from_action(action)?.map(|child| match child { + OwnedRef::Node(ptr, hash) => (ptr, hash), + _ => unreachable!(), + }); + self.wlog.commit(); + Ok(res) + } + + /// Processes a reference which may be either node or value reference. + fn handle_reference( + &mut self, + child: Reference, + from_ext: bool, + ) -> Result { + match child { + Reference::Value(vref) => { + if vref.is_sealed { + Err(Error::Sealed) + } else if self.key.is_empty() { + Ok(Action::Drop) + } else { + Err(Error::NotFound) + } + } + Reference::Node(nref) => self.handle(nref, from_ext), + } + } + + /// Processes a node. + fn handle(&mut self, nref: NodeRef, from_ext: bool) -> Result { + let ptr = nref.ptr.ok_or(Error::Sealed)?; + let node = RawNode(*self.wlog.allocator().get(ptr)); + let node = node.decode(); + debug_assert_eq!(*nref.hash, node.hash()); + + match node { + Node::Branch { children } => self.handle_branch(ptr, children), + Node::Extension { key, child } => { + self.handle_extension(ptr, key, child) + } + Node::Value { value, child } => { + self.handle_value(ptr, value, child, from_ext) + } + } + } + + /// Processes a Branch node. + fn handle_branch( + &mut self, + ptr: Ptr, + children: [Reference; 2], + ) -> Result { + let key_offset = self.key.offset; + + let side = usize::from(self.key.pop_front().ok_or(Error::NotFound)?); + let action = self.handle_reference(children[side], false)?; + + // If the branch changed but wasn’t deleted, we just need to replace the + // reference. Otherwise, we’ll need to convert the Branch into an + // Extension. + if let Some(child) = self.ref_from_action(action)? { + let child = child.to_ref(); + let (left, right) = if side == 0 { + (child, children[1]) + } else { + (children[0], child) + }; + let node = RawNode::branch(left, right); + return Ok(Action::Ref(self.set_node(ptr, node))); + } + + // The child has been deleted. We need to convert this Branch into an + // Extension with a single-bit key and the other child. However, if the + // other child already is also an Extension, we need to merge them. + + self.del_node(ptr); + let child = children[1 - side]; + Ok(self + .maybe_pop_extension(child, &|key| { + bits::Owned::unshift(side == 0, key).unwrap() + }) + .unwrap_or_else(|| { + Action::Ext( + bits::Owned::bit(side == 0, key_offset), + OwnedRef::from(child), + ) + })) + } + + /// Processes an Extension node. + fn handle_extension( + &mut self, + ptr: Ptr, + key: bits::Slice, + child: Reference, + ) -> Result { + if !self.key.strip_prefix(key) { + return Err(Error::NotFound); + } + self.del_node(ptr); + Ok(match self.handle_reference(child, true)? { + Action::Drop => Action::Drop, + Action::Ref(child) => Action::Ext(key.into(), child.into()), + Action::Ext(suffix, child) => { + let key = bits::Owned::concat(key, suffix).unwrap(); + Action::Ext(key, child.into()) + } + }) + } + + /// Processes a Branch node. + fn handle_value( + &mut self, + ptr: Ptr, + value: ValueRef<'_, ()>, + child: NodeRef, + from_ext: bool, + ) -> Result { + // We’ve reached the value we want to delete. Drop the Value node and + // replace parent’s reference with child we’re pointing at. The one + // complication is that if our parent is an Extension, we need to fetch + // the child to check if it’s an Extension as well. + if self.key.is_empty() { + self.del_node(ptr); + if from_ext { + let action = self + .maybe_pop_extension(Reference::Node(child), &|key| { + key.into() + }); + if let Some(action) = action { + return Ok(action); + } + } + return Ok(Action::Ref(child.into())); + } + + // Traverse into the child and handle that. + let action = self.handle(child, false)?; + Ok(match self.ref_from_action(action)? { + None => { + // We’re deleting the child which means we need to delete the + // Value node and replace parent’s reference to ValueRef. + self.del_node(ptr); + let value = ValueRef::new(false, value.hash); + Action::Ref(value.into()) + } + Some(OwnedRef::Node(child_ptr, hash)) => { + let child = NodeRef::new(child_ptr, &hash); + let node = RawNode::value(value, child); + Action::Ref(self.set_node(ptr, node)) + } + Some(OwnedRef::Value(..)) => unreachable!(), + }) + } + + /// If `child` is a node reference pointing at an Extension node, pops that + /// node and returns corresponding `Action::Ext` action. + fn maybe_pop_extension( + &mut self, + child: Reference, + make_key: &dyn Fn(bits::Slice) -> bits::Owned, + ) -> Option { + if let Reference::Node(NodeRef { ptr: Some(ptr), hash }) = child { + let node = RawNode(*self.wlog.allocator().get(ptr)); + let node = node.decode(); + debug_assert_eq!(*hash, node.hash()); + + if let Node::Extension { key, child } = node { + // Drop the child Extension and merge keys. + self.del_node(ptr); + return Some(Action::Ext(make_key(key), OwnedRef::from(child))); + } + } + None + } + + /// Sets value of a node cell at given address and returns an [`OwnedRef`] + /// pointing at the node. + fn set_node(&mut self, ptr: Ptr, node: RawNode) -> OwnedRef { + let hash = node.decode().hash(); + self.wlog.set(ptr, *node); + OwnedRef::Node(Some(ptr), hash) + } + + /// Frees a node. + fn del_node(&mut self, ptr: Ptr) { self.wlog.free(ptr); } + + /// Converts an [`Action`] into an [`OwnedRef`] if it’s not a `Drop` action. + /// + /// If action is [`Action::Ext`] allocates a new node or sequence of nodes + /// and adds them eventually converting the action to an `OwnedRef`. If + /// action is [`Action::Ref`] already, simply returns the reference. + fn ref_from_action(&mut self, action: Action) -> Result> { + let (key, mut child) = match action { + Action::Ext(key, child) => (key, child), + Action::Ref(owned) => return Ok(Some(owned)), + Action::Drop => return Ok(None), + }; + + for chunk in key.as_slice().chunks().rev() { + let node = RawNode::extension(chunk, child.to_ref()).unwrap(); + let ptr = self.wlog.alloc(node.0)?; + child = OwnedRef::Node(Some(ptr), node.decode().hash()); + } + + Ok(Some(child)) + } +} + +/// An internal representation of results of handling of a node. +enum Action { + /// The node has been deleted. + /// + /// Deletion should propagate upstream. + Drop, + + /// The node needs to be replaced with given Extension node. + /// + /// This may propagate upstream through Extension nodes that may need to be + /// merged or split. + Ext(bits::Owned, OwnedRef), + + /// The reference has been replaced by the given owned reference. + Ref(OwnedRef), +} + +enum OwnedRef { + Node(Option, CryptoHash), + Value(bool, CryptoHash), +} + +impl OwnedRef { + fn to_ref(&self) -> Reference { + match self { + Self::Node(ptr, hash) => Reference::node(*ptr, hash), + Self::Value(is_sealed, hash) => Reference::value(*is_sealed, hash), + } + } +} + +impl From> for OwnedRef { + fn from(nref: NodeRef) -> OwnedRef { + Self::Node(nref.ptr, nref.hash.clone()) + } +} + +impl From> for OwnedRef { + fn from(vref: ValueRef) -> OwnedRef { + Self::Value(vref.is_sealed, vref.hash.clone()) + } +} + +impl From> for OwnedRef { + fn from(rf: Reference<'_>) -> Self { + match rf { + Reference::Node(nref) => nref.into(), + Reference::Value(vref) => vref.into(), + } + } +} diff --git a/common/sealable-trie/src/trie/seal.rs b/common/sealable-trie/src/trie/seal.rs index 122a9c58..b8c29b77 100644 --- a/common/sealable-trie/src/trie/seal.rs +++ b/common/sealable-trie/src/trie/seal.rs @@ -7,7 +7,7 @@ use crate::bits; use crate::nodes::{Node, NodeRef, RawNode, Reference, ValueRef}; /// Context for [`Trie::seal`] operation. -pub(super) struct SealContext<'a, A> { +pub(super) struct Context<'a, A> { /// Part of the key yet to be traversed. /// /// It starts as the key user provided and as trie is traversed bits are @@ -18,7 +18,7 @@ pub(super) struct SealContext<'a, A> { alloc: &'a mut A, } -impl<'a, A: memory::Allocator> SealContext<'a, A> { +impl<'a, A: memory::Allocator> Context<'a, A> { pub(super) fn new(alloc: &'a mut A, key: bits::Slice<'a>) -> Self { Self { key, alloc } } @@ -57,10 +57,7 @@ impl<'a, A: memory::Allocator> SealContext<'a, A> { &mut self, mut children: [Reference; 2], ) -> Result { - let side = match self.key.pop_front() { - None => return Err(Error::NotFound), - Some(bit) => usize::from(bit), - }; + let side = usize::from(self.key.pop_front().ok_or(Error::NotFound)?); match self.seal_child(children[side])? { None => Ok(SealResult::Done), Some(_) if children[1 - side].is_sealed() => Ok(SealResult::Free), diff --git a/common/sealable-trie/src/trie/set.rs b/common/sealable-trie/src/trie/set.rs index 7723305e..5fbc13e6 100644 --- a/common/sealable-trie/src/trie/set.rs +++ b/common/sealable-trie/src/trie/set.rs @@ -6,7 +6,7 @@ use crate::bits; use crate::nodes::{self, Node, NodeRef, RawNode, Reference, ValueRef}; /// Context for [`Trie::set`] operation. -pub(super) struct SetContext<'a, A: memory::Allocator> { +pub(super) struct Context<'a, A: memory::Allocator> { /// Part of the key yet to be traversed. /// /// It starts as the key user provided and as trie is traversed bits are @@ -20,7 +20,7 @@ pub(super) struct SetContext<'a, A: memory::Allocator> { wlog: memory::WriteLog<'a, A>, } -impl<'a, A: memory::Allocator> SetContext<'a, A> { +impl<'a, A: memory::Allocator> Context<'a, A> { pub(super) fn new( alloc: &'a mut A, key: bits::Slice<'a>, diff --git a/common/sealable-trie/src/trie/tests.rs b/common/sealable-trie/src/trie/tests.rs index 06299cb0..c5f59f4a 100644 --- a/common/sealable-trie/src/trie/tests.rs +++ b/common/sealable-trie/src/trie/tests.rs @@ -1,12 +1,15 @@ use std::collections::HashMap; use std::println; +use hex_literal::hex; use lib::hash::CryptoHash; use memory::test_utils::TestAllocator; use rand::Rng; +#[track_caller] fn do_test_inserts<'a>( keys: impl IntoIterator, + want_nodes: usize, verbose: bool, ) -> TestTrie { let keys = keys.into_iter(); @@ -15,42 +18,98 @@ fn do_test_inserts<'a>( for key in keys { trie.set(key, verbose) } + if want_nodes != usize::MAX { + assert_eq!(want_nodes, trie.nodes_count()); + } trie } #[test] -fn test_msb_difference() { do_test_inserts([&[0][..], &[0x80][..]], true); } +fn test_msb_difference() { do_test_inserts([&[0][..], &[0x80][..]], 3, true); } #[test] fn test_sequence() { do_test_inserts( b"0123456789:;<=>?".iter().map(core::slice::from_ref), + 16, true, ); } #[test] fn test_2byte_extension() { - do_test_inserts([&[123, 40][..], &[134, 233][..]], true); + do_test_inserts([&[123, 40][..], &[134, 233][..]], 3, true); } #[test] fn test_prefix() { let key = b"xy"; - do_test_inserts([&key[..], &key[..1]], true); - do_test_inserts([&key[..1], &key[..]], true); + do_test_inserts([&key[..], &key[..1]], 3, true); + do_test_inserts([&key[..1], &key[..]], 3, true); } #[test] fn test_seal() { let mut trie = do_test_inserts( b"0123456789:;<=>?".iter().map(core::slice::from_ref), + 16, true, ); for b in b'0'..=b'?' { trie.seal(&[b], true); } + assert_eq!(1, trie.nodes_count()); +} + +#[test] +fn test_del_simple() { + let mut trie = do_test_inserts( + b"0123456789:;<=>?".iter().map(core::slice::from_ref), + 16, + true, + ); + + for b in b'0'..=b';' { + trie.del(&[b], true); + } + assert_eq!(4, trie.nodes_count()); + for b in b'<'..=b'?' { + trie.del(&[b], true); + } + assert!(trie.is_empty()); +} + +#[test] +fn test_del_extension_0() { + // Construct a trie with following nodes: + // Extension → Branch → Extension + // → Extension + // And then remove one of the keys. Furthermore, because the extensions are + // long, this should result in two Extension nodes at the end. + let keys = [ + &hex!( + "00 00000000 00000000 00000000 00000000 00000000 00000000 \ + 00000000 00000000 0000" + )[..], + &hex!( + "01 00000000 00000000 00000000 00000000 00000000 00000000 \ + 00000000 00000000 0000" + )[..], + ]; + let mut trie = do_test_inserts(keys, 5, true); + trie.del(keys[1], true); + assert_eq!(2, trie.nodes_count()); +} + +#[test] +fn test_del_extension_1() { + // Construct a trie with `Extension → Value → Extension` chain and delete + // the Value. The Extensions should be merged into one. + let keys = [&hex!("00")[..], &hex!("00 FF")[..]]; + let mut trie = do_test_inserts(keys, 3, true); + trie.del(keys[0], true); + assert_eq!(1, trie.nodes_count()); } #[test] @@ -75,20 +134,72 @@ fn stress_test() { } let count = lib::test_utils::get_iteration_count(500); - let keys = RandKeys { buf: &mut [0; 35], rng: rand::thread_rng() }; - do_test_inserts(keys.take(count), false); + + // Insert count/2 random keys. + let mut rand_keys = RandKeys { buf: &mut [0; 35], rng: rand::thread_rng() }; + let mut trie = do_test_inserts( + (&mut rand_keys).take((count / 2).max(1)), + usize::MAX, + false, + ); + + // Now insert and delete keys randomly total of count times. On average + // that means count/2 deletions and count/2 new insertions. + let mut keys = trie + .mapping + .keys() + .map(|key| key.clone()) + .collect::>(); + for _ in 0..count { + let idx = if keys.is_empty() { + 1 + } else { + rand_keys.rng.gen_range(0..keys.len() * 2) + }; + if idx < keys.len() { + let key = keys.remove(idx); + trie.del(&key, false); + } else { + let key = rand_keys.next().unwrap(); + trie.set(&key, false); + } + } + + // Lastly, delete all remaining keys. + while !trie.mapping.is_empty() { + let key = trie.mapping.keys().next().unwrap().clone(); + trie.del(&key, false); + } + assert!(trie.is_empty()) } -#[derive(Eq, Ord)] +#[derive(Clone, Eq, Ord)] struct Key { len: u8, buf: [u8; 35], } impl Key { + fn new(key: &[u8]) -> Self { + assert!(key.len() <= 35); + Self { + len: key.len() as u8, + buf: { + let mut buf = [0; 35]; + buf[..key.len()].copy_from_slice(key); + buf + }, + } + } + fn as_bytes(&self) -> &[u8] { &self.buf[..usize::from(self.len)] } } +impl core::ops::Deref for Key { + type Target = [u8]; + fn deref(&self) -> &[u8] { &self.buf[..usize::from(self.len)] } +} + impl core::cmp::PartialEq for Key { fn eq(&self, other: &Self) -> bool { self.as_bytes() == other.as_bytes() } } @@ -126,33 +237,30 @@ impl TestTrie { } } + pub fn is_empty(&self) -> bool { + if self.trie.is_empty() { + assert_eq!(0, self.nodes_count()); + true + } else { + false + } + } + + pub fn nodes_count(&self) -> usize { self.trie.alloc.count() } + pub fn set(&mut self, key: &[u8], verbose: bool) { - assert!(key.len() <= 35); - let key = Key { - len: key.len() as u8, - buf: { - let mut buf = [0; 35]; - buf[..key.len()].copy_from_slice(key); - buf - }, - }; + let key = Key::new(key); let value = self.next_value(); println!("{}Inserting {key:?}", if verbose { "\n" } else { "" }); self.trie - .set(key.as_bytes(), &value) + .set(&key, &value) .unwrap_or_else(|err| panic!("Failed setting ‘{key:?}’: {err}")); self.mapping.insert(key, value); if verbose { self.trie.print(); } - for (key, value) in self.mapping.iter() { - let key = key.as_bytes(); - let got = self.trie.get(key).unwrap_or_else(|err| { - panic!("Failed getting ‘{key:?}’: {err}") - }); - assert_eq!(Some(value), got.as_ref(), "Invalid value at ‘{key:?}’"); - } + self.check_all_reads(); } pub fn seal(&mut self, key: &[u8], verbose: bool) { @@ -170,6 +278,35 @@ impl TestTrie { ) } + pub fn del(&mut self, key: &[u8], verbose: bool) { + let key = Key::new(key); + + println!("{}Deleting {key:?}", if verbose { "\n" } else { "" }); + let deleted = self + .trie + .del(&key) + .unwrap_or_else(|err| panic!("Failed sealing ‘{key:?}’: {err}")); + if verbose { + self.trie.print(); + } + assert_eq!(deleted, self.mapping.remove(&key).is_some()); + let got = self + .trie + .get(&key) + .unwrap_or_else(|err| panic!("Failed getting ‘{key:?}’: {err}")); + assert_eq!(None, got.as_ref(), "Invalid value at ‘{key:?}’"); + self.check_all_reads(); + } + + fn check_all_reads(&self) { + for (key, value) in self.mapping.iter() { + let got = self.trie.get(&key).unwrap_or_else(|err| { + panic!("Failed getting ‘{key:?}’: {err}") + }); + assert_eq!(Some(value), got.as_ref(), "Invalid value at ‘{key:?}’"); + } + } + fn next_value(&mut self) -> CryptoHash { self.count += 1; CryptoHash::test(self.count) From 3e996db3c0ae072f0dc1e226cb39cb831e207e89 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 23 Oct 2023 16:42:39 +0200 Subject: [PATCH 2/2] clippy --- common/sealable-trie/src/trie/del.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/sealable-trie/src/trie/del.rs b/common/sealable-trie/src/trie/del.rs index 6b9a1051..bf098ded 100644 --- a/common/sealable-trie/src/trie/del.rs +++ b/common/sealable-trie/src/trie/del.rs @@ -136,10 +136,10 @@ impl<'a, A: memory::Allocator> Context<'a, A> { self.del_node(ptr); Ok(match self.handle_reference(child, true)? { Action::Drop => Action::Drop, - Action::Ref(child) => Action::Ext(key.into(), child.into()), + Action::Ref(child) => Action::Ext(key.into(), child), Action::Ext(suffix, child) => { let key = bits::Owned::concat(key, suffix).unwrap(); - Action::Ext(key, child.into()) + Action::Ext(key, child) } }) }