Skip to content

Commit

Permalink
[Trie] Optimize runtime trie access by avoiding wasted lookups of inl…
Browse files Browse the repository at this point in the history
…ined values.
  • Loading branch information
robin-near committed Nov 4, 2023
1 parent ea8cb09 commit 6f0fc5b
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 78 deletions.
5 changes: 3 additions & 2 deletions core/store/src/trie/mem/loading.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ mod tests {
};
use crate::trie::mem::loading::load_trie_from_flat_state;
use crate::trie::mem::lookup::memtrie_lookup;
use crate::trie::OptimizedValueRef;
use crate::{KeyLookupMode, NibbleSlice, Trie, TrieUpdate};
use near_primitives::hash::CryptoHash;
use near_primitives::shard_layout::ShardUId;
Expand Down Expand Up @@ -128,8 +129,8 @@ mod tests {
let mut nodes_count = TrieNodesCount { db_reads: 0, mem_reads: 0 };
for key in keys.iter() {
let actual_value_ref = memtrie_lookup(root, key, Some(&mut cache), &mut nodes_count)
.map(|v| v.to_value_ref());
let expected_value_ref = trie.get_ref(key, KeyLookupMode::Trie).unwrap();
.map(OptimizedValueRef::from_flat_value);
let expected_value_ref = trie.get_optimized_ref(key, KeyLookupMode::Trie).unwrap();
assert_eq!(actual_value_ref, expected_value_ref, "{:?}", NibbleSlice::new(key));
assert_eq!(&nodes_count, &trie.get_trie_nodes_count());
}
Expand Down
12 changes: 7 additions & 5 deletions core/store/src/trie/mem/updating.rs
Original file line number Diff line number Diff line change
Expand Up @@ -996,15 +996,17 @@ mod tests {
&mut TrieNodesCount { db_reads: 0, mem_reads: 0 },
)
});
let disk_result = disk_trie.get_ref(key, KeyLookupMode::Trie).unwrap();
let disk_result = disk_trie.get_optimized_ref(key, KeyLookupMode::Trie).unwrap();
if let Some(value_ref) = value_ref {
let memtrie_value_ref = memtrie_result
.expect(&format!("Key {} is in truth but not in memtrie", hex::encode(key)))
.to_value_ref();
let disk_value_ref = disk_result.expect(&format!(
"Key {} is in truth but not in disk trie",
hex::encode(key)
));
let disk_value_ref = disk_result
.expect(&format!(
"Key {} is in truth but not in disk trie",
hex::encode(key)
))
.into_value_ref();
assert_eq!(
memtrie_value_ref,
*value_ref,
Expand Down
144 changes: 92 additions & 52 deletions core/store/src/trie/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,57 @@ enum NodeOrValue {
Value(std::sync::Arc<[u8]>),
}

/// Like a ValueRef, but allows for optimized retrieval of the value if the
/// value were already readily available when the ValueRef was retrieved.
///
/// This can be the case if the value came from flat storage, for example,
/// when some values are inlined into the storage.
///
/// Information-wise, this struct contains the same information as a
/// FlatStateValue; however, we make this a separate struct because
/// dereferencing a ValueRef (and likewise, OptimizedValueRef) requires proper
/// gas accounting; it is not a free operation. Therefore, while
/// OptimizedValueRef can be directly converted to a ValueRef, dereferencing
/// the value, even if the value is already available, can only be done via
/// `Trie::deref_optimized`.
#[derive(Debug, PartialEq, Eq)]
pub enum OptimizedValueRef {
Ref(ValueRef),
AvailableValue(ValueAccessToken),
}

/// Opaque wrapper around Vec<u8> so that the value cannot be used directly and
/// must instead be dereferenced via `Trie::deref_optimized`, so that gas
/// accounting is never skipped.
#[derive(Debug, PartialEq, Eq)]
pub struct ValueAccessToken {
// Must stay private.
value: Vec<u8>,
}

impl OptimizedValueRef {
fn from_flat_value(value: FlatStateValue) -> Self {
match value {
FlatStateValue::Ref(value_ref) => Self::Ref(value_ref),
FlatStateValue::Inlined(value) => Self::AvailableValue(ValueAccessToken { value }),
}
}

pub fn len(&self) -> usize {
match self {
Self::Ref(value_ref) => value_ref.len(),
Self::AvailableValue(token) => token.value.len(),
}
}

pub fn into_value_ref(self) -> ValueRef {
match self {
Self::Ref(value_ref) => value_ref,
Self::AvailableValue(token) => ValueRef::new(&token.value),
}
}
}

impl Trie {
pub const EMPTY_ROOT: StateRoot = StateRoot::new();

Expand Down Expand Up @@ -1096,42 +1147,22 @@ impl Trie {
fn lookup_from_flat_storage(
&self,
key: &[u8],
ref_only: bool,
) -> Result<Option<FlatStateValue>, StorageError> {
) -> Result<Option<OptimizedValueRef>, StorageError> {
let flat_storage_chunk_view = self.flat_storage_chunk_view.as_ref().unwrap();
let mut value = flat_storage_chunk_view.get_value(key)?;
if ref_only {
value = value.map(|value| FlatStateValue::Ref(value.to_value_ref()));
}
let value = flat_storage_chunk_view.get_value(key)?;
if self.recorder.is_some() {
// If recording, we need to look up in the trie as well to record the trie nodes,
// as they are needed to prove the value. Also, it's important that this lookup
// is done even if the key was not found, because intermediate trie nodes may be
// needed to prove the non-existence of the key.
let value_ref_from_trie =
self.lookup_from_state_column(NibbleSlice::new(key), false)?;
match &value {
Some(FlatStateValue::Inlined(value)) => {
assert!(value_ref_from_trie.is_some());
let value_from_trie =
self.retrieve_value(&value_ref_from_trie.unwrap().hash)?;
assert_eq!(&value_from_trie, value);
}
Some(FlatStateValue::Ref(value_ref)) => {
assert_eq!(value_ref_from_trie.as_ref(), Some(value_ref));
}
None => {
assert!(value_ref_from_trie.is_none());
}
}
} else {
if let Some(FlatStateValue::Inlined(value)) = &value {
self.accounting_cache
.borrow_mut()
.retroactively_account(hash(value), value.clone().into());
}
debug_assert_eq!(
&value_ref_from_trie,
&value.as_ref().map(|value| value.to_value_ref())
);
}
Ok(value)
Ok(value.map(OptimizedValueRef::from_flat_value))
}

/// Looks up the given key by walking the trie nodes stored in the
Expand Down Expand Up @@ -1266,7 +1297,8 @@ impl Trie {
Ok(bytes.to_vec())
}

/// Return the value reference to the `key`
/// Retrieves an `OptimizedValueRef`` for the given key. See `OptimizedValueRef`.
///
/// `mode`: whether we will try to perform the lookup through flat storage or trie.
/// Note that even if `mode == KeyLookupMode::FlatStorage`, we still may not use
/// flat storage if the trie is not created with a flat storage object in it.
Expand All @@ -1276,43 +1308,51 @@ impl Trie {
/// storage for key lookup performed in `storage_write`, so we need
/// the `use_flat_storage` to differentiate whether the lookup is performed for
/// storage_write or not.
pub fn get_ref(
pub fn get_optimized_ref(
&self,
key: &[u8],
mode: KeyLookupMode,
) -> Result<Option<ValueRef>, StorageError> {
let use_flat_storage =
mode == KeyLookupMode::FlatStorage && self.flat_storage_chunk_view.is_some();
let charge_gas_for_trie_node_access =
mode == KeyLookupMode::Trie || self.charge_gas_for_trie_node_access;
if use_flat_storage {
Ok(self.lookup_from_flat_storage(key, true)?.map(|value| value.to_value_ref()))
} else {
self.lookup_from_state_column(NibbleSlice::new(key), charge_gas_for_trie_node_access)
}
}

/// Retrieves a value, which may or may not be the complete value, for the given key.
/// If the full value could be obtained cheaply it is returned; otherwise only the reference
/// is returned.
fn get_flat_value(&self, key: &[u8]) -> Result<Option<FlatStateValue>, StorageError> {
if self.flat_storage_chunk_view.is_some() {
self.lookup_from_flat_storage(key, false)
) -> Result<Option<OptimizedValueRef>, StorageError> {
if mode == KeyLookupMode::FlatStorage && self.flat_storage_chunk_view.is_some() {
self.lookup_from_flat_storage(key)
} else {
Ok(self
.lookup_from_state_column(
NibbleSlice::new(key),
self.charge_gas_for_trie_node_access,
mode == KeyLookupMode::Trie || self.charge_gas_for_trie_node_access,
)?
.map(|value| FlatStateValue::Ref(value)))
.map(OptimizedValueRef::Ref))
}
}

/// Dereferences an `OptimizedValueRef` into the full value, and properly
/// accounts for the gas, caching, and recording (if enabled). This may or
/// may not incur a on-disk lookup, depending on whether the
/// `OptimizedValueRef` contains an already available value.
pub fn deref_optimized(
&self,
optimized_value_ref: &OptimizedValueRef,
) -> Result<Vec<u8>, StorageError> {
match optimized_value_ref {
OptimizedValueRef::Ref(value_ref) => self.retrieve_value(&value_ref.hash),
OptimizedValueRef::AvailableValue(ValueAccessToken { value }) => {
let value_hash = hash(value);
let arc_value: Arc<[u8]> = value.clone().into();
self.accounting_cache
.borrow_mut()
.retroactively_account(value_hash, arc_value.clone());
if let Some(recorder) = &self.recorder {
recorder.borrow_mut().record(&value_hash, arc_value);
}
Ok(value.clone())
}
}
}

/// Retrieves the full value for the given key.
pub fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>, StorageError> {
match self.get_flat_value(key)? {
Some(FlatStateValue::Ref(value_ref)) => self.retrieve_value(&value_ref.hash).map(Some),
Some(FlatStateValue::Inlined(value)) => Ok(Some(value)),
match self.get_optimized_ref(key, KeyLookupMode::FlatStorage)? {
Some(optimized_ref) => Ok(Some(self.deref_optimized(&optimized_ref)?)),
None => Ok(None),
}
}
Expand Down
4 changes: 2 additions & 2 deletions core/store/src/trie/trie_recording.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ mod trie_recording_tests {
.collect::<Vec<_>>();

// First, check that the trie is using flat storage, so that counters are all zero.
// Only use get_ref(), because get() will actually dereference values which can
// Only use get_optimized_ref(), because get() will actually dereference values which can
// cause trie reads.
let trie = tries.get_trie_with_block_hash_for_shard(
shard_uid,
Expand All @@ -179,7 +179,7 @@ mod trie_recording_tests {
false,
);
for key in &keys_to_test_with {
trie.get_ref(&key, crate::KeyLookupMode::FlatStorage).unwrap();
trie.get_optimized_ref(&key, crate::KeyLookupMode::FlatStorage).unwrap();
}
assert_eq!(trie.get_trie_nodes_count(), TrieNodesCount { db_reads: 0, mem_reads: 0 });

Expand Down
28 changes: 13 additions & 15 deletions core/store/src/trie/update.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
pub use self::iterator::TrieUpdateIterator;
use super::Trie;
use super::{OptimizedValueRef, Trie};
use crate::trie::{KeyLookupMode, TrieChanges};
use crate::StorageError;
use near_primitives::hash::CryptoHash;
use near_primitives::state::ValueRef;
use near_primitives::trie_key::TrieKey;
use near_primitives::types::{
RawStateChange, RawStateChanges, RawStateChangesWithTrieKey, StateChangeCause, StateRoot,
TrieCacheMode,
};
use std::collections::BTreeMap;

mod iterator;

/// Key-value update. Contains a TrieKey and a value.
Expand All @@ -30,24 +29,22 @@ pub struct TrieUpdate {
}

pub enum TrieUpdateValuePtr<'a> {
HashAndSize(&'a Trie, u32, CryptoHash),
Ref(&'a Trie, OptimizedValueRef),
MemoryRef(&'a [u8]),
}

impl<'a> TrieUpdateValuePtr<'a> {
pub fn len(&self) -> u32 {
match self {
TrieUpdateValuePtr::MemoryRef(value) => value.len() as u32,
TrieUpdateValuePtr::HashAndSize(_, length, _) => *length,
TrieUpdateValuePtr::Ref(_, value_ref) => value_ref.len() as u32,
}
}

pub fn deref_value(&self) -> Result<Vec<u8>, StorageError> {
match self {
TrieUpdateValuePtr::MemoryRef(value) => Ok(value.to_vec()),
TrieUpdateValuePtr::HashAndSize(trie, _, hash) => {
trie.internal_retrieve_trie_node(hash, true).map(|bytes| bytes.to_vec())
}
TrieUpdateValuePtr::Ref(trie, value_ref) => Ok(trie.deref_optimized(value_ref)?),
}
}
}
Expand Down Expand Up @@ -75,11 +72,12 @@ impl TrieUpdate {
}
}

self.trie.get_ref(&key, mode).map(|option| {
option.map(|ValueRef { length, hash }| {
TrieUpdateValuePtr::HashAndSize(&self.trie, length, hash)
})
})
let result =
self.trie
.get_optimized_ref(&key, mode)?
.map(|optimized_value_ref| TrieUpdateValuePtr::Ref(&self.trie, optimized_value_ref));

Ok(result)
}

pub fn get(&self, key: &TrieKey) -> Result<Option<Vec<u8>>, StorageError> {
Expand Down Expand Up @@ -169,10 +167,10 @@ impl crate::TrieAccess for TrieUpdate {

#[cfg(test)]
mod tests {
use crate::test_utils::TestTriesBuilder;

use super::*;
use crate::test_utils::TestTriesBuilder;
use crate::ShardUId;
use near_primitives::hash::CryptoHash;
const SHARD_VERSION: u32 = 1;
const COMPLEX_SHARD_UID: ShardUId = ShardUId { version: SHARD_VERSION, shard_id: 0 };

Expand Down
7 changes: 5 additions & 2 deletions integration-tests/src/tests/client/flat_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,10 @@ fn test_flat_storage_creation_start_from_state_part() {
.unwrap();
for part_trie_keys in trie_keys.iter() {
for trie_key in part_trie_keys.iter() {
assert_matches!(trie.get_ref(&trie_key, KeyLookupMode::FlatStorage), Ok(Some(_)));
assert_matches!(
trie.get_optimized_ref(&trie_key, KeyLookupMode::FlatStorage),
Ok(Some(_))
);
}
}
assert_eq!(trie.get_trie_nodes_count(), TrieNodesCount { db_reads: 0, mem_reads: 0 });
Expand Down Expand Up @@ -542,7 +545,7 @@ fn test_not_supported_block() {
if height == flat_head_height {
env.produce_block(0, START_HEIGHT);
}
get_ref_results.push(trie.get_ref(&trie_key_bytes, KeyLookupMode::FlatStorage));
get_ref_results.push(trie.get_optimized_ref(&trie_key_bytes, KeyLookupMode::FlatStorage));
}

// The first result should be FlatStorageError, because we can't read from first chunk view anymore.
Expand Down

0 comments on commit 6f0fc5b

Please sign in to comment.