From cc248b106faaa8a271e5ee590be50d066a7fce96 Mon Sep 17 00:00:00 2001 From: PSeitz Date: Fri, 27 Oct 2023 05:01:32 +0200 Subject: [PATCH] term hashmap remove copy in is_empty, unused unordered_id (#2229) --- columnar/src/columnar/writer/mod.rs | 12 +++++------ src/indexer/segment_writer.rs | 6 +++--- src/postings/postings_writer.rs | 2 +- stacker/src/arena_hashmap.rs | 33 ++++++++++------------------- 4 files changed, 21 insertions(+), 32 deletions(-) diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index 56e557b157..94e7c8473a 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -338,7 +338,7 @@ impl ColumnarWriter { let mut columns: Vec<(&[u8], ColumnType, Addr)> = self .numerical_field_hash_map .iter() - .map(|(column_name, addr, _)| { + .map(|(column_name, addr)| { let numerical_column_writer: NumericalColumnWriter = self.numerical_field_hash_map.read(addr); let column_type = numerical_column_writer.numerical_type().into(); @@ -348,27 +348,27 @@ impl ColumnarWriter { columns.extend( self.bytes_field_hash_map .iter() - .map(|(term, addr, _)| (term, ColumnType::Bytes, addr)), + .map(|(term, addr)| (term, ColumnType::Bytes, addr)), ); columns.extend( self.str_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnType::Str, addr)), + .map(|(column_name, addr)| (column_name, ColumnType::Str, addr)), ); columns.extend( self.bool_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnType::Bool, addr)), + .map(|(column_name, addr)| (column_name, ColumnType::Bool, addr)), ); columns.extend( self.ip_addr_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnType::IpAddr, addr)), + .map(|(column_name, addr)| (column_name, ColumnType::IpAddr, addr)), ); columns.extend( self.datetime_field_hash_map .iter() - .map(|(column_name, addr, _)| (column_name, ColumnType::DateTime, addr)), + .map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)), ); columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type)); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index bb7d66077a..1369f1ad32 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -510,9 +510,9 @@ mod tests { #[test] fn test_hashmap_size() { - assert_eq!(compute_initial_table_size(100_000).unwrap(), 1 << 11); - assert_eq!(compute_initial_table_size(1_000_000).unwrap(), 1 << 14); - assert_eq!(compute_initial_table_size(15_000_000).unwrap(), 1 << 18); + assert_eq!(compute_initial_table_size(100_000).unwrap(), 1 << 12); + assert_eq!(compute_initial_table_size(1_000_000).unwrap(), 1 << 15); + assert_eq!(compute_initial_table_size(15_000_000).unwrap(), 1 << 19); assert_eq!(compute_initial_table_size(1_000_000_000).unwrap(), 1 << 19); assert_eq!(compute_initial_table_size(4_000_000_000).unwrap(), 1 << 19); } diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index d3c26be135..96952e2a79 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -53,7 +53,7 @@ pub(crate) fn serialize_postings( term_offsets.extend( ctx.term_index .iter() - .map(|(bytes, addr, _unordered_id)| (Term::wrap(bytes), addr)), + .map(|(bytes, addr)| (Term::wrap(bytes), addr)), ); term_offsets.sort_unstable_by_key(|(k, _)| k.clone()); diff --git a/stacker/src/arena_hashmap.rs b/stacker/src/arena_hashmap.rs index 931ae613b7..d855e70431 100644 --- a/stacker/src/arena_hashmap.rs +++ b/stacker/src/arena_hashmap.rs @@ -4,7 +4,6 @@ use std::mem; use super::{Addr, MemoryArena}; use crate::fastcpy::fast_short_slice_copy; use crate::memory_arena::store; -use crate::UnorderedId; /// Returns the actual memory size in bytes /// required to create a table with a given capacity. @@ -26,7 +25,6 @@ type HashType = u64; struct KeyValue { key_value_addr: Addr, hash: HashType, - unordered_id: UnorderedId, } impl Default for KeyValue { @@ -34,14 +32,13 @@ impl Default for KeyValue { KeyValue { key_value_addr: Addr::null_pointer(), hash: 0, - unordered_id: UnorderedId::default(), } } } impl KeyValue { #[inline] - fn is_empty(self) -> bool { + fn is_empty(&self) -> bool { self.key_value_addr.is_null() } #[inline] @@ -96,12 +93,12 @@ pub struct Iter<'a> { } impl<'a> Iterator for Iter<'a> { - type Item = (&'a [u8], Addr, UnorderedId); + type Item = (&'a [u8], Addr); fn next(&mut self) -> Option { self.inner.next().map(move |kv| { let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr); - (key, offset, kv.unordered_id) + (key, offset) }) } } @@ -207,16 +204,13 @@ impl ArenaHashMap { } #[inline] - fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) -> UnorderedId { - let unordered_id = self.len as UnorderedId; + fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) { self.len += 1; self.table[bucket] = KeyValue { key_value_addr, hash, - unordered_id, }; - unordered_id } #[inline] @@ -290,14 +284,8 @@ impl ArenaHashMap { /// If the key already as an associated value, then it will be passed /// `Some(previous_value)`. #[inline] - pub fn mutate_or_create( - &mut self, - key: &[u8], - mut updater: impl FnMut(Option) -> V, - ) -> UnorderedId - where - V: Copy + 'static, - { + pub fn mutate_or_create(&mut self, key: &[u8], mut updater: impl FnMut(Option) -> V) + where V: Copy + 'static { if self.is_saturated() { self.resize(); } @@ -320,14 +308,15 @@ impl ArenaHashMap { store(&mut data[stop..], val); } - return self.set_bucket(hash, key_addr, bucket); + self.set_bucket(hash, key_addr, bucket); + return; } if kv.hash == hash { if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) { let v = self.memory_arena.read(val_addr); let new_v = updater(Some(v)); self.memory_arena.write_at(val_addr, new_v); - return kv.unordered_id; + return; } } // This allows fetching the next bucket before the loop jmp @@ -361,7 +350,7 @@ mod tests { }); let mut vanilla_hash_map = HashMap::new(); let iter_values = hash_map.iter(); - for (key, addr, _) in iter_values { + for (key, addr) in iter_values { let val: u32 = hash_map.memory_arena.read(addr); vanilla_hash_map.insert(key.to_owned(), val); } @@ -390,7 +379,7 @@ mod tests { } let mut terms_back: Vec = hash_map .iter() - .map(|(bytes, _, _)| String::from_utf8(bytes.to_vec()).unwrap()) + .map(|(bytes, _)| String::from_utf8(bytes.to_vec()).unwrap()) .collect(); terms_back.sort(); terms.sort();