From e82b4ac4e3dd68a7f5ed91508028809549a9d5d5 Mon Sep 17 00:00:00 2001 From: Popog Date: Tue, 15 Nov 2016 05:57:51 -0800 Subject: [PATCH 1/7] Added Retain Iterator .retain() now returns an iterator with the removed items closes bluss/ordermap#11 Also cleaned up some match statements. --- src/lib.rs | 121 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 98 insertions(+), 23 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5428f111..71e4c819 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -226,7 +226,7 @@ impl ShortHashProxy /// for ch in "a short treatise on fungi".chars() { /// *letters.entry(ch).or_insert(0) += 1; /// } -/// +/// /// assert_eq!(letters[&'s'], 2); /// assert_eq!(letters[&'t'], 3); /// assert_eq!(letters[&'u'], 1); @@ -930,22 +930,15 @@ impl OrderMap /// The order the elements are visited is not specified. /// /// Computes in **O(n)** time (average). - pub fn retain(&mut self, mut keep: F) - where F: FnMut(&mut K, &mut V) -> bool, + pub fn retain(&mut self, keep: F) -> Retain + where F: FnMut(&mut K, &mut V) -> bool { // We can use either forward or reverse scan, but forward was // faster in a microbenchmark - let mut i = 0; - while i < self.len() { - { - let entry = &mut self.entries[i]; - if keep(&mut entry.key, &mut entry.value) { - i += 1; - continue; - } - } - self.swap_remove_index(i); - // skip increment on remove + Retain { + i: 0, + map: self, + keep: keep, } } } @@ -975,11 +968,9 @@ impl OrderMap { /// /// Computes in **O(1)** time (average). pub fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { - let (probe, found) = match self.entries.get(index) - .map(|e| self.find_existing_entry(e)) - { + let (probe, found) = match self.entries.get(index) { None => return None, - Some(t) => t, + Some(e) => self.find_existing_entry(e), }; Some(self.remove_found(probe, found)) } @@ -993,11 +984,9 @@ impl OrderMap { // However, we should probably not let this show in the public API or docs. impl OrderMap { fn pop_impl(&mut self) -> Option<(K, V)> { - let (probe, found) = match self.entries.last() - .map(|e| self.find_existing_entry(e)) - { + let (probe, found) = match self.entries.last() { None => return None, - Some(t) => t, + Some(e) => self.find_existing_entry(e), }; debug_assert_eq!(found, self.entries.len() - 1); Some(self.remove_found(probe, found)) @@ -1166,6 +1155,47 @@ impl<'a, K, V> DoubleEndedIterator for Keys<'a, K, V> { } } +pub struct Retain<'a, K: 'a, V: 'a, S: 'a, F: FnMut(&mut K, &mut V) -> bool> { + map: &'a mut OrderMap, + keep: F, + i: usize, +} + +impl<'a, K, V, S, F> Drop for Retain<'a, K, V, S, F> + where F: FnMut(&mut K, &mut V) -> bool +{ + fn drop(&mut self) { + for _ in self {} + } +} + + +impl<'a, K, V, S, F> Iterator for Retain<'a, K, V, S, F> + where F: FnMut(&mut K, &mut V) -> bool +{ + type Item = (K, V); + + fn next(&mut self) -> Option { + while self.i < self.map.len() { + let keep = { + let entry = &mut self.map.entries[self.i]; + (self.keep)(&mut entry.key, &mut entry.value) + }; + if keep { + self.i += 1; + } else { + // skip increment on remove + return self.map.swap_remove_index(self.i); + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.map.len() - self.i)) + } +} + pub struct Iter<'a, K: 'a, V: 'a> { iter: SliceIter<'a, Bucket>, } @@ -1214,7 +1244,7 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } - + fn count(self) -> usize { self.iter.len() } @@ -1492,6 +1522,51 @@ mod tests { } } + #[test] + fn retain() { + let mut insert = vec![0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut map = OrderMap::new(); + + for &elt in &insert { + map.insert(elt, elt); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + + let mut removed_ex = Vec::new(); + for i in 0.. { + while insert.get(i).iter().filter(|&v| **v >= 10).next().is_some() { + removed_ex.push(insert.swap_remove(i)); + } + if i > insert.len() { + break; + } + } + println!("{:?}", removed_ex); + + { + let removed: Vec<_> = map.retain(|k, _| *k < 10).collect(); + assert_eq!(removed.len(), removed_ex.len()); + for (&a, (b, _)) in removed_ex.iter().zip(removed) { + assert_eq!(a, b); + } + } + + println!("{:?}", insert); + println!("{:?}", map); + + assert_eq!(map.keys().count(), insert.len()); + assert_eq!(map.keys().count(), insert.len()); + + for (&a, &b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + } + #[test] fn remove() { let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; From 6dfde3446db8d430084a685225a798037c7cc994 Mon Sep 17 00:00:00 2001 From: Popog Date: Fri, 25 Nov 2016 03:19:50 -0800 Subject: [PATCH 2/7] Tombstones! Functioning tombstones and some associated work. Also some other things like reserve and removing S from Entry. Ironically removes Retain iterators. Attempts to minimize unwrap usage. Also pointlessly moves a bunch of methods around. Sorry about that. --- .gitignore | 13 + src/lib.rs | 1745 ++++++++++++++++++++++++++++++++++++------------ src/util.rs | 8 - tests/quick.rs | 7 +- 4 files changed, 1329 insertions(+), 444 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..22bc1a68 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Compiled files +*.o +*.so +*.rlib +*.dll + +# Executables +*.exe + +# Generated by Cargo +/target/ +Cargo.lock + diff --git a/src/lib.rs b/src/lib.rs index 71e4c819..7aff50ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,10 +12,10 @@ use std::borrow::Borrow; use std::cmp::max; use std::fmt; -use std::mem::{replace}; +use std::mem::replace; use std::marker::PhantomData; -use util::{second, ptrdistance, enumerate}; +use util::{enumerate}; fn hash_elem_using(build: &B, k: &K) -> HashValue { let mut h = build.build_hasher(); @@ -112,57 +112,125 @@ impl Clone for Pos { impl fmt::Debug for Pos { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.pos() { - Some(i) => write!(f, "Pos({} / {:x})", i, self.index), - None => write!(f, "Pos(None)"), + match self.debug_pos() { + PosState::Value(i) => write!(f, "Pos({} / {:x})", i, self.index), + PosState::IsNone => write!(f, "Pos(None)"), + PosState::IsTombstone => write!(f, "Pos(Tombstone)"), } } } +#[derive(PartialEq, Eq)] +enum PosState { + IsNone, + IsTombstone, + Value(T), +} + +const POS_NONE: u64 = !0; +const POS_TOMBSTONE: u64 = !1; + impl Pos { #[inline] - fn none() -> Self { Pos { index: !0 } } + fn none() -> Self { Pos { index: POS_NONE } } + + #[inline] + fn tombstone() -> Self { Pos { index: POS_TOMBSTONE } } #[inline] - fn is_none(&self) -> bool { self.index == !0 } + fn with_hash(i: usize, hash: HashValue) -> Self + where Sz: Size + { + let index = if Sz::is_64_bit() { + i as u64 + } else { + (i | (lo32(hash.0 as u64) << 32)) as u64 + }; + debug_assert!(index as u64 != POS_NONE); + debug_assert!(index as u64 != POS_TOMBSTONE); + Pos { index: index as u64 } + } #[inline] - fn pos(&self) -> Option { - if self.index == !0 { None } else { Some(lo32(self.index as u64)) } + fn link(i: usize) -> Self { + debug_assert!(i as u64 != POS_NONE); + debug_assert!(i as u64 != POS_TOMBSTONE); + Pos { index: i as u64 } } + #[inline] - fn with_hash(i: usize, hash: HashValue) -> Self + fn state(&self) -> PosState<()> { + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + _ => PosState::Value(()), + } + } + + #[inline] + fn link_pos(&self) -> Option { + debug_assert!(self.index != POS_TOMBSTONE); + if self.index == POS_NONE { + Some(self.index as usize) + } else { + None + } + } + + #[inline] + fn debug_pos(&self) -> PosState + { + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + v => PosState::Value(lo32(v as u64)), + } + } + + #[inline] + fn sub_eq(&mut self, delta: usize) where Sz: Size { + debug_assert!(self.index != POS_NONE); + debug_assert!(self.index != POS_TOMBSTONE); if Sz::is_64_bit() { - Pos { - index: i as u64, - } + self.index -= delta as u64 } else { - Pos { - index: (i | (lo32(hash.0 as u64) << 32)) as u64 - } + let (i, hash) = split_lo_hi(self.index); + let i = i as usize - delta; + self.index = (i | (lo32(hash as u64) << 32)) as u64; } } #[inline] - fn resolve(&self) -> Option<(usize, ShortHashProxy)> + fn pos(&self) -> PosState where Sz: Size { - if Sz::is_64_bit() { - if !self.is_none() { - Some((self.index as usize, ShortHashProxy::new(0))) + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + v => if Sz::is_64_bit() { + PosState::Value(v as usize) } else { - None - } - } else { - if !self.is_none() { - let (i, hash) = split_lo_hi(self.index); - Some((i as usize, ShortHashProxy::new(hash as usize))) + PosState::Value(lo32(v as u64)) + }, + } + } + + #[inline] + fn resolve(&self) -> PosState<(usize, ShortHashProxy)> + where Sz: Size + { + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + v => if Sz::is_64_bit() { + PosState::Value((v as usize, ShortHashProxy::new(0))) } else { - None - } + let (i, hash) = split_lo_hi(v); + PosState::Value((i as usize, ShortHashProxy::new(hash as usize))) + }, } } } @@ -187,9 +255,9 @@ impl ShortHashProxy /// Get the hash from either `self` or from a lookup into `entries`, /// depending on `Sz`. - fn get_short_hash(&self, entries: &[Bucket], index: usize) -> ShortHash { + fn get_short_hash(&self, entries: &[Option>], index: usize) -> ShortHash { if Sz::is_64_bit() { - ShortHash(entries[index].hash.0, PhantomData) + ShortHash(entries[index].unwrap_hash().0, PhantomData) } else { ShortHash(self.0, PhantomData) } @@ -238,7 +306,11 @@ pub struct OrderMap { /// indices are the buckets. indices.len() == raw capacity indices: Vec, /// entries is a dense vec of entries in their order. entries.len() == len - entries: Vec>, + entries: Vec>>, + /// the number of tombstones in `indices` waiting to be cleaned up + index_tombstones: usize, + /// the number of tombstones in `entries` waiting to be cleaned up + entry_tombstones: usize, hash_builder: S, } @@ -249,17 +321,36 @@ struct Bucket { value: V, } +trait BucketHelper { + fn unwrap_hash(&self) -> HashValue; +} + +impl BucketHelper for Option> { + // if the bucket is none, gives a hash of 0 in release and panics in debug + fn unwrap_hash(&self) -> HashValue { + debug_assert!(self.is_some()); + self.as_ref().map_or(HashValue(0), |e| e.hash) + } +} + #[inline(always)] fn desired_pos(mask: usize, hash: HashValue) -> usize { hash.0 & mask } +/// The number of steps that `current` is forward of the prev +#[inline(always)] +fn probe_delta(mask: usize, prev: usize, current: usize) -> usize { + current.wrapping_sub(prev) & mask +} + /// The number of steps that `current` is forward of the desired position for hash #[inline(always)] fn probe_distance(mask: usize, hash: HashValue, current: usize) -> usize { - current.wrapping_sub(desired_pos(mask, hash)) & mask + probe_delta(mask, desired_pos(mask, hash), current) } + enum Inserted { Done, Swapped { prev_value: V }, @@ -275,28 +366,29 @@ impl fmt::Debug for OrderMap S: BuildHasher, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(f.debug_map().entries(self.iter()).finish()); + f.debug_map().entries(self.iter()).finish()?; if cfg!(not(feature = "test_debug")) { return Ok(()); } - try!(writeln!(f, "")); + writeln!(f, "")?; for (i, index) in enumerate(&self.indices) { - try!(write!(f, "{}: {:?}", i, index)); - if let Some(pos) = index.pos() { - let hash = self.entries[pos].hash; - let key = &self.entries[pos].key; - let desire = desired_pos(self.mask, hash); - try!(writeln!(f, ", desired={}, probe_distance={}, key={:?}", - desire, - probe_distance(self.mask, hash, i), - key)); + write!(f, "{}: {:?}", i, index)?; + if let PosState::Value(pos) = index.debug_pos() { + if let &Some(ref entry) = &self.entries[pos] { + writeln!(f, ", desired={}, probe_distance={}, key={:?}", + desired_pos(self.mask, entry.hash), + probe_distance(self.mask, entry.hash, i), + entry.key)?; + } else { + writeln!(f, ", tombstone")?; + } } - try!(writeln!(f, "")); + writeln!(f, "")?; } - try!(writeln!(f, "cap={}, raw_cap={}, entries.cap={}", + writeln!(f, "cap={}, raw_cap={}, entries.cap={}", self.capacity(), self.raw_capacity(), - self.entries.capacity())); + self.entries.capacity())?; Ok(()) } } @@ -354,6 +446,8 @@ impl OrderMap mask: 0, indices: Vec::new(), entries: Vec::new(), + entry_tombstones: 0, + index_tombstones: 0, hash_builder: hash_builder, } } else { @@ -363,6 +457,8 @@ impl OrderMap mask: raw_cap.wrapping_sub(1), indices: vec![Pos::none(); raw_cap], entries: Vec::with_capacity(usable_capacity(raw_cap)), + entry_tombstones: 0, + index_tombstones: 0, hash_builder: hash_builder, } } @@ -371,7 +467,12 @@ impl OrderMap /// Return the number of key-value pairs in the map. /// /// Computes in **O(1)** time. - pub fn len(&self) -> usize { self.entries.len() } + pub fn len(&self) -> usize { self.entries.len() - self.entry_tombstones } + + /// Return the number of tombstoned key-value pairs in the map. + /// + /// Computes in **O(1)** time. + pub fn tombstones(&self) -> usize { self.entry_tombstones } // Return whether we need 32 or 64 bits to specify a bucket or entry index #[cfg(not(feature = "test_low_transition_point"))] @@ -421,6 +522,13 @@ impl Size for u64 { /// /// The u32 or u64 is *prepended* to the type parameter list! macro_rules! dispatch_32_vs_64 { + ($self_:ident . $method:ident::<$($t:ty),*>($($arg:expr),*)) => { + if $self_.size_class_is_64bit() { + $self_.$method::($($arg),*) + } else { + $self_.$method::($($arg),*) + } + }; ($self_:ident . $method:ident::<$($t:ty),*>($($arg:expr),*)) => { if $self_.size_class_is_64bit() { $self_.$method::($($arg),*) @@ -437,14 +545,12 @@ macro_rules! dispatch_32_vs_64 { }; } -/// FIXME: Remove dependence on the `S` parameter -/// (to match HashMap). -pub enum Entry<'a, K: 'a, V: 'a, S: 'a = RandomState> { - Occupied(OccupiedEntry<'a, K, V, S>), - Vacant(VacantEntry<'a, K, V, S>), +pub enum Entry<'a, K: 'a, V: 'a,> { + Occupied(OccupiedEntry<'a, K, V>), + Vacant(VacantEntry<'a, K, V>), } -impl<'a, K, V, S> Entry<'a, K, V, S> { +impl<'a, K, V> Entry<'a, K, V> { /// Computes in **O(1)** time (amortized average). pub fn or_insert(self, default: V) -> &'a mut V { match self { @@ -471,23 +577,25 @@ impl<'a, K, V, S> Entry<'a, K, V, S> { } } -pub struct OccupiedEntry<'a, K: 'a, V: 'a, S: 'a = RandomState> { - map: &'a mut OrderMap, - key: K, - probe: usize, - index: usize, +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + index_tombstones: &'a mut usize, + entry_tombstones: &'a mut usize, + pos: &'a mut Pos, + kv: &'a mut Option>, } -impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { - pub fn key(&self) -> &K { &self.key } +impl<'a, K, V> OccupiedEntry<'a, K, V> { + pub fn key(&self) -> &K { + &self.kv.as_ref().unwrap().key + } pub fn get(&self) -> &V { - &self.map.entries[self.index].value + &self.kv.as_ref().unwrap().value } pub fn get_mut(&mut self) -> &mut V { - &mut self.map.entries[self.index].value + &mut self.kv.as_mut().unwrap().value } pub fn into_mut(self) -> &'a mut V { - &mut self.map.entries[self.index].value + &mut self.kv.as_mut().unwrap().value } pub fn insert(self, value: V) -> V { @@ -499,24 +607,34 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { } /// Remove and return the key, value pair stored in the map for this entry + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. pub fn remove_entry(self) -> (K, V) { - self.map.remove_found(self.probe, self.index) + *self.index_tombstones +=1; + *self.entry_tombstones += 1; + *self.pos = Pos::tombstone(); + let entry = self.kv.take().unwrap(); + (entry.key, entry.value) } } -pub struct VacantEntry<'a, K: 'a, V: 'a, S: 'a = RandomState> { - map: &'a mut OrderMap, +pub struct VacantEntry<'a, K: 'a, V: 'a> { + indices: &'a mut Vec, + entries: &'a mut Vec>>, + index_tombstones: &'a mut usize, + size_class_is_64bit: bool, key: K, hash: HashValue, probe: usize, } -impl<'a, K, V, S> VacantEntry<'a, K, V, S> { +impl<'a, K, V> VacantEntry<'a, K, V> { pub fn key(&self) -> &K { &self.key } pub fn into_key(self) -> K { self.key } pub fn insert(self, value: V) -> &'a mut V { - if self.map.size_class_is_64bit() { + if self.size_class_is_64bit { self.insert_impl::(value) } else { self.insert_impl::(value) @@ -526,11 +644,11 @@ impl<'a, K, V, S> VacantEntry<'a, K, V, S> { fn insert_impl(self, value: V) -> &'a mut V where Sz: Size { - let index = self.map.entries.len(); - self.map.entries.push(Bucket { hash: self.hash, key: self.key, value: value }); + let index = self.entries.len(); + self.entries.push(Some(Bucket { hash: self.hash, key: self.key, value: value })); let old_pos = Pos::with_hash::(index, self.hash); - self.map.insert_phase_2::(self.probe, old_pos); - &mut {self.map}.entries[index].value + insert_phase_2::(self.indices, self.index_tombstones, self.probe, old_pos); + &mut self.entries[index].as_mut().unwrap().value } } @@ -541,67 +659,118 @@ impl OrderMap /// Get the given key’s corresponding entry in the map for in-place manipulation. /// /// Computes in **O(1)** time (amortized average). - pub fn entry(&mut self, key: K) -> Entry { + pub fn entry(&mut self, key: K) -> Entry { self.reserve_one(); dispatch_32_vs_64!(self.entry_phase_1(key)) } // Warning, this is a code duplication zone Entry is not yet finished - fn entry_phase_1(&mut self, key: K) -> Entry + fn entry_phase_1(&mut self, key: K) -> Entry where Sz: Size { let hash = hash_elem_using(&self.hash_builder, &key); let mut probe = desired_pos(self.mask, hash); let mut dist = 0; + let mut last_tombstone = None; + let indices_len = self.indices.len(); debug_assert!(self.len() < self.raw_capacity()); probe_loop!(probe < self.indices.len(), { - if let Some((i, hash_proxy)) = self.indices[probe].resolve::() { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - // if existing element probed less than us, swap - let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); - if their_dist < dist { - // robin hood: steal the spot if it's better for us + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + // if existing element probed less than us, swap + let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); + if their_dist < dist { + // don't steal an entry if we can steal a tombstone + if let Some(probe) = last_tombstone { + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: probe, + }); + } + + // robin hood: steal the spot if it's better for us + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: probe, + }); + } else if entry_hash == hash && self.entries[i].as_ref().map_or(false, |e| e.key == key) { + return Entry::Occupied(OccupiedEntry { + entry_tombstones: &mut self.entry_tombstones, + index_tombstones: &mut self.index_tombstones, + kv: &mut self.entries[i], + pos: &mut self.indices[probe], + }); + } else if let Some(tprobe) = last_tombstone { + let tdist = probe_delta(self.mask, tprobe, probe); + + // We're already in the neighborhood, + // If a bucket wants to steal from a tombstone. make it happen + if tdist < their_dist { + self.indices.swap(tprobe, probe); + last_tombstone = Some(probe); + } + } + }, + PosState::IsNone => { + // empty bucket, insert here (or any previous tombstones if we had those) + let probe = last_tombstone.unwrap_or(probe); return Entry::Vacant(VacantEntry { - map: self, + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, hash: hash, key: key, probe: probe, }); - } else if entry_hash == hash && self.entries[i].key == key { - return Entry::Occupied(OccupiedEntry { - map: self, - key: key, - probe: probe, - index: i, - }); - } - } else { - // empty bucket, insert here - return Entry::Vacant(VacantEntry { - map: self, - hash: hash, - key: key, - probe: probe, - }); + }, + PosState::IsTombstone => { + // tombstone bucket, insert here if we've checked everywhere else + if dist >= indices_len { + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: last_tombstone.unwrap(), + }); + } + last_tombstone = Some(probe); + }, } dist += 1; }); } - /// Remove all key-value pairs in the map, while preserving its capacity. + /// Reserves capacity for at least `additional` more elements to be + /// inserted. + /// + /// The collection may reserve more space to avoid frequent reallocations. /// /// Computes in **O(n)** time. - pub fn clear(&mut self) { - self.entries.clear(); - for pos in &mut self.indices { - *pos = Pos::none(); - } - } - - /// FIXME Not implemented fully yet pub fn reserve(&mut self, additional: usize) { - if additional > 0 { - self.reserve_one(); + if additional == 0 { return } + self.entries.reserve(additional); + let raw_cap = to_raw_capacity(self.len() + self.index_tombstones + additional); + if raw_cap < self.raw_capacity() { return } + + if self.index_tombstones >= additional { + self.remove_index_tombstones(); + } else { + dispatch_32_vs_64!(self.change_capacity(raw_cap.next_power_of_two())); } } @@ -616,123 +785,85 @@ impl OrderMap let hash = hash_elem_using(&self.hash_builder, &key); let mut probe = desired_pos(self.mask, hash); let mut dist = 0; + let mut last_tombstone = None; + let indices_len = self.indices.len(); let insert_kind; debug_assert!(self.len() < self.raw_capacity()); probe_loop!(probe < self.indices.len(), { - let pos = &mut self.indices[probe]; - if let Some((i, hash_proxy)) = pos.resolve::() { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - // if existing element probed less than us, swap - let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); - if their_dist < dist { - // robin hood: steal the spot if it's better for us + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + // if existing element probed less than us, swap + let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); + if their_dist < dist { + // don't steal an entry if we can steal a tombstone + if let Some(probe) = last_tombstone { + self.index_tombstones -= 1; + let index = self.entries.len(); + self.indices[probe] = Pos::with_hash::(index, hash); + insert_kind = Inserted::Done; + break; + } + + // robin hood: steal the spot if it's better for us + let index = self.entries.len(); + insert_kind = Inserted::RobinHood { + probe: probe, + old_pos: Pos::with_hash::(index, hash), + }; + break; + } else if entry_hash == hash { + if let Some(entry) = self.entries[i].as_mut() { + if entry.key == key { + return Inserted::Swapped { + prev_value: replace(&mut entry.value, value), + }; + } + } + } else if let Some(tprobe) = last_tombstone { + let tdist = probe_delta(self.mask, tprobe, probe); + // We're already in the neighborhood, + // If a bucket wants to steal from a tombstone. make it happen + if tdist < their_dist { + self.indices.swap(tprobe, probe); + last_tombstone = Some(probe); + } + } + }, + PosState::IsNone => { + // use a the first tombstone if we found one of those + if let Some(probe) = last_tombstone { + self.index_tombstones -= 1; + let index = self.entries.len(); + self.indices[probe] = Pos::with_hash::(index, hash); + insert_kind = Inserted::Done; + break; + } + // empty bucket, insert here let index = self.entries.len(); - insert_kind = Inserted::RobinHood { - probe: probe, - old_pos: Pos::with_hash::(index, hash), - }; + self.indices[probe] = Pos::with_hash::(index, hash); + insert_kind = Inserted::Done; break; - } else if entry_hash == hash && self.entries[i].key == key { - return Inserted::Swapped { - prev_value: replace(&mut self.entries[i].value, value), - }; - } - } else { - // empty bucket, insert here - let index = self.entries.len(); - *pos = Pos::with_hash::(index, hash); - insert_kind = Inserted::Done; - break; + }, + PosState::IsTombstone => { + // tombstone bucket, insert here if we've checked everywhere else + if dist >= indices_len { + let probe = last_tombstone.unwrap(); + self.index_tombstones -= 1; + let index = self.entries.len(); + self.indices[probe] = Pos::with_hash::(index, hash); + insert_kind = Inserted::Done; + break; + } + last_tombstone = Some(probe); + }, } dist += 1; }); - self.entries.push(Bucket { hash: hash, key: key, value: value }); + self.entries.push(Some(Bucket { hash: hash, key: key, value: value })); insert_kind } - fn first_allocation(&mut self) { - debug_assert_eq!(self.len(), 0); - let raw_cap = 8usize; - self.mask = raw_cap.wrapping_sub(1); - self.indices = vec![Pos::none(); raw_cap]; - self.entries = Vec::with_capacity(usable_capacity(raw_cap)); - } - - #[inline(never)] - // `Sz` is *current* Size class, before grow - fn double_capacity(&mut self) - where Sz: Size - { - debug_assert!(self.raw_capacity() == 0 || self.len() > 0); - if self.raw_capacity() == 0 { - return self.first_allocation(); - } - - // find first ideally placed element -- start of cluster - let mut first_ideal = 0; - for (i, index) in enumerate(&self.indices) { - if let Some(pos) = index.pos() { - if 0 == probe_distance(self.mask, self.entries[pos].hash, i) { - first_ideal = i; - break; - } - } - } - - // visit the entries in an order where we can simply reinsert them - // into self.indices without any bucket stealing. - let new_raw_cap = self.indices.len() * 2; - let old_indices = replace(&mut self.indices, vec![Pos::none(); new_raw_cap]); - self.mask = new_raw_cap.wrapping_sub(1); - - // `Sz` is the old size class, and either u32 or u64 is the new - for &pos in &old_indices[first_ideal..] { - dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); - } - - for &pos in &old_indices[..first_ideal] { - dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); - } - let more = self.capacity() - self.len(); - self.entries.reserve_exact(more); - } - - // write to self.indices - // read from self.entries at `pos` - // - // reinserting rewrites all `Pos` entries anyway. This handles transitioning - // from u32 to u64 size class if needed by using the two type parameters. - fn reinsert_entry_in_order(&mut self, pos: Pos) - where SzNew: Size, - SzOld: Size, - { - if let Some((i, hash_proxy)) = pos.resolve::() { - // only if the size class is conserved can we use the short hash - let entry_hash = if SzOld::is_same_size::() { - hash_proxy.get_short_hash(&self.entries, i).into_hash() - } else { - self.entries[i].hash - }; - // find first empty bucket and insert there - let mut probe = desired_pos(self.mask, entry_hash); - probe_loop!(probe < self.indices.len(), { - if let Some(_) = self.indices[probe].resolve::() { - /* nothing */ - } else { - // empty bucket, insert here - self.indices[probe] = Pos::with_hash::(i, entry_hash); - return; - } - }); - } - } - - fn reserve_one(&mut self) { - if self.len() == self.capacity() { - dispatch_32_vs_64!(self.double_capacity()); - } - } - /// Insert they key-value pair into the map. /// /// If a value already existed for `key`, that old value is returned @@ -746,7 +877,7 @@ impl OrderMap Inserted::Swapped { prev_value } => Some(prev_value), Inserted::Done => None, Inserted::RobinHood { probe, old_pos } => { - self.insert_phase_2::(probe, old_pos); + insert_phase_2::(&mut self.indices, &mut self.index_tombstones, probe, old_pos); None } } @@ -755,7 +886,7 @@ impl OrderMap Inserted::Swapped { prev_value } => Some(prev_value), Inserted::Done => None, Inserted::RobinHood { probe, old_pos } => { - self.insert_phase_2::(probe, old_pos); + insert_phase_2::(&mut self.indices, &mut self.index_tombstones, probe, old_pos); None } } @@ -765,21 +896,24 @@ impl OrderMap /// Return an iterator over the keys of the map, in their order pub fn keys(&self) -> Keys { Keys { - iter: self.entries.iter() + iter: self.entries.iter(), + tombstones: self.entry_tombstones, } } /// Return an iterator over the key-value pairs of the map, in their order pub fn iter(&self) -> Iter { Iter { - iter: self.entries.iter() + iter: self.entries.iter(), + tombstones: self.entry_tombstones, } } /// Return an iterator over the key-value pairs of the map, in their order pub fn iter_mut(&mut self) -> IterMut { IterMut { - iter: self.entries.iter_mut() + iter: self.entries.iter_mut(), + tombstones: self.entry_tombstones, } } @@ -801,19 +935,14 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - self.get_pair(key).map(second) + self.get_pair(key).map(|(_, v)| v) } pub fn get_pair(&self, key: &Q) -> Option<(&K, &V)> where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &self.entries[found]; - Some((&entry.key, &entry.value)) - } else { - None - } + self.get_pair_index(key).map(|(_, k, v)| (k, v)) } /// Return item index, key and value @@ -821,19 +950,14 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &self.entries[found]; - Some((found, &entry.key, &entry.value)) - } else { - None - } + self.find(key).map(|(_, found, k, v)| (found, k, v)) } pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> where K: Borrow, Q: Eq + Hash, { - self.get_pair_mut(key).map(second) + self.get_pair_mut(key).map(|(_, v)| v) } pub fn get_pair_mut(&mut self, key: &Q) @@ -841,12 +965,7 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &mut self.entries[found]; - Some((&mut entry.key, &mut entry.value)) - } else { - None - } + self.get_pair_index_mut(key).map(|(_, k, v)| (k, v)) } pub fn get_pair_index_mut(&mut self, key: &Q) @@ -854,49 +973,104 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &mut self.entries[found]; - Some((found, &mut entry.key, &mut entry.value)) - } else { - None - } + self.find_mut(key).map(|(_, found, k, v)| (found, k, v)) } - /// Return probe (indices) and position (entries) - fn find(&self, key: &Q) -> Option<(usize, usize)> + /// Return probe (indices), position (entries), and key-value pairs by `&`. + fn find(&self, key: &Q) -> Option<(usize, usize, &K, &V)> + where K: Borrow, + Q: Eq + Hash, + { + if self.len() <= 0 { return None; } + + let h = hash_elem_using(&self.hash_builder, key); + self.find_using(h, move |e| { *e.key.borrow() == *key }) + } + + /// Return probe (indices), position (entries), and key-value pairs by + /// `&mut`. + fn find_mut(&mut self, key: &Q) -> Option<(usize, usize, &mut K, &mut V)> + where K: Borrow, + Q: Eq + Hash, + { + if self.len() <= 0 { return None; } + + let h = hash_elem_using(&self.hash_builder, key); + self.find_mut_using(h, move |e| { *e.key.borrow() == *key }) + } + + /// Return probe (indices), position (entries), and key-value pairs by + /// value. + fn find_remove(&mut self, key: &Q) -> Option<(usize, usize, K, V)> where K: Borrow, Q: Eq + Hash, { - if self.len() == 0 { return None; } + if self.len() <= 0 { return None; } + let h = hash_elem_using(&self.hash_builder, key); - self.find_using(h, move |entry| { *entry.key.borrow() == *key }) + self.find_remove_using(h, move |e| { *e.key.borrow() == *key }) } - /// Remove the key-value pair equivalent to `key` and return - /// its value. + /// Remove the key-value pair equivalent to `key` and return the `value`. /// - /// Like `Vec::swap_remove`, the pair is removed by swapping it with the - /// last element of the map and popping it off. **This perturbs - /// the postion of what used to be the last element!** + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. /// /// Return `None` if `key` is not in map. /// /// Computes in **O(1)** time (average). - pub fn swap_remove(&mut self, key: &Q) -> Option + pub fn remove(&mut self, key: &Q) -> Option where K: Borrow, Q: Eq + Hash, { - self.swap_remove_pair(key).map(second) + self.remove_pair(key).map(|(_, v)| v) } - /// FIXME Same as .swap_remove + /// Remove the key-value pair equivalent to `key` and return it. + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Return `None` if `key` is not in map. /// /// Computes in **O(1)** time (average). - pub fn remove(&mut self, key: &Q) -> Option + fn remove_pair(&mut self, key: &Q) -> Option<(K, V)> + where K: Borrow, + Q: Eq + Hash, + { + self.remove_pair_index(key).map(|(_, k, v)| (k, v)) + } + + /// Remove the key-value pair equivalent to `key` and return it along with + /// the formerly occupied index. + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Return `None` if `key` is not in map. + /// + /// Computes in **O(1)** time (average). + fn remove_pair_index(&mut self, key: &Q) -> Option<(usize, K, V)> + where K: Borrow, + Q: Eq + Hash, + { + self.find_remove(key).map(|(_, found, k, v)| (found, k, v)) + } + + /// Remove the key-value pair equivalent to `key` and return the `value`. + /// + /// Like `Vec::swap_remove`, the pair is removed by swapping it with the + /// last element of the map and popping it off. **This perturbs + /// the postion of what used to be the last element!** + /// + /// Return `None` if `key` is not in map. + /// + /// Computes in **O(1)** time (average). + pub fn swap_remove(&mut self, key: &Q) -> Option where K: Borrow, Q: Eq + Hash, { - self.swap_remove(key) + self.swap_remove_pair(key).map(|(_, v)| v) } /// Remove the key-value pair equivalent to `key` and return it. @@ -910,11 +1084,31 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - let (probe, found) = match self.find(key) { + let (probe, found, _, _) = match self.find(key) { None => return None, Some(t) => t, }; - Some(self.remove_found(probe, found)) + self.swap_remove_found(probe, found) + } +} + +// Methods that don't use any properties (Hash / Eq) of K. +// +// It's cleaner to separate them out, then the compiler checks that we are not +// using Hash + Eq at all in these methods. +// +// However, we should probably not let this show in the public API or docs. +impl OrderMap { + /// Remove all key-value pairs in the map, while preserving its capacity. + /// + /// Computes in **O(n)** time. + pub fn clear(&mut self) { + self.entry_tombstones = 0; + self.index_tombstones = 0; + self.entries.clear(); + for pos in &mut self.indices { + *pos = Pos::none(); + } } /// Remove the last key-value pair @@ -930,92 +1124,452 @@ impl OrderMap /// The order the elements are visited is not specified. /// /// Computes in **O(n)** time (average). - pub fn retain(&mut self, keep: F) -> Retain + pub fn retain(&mut self, keep: F) where F: FnMut(&mut K, &mut V) -> bool { - // We can use either forward or reverse scan, but forward was - // faster in a microbenchmark - Retain { - i: 0, - map: self, - keep: keep, - } + dispatch_32_vs_64!(self.retain_impl::<_>(keep)) } -} -impl OrderMap { /// Get a key-value pair by index /// - /// Valid indices are *0 <= index < self.len()* + /// Valid indices are *0 <= index < self.len() + self.tombstones()* /// /// Computes in **O(1)** time. pub fn get_index(&self, index: usize) -> Option<(&K, &V)> { - self.entries.get(index).map(|ent| (&ent.key, &ent.value)) + self.entries.get(index).and_then(|e| e.as_ref()) + .map(|e| (&e.key, &e.value)) } /// Get a key-value pair by index /// - /// Valid indices are *0 <= index < self.len()* + /// Valid indices are *0 <= index < self.len() + self.tombstones()* /// /// Computes in **O(1)** time. pub fn get_index_mut(&mut self, index: usize) -> Option<(&mut K, &mut V)> { - self.entries.get_mut(index).map(|ent| (&mut ent.key, &mut ent.value)) + self.entries.get_mut(index).and_then(|e| e.as_mut()) + .map(|e| (&mut e.key, &mut e.value)) } /// Remove the key-value pair by index /// - /// Valid indices are *0 <= index < self.len()* + /// Valid indices are *0 <= index < self.len() + self.tombstones()* /// /// Computes in **O(1)** time (average). pub fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { - let (probe, found) = match self.entries.get(index) { - None => return None, - Some(e) => self.find_existing_entry(e), - }; - Some(self.remove_found(probe, found)) + self.entries.get(index).and_then(|e| e.as_ref()) + .map(|e| self.find_existing_entry(index, e.hash)) + .and_then(|probe| self.swap_remove_found(probe, index)) } -} -// Methods that don't use any properties (Hash / Eq) of K. -// + /// Remove the key-value pair by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Computes in **O(1)** time (average). + pub fn remove_index(&mut self, index: usize) -> Option<(K, V)> { + dispatch_32_vs_64!(self.remove_index_impl(index)) + } + + /// Swaps the index of two key-value pairs by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// Computes in **O(1)** time (average). + pub fn swap_index(&mut self, a: usize, b: usize) { + dispatch_32_vs_64!(self.swap_index_impl(a, b)) + } + + /// Removes all the entry tombstones. + /// + /// Note that this means indices (e.g. those returned by `get_index`) may + /// no longer be valid. + /// + /// Computes in **O(n)** time (average). + pub fn remove_tombstones(&mut self) { + dispatch_32_vs_64!(self.remove_tombstones_impl()) + } +} + +// Methods that don't use any properties (Hash / Eq) of K. +// // It's cleaner to separate them out, then the compiler checks that we are not // using Hash + Eq at all in these methods. // // However, we should probably not let this show in the public API or docs. impl OrderMap { + fn first_allocation(&mut self) { + debug_assert_eq!(self.len(), 0); + let raw_cap = 8usize; + self.mask = raw_cap.wrapping_sub(1); + self.indices = vec![Pos::none(); raw_cap]; + self.entries = Vec::with_capacity(usable_capacity(raw_cap)); + } + + // `Sz` is *current* Size class, before grow + /// Computes in **O(n)** time. + fn double_capacity(&mut self) { + if self.raw_capacity() == 0 { + return self.first_allocation(); + } + let raw_cap = self.indices.len() * 2; + + dispatch_32_vs_64!(self.change_capacity(raw_cap)); + } + + #[inline(never)] + // `Sz` is *current* Size class, before grow + // it is the caller's responsibility to only pass powers of 2 to new_raw_cap + /// Computes in **O(n)** time. + fn change_capacity(&mut self, raw_cap: usize) + where Sz: Size + { + debug_assert!(self.raw_capacity() == 0 || self.len() > 0); + debug_assert!(raw_cap.is_power_of_two()); + if self.raw_capacity() == 0 { + self.mask = raw_cap.wrapping_sub(1); + self.indices = vec![Pos::none(); raw_cap]; + self.entries = Vec::with_capacity(usable_capacity(raw_cap)); + return; + } + + // find first ideally placed element -- start of cluster + let first_ideal = enumerate(&self.indices).find(|&(i, pos)| match pos.pos::() { + PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), + PosState::IsTombstone => false, + PosState::IsNone => false, + }).map_or(0, |(i, _)| i); + + // visit the entries in an order where we can simply reinsert them + // into self.indices without any bucket stealing. + let old_indices = replace(&mut self.indices, vec![Pos::none(); raw_cap]); + self.mask = raw_cap.wrapping_sub(1); + + // `Sz` is the old size class, and either u32 or u64 is the new + for &pos in &old_indices[first_ideal..] { + dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); + } + + for &pos in &old_indices[..first_ideal] { + dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); + } + + // We've removed all the index tombstones + self.index_tombstones = 0; + + let more = self.capacity() - self.len(); + self.entries.reserve_exact(more); + } + + /// Removes all the index tombstones + /// + /// Computes in **O(n)** time. + fn remove_index_tombstones(&mut self) { + dispatch_32_vs_64!(self.remove_index_tombstones_impl()) + } + + /// Remove the last key-value pair + /// + /// Computes in **O(1)** time (average). fn pop_impl(&mut self) -> Option<(K, V)> { - let (probe, found) = match self.entries.last() { - None => return None, - Some(e) => self.find_existing_entry(e), - }; - debug_assert_eq!(found, self.entries.len() - 1); - Some(self.remove_found(probe, found)) + // if there entries are empty, just return + // otherwise, try to get the hash (last entry might be a tombstone) + let hash = if let Some(e) = self.entries.last() { + e.as_ref().map(|e| e.hash) + } else { return None }; + + let index = self.entries.len()-1; + if let Some(hash) = hash { + let probe = self.find_existing_entry(index, hash); + self.swap_remove_found(probe, index) + } else { + self.entries.pop(); + None + } } - /// phase 2 is post-insert where we forward-shift `Pos` in the indices. - fn insert_phase_2(&mut self, mut probe: usize, mut old_pos: Pos) + /// Removes all the index tombstones + /// + /// Computes in **O(n)** time. + fn remove_index_tombstones_impl(&mut self) where Sz: Size { - probe_loop!(probe < self.indices.len(), { - let pos = &mut self.indices[probe]; - if pos.is_none() { - *pos = old_pos; + if self.index_tombstones == 0 { return } + + // Find the first tombstone after an ideal + let mut tombstone_head = enumerate(&self.indices).find(|&(i, index)| match index.pos::() { + PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), + PosState::IsTombstone => false, + PosState::IsNone => false, + }).map_or(0, |(i, _)| i); + probe_loop!(tombstone_head < self.indices.len(), { + if self.indices[tombstone_head].state() != PosState::Value(()) { break; - } else { - old_pos = replace(pos, old_pos); } + tombstone_head += 1; }); + + if self.indices[tombstone_head].state() == PosState::IsTombstone { + self.index_tombstones -= 1; + } + self.indices[tombstone_head] = Pos::none(); + + let mut tombstone_tail = tombstone_head; + let mut probe = tombstone_head + 1; + probe_loop!(probe < self.indices.len(), { + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + let dist = probe_distance(self.mask, entry_hash.into_hash(), probe); + if dist == 0 { + // clear the linked list + let mut iter = tombstone_head; + while let Some(next) = self.indices[iter].link_pos() { + self.indices[iter] = Pos::none(); + iter = next; + } + + // if we're out of tombstones, return + if self.index_tombstones == 0 { return } + + // find a new tombstone_tail + tombstone_head = probe + 1; + probe_loop!(tombstone_head < self.indices.len(), { + if self.indices[tombstone_head].state() != PosState::Value(()) { + break; + } + }); + tombstone_tail = tombstone_head; + } else { + loop { + let empty_dist = probe_delta(self.mask, tombstone_head, probe); + if dist >= empty_dist { + // move the value up + self.indices[tombstone_head] = self.indices[probe]; + self.indices[probe] = Pos::none(); + + if let Some(next) = self.indices[tombstone_head].link_pos() { + // add the value's old space to our linked list + self.indices[tombstone_tail] = Pos::link(probe); + // move the head of the linked list forward + tombstone_head = next; + } else { + // the list is empty, so just use the recently empied + // probe as our new list + tombstone_head = probe; + tombstone_tail = probe; + } + } else { + if let Some(next) = self.indices[tombstone_head].link_pos() { + // pop off the head and clear it + self.indices[tombstone_head] = Pos::none(); + tombstone_head = next; + } else { + // if we're out of tombstones, return + if self.index_tombstones == 0 { return } + + // find a new tombstone for the list + tombstone_head = probe + 1; + probe_loop!(tombstone_head < self.indices.len(), { + if self.indices[tombstone_head].state() != PosState::Value(()) { + break; + } + }) + } + } + } + } + }, + PosState::IsTombstone => { + self.index_tombstones -= 1; + // push it onto the back of the linked list + self.indices[probe] = Pos::none(); + self.indices[tombstone_tail] = Pos::link(probe); + tombstone_tail = probe; + }, + PosState::IsNone => { + // clear the tombstone list + let mut iter = tombstone_head; + while let Some(next) = self.indices[iter].link_pos() { + self.indices[iter] = Pos::none(); + iter = next; + } + + // if we're out of tombstones, return + if self.index_tombstones == 0 { return } + + // find a new tombstone for the list + tombstone_head = probe + 1; + probe_loop!(tombstone_head < self.indices.len(), { + if self.indices[tombstone_head].state() != PosState::Value(()) { + break; + } + }) + }, + } + }); + } + + // write to self.indices + // read from self.entries at `pos` + // + // reinserting rewrites all `Pos` entries anyway. This handles transitioning + // from u32 to u64 size class if needed by using the two type parameters. + fn reinsert_entry_in_order(&mut self, pos: Pos) + where SzNew: Size, + SzOld: Size, + { + if let PosState::Value((i, hash_proxy)) = pos.resolve::() { + // only if the size class is conserved can we use the short hash + let entry_hash = if SzOld::is_same_size::() { + hash_proxy.get_short_hash(&self.entries, i).into_hash() + } else { + debug_assert!(self.entries[i].is_some()); + self.entries[i].unwrap_hash() + }; + // find first empty bucket or tombstone and insert there + let mut probe = desired_pos(self.mask, entry_hash); + probe_loop!(probe < self.indices.len(), { + match self.indices[probe].state() { + // skip values + PosState::Value(()) => {}, + PosState::IsNone => { + self.indices[probe] = Pos::with_hash::(i, entry_hash); + return + }, + PosState::IsTombstone => debug_assert!(false, "reinserting into tombstones"), + } + }); + } } + /// Reserves at least one capacity + /// Computes in **O(n)** time (average). + fn reserve_one(&mut self) { + if self.len() == self.capacity() { + self.double_capacity(); + } + } - /// Return probe (indices) and position (entries) - fn find_using(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> + /// Scan through each key-value pair in the map and keep those where the + /// closure `keep` returns `true`. + /// + /// The order the elements are visited is not specified. + /// + /// Computes in **O(n)** time (average). + fn retain_impl(&mut self, mut keep: F) + where F: FnMut(&mut K, &mut V) -> bool, + Sz: Size, + { + for (i, e) in self.entries.iter_mut().enumerate() { + let hash = if let Some(e) = e.as_mut() { + if keep(&mut e.key, &mut e.value) { continue } else { e.hash } + } else { continue }; + + let probe = find_existing_entry_impl::(self.mask, &self.indices, i, hash); + self.indices[probe] = Pos::tombstone(); + self.index_tombstones += 1; + self.entry_tombstones += 1; + e.take(); + } + } + + /// Remove the key-value pair by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Computes in **O(1)** time (average). + fn remove_index_impl(&mut self, index: usize) -> Option<(K, V)> + where Sz: Size + { + let mask = self.mask; + let indices = &mut self.indices; + let entry_tombstones = &mut self.entry_tombstones; + let index_tombstones = &mut self.index_tombstones; + self.entries.get_mut(index).and_then(|e| { + if let Some(e) = e.as_ref() { + let probe = find_existing_entry_impl::(mask, indices, index, e.hash); + indices[probe] = Pos::tombstone(); + *entry_tombstones += 1; + *index_tombstones += 1; + } + e.take().map(|e| (e.key, e.value)) + }) + } + + /// Swaps the index of two key-value pairs by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// Computes in **O(1)** time (average). + fn swap_index_impl(&mut self, a: usize, b: usize) + where Sz: Size + { + if a == b { return } + + match (self.entries[a].as_ref(), self.entries[b].as_ref()) { + (None, None) => {}, + (None, Some(b_ref)) => { + let probe_b = self.find_existing_entry(b, b_ref.hash); + self.indices[probe_b] = Pos::with_hash::(a, b_ref.hash) + }, + (Some(a_ref), None) => { + let probe_a = self.find_existing_entry(a, a_ref.hash); + self.indices[probe_a] = Pos::with_hash::(b, a_ref.hash) + }, + (Some(a_ref), Some(b_ref)) => { + let probe_a = self.find_existing_entry(a, a_ref.hash); + let probe_b = self.find_existing_entry(b, b_ref.hash); + self.indices.swap(probe_a, probe_b); + } + } + self.entries.swap(a, b); + } + + /// Removes all the entry tombstones. + /// + /// Note that this means indices (e.g. those returned by `get_index`) may + /// no longer be valid. + /// + /// Computes in **O(n)** time (average). + fn remove_tombstones_impl(&mut self) + where Sz: Size + { + let mask = self.mask; + let indices = &mut self.indices; + let mut removed = 0; + let mut index = 0; + + self.entries.retain(|e| { + index += 1; + if let Some(e) = e.as_ref() { + if removed != 0 { + let probe = find_existing_entry_impl::(mask, indices, index-1, e.hash); + indices[probe].sub_eq::(removed); + } + true + } else { + removed += 1; + false + } + }); + + self.entry_tombstones = 0; + } + + /// Return probe (indices) and position (entries), and kv reference + fn find_using(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &K, &V)> where F: Fn(&Bucket) -> bool, { dispatch_32_vs_64!(self.find_using_impl::<_>(hash, key_eq)) } - fn find_using_impl(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> + fn find_using_impl(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &K, &V)> where F: Fn(&Bucket) -> bool, Sz: Size, { @@ -1023,81 +1577,155 @@ impl OrderMap { let mut probe = desired_pos(self.mask, hash); let mut dist = 0; probe_loop!(probe < self.indices.len(), { - if let Some((i, hash_proxy)) = self.indices[probe].resolve::() { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { - // give up when probe distance is too long - return None; - } else if entry_hash == hash && key_eq(&self.entries[i]) { - return Some((probe, i)); - } - } else { - return None; + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { + // give up when probe distance is too long + return None; + } else if entry_hash == hash { + if let Some(e) = self.entries[i].as_ref() { + if key_eq(e) { return Some((probe, i, &e.key, &e.value)); } + } + } + }, + PosState::IsTombstone => {}, + PosState::IsNone => return None, } dist += 1; }); } - /// Find `entry` which is already placed inside self.entries; - /// return its probe and entry index. - fn find_existing_entry(&self, entry: &Bucket) -> (usize, usize) + /// Return probe (indices), position (entries), and kv reference + fn find_mut_using(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &mut K, &mut V)> + where F: Fn(&Bucket) -> bool, { - dispatch_32_vs_64!(self.find_existing_entry_impl(entry)) + dispatch_32_vs_64!(self.find_mut_using_impl::<_>(hash, key_eq)) } - fn find_existing_entry_impl(&self, entry: &Bucket) -> (usize, usize) - where Sz: Size, + fn find_mut_using_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &mut K, &mut V)> + where F: Fn(&Bucket) -> bool, + Sz: Size, { debug_assert!(self.len() > 0); - let hash = entry.hash; - let actual_pos = ptrdistance(&self.entries[0], entry); let mut probe = desired_pos(self.mask, hash); + let mut dist = 0; probe_loop!(probe < self.indices.len(), { - if let Some((i, _)) = self.indices[probe].resolve::() { - if i == actual_pos { - return (probe, actual_pos); - } - } else { - debug_assert!(false, "the entry does not exist"); + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { + // give up when probe distance is too long + return None; + } else if entry_hash == hash && self.entries[i].as_ref().map_or(false, &key_eq) { + // TODO: We shouldn't need unwrap here, blocked on rust-lang/rfcs#811 + let e = self.entries[i].as_mut().unwrap(); + return Some((probe, i, &mut e.key, &mut e.value)); + } + }, + PosState::IsTombstone => {}, + PosState::IsNone => return None, + } + dist += 1; + }); + } + + /// Return probe (indices) and position (entries) + fn find_remove_using(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, K, V)> + where F: Fn(&Bucket) -> bool, + { + dispatch_32_vs_64!(self.find_remove_using_impl::<_>(hash, key_eq)) + } + + fn find_remove_using_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, K, V)> + where F: Fn(&Bucket) -> bool, + Sz: Size, + { + debug_assert!(self.len() > 0); + let mut probe = desired_pos(self.mask, hash); + let mut dist = 0; + probe_loop!(probe < self.indices.len(), { + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { + // give up when probe distance is too long + return None; + } else if entry_hash == hash && self.entries[i].as_ref().map_or(false, &key_eq) { + self.index_tombstones += 1; + self.entry_tombstones += 1; + self.indices[probe] = Pos::tombstone(); + let e = self.entries[i].take().unwrap(); + return Some((probe, i, e.key, e.value)); + } + }, + PosState::IsTombstone => {}, + PosState::IsNone => return None, } + dist += 1; }); } - fn remove_found(&mut self, probe: usize, found: usize) -> (K, V) { - dispatch_32_vs_64!(self.remove_found_impl(probe, found)) + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + fn find_existing_entry(&self, actual_pos: usize, hash: HashValue) -> usize + { + dispatch_32_vs_64!(self.find_existing_entry_impl(actual_pos, hash)) } - fn remove_found_impl(&mut self, probe: usize, found: usize) -> (K, V) + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + fn find_existing_entry_impl(&self, actual_pos: usize, hash: HashValue) -> usize + where Sz: Size, + { + debug_assert!(self.len() > actual_pos); + find_existing_entry_impl::(self.mask, &self.indices, actual_pos, hash) + } + + fn swap_remove_found(&mut self, probe: usize, found: usize) -> Option<(K, V)> { + dispatch_32_vs_64!(self.swap_remove_found_impl(probe, found)) + } + + fn swap_remove_found_impl(&mut self, probe: usize, found: usize) -> Option<(K, V)> where Sz: Size { // index `probe` and entry `found` is to be removed // use swap_remove, but then we need to update the index that points // to the other entry that has to move self.indices[probe] = Pos::none(); - let entry = self.entries.swap_remove(found); + let kv = self.entries.swap_remove(found).map(|e| (e.key, e.value)); + // correct index that points to the entry that had to swap places - if let Some(entry) = self.entries.get(found) { - // was not last element + // check if it was the last element or a tombstone + if let Some(e) = self.entries.get(found).and_then(|e| e.as_ref()) { // examine new element in `found` and find it in indices - let mut probe = desired_pos(self.mask, entry.hash); + let mut probe = desired_pos(self.mask, e.hash); probe_loop!(probe < self.indices.len(), { - if let Some((i, _)) = self.indices[probe].resolve::() { + if let PosState::Value(i) = self.indices[probe].pos::() { if i >= self.entries.len() { // found it - self.indices[probe] = Pos::with_hash::(found, entry.hash); + self.indices[probe] = Pos::with_hash::(found, e.hash); break; } } }); } + + // if we're empty there is there's no work to do + if self.len() == 0 { return kv } + // backward shift deletion in self.indices // after probe, shift all non-ideally placed indices backward - if self.len() > 0 { - let mut last_probe = probe; - let mut probe = probe + 1; - probe_loop!(probe < self.indices.len(), { - if let Some((i, hash_proxy)) = self.indices[probe].resolve::() { + let mut last_probe = probe; + let mut probe = probe + 1; + probe_loop!(probe < self.indices.len(), { + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if probe_distance(self.mask, entry_hash.into_hash(), probe) > 0 { self.indices[last_probe] = self.indices[probe]; @@ -1105,16 +1733,57 @@ impl OrderMap { } else { break; } - } else { - break; - } - last_probe = probe; - }); - } + }, + // Always move tombstones + PosState::IsTombstone => { + self.indices[last_probe] = Pos::tombstone(); + self.indices[probe] = Pos::none(); + }, + PosState::IsNone => break, + } + last_probe = probe; + }); - (entry.key, entry.value) + kv } +} +/// phase 2 is post-insert where we forward-shift `Pos` in the indices. +fn insert_phase_2(indices: &mut Vec, index_tombstones: &mut usize, mut probe: usize, mut old_pos: Pos) + where Sz: Size +{ + probe_loop!(probe < indices.len(), { + let pos = &mut indices[probe]; + match pos.state() { + PosState::Value(()) => old_pos = replace(pos, old_pos), + PosState::IsTombstone => { + *index_tombstones -= 1; + *pos = old_pos; + break; + }, + PosState::IsNone => { + *pos = old_pos; + break; + }, + } + }); +} + +/// Find an entry already placed inside `self.entries` given its position and hash. +/// Return the probe for the entry. +/// +/// Computes in **O(1)** time (average). +fn find_existing_entry_impl(mask: usize, indices: &Vec, actual_pos: usize, hash: HashValue) -> usize + where Sz: Size, +{ + let mut probe = desired_pos(mask, hash); + probe_loop!(probe < indices.len(), { + match indices[probe].pos::() { + PosState::Value(i) => if i == actual_pos { return probe }, + PosState::IsTombstone => {}, + PosState::IsNone => debug_assert!(false, "the entry does not exist"), + } + }); } use std::slice::Iter as SliceIter; @@ -1122,26 +1791,51 @@ use std::slice::IterMut as SliceIterMut; use std::vec::IntoIter as VecIntoIter; pub struct Keys<'a, K: 'a, V: 'a> { - iter: SliceIter<'a, Bucket>, + iter: SliceIter<'a, Option>>, + tombstones: usize, } impl<'a, K, V> Iterator for Keys<'a, K, V> { type Item = &'a K; fn next(&mut self) -> Option<&'a K> { - self.iter.next().map(|ent| &ent.key) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_ref() { + Some(&e.key) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|ent| &ent.key) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + let e = e.as_ref().unwrap(); + &e.key + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_ref() { + Some(&e.key) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1151,72 +1845,71 @@ impl<'a, K, V> Iterator for Keys<'a, K, V> { impl<'a, K, V> DoubleEndedIterator for Keys<'a, K, V> { fn next_back(&mut self) -> Option<&'a K> { - self.iter.next_back().map(|ent| &ent.key) - } -} - -pub struct Retain<'a, K: 'a, V: 'a, S: 'a, F: FnMut(&mut K, &mut V) -> bool> { - map: &'a mut OrderMap, - keep: F, - i: usize, -} - -impl<'a, K, V, S, F> Drop for Retain<'a, K, V, S, F> - where F: FnMut(&mut K, &mut V) -> bool -{ - fn drop(&mut self) { - for _ in self {} - } -} - - -impl<'a, K, V, S, F> Iterator for Retain<'a, K, V, S, F> - where F: FnMut(&mut K, &mut V) -> bool -{ - type Item = (K, V); - - fn next(&mut self) -> Option { - while self.i < self.map.len() { - let keep = { - let entry = &mut self.map.entries[self.i]; - (self.keep)(&mut entry.key, &mut entry.value) - }; - if keep { - self.i += 1; + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_ref() { + Some(&e.key) } else { - // skip increment on remove - return self.map.swap_remove_index(self.i); + *tombstones -= 1; + None } - } - None + }).next_back() } +} - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.map.len() - self.i)) +impl<'a, K, V> ExactSizeIterator for Keys<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() - self.tombstones } } + pub struct Iter<'a, K: 'a, V: 'a> { - iter: SliceIter<'a, Bucket>, + iter: SliceIter<'a, Option>>, + tombstones: usize, } impl<'a, K, V> Iterator for Iter<'a, K, V> { type Item = (&'a K, &'a V); fn next(&mut self) -> Option { - self.iter.next().map(|e| (&e.key, &e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_ref() { + Some((&e.key, &e.value)) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|e| (&e.key, &e.value)) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + let e = e.as_ref().unwrap(); + (&e.key, &e.value) + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_ref() { + Some((&e.key, &e.value)) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1226,31 +1919,70 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { impl<'a, K, V> DoubleEndedIterator for Iter<'a, K, V> { fn next_back(&mut self) -> Option { - self.iter.next_back().map(|e| (&e.key, &e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_ref() { + Some((&e.key, &e.value)) + } else { + *tombstones -= 1; + None + } + }).next_back() + } +} + +impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() - self.tombstones } } pub struct IterMut<'a, K: 'a, V: 'a> { - iter: SliceIterMut<'a, Bucket>, + iter: SliceIterMut<'a, Option>>, + tombstones: usize, } impl<'a, K, V> Iterator for IterMut<'a, K, V> { type Item = (&'a K, &'a mut V); fn next(&mut self) -> Option { - self.iter.next().map(|e| (&e.key, &mut e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_mut() { + Some((&e.key, &mut e.value)) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|e| (&e.key, &mut e.value)) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + let e = e.as_mut().unwrap(); + (&e.key, &mut e.value) + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_mut() { + Some((&e.key, &mut e.value)) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1260,31 +1992,70 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { impl<'a, K, V> DoubleEndedIterator for IterMut<'a, K, V> { fn next_back(&mut self) -> Option { - self.iter.next_back().map(|e| (&e.key, &mut e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e.as_mut() { + Some((&e.key, &mut e.value)) + } else { + *tombstones -= 1; + None + } + }).next_back() + } +} + +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() - self.tombstones } } pub struct IntoIter { - iter: VecIntoIter>, + iter: VecIntoIter>>, + tombstones: usize, } impl Iterator for IntoIter { type Item = (K, V); fn next(&mut self) -> Option { - self.iter.next().map(|e| (e.key, e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e { + Some((e.key, e.value)) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|e| (e.key, e.value)) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + let e = e.unwrap(); + (e.key, e.value) + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some(e) = e { + Some((e.key, e.value)) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1292,9 +2063,23 @@ impl Iterator for IntoIter { } } -impl<'a, K, V> DoubleEndedIterator for IntoIter { +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() - self.tombstones + } +} + +impl DoubleEndedIterator for IntoIter { fn next_back(&mut self) -> Option { - self.iter.next_back().map(|e| (e.key, e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(|e| { + if let Some(e) = e { + Some((e.key, e.value)) + } else { + *tombstones -= 1; + None + } + }).next_back() } } @@ -1329,6 +2114,7 @@ impl IntoIterator for OrderMap fn into_iter(self) -> Self::IntoIter { IntoIter { iter: self.entries.into_iter(), + tombstones: self.entry_tombstones, } } } @@ -1491,6 +2277,56 @@ mod tests { } } + #[test] + fn swap() { + let mut insert = [0, 4, 2, 12]; + let mut map = OrderMap::new(); + + for &elt in &insert { + map.insert(elt, ()); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + for (i, k) in (0..insert.len()).zip(map.keys()) { + assert_eq!(map.get_index(i).unwrap().0, k); + } + + println!("{:?}", map); + println!("{:?}", insert); + + insert.swap(1, 2); + map.swap_index(1, 2); + + println!("{:?}", insert); + println!("{:?}", map); + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + for (i, k) in (0..insert.len()).zip(map.keys()) { + assert_eq!(map.get_index(i).unwrap().0, k); + } + + + insert.swap(0, 3); + map.swap_index(0, 3); + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + for (i, k) in (0..insert.len()).zip(map.keys()) { + assert_eq!(map.get_index(i).unwrap().0, k); + } + } + #[test] fn grow() { let insert = [0, 4, 2, 12, 8, 7, 11]; @@ -1524,7 +2360,7 @@ mod tests { #[test] fn retain() { - let mut insert = vec![0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let insert = vec![0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; let mut map = OrderMap::new(); for &elt in &insert { @@ -1536,39 +2372,22 @@ mod tests { for (a, b) in insert.iter().zip(map.keys()) { assert_eq!(a, b); } - - let mut removed_ex = Vec::new(); - for i in 0.. { - while insert.get(i).iter().filter(|&v| **v >= 10).next().is_some() { - removed_ex.push(insert.swap_remove(i)); - } - if i > insert.len() { - break; - } - } - println!("{:?}", removed_ex); - - { - let removed: Vec<_> = map.retain(|k, _| *k < 10).collect(); - assert_eq!(removed.len(), removed_ex.len()); - for (&a, (b, _)) in removed_ex.iter().zip(removed) { - assert_eq!(a, b); - } - } + let removed: Vec<_> = insert.iter().filter(|&v| *v >= 10).collect(); + map.retain(|k, _| *k < 10); println!("{:?}", insert); println!("{:?}", map); - assert_eq!(map.keys().count(), insert.len()); - assert_eq!(map.keys().count(), insert.len()); + assert_eq!(map.len(), insert.len() - removed.len()); - for (&a, &b) in insert.iter().zip(map.keys()) { + for (&a, &b) in insert.iter().filter(|&v| *v < 10).zip(map.keys()) { assert_eq!(a, b); } } + #[test] - fn remove() { + fn swap_remove() { let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; let mut map = OrderMap::new(); @@ -1602,6 +2421,68 @@ mod tests { assert_eq!(map.keys().count(), insert.len() - remove.len()); } + #[test] + fn remove() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut map = OrderMap::new(); + + for &elt in &insert { + map.insert(elt, elt); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + + let remove_fail = [99, 77]; + let remove = [4, 12, 8, 7]; + + for &key in &remove_fail { + assert!(map.get_pair_index(&key).is_none()); + assert!(map.remove(&key).is_none()); + } + println!("{:?}", map); + for &key in &remove { + assert!(map.get_pair_index(&key).is_some()); + let (index, _, _) = map.get_pair_index(&key).unwrap(); + assert!(map.get_index(index).is_some()); + assert_eq!(map.remove(&key), Some(key)); + assert!(map.get_pair_index(&key).is_none()); + assert!(map.get_index(index).is_none()); + } + println!("{:?}", map); + + for key in &insert { + assert_eq!(map.get(key).is_some(), !remove.contains(key)); + } + + assert_eq!(map.tombstones(), remove.len()); + assert_eq!(map.len(), insert.len() - remove.len()); + assert_eq!(map.keys().count(), insert.len() - remove.len()); + + for (&a, (&b, _)) in insert.iter().filter(|key| !remove.contains(key)).zip(map.iter()) { + assert_eq!(a, b); + } + + map.remove_tombstones(); + + assert_eq!(map.tombstones(), 0); + assert_eq!(map.len(), insert.len() - remove.len()); + assert_eq!(map.keys().count(), insert.len() - remove.len()); + + for (&a, (&b, _)) in insert.iter().filter(|key| !remove.contains(key)).zip(map.iter()) { + assert_eq!(a, b); + } + + for (i, &a) in insert.iter().filter(|key| !remove.contains(key)).enumerate() { + assert!(map.get_index(i).is_some()); + assert_eq!(a, *map.get_index(i).unwrap().0); + } + + } + #[test] fn swap_remove_index() { let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; diff --git a/src/util.rs b/src/util.rs index 95742631..6c841123 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,16 +1,8 @@ use std::iter::Enumerate; -use std::mem::size_of; -pub fn second(t: (A, B)) -> B { t.1 } pub fn enumerate(iterable: I) -> Enumerate where I: IntoIterator { iterable.into_iter().enumerate() } - -/// return the number of steps from a to b -pub fn ptrdistance(a: *const T, b: *const T) -> usize { - debug_assert!(a as usize <= b as usize); - (b as usize - a as usize) / size_of::() -} diff --git a/tests/quick.rs b/tests/quick.rs index bfc2963c..97e77b1c 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -123,8 +123,8 @@ impl Arbitrary for Op } fn do_ops(ops: &[Op], a: &mut OrderMap, b: &mut HashMap) - where K: Hash + Eq + Clone, - V: Clone, + where K: Hash + Eq + Clone + Debug, + V: Clone + Debug, { for op in ops { match *op { @@ -151,7 +151,7 @@ fn do_ops(ops: &[Op], a: &mut OrderMap, b: &mut HashMap) } } } - //println!("{:?}", a); + assert_eq!(a.len(), b.len(), "last operation was {:?}", op); } } @@ -200,7 +200,6 @@ quickcheck! { assert!(map.contains_key(key)); } true - } // Check that retain visits each key exactly once From c1fab6bdf253d26f231c1a6e42e8d9cc5d44cb50 Mon Sep 17 00:00:00 2001 From: Popog Date: Fri, 25 Nov 2016 10:41:27 -0800 Subject: [PATCH 3/7] Early return from remove_tombstone Don't need to do anything if there a no tombstones --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 7aff50ce..a0a18cc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1545,6 +1545,8 @@ impl OrderMap { let mut removed = 0; let mut index = 0; + if self.entry_tombstones == 0 { return } + self.entries.retain(|e| { index += 1; if let Some(e) = e.as_ref() { From 700eba3f28da051f61679f718de90d4df6287c57 Mon Sep 17 00:00:00 2001 From: Popog Date: Wed, 30 Nov 2016 22:07:10 -0800 Subject: [PATCH 4/7] Some tombstone fixes plus more variants for perf testing Contains unsafe code, but it should all be contained in unsafe.rs and guarded by feature flags. --- Cargo.toml | 5 + src/lib.rs | 752 ++++++++++++++++++++++++++++++++++---------------- src/unsafe.rs | 163 +++++++++++ 3 files changed, 682 insertions(+), 238 deletions(-) create mode 100644 src/unsafe.rs diff --git a/Cargo.toml b/Cargo.toml index e92fc795..29c9ee59 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ description = "A hash table with consistent order and fast iteration." bench = false [dependencies] +efficient_enum = { version = "^0.3.1", optional = true } [dev-dependencies] itertools = "0.5.1" @@ -23,6 +24,10 @@ lazy_static = "0.2" # for testing only, of course test_low_transition_point = [] test_debug = [] +test_efficient_enum = ["efficient_enum", "unsafe"] +test_unsafe_efficient_enum = ["test_efficient_enum"] +test_unsafe_index = ["unsafe"] +unsafe = [] [profile.bench] debug = true diff --git a/src/lib.rs b/src/lib.rs index a0a18cc8..65b52f64 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,9 @@ #![doc(html_root_url = "https://docs.rs/ordermap/0.2/")] +#[cfg(feature="test_efficient_enum")] +extern crate efficient_enum; + mod macros; mod util; @@ -15,18 +18,33 @@ use std::fmt; use std::mem::replace; use std::marker::PhantomData; -use util::{enumerate}; +use util::enumerate; fn hash_elem_using(build: &B, k: &K) -> HashValue { let mut h = build.build_hasher(); k.hash(&mut h); - HashValue(h.finish() as usize) + HashValue::new(h.finish() as usize) +} + +#[cfg(not(feature="test_unsafe_index"))] +macro_rules! i { + ($e:ident$(.$e2:ident)*[$i:expr]) => {$e$(.$e2)*[$i]}; + (&$e:ident$(.$e2:ident)*[$i:expr]) => {&$e$(.$e2)*[$i]}; } +#[cfg(not(feature="test_unsafe_index"))] +macro_rules! im { + ($e:ident$(.$e2:ident)*[$i:expr]) => {$e$(.$e2)*[$i]}; + (&mut $e:ident$(.$e2:ident)*[$i:expr]) => {&mut $e(.$e2)+[$i]}; +} + +#[cfg(feature="unsafe")] +include!("unsafe.rs"); + /// Hash value newtype. Not larger than usize, since anything larger /// isn't used for selecting position anyway. #[derive(Copy, Debug)] -struct HashValue(usize); +pub struct HashValue(usize); impl Clone for HashValue { #[inline] @@ -38,11 +56,145 @@ impl PartialEq for HashValue { self.0 == rhs.0 } } +impl HashValue { + #[cfg(not(feature="test_efficient_enum"))] + pub fn new(v: usize) -> HashValue { + HashValue(v) + } + + pub fn eq_lo32(&self, rhs: &HashValue) -> bool { + lo32(self.0 as u64) == lo32(rhs.0 as u64) + } + + pub fn desired_pos(self, mask: usize) -> usize { + self.0 & mask + } + + pub fn combine_pos(self, i: usize) -> u64 { + i as u64 | (lo32(self.0 as u64) << 32) as u64 + } +} + +#[cfg(not(feature="test_efficient_enum"))] +#[derive(Copy,Clone,Debug)] +pub struct Bucket { + hash: HashValue, + option: Option<(K, V)>, +} + +/// A type which can take values from a Bucket, leaving the bucket empty +#[cfg(not(feature="test_efficient_enum"))] +pub struct BucketTaker<'a, K: 'a, V: 'a>(&'a mut Bucket); + +#[cfg(not(feature="test_efficient_enum"))] +impl Bucket { + pub fn new(hash: HashValue, key: K, value: V) -> Self { + Bucket { hash: hash, option: Some((key, value)) } + } + + pub fn unwrap_hash_key(&self) -> (HashValue, &K) { + debug_assert!(self.option.is_some()); + (self.hash, &self.option.as_ref().unwrap().0) + } + + // if the bucket is none, gives a meaningless hash in release and panics in debug + pub fn hash(&self) -> Option { + if self.option.is_some() { + Some(self.hash) + } else { None } + } + + // if the bucket is none, gives a meaningless hash in release and panics in debug + pub fn unwrap_hash(&self) -> HashValue { + debug_assert!(self.option.is_some()); + self.hash + } + + pub fn kv(&self) -> Option<(&K, &V)> { + self.option.as_ref().map(|e| (&e.0, &e.1)) + } + + pub fn unwrap_kv(&self) -> (&K, &V) { + debug_assert!(self.option.is_some()); + let kv = self.option.as_ref().unwrap(); + (&kv.0, &kv.1) + } + + pub fn kv_mut(&mut self) -> Option<(&mut K, &mut V)> { + self.option.as_mut().map(|e| (&mut e.0, &mut e.1)) + } + + pub fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { + debug_assert!(self.option.is_some()); + let kv = self.option.as_mut().unwrap(); + (&mut kv.0, &mut kv.1) + } + + pub fn taker<'a>(&'a mut self) -> Option> { + if self.option.is_some() { + Some(BucketTaker(self)) + } else { None } + } + + pub fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { + debug_assert!(self.option.is_some()); + BucketTaker(self) + } + + pub fn take(&mut self) -> Option<(K, V)> { + self.option.take() + } + + pub fn into_kv(self) -> Option<(K, V)> { + debug_assert!(self.option.is_some()); + self.option + } + + pub fn unwrap_into_kv(self) -> (K, V) { + debug_assert!(self.option.is_some()); + self.option.unwrap() + } +} + +#[cfg(not(feature="test_efficient_enum"))] +impl<'a, K, V> BucketTaker<'a, K, V> { + pub fn hash(&self) -> HashValue { + debug_assert!(self.0.option.is_some()); + self.0.hash + } + pub fn key(&self) -> &K { + debug_assert!(self.0.option.is_some()); + &self.0.option.as_ref().unwrap().0 + } + pub fn value(&self) -> &V { + debug_assert!(self.0.option.is_some()); + &self.0.option.as_ref().unwrap().1 + } + pub fn value_mut(&mut self) -> &mut V { + debug_assert!(self.0.option.is_some()); + &mut self.0.option.as_mut().unwrap().1 + } + pub fn into_value_mut(self) -> &'a mut V { + debug_assert!(self.0.option.is_some()); + &mut self.0.option.as_mut().unwrap().1 + } + pub fn kv_mut(&mut self) -> (&mut K, &mut V) { + debug_assert!(self.0.option.is_some()); + let e = self.0.option.as_mut().unwrap(); + (&mut e.0, &mut e.1) + } + pub fn take(self) -> (K, V) { + debug_assert!(self.0.option.is_some()); + self.0.option.take().unwrap() + } +} + + /// A possibly truncated hash value. /// #[derive(Debug)] -struct ShortHash(usize, PhantomData); +struct ShortHash(HashValue, PhantomData); impl ShortHash { /// Pretend this is a full HashValue, which @@ -51,7 +203,7 @@ impl ShortHash { /// - Sz = u32: 32-bit hash is enough to select bucket index /// - Sz = u64: hash is not truncated fn into_hash(self) -> HashValue { - HashValue(self.0) + self.0 } } @@ -74,15 +226,12 @@ impl PartialEq for ShortHash where Sz: Size { #[inline] fn eq(&self, rhs: &HashValue) -> bool { if Sz::is_64_bit() { - self.0 == rhs.0 + self.0 == *rhs } else { - lo32(self.0 as u64) == lo32(rhs.0 as u64) + self.0.eq_lo32(rhs) } } } -impl From> for HashValue { - fn from(x: ShortHash) -> Self { HashValue(x.0) } -} /// `Pos` is stored in the `indices` array and it points to the index of a /// `Bucket` in self.entries. @@ -141,14 +290,14 @@ impl Pos { fn with_hash(i: usize, hash: HashValue) -> Self where Sz: Size { - let index = if Sz::is_64_bit() { + let i = if Sz::is_64_bit() { i as u64 } else { - (i | (lo32(hash.0 as u64) << 32)) as u64 + hash.combine_pos(i) }; - debug_assert!(index as u64 != POS_NONE); - debug_assert!(index as u64 != POS_TOMBSTONE); - Pos { index: index as u64 } + debug_assert!(i != POS_NONE); + debug_assert!(i != POS_TOMBSTONE); + Pos { index: i } } #[inline] @@ -158,6 +307,15 @@ impl Pos { Pos { index: i as u64 } } + #[inline] + fn is_none(&self) -> bool { + self.index == POS_NONE + } + + #[inline] + fn is_tombstone(&self) -> bool { + self.index == POS_TOMBSTONE + } #[inline] fn state(&self) -> PosState<()> { @@ -171,7 +329,7 @@ impl Pos { #[inline] fn link_pos(&self) -> Option { debug_assert!(self.index != POS_TOMBSTONE); - if self.index == POS_NONE { + if self.index != POS_NONE { Some(self.index as usize) } else { None @@ -255,11 +413,11 @@ impl ShortHashProxy /// Get the hash from either `self` or from a lookup into `entries`, /// depending on `Sz`. - fn get_short_hash(&self, entries: &[Option>], index: usize) -> ShortHash { + fn get_short_hash(&self, entries: &[Bucket], index: usize) -> ShortHash { if Sz::is_64_bit() { - ShortHash(entries[index].unwrap_hash().0, PhantomData) + ShortHash(entries[index].unwrap_hash(), PhantomData) } else { - ShortHash(self.0, PhantomData) + ShortHash(HashValue::new(self.0), PhantomData) } } } @@ -306,7 +464,7 @@ pub struct OrderMap { /// indices are the buckets. indices.len() == raw capacity indices: Vec, /// entries is a dense vec of entries in their order. entries.len() == len - entries: Vec>>, + entries: Vec>, /// the number of tombstones in `indices` waiting to be cleaned up index_tombstones: usize, /// the number of tombstones in `entries` waiting to be cleaned up @@ -314,30 +472,6 @@ pub struct OrderMap { hash_builder: S, } -#[derive(Copy, Clone, Debug)] -struct Bucket { - hash: HashValue, - key: K, - value: V, -} - -trait BucketHelper { - fn unwrap_hash(&self) -> HashValue; -} - -impl BucketHelper for Option> { - // if the bucket is none, gives a hash of 0 in release and panics in debug - fn unwrap_hash(&self) -> HashValue { - debug_assert!(self.is_some()); - self.as_ref().map_or(HashValue(0), |e| e.hash) - } -} - -#[inline(always)] -fn desired_pos(mask: usize, hash: HashValue) -> usize { - hash.0 & mask -} - /// The number of steps that `current` is forward of the prev #[inline(always)] fn probe_delta(mask: usize, prev: usize, current: usize) -> usize { @@ -347,7 +481,7 @@ fn probe_delta(mask: usize, prev: usize, current: usize) -> usize { /// The number of steps that `current` is forward of the desired position for hash #[inline(always)] fn probe_distance(mask: usize, hash: HashValue, current: usize) -> usize { - probe_delta(mask, desired_pos(mask, hash), current) + probe_delta(mask, hash.desired_pos(mask), current) } @@ -374,14 +508,13 @@ impl fmt::Debug for OrderMap for (i, index) in enumerate(&self.indices) { write!(f, "{}: {:?}", i, index)?; if let PosState::Value(pos) = index.debug_pos() { - if let &Some(ref entry) = &self.entries[pos] { - writeln!(f, ", desired={}, probe_distance={}, key={:?}", - desired_pos(self.mask, entry.hash), - probe_distance(self.mask, entry.hash, i), - entry.key)?; - } else { - writeln!(f, ", tombstone")?; - } + let (hash, key) = self.entries[pos].unwrap_hash_key(); + writeln!(f, ", desired={}, probe_distance={}, key={:?}", + hash.desired_pos(self.mask), + probe_distance(self.mask, hash, i), + key)?; + } else { + writeln!(f, ", tombstone")?; } writeln!(f, "")?; } @@ -417,6 +550,19 @@ macro_rules! probe_loop { } } +// this could not be captured in an efficient iterator +macro_rules! rev_probe_loop { + ($probe_var: ident < $len: expr, $body: expr) => { + loop { + $body + if $probe_var == 0 { + $probe_var = $len; + } + $probe_var -= 1; + } + } +} + impl OrderMap { /// Create a new map. (Does not allocate.) pub fn new() -> Self { @@ -581,25 +727,25 @@ pub struct OccupiedEntry<'a, K: 'a, V: 'a> { index_tombstones: &'a mut usize, entry_tombstones: &'a mut usize, pos: &'a mut Pos, - kv: &'a mut Option>, + bucket_taker: BucketTaker<'a, K, V>, } impl<'a, K, V> OccupiedEntry<'a, K, V> { pub fn key(&self) -> &K { - &self.kv.as_ref().unwrap().key + self.bucket_taker.key() } pub fn get(&self) -> &V { - &self.kv.as_ref().unwrap().value + self.bucket_taker.value() } pub fn get_mut(&mut self) -> &mut V { - &mut self.kv.as_mut().unwrap().value + self.bucket_taker.value_mut() } pub fn into_mut(self) -> &'a mut V { - &mut self.kv.as_mut().unwrap().value + self.bucket_taker.into_value_mut() } pub fn insert(self, value: V) -> V { - replace(&mut self.into_mut(), value) + replace(self.into_mut(), value) } pub fn remove(self) -> V { @@ -614,15 +760,14 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { *self.index_tombstones +=1; *self.entry_tombstones += 1; *self.pos = Pos::tombstone(); - let entry = self.kv.take().unwrap(); - (entry.key, entry.value) + self.bucket_taker.take() } } pub struct VacantEntry<'a, K: 'a, V: 'a> { indices: &'a mut Vec, - entries: &'a mut Vec>>, + entries: &'a mut Vec>, index_tombstones: &'a mut usize, size_class_is_64bit: bool, key: K, @@ -645,10 +790,10 @@ impl<'a, K, V> VacantEntry<'a, K, V> { where Sz: Size { let index = self.entries.len(); - self.entries.push(Some(Bucket { hash: self.hash, key: self.key, value: value })); + self.entries.push(Bucket::new(self.hash, self.key, value)); let old_pos = Pos::with_hash::(index, self.hash); insert_phase_2::(self.indices, self.index_tombstones, self.probe, old_pos); - &mut self.entries[index].as_mut().unwrap().value + self.entries[index].kv_mut().unwrap().1 } } @@ -669,7 +814,7 @@ impl OrderMap where Sz: Size { let hash = hash_elem_using(&self.hash_builder, &key); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); let mut dist = 0; let mut last_tombstone = None; let indices_len = self.indices.len(); @@ -704,18 +849,22 @@ impl OrderMap key: key, probe: probe, }); - } else if entry_hash == hash && self.entries[i].as_ref().map_or(false, |e| e.key == key) { - return Entry::Occupied(OccupiedEntry { - entry_tombstones: &mut self.entry_tombstones, - index_tombstones: &mut self.index_tombstones, - kv: &mut self.entries[i], - pos: &mut self.indices[probe], - }); - } else if let Some(tprobe) = last_tombstone { - let tdist = probe_delta(self.mask, tprobe, probe); + } else if entry_hash == hash { + if *self.entries[i].unwrap_kv().0 == key { + let taker = self.entries[i].unwrap_taker(); + return Entry::Occupied(OccupiedEntry { + entry_tombstones: &mut self.entry_tombstones, + index_tombstones: &mut self.index_tombstones, + bucket_taker: taker, + pos: &mut self.indices[probe], + }); + } + } - // We're already in the neighborhood, - // If a bucket wants to steal from a tombstone. make it happen + // We're already in the neighborhood, + // If a bucket wants to steal from a tombstone. make it happen + if let Some(tprobe) = last_tombstone { + let tdist = probe_delta(self.mask, tprobe, probe); if tdist < their_dist { self.indices.swap(tprobe, probe); last_tombstone = Some(probe); @@ -783,7 +932,7 @@ impl OrderMap where Sz: Size { let hash = hash_elem_using(&self.hash_builder, &key); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); let mut dist = 0; let mut last_tombstone = None; let indices_len = self.indices.len(); @@ -813,17 +962,18 @@ impl OrderMap }; break; } else if entry_hash == hash { - if let Some(entry) = self.entries[i].as_mut() { - if entry.key == key { - return Inserted::Swapped { - prev_value: replace(&mut entry.value, value), - }; - } + let (entry_key, entry_value) = self.entries[i].unwrap_kv_mut(); + if *entry_key == key { + return Inserted::Swapped { + prev_value: replace(entry_value, value), + }; } - } else if let Some(tprobe) = last_tombstone { + } + + // We're already in the neighborhood, + // If a bucket wants to steal from a tombstone. make it happen + if let Some(tprobe) = last_tombstone { let tdist = probe_delta(self.mask, tprobe, probe); - // We're already in the neighborhood, - // If a bucket wants to steal from a tombstone. make it happen if tdist < their_dist { self.indices.swap(tprobe, probe); last_tombstone = Some(probe); @@ -860,7 +1010,7 @@ impl OrderMap } dist += 1; }); - self.entries.push(Some(Bucket { hash: hash, key: key, value: value })); + self.entries.push(Bucket::new(hash, key, value)); insert_kind } @@ -984,7 +1134,7 @@ impl OrderMap if self.len() <= 0 { return None; } let h = hash_elem_using(&self.hash_builder, key); - self.find_using(h, move |e| { *e.key.borrow() == *key }) + self.find_using(h, move |k, _| { *k.borrow() == *key }) } /// Return probe (indices), position (entries), and key-value pairs by @@ -996,7 +1146,7 @@ impl OrderMap if self.len() <= 0 { return None; } let h = hash_elem_using(&self.hash_builder, key); - self.find_mut_using(h, move |e| { *e.key.borrow() == *key }) + self.find_mut_using(h, move |k, _| { *k.borrow() == *key }) } /// Return probe (indices), position (entries), and key-value pairs by @@ -1008,7 +1158,7 @@ impl OrderMap if self.len() <= 0 { return None; } let h = hash_elem_using(&self.hash_builder, key); - self.find_remove_using(h, move |e| { *e.key.borrow() == *key }) + self.find_remove_using(h, move |k, _| { *k.borrow() == *key }) } /// Remove the key-value pair equivalent to `key` and return the `value`. @@ -1136,8 +1286,7 @@ impl OrderMap { /// /// Computes in **O(1)** time. pub fn get_index(&self, index: usize) -> Option<(&K, &V)> { - self.entries.get(index).and_then(|e| e.as_ref()) - .map(|e| (&e.key, &e.value)) + self.entries.get(index).and_then(|e| e.kv()) } /// Get a key-value pair by index @@ -1146,8 +1295,7 @@ impl OrderMap { /// /// Computes in **O(1)** time. pub fn get_index_mut(&mut self, index: usize) -> Option<(&mut K, &mut V)> { - self.entries.get_mut(index).and_then(|e| e.as_mut()) - .map(|e| (&mut e.key, &mut e.value)) + self.entries.get_mut(index).and_then(|e| e.kv_mut()) } /// Remove the key-value pair by index @@ -1156,8 +1304,8 @@ impl OrderMap { /// /// Computes in **O(1)** time (average). pub fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { - self.entries.get(index).and_then(|e| e.as_ref()) - .map(|e| self.find_existing_entry(index, e.hash)) + self.entries.get(index).and_then(|e| e.hash()) + .map(|hash| self.find_existing_entry_mut(index, hash, false)) .and_then(|probe| self.swap_remove_found(probe, index)) } @@ -1263,6 +1411,13 @@ impl OrderMap { self.entries.reserve_exact(more); } + /// Removes all the index tombstones + /// + /// Computes in **O(n)** time. + fn remove_cheap_index_tombstones(&mut self) { + dispatch_32_vs_64!(self.remove_cheap_index_tombstones_impl()) + } + /// Removes all the index tombstones /// /// Computes in **O(n)** time. @@ -1277,19 +1432,65 @@ impl OrderMap { // if there entries are empty, just return // otherwise, try to get the hash (last entry might be a tombstone) let hash = if let Some(e) = self.entries.last() { - e.as_ref().map(|e| e.hash) + e.hash() } else { return None }; let index = self.entries.len()-1; if let Some(hash) = hash { - let probe = self.find_existing_entry(index, hash); - self.swap_remove_found(probe, index) + let probe = self.find_existing_entry_mut(index, hash, false); + im!(self.indices[probe]) = Pos::tombstone(); + self.index_tombstones += 1; + let v = self.entries.pop().unwrap().take(); + v + //self.swap_remove_found(probe, index) } else { self.entries.pop(); None } } + /// Removes cheap the index tombstones + /// + /// Computes in **O(n)** time. + fn remove_cheap_index_tombstones_impl(&mut self) + where Sz: Size + { + if self.index_tombstones == 0 { return } + + // Find the first ideal or none (looking backwards) + let mut probe = enumerate(&self.indices).rev().find(|&(i, index)| match index.pos::() { + PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), + PosState::IsTombstone => false, + PosState::IsNone => true, + }).map_or(0, |(i, _)| i); + + let mut need_processing = self.len() + self.index_tombstones; + rev_probe_loop!(probe < self.indices.len(), { + match i!(self.indices[probe]).state() { + PosState::Value(()) => { + need_processing -= 1; + if need_processing == 0 { return } + + rev_probe_loop!(probe < self.indices.len(), { + if i!(self.indices[probe]).is_none() { break } + need_processing -= 1; + if need_processing == 0 { return } + }); + }, + PosState::IsTombstone => { + im!(self.indices[probe]) = Pos::none(); + + self.index_tombstones -= 1; + if self.index_tombstones == 0 { return } + + need_processing -= 1; + if need_processing == 0 { return } + }, + PosState::IsNone => {}, + } + }); + } + /// Removes all the index tombstones /// /// Computes in **O(n)** time. @@ -1298,22 +1499,16 @@ impl OrderMap { { if self.index_tombstones == 0 { return } - // Find the first tombstone after an ideal + // Find the first tombstone after an ideal or none let mut tombstone_head = enumerate(&self.indices).find(|&(i, index)| match index.pos::() { PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), PosState::IsTombstone => false, - PosState::IsNone => false, + PosState::IsNone => true, }).map_or(0, |(i, _)| i); probe_loop!(tombstone_head < self.indices.len(), { - if self.indices[tombstone_head].state() != PosState::Value(()) { - break; - } - tombstone_head += 1; + if i!(self.indices[tombstone_head]).is_tombstone() { break } }); - - if self.indices[tombstone_head].state() == PosState::IsTombstone { - self.index_tombstones -= 1; - } + self.index_tombstones -= 1; self.indices[tombstone_head] = Pos::none(); let mut tombstone_tail = tombstone_head; @@ -1321,13 +1516,14 @@ impl OrderMap { probe_loop!(probe < self.indices.len(), { match self.indices[probe].resolve::() { PosState::Value((i, hash_proxy)) => { + //println!("value"); let entry_hash = hash_proxy.get_short_hash(&self.entries, i); let dist = probe_distance(self.mask, entry_hash.into_hash(), probe); if dist == 0 { // clear the linked list let mut iter = tombstone_head; - while let Some(next) = self.indices[iter].link_pos() { - self.indices[iter] = Pos::none(); + while let Some(next) = im!(self.indices[iter]).link_pos() { + im!(self.indices[iter]) = Pos::none(); iter = next; } @@ -1337,22 +1533,27 @@ impl OrderMap { // find a new tombstone_tail tombstone_head = probe + 1; probe_loop!(tombstone_head < self.indices.len(), { - if self.indices[tombstone_head].state() != PosState::Value(()) { - break; + if i!(self.indices[tombstone_head]).is_tombstone() { + break } }); + self.index_tombstones -= 1; + self.indices[tombstone_head] = Pos::none(); tombstone_tail = tombstone_head; } else { loop { let empty_dist = probe_delta(self.mask, tombstone_head, probe); if dist >= empty_dist { + // record the next link + let next = i!(self.indices[tombstone_head]).link_pos(); + // move the value up - self.indices[tombstone_head] = self.indices[probe]; - self.indices[probe] = Pos::none(); + im!(self.indices[tombstone_head]) = self.indices[probe]; + im!(self.indices[probe]) = Pos::none(); - if let Some(next) = self.indices[tombstone_head].link_pos() { + if let Some(next) = next { // add the value's old space to our linked list - self.indices[tombstone_tail] = Pos::link(probe); + im!(self.indices[tombstone_tail]) = Pos::link(probe); // move the head of the linked list forward tombstone_head = next; } else { @@ -1361,23 +1562,26 @@ impl OrderMap { tombstone_head = probe; tombstone_tail = probe; } + break + } else if let Some(next) = self.indices[tombstone_head].link_pos() { + // pop off the head and clear it + im!(self.indices[tombstone_head]) = Pos::none(); + tombstone_head = next; } else { - if let Some(next) = self.indices[tombstone_head].link_pos() { - // pop off the head and clear it - self.indices[tombstone_head] = Pos::none(); - tombstone_head = next; - } else { - // if we're out of tombstones, return - if self.index_tombstones == 0 { return } - - // find a new tombstone for the list - tombstone_head = probe + 1; - probe_loop!(tombstone_head < self.indices.len(), { - if self.indices[tombstone_head].state() != PosState::Value(()) { - break; - } - }) - } + // if we're out of tombstones, return + if self.index_tombstones == 0 { return } + + // find a new tombstone for the list + tombstone_head = probe + 1; + probe_loop!(tombstone_head < self.indices.len(), { + if i!(self.indices[tombstone_head]).is_tombstone() { + break + } + }); + self.index_tombstones -= 1; + im!(self.indices[tombstone_head]) = Pos::none(); + tombstone_tail = tombstone_head; + break } } } @@ -1385,15 +1589,15 @@ impl OrderMap { PosState::IsTombstone => { self.index_tombstones -= 1; // push it onto the back of the linked list - self.indices[probe] = Pos::none(); - self.indices[tombstone_tail] = Pos::link(probe); + im!(self.indices[probe]) = Pos::none(); + im!(self.indices[tombstone_tail]) = Pos::link(probe); tombstone_tail = probe; }, PosState::IsNone => { // clear the tombstone list let mut iter = tombstone_head; - while let Some(next) = self.indices[iter].link_pos() { - self.indices[iter] = Pos::none(); + while let Some(next) = i!(self.indices[iter]).link_pos() { + im!(self.indices[iter]) = Pos::none(); iter = next; } @@ -1403,10 +1607,13 @@ impl OrderMap { // find a new tombstone for the list tombstone_head = probe + 1; probe_loop!(tombstone_head < self.indices.len(), { - if self.indices[tombstone_head].state() != PosState::Value(()) { - break; + if i!(self.indices[tombstone_head]).is_tombstone() { + break } - }) + }); + self.index_tombstones -= 1; + im!(self.indices[tombstone_head]) = Pos::none(); + tombstone_tail = tombstone_head; }, } }); @@ -1426,17 +1633,16 @@ impl OrderMap { let entry_hash = if SzOld::is_same_size::() { hash_proxy.get_short_hash(&self.entries, i).into_hash() } else { - debug_assert!(self.entries[i].is_some()); - self.entries[i].unwrap_hash() + i!(self.entries[i]).unwrap_hash() }; // find first empty bucket or tombstone and insert there - let mut probe = desired_pos(self.mask, entry_hash); + let mut probe = entry_hash.desired_pos(self.mask); probe_loop!(probe < self.indices.len(), { - match self.indices[probe].state() { + match i!(self.indices[probe]).state() { // skip values PosState::Value(()) => {}, PosState::IsNone => { - self.indices[probe] = Pos::with_hash::(i, entry_hash); + im!(self.indices[probe]) = Pos::with_hash::(i, entry_hash); return }, PosState::IsTombstone => debug_assert!(false, "reinserting into tombstones"), @@ -1464,12 +1670,13 @@ impl OrderMap { Sz: Size, { for (i, e) in self.entries.iter_mut().enumerate() { - let hash = if let Some(e) = e.as_mut() { - if keep(&mut e.key, &mut e.value) { continue } else { e.hash } + if let Some((key, value)) = e.kv_mut() { + if keep(key, value) { continue } } else { continue }; + let hash = e.unwrap_hash(); let probe = find_existing_entry_impl::(self.mask, &self.indices, i, hash); - self.indices[probe] = Pos::tombstone(); + im!(self.indices[probe]) = Pos::tombstone(); self.index_tombstones += 1; self.entry_tombstones += 1; e.take(); @@ -1492,13 +1699,15 @@ impl OrderMap { let entry_tombstones = &mut self.entry_tombstones; let index_tombstones = &mut self.index_tombstones; self.entries.get_mut(index).and_then(|e| { - if let Some(e) = e.as_ref() { - let probe = find_existing_entry_impl::(mask, indices, index, e.hash); - indices[probe] = Pos::tombstone(); + if let Some(e) = e.taker() { + let probe = find_existing_entry_impl::(mask, indices, index, e.hash()); + im!(indices[probe]) = Pos::tombstone(); *entry_tombstones += 1; *index_tombstones += 1; + Some(e.take()) + } else { + None } - e.take().map(|e| (e.key, e.value)) }) } @@ -1512,19 +1721,19 @@ impl OrderMap { { if a == b { return } - match (self.entries[a].as_ref(), self.entries[b].as_ref()) { + match (self.entries[a].hash(), self.entries[b].hash()) { (None, None) => {}, - (None, Some(b_ref)) => { - let probe_b = self.find_existing_entry(b, b_ref.hash); - self.indices[probe_b] = Pos::with_hash::(a, b_ref.hash) + (None, Some(b_hash)) => { + let probe_b = self.find_existing_entry_mut(b, b_hash, true); + self.indices[probe_b] = Pos::with_hash::(a, b_hash) }, - (Some(a_ref), None) => { - let probe_a = self.find_existing_entry(a, a_ref.hash); - self.indices[probe_a] = Pos::with_hash::(b, a_ref.hash) + (Some(a_hash), None) => { + let probe_a = self.find_existing_entry_mut(a, a_hash, true); + self.indices[probe_a] = Pos::with_hash::(b, a_hash) }, - (Some(a_ref), Some(b_ref)) => { - let probe_a = self.find_existing_entry(a, a_ref.hash); - let probe_b = self.find_existing_entry(b, b_ref.hash); + (Some(a_hash), Some(b_hash)) => { + let probe_a = self.find_existing_entry_mut(a, a_hash, true); + let probe_b = self.find_existing_entry_mut(b, b_hash, true); self.indices.swap(probe_a, probe_b); } } @@ -1540,19 +1749,19 @@ impl OrderMap { fn remove_tombstones_impl(&mut self) where Sz: Size { + if self.entry_tombstones == 0 { return } + let mask = self.mask; let indices = &mut self.indices; let mut removed = 0; let mut index = 0; - if self.entry_tombstones == 0 { return } - self.entries.retain(|e| { index += 1; - if let Some(e) = e.as_ref() { + if let Some(hash) = e.hash() { if removed != 0 { - let probe = find_existing_entry_impl::(mask, indices, index-1, e.hash); - indices[probe].sub_eq::(removed); + let probe = find_existing_entry_impl::(mask, indices, index-1, hash); + im!(indices[probe]).sub_eq::(removed); } true } else { @@ -1566,28 +1775,29 @@ impl OrderMap { /// Return probe (indices) and position (entries), and kv reference fn find_using(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &K, &V)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, { dispatch_32_vs_64!(self.find_using_impl::<_>(hash, key_eq)) } fn find_using_impl(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &K, &V)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, Sz: Size, { debug_assert!(self.len() > 0); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); let mut dist = 0; probe_loop!(probe < self.indices.len(), { - match self.indices[probe].resolve::() { + match i!(self.indices[probe]).resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { // give up when probe distance is too long return None; } else if entry_hash == hash { - if let Some(e) = self.entries[i].as_ref() { - if key_eq(e) { return Some((probe, i, &e.key, &e.value)); } + let (key, value) = i!(self.entries[i]).unwrap_kv(); + if key_eq(key, value) { + return Some((probe, i, key, value)); } } }, @@ -1600,29 +1810,30 @@ impl OrderMap { /// Return probe (indices), position (entries), and kv reference fn find_mut_using(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &mut K, &mut V)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, { dispatch_32_vs_64!(self.find_mut_using_impl::<_>(hash, key_eq)) } fn find_mut_using_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &mut K, &mut V)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, Sz: Size, { debug_assert!(self.len() > 0); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); let mut dist = 0; probe_loop!(probe < self.indices.len(), { - match self.indices[probe].resolve::() { + match i!(self.indices[probe]).resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { // give up when probe distance is too long return None; - } else if entry_hash == hash && self.entries[i].as_ref().map_or(false, &key_eq) { - // TODO: We shouldn't need unwrap here, blocked on rust-lang/rfcs#811 - let e = self.entries[i].as_mut().unwrap(); - return Some((probe, i, &mut e.key, &mut e.value)); + } else if entry_hash == hash { + if { let (key, value) = im!(self.entries[i]).unwrap_kv_mut(); key_eq(key, value) } { + let (key, value) = im!(self.entries[i]).unwrap_kv_mut(); + return Some((probe, i, key, value)); + } } }, PosState::IsTombstone => {}, @@ -1634,31 +1845,34 @@ impl OrderMap { /// Return probe (indices) and position (entries) fn find_remove_using(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, K, V)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, { dispatch_32_vs_64!(self.find_remove_using_impl::<_>(hash, key_eq)) } fn find_remove_using_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, K, V)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, Sz: Size, { debug_assert!(self.len() > 0); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); let mut dist = 0; probe_loop!(probe < self.indices.len(), { - match self.indices[probe].resolve::() { + match i!(self.indices[probe]).resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { // give up when probe distance is too long return None; - } else if entry_hash == hash && self.entries[i].as_ref().map_or(false, &key_eq) { - self.index_tombstones += 1; - self.entry_tombstones += 1; - self.indices[probe] = Pos::tombstone(); - let e = self.entries[i].take().unwrap(); - return Some((probe, i, e.key, e.value)); + } else if entry_hash == hash { + let mut taker = im!(self.entries[i]).unwrap_taker(); + if { let (key, value) = taker.kv_mut(); key_eq(key, value) } { + self.index_tombstones += 1; + self.entry_tombstones += 1; + self.indices[probe] = Pos::tombstone(); + let (key, value) = taker.take(); + return Some((probe, i, key, value)); + } } }, PosState::IsTombstone => {}, @@ -1688,6 +1902,26 @@ impl OrderMap { find_existing_entry_impl::(self.mask, &self.indices, actual_pos, hash) } + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + fn find_existing_entry_mut(&mut self, actual_pos: usize, hash: HashValue, move_found: bool) -> usize + { + dispatch_32_vs_64!(self.find_existing_entry_mut_impl(actual_pos, hash, move_found)) + } + + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + fn find_existing_entry_mut_impl(&mut self, actual_pos: usize, hash: HashValue, move_found: bool) -> usize + where Sz: Size, + { + debug_assert!(self.len() > actual_pos); + find_existing_entry_mut_impl::(self.mask, &self.entries, &mut self.indices, actual_pos, hash, move_found) + } + fn swap_remove_found(&mut self, probe: usize, found: usize) -> Option<(K, V)> { dispatch_32_vs_64!(self.swap_remove_found_impl(probe, found)) } @@ -1699,19 +1933,19 @@ impl OrderMap { // use swap_remove, but then we need to update the index that points // to the other entry that has to move self.indices[probe] = Pos::none(); - let kv = self.entries.swap_remove(found).map(|e| (e.key, e.value)); + let kv = self.entries.swap_remove(found).take(); // correct index that points to the entry that had to swap places // check if it was the last element or a tombstone - if let Some(e) = self.entries.get(found).and_then(|e| e.as_ref()) { + if let Some(hash) = self.entries.get(found).and_then(Bucket::hash) { // examine new element in `found` and find it in indices - let mut probe = desired_pos(self.mask, e.hash); + let mut probe = hash.desired_pos(self.mask); probe_loop!(probe < self.indices.len(), { - if let PosState::Value(i) = self.indices[probe].pos::() { + if let PosState::Value(i) = i!(self.indices[probe]).pos::() { if i >= self.entries.len() { // found it - self.indices[probe] = Pos::with_hash::(found, e.hash); + im!(self.indices[probe]) = Pos::with_hash::(found, hash); break; } } @@ -1730,16 +1964,16 @@ impl OrderMap { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if probe_distance(self.mask, entry_hash.into_hash(), probe) > 0 { - self.indices[last_probe] = self.indices[probe]; - self.indices[probe] = Pos::none(); + im!(self.indices[last_probe]) = self.indices[probe]; + im!(self.indices[probe]) = Pos::none(); } else { break; } }, // Always move tombstones PosState::IsTombstone => { - self.indices[last_probe] = Pos::tombstone(); - self.indices[probe] = Pos::none(); + im!(self.indices[last_probe]) = Pos::tombstone(); + im!(self.indices[probe]) = Pos::none(); }, PosState::IsNone => break, } @@ -1778,7 +2012,7 @@ fn insert_phase_2(indices: &mut Vec, index_tombstones: &mut usize, mut fn find_existing_entry_impl(mask: usize, indices: &Vec, actual_pos: usize, hash: HashValue) -> usize where Sz: Size, { - let mut probe = desired_pos(mask, hash); + let mut probe = hash.desired_pos(mask); probe_loop!(probe < indices.len(), { match indices[probe].pos::() { PosState::Value(i) => if i == actual_pos { return probe }, @@ -1788,12 +2022,49 @@ fn find_existing_entry_impl(mask: usize, indices: &Vec, actual_pos: usi }); } +/// Find an entry already placed inside `self.entries` given its position and hash. +/// Return the probe for the entry. +/// +/// This method does will steal from tombstones where possible +/// +/// Computes in **O(1)** time (average). +fn find_existing_entry_mut_impl(mask: usize, entries: &Vec>, indices: &mut Vec, actual_pos: usize, hash: HashValue, move_found: bool) -> usize + where Sz: Size, +{ + let mut probe = hash.desired_pos(mask); + let mut last_tombstone = None; + probe_loop!(probe < indices.len(), { + match i!(indices[probe]).resolve::() { + PosState::Value((i, hash_proxy)) => { + if !move_found && i == actual_pos { return probe } + + // We're already in the neighborhood, + // If a bucket wants to steal from a tombstone. make it happen + if let Some(tprobe) = last_tombstone { + let entry_hash = hash_proxy.get_short_hash(entries, i); + let dist = probe_distance(mask, entry_hash.into_hash(), probe); + let tdist = probe_delta(mask, tprobe, probe); + if tdist < dist { + indices.swap(tprobe, probe); + if i == actual_pos { return tprobe } + last_tombstone = Some(probe); + } + } + + if i == actual_pos { return probe } + }, + PosState::IsTombstone => last_tombstone = Some(probe), + PosState::IsNone => debug_assert!(false, "the entry does not exist"), + } + }); +} + use std::slice::Iter as SliceIter; use std::slice::IterMut as SliceIterMut; use std::vec::IntoIter as VecIntoIter; pub struct Keys<'a, K: 'a, V: 'a> { - iter: SliceIter<'a, Option>>, + iter: SliceIter<'a, Bucket>, tombstones: usize, } @@ -1803,8 +2074,8 @@ impl<'a, K, V> Iterator for Keys<'a, K, V> { fn next(&mut self) -> Option<&'a K> { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_ref() { - Some(&e.key) + if let Some((key, _)) = e.kv() { + Some(key) } else { *tombstones -= 1; None @@ -1824,14 +2095,13 @@ impl<'a, K, V> Iterator for Keys<'a, K, V> { fn nth(&mut self, n: usize) -> Option { if self.tombstones == 0 { self.iter.nth(n).map(|e| { - let e = e.as_ref().unwrap(); - &e.key + e.unwrap_kv().0 }) } else { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_ref() { - Some(&e.key) + if let Some((key, _)) = e.kv() { + Some(key) } else { *tombstones -= 1; None @@ -1849,8 +2119,8 @@ impl<'a, K, V> DoubleEndedIterator for Keys<'a, K, V> { fn next_back(&mut self) -> Option<&'a K> { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_ref() { - Some(&e.key) + if let Some((key, _)) = e.kv() { + Some(key) } else { *tombstones -= 1; None @@ -1867,7 +2137,7 @@ impl<'a, K, V> ExactSizeIterator for Keys<'a, K, V> { pub struct Iter<'a, K: 'a, V: 'a> { - iter: SliceIter<'a, Option>>, + iter: SliceIter<'a, Bucket>, tombstones: usize, } @@ -1877,8 +2147,8 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { fn next(&mut self) -> Option { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_ref() { - Some((&e.key, &e.value)) + if let Some((key, value)) = e.kv() { + Some((key, value)) } else { *tombstones -= 1; None @@ -1898,14 +2168,13 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { fn nth(&mut self, n: usize) -> Option { if self.tombstones == 0 { self.iter.nth(n).map(|e| { - let e = e.as_ref().unwrap(); - (&e.key, &e.value) + e.unwrap_kv() }) } else { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_ref() { - Some((&e.key, &e.value)) + if let Some((key, value)) = e.kv() { + Some((key, value)) } else { *tombstones -= 1; None @@ -1923,8 +2192,8 @@ impl<'a, K, V> DoubleEndedIterator for Iter<'a, K, V> { fn next_back(&mut self) -> Option { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_ref() { - Some((&e.key, &e.value)) + if let Some((key, value)) = e.kv() { + Some((key, value)) } else { *tombstones -= 1; None @@ -1940,7 +2209,7 @@ impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { } pub struct IterMut<'a, K: 'a, V: 'a> { - iter: SliceIterMut<'a, Option>>, + iter: SliceIterMut<'a, Bucket>, tombstones: usize, } @@ -1950,8 +2219,8 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { fn next(&mut self) -> Option { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_mut() { - Some((&e.key, &mut e.value)) + if let Some((key, value)) = e.kv_mut() { + Some((&*key, value)) } else { *tombstones -= 1; None @@ -1971,14 +2240,14 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { fn nth(&mut self, n: usize) -> Option { if self.tombstones == 0 { self.iter.nth(n).map(|e| { - let e = e.as_mut().unwrap(); - (&e.key, &mut e.value) + let (key, value) = e.unwrap_kv_mut(); + (&*key, value) }) } else { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_mut() { - Some((&e.key, &mut e.value)) + if let Some((key, value)) = e.kv_mut() { + Some((&*key, value)) } else { *tombstones -= 1; None @@ -1996,8 +2265,8 @@ impl<'a, K, V> DoubleEndedIterator for IterMut<'a, K, V> { fn next_back(&mut self) -> Option { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e.as_mut() { - Some((&e.key, &mut e.value)) + if let Some((key, value)) = e.kv_mut() { + Some((&*key, value)) } else { *tombstones -= 1; None @@ -2013,7 +2282,7 @@ impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { } pub struct IntoIter { - iter: VecIntoIter>>, + iter: VecIntoIter>, tombstones: usize, } @@ -2023,8 +2292,8 @@ impl Iterator for IntoIter { fn next(&mut self) -> Option { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e { - Some((e.key, e.value)) + if let Some((key, value)) = e.into_kv() { + Some((key, value)) } else { *tombstones -= 1; None @@ -2044,14 +2313,13 @@ impl Iterator for IntoIter { fn nth(&mut self, n: usize) -> Option { if self.tombstones == 0 { self.iter.nth(n).map(|e| { - let e = e.unwrap(); - (e.key, e.value) + e.unwrap_into_kv() }) } else { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(move |e| { - if let Some(e) = e { - Some((e.key, e.value)) + if let Some((key, value)) = e.into_kv() { + Some((key, value)) } else { *tombstones -= 1; None @@ -2075,8 +2343,8 @@ impl DoubleEndedIterator for IntoIter { fn next_back(&mut self) -> Option { let tombstones = &mut self.tombstones; self.iter.by_ref().filter_map(|e| { - if let Some(e) = e { - Some((e.key, e.value)) + if let Some((key, value)) = e.into_kv() { + Some((key, value)) } else { *tombstones -= 1; None @@ -2181,7 +2449,15 @@ impl Extend<(K, V)> for OrderMap S: BuildHasher, { fn extend>(&mut self, iterable: I) { - for (k, v) in iterable { self.insert(k, v); } + let mut iterator = iterable.into_iter(); + while let Some((k, v)) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.reserve(lower.saturating_add(1)); + } + self.insert(k, v); + } } } diff --git a/src/unsafe.rs b/src/unsafe.rs new file mode 100644 index 00000000..b5a0cbe4 --- /dev/null +++ b/src/unsafe.rs @@ -0,0 +1,163 @@ + +#[cfg(feature="test_unsafe_index")] +macro_rules! i { + ($e:ident$(.$e2:ident)*[$i:expr]) => {*unsafe { $e$(.$e2)*.get_unchecked($i) }}; + (&$e:ident$(.$e2:ident)*[$i:expr]) => {unsafe { $e$(.$e2)*.get_unchecked($i) }}; +} + +#[cfg(feature="test_unsafe_index")] +macro_rules! im { + ($e:ident$(.$e2:ident)*[$i:expr]) => {*unsafe { $e$(.$e2)*.get_unchecked_mut($i) }}; + (&mut $e:ident$(.$e2:ident)*[$i:expr]) => {unsafe { $e$(.$e2)*.get_unchecked_mut($i) }}; +} + + +#[cfg(feature="test_efficient_enum")] +use efficient_enum::tag::TagMSB; +#[cfg(feature="test_efficient_enum")] +use efficient_enum::option::{EfficientOption, EfficientOptionInnerSome}; + + +impl HashValue { + #[cfg(feature="test_efficient_enum")] + pub fn new(v: usize) -> HashValue { + HashValue(v & (usize::max_value() >> 1)) + } +} + + +#[cfg(feature="test_efficient_enum")] +#[derive(Copy,Clone,Debug)] +pub struct Bucket { + option: EfficientOption, +} + +/// A type which can take values from a Bucket, leaving the bucket empty +#[cfg(feature="test_efficient_enum")] +pub struct BucketTaker<'a, K: 'a, V: 'a>(EfficientOptionInnerSome<'a, usize, (K, V), TagMSB>); + + +#[cfg(feature="test_efficient_enum")] +impl Bucket { + pub fn new(hash: HashValue, key: K, value: V) -> Self { + Bucket { option: EfficientOption::some((hash.0, (key, value))) } + } + + pub fn unwrap_hash_key(&self) -> (HashValue, &K) { + debug_assert!(self.option.is_some()); + let hash = self.option.clone_0().unwrap(); + (HashValue(hash), &self.option.ref_1().unwrap().0) + } + + pub fn hash(&self) -> Option { + self.option.clone_0().map(HashValue) + } + + pub fn kv(&self) -> Option<(&K, &V)> { + self.option.ref_1().map(|e| (&e.0, &e.1)) + } + + pub fn kv_mut(&mut self) -> Option<(&mut K, &mut V)> { + self.option.mut_1().map(|e| (&mut e.0, &mut e.1)) + } + + pub fn taker<'a>(&'a mut self) -> Option> { + use efficient_enum::option::EfficientOptionInner::IsSome; + if let IsSome(e) = self.option.inner() { + Some(BucketTaker(e)) + } else { None } + } + + pub fn take(&mut self) -> Option<(K, V)> { + self.option.take_1() + } + + pub fn into_kv(self) -> Option<(K, V)> { + debug_assert!(self.option.is_some()); + self.option.destructure_1() + } +} + +#[cfg(all(feature="test_efficient_enum", not(feature="test_unsafe_efficient_enum")))] +impl Bucket { + pub fn unwrap_hash(&self) -> HashValue { + debug_assert!(self.option.is_some()); + HashValue(self.option.clone_0().unwrap()) + } + + pub fn unwrap_kv(&self) -> (&K, &V) { + debug_assert!(self.option.is_some()); + self.kv().unwrap() + } + + pub fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { + debug_assert!(self.option.is_some()); + self.kv_mut().unwrap() + } + + pub fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { + debug_assert!(self.option.is_some()); + self.taker().unwrap() + } + + pub fn unwrap_into_kv(self) -> (K, V) { + debug_assert!(self.option.is_some()); + self.into_kv().unwrap() + } +} + +#[cfg(feature="test_efficient_enum")] +impl<'a, K, V> BucketTaker<'a, K, V> { + pub fn hash(&self) -> HashValue { + HashValue(self.0.clone_0()) + } + pub fn key(&self) -> &K { + &self.0.ref_1().0 + } + pub fn value(&self) -> &V { + &self.0.ref_1().1 + } + pub fn value_mut(&mut self) -> &mut V { + &mut self.0.mut_1().1 + } + pub fn into_value_mut(self) -> &'a mut V { + &mut self.0.destructure_1().1 + } + pub fn kv_mut(&mut self) -> (&mut K, &mut V) { + let e = self.0.mut_1(); + (&mut e.0, &mut e.1) + } + pub fn take(self) -> (K, V) { + self.0.take_1().1 + } +} + +#[cfg(feature="test_unsafe_efficient_enum")] +impl Bucket { + pub fn unwrap_hash(&self) -> HashValue { + debug_assert!(self.option.is_some()); + unsafe { HashValue(*self.option.unwrap_ref_0()) } + } + + pub fn unwrap_kv(&self) -> (&K, &V) { + debug_assert!(self.option.is_some()); + let kv = unsafe { self.option.unwrap_ref_1() }; + (&kv.0, &kv.1) + } + + pub fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { + debug_assert!(self.option.is_some()); + let kv = unsafe { self.option.unwrap_mut_1() }; + (&mut kv.0, &mut kv.1) + } + + pub fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { + debug_assert!(self.option.is_some()); + unsafe { BucketTaker(self.option.inner_some()) } + } + + pub fn unwrap_into_kv(self) -> (K, V) { + debug_assert!(self.option.is_some()); + unsafe { self.option.unwrap_destructure_1() } + } +} From 03ebf0a9090cfbd9d24c2da12cf3314fb4b5cd24 Mon Sep 17 00:00:00 2001 From: Popog Date: Sat, 3 Dec 2016 02:17:40 -0800 Subject: [PATCH 5/7] Merged .insert and .entry, tombstone fixes Improved find_existing_entry_mut: No longer requires entries (everything ahead of the entry can just be shifted). Switched to first_tombstone instead of last. Maximized usage of this function. Fixed remove_index_tombstones: fixed backtracking bug which tanked perf. Rewored .entry: merged with insert to reduce code duplication. Reworked probe loop to seperate tombstone code (makes it easier to reason about) Switched to first_tombstone instead of last. --- benches/bench.rs | 33 ++++ src/lib.rs | 419 ++++++++++++++++++++--------------------------- 2 files changed, 208 insertions(+), 244 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index 69eca1b0..dca4a1d7 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -610,3 +610,36 @@ fn retain_many_ordermap_100_000(b: &mut Bencher) { map }); } + +#[bench] +fn remove_index_tombstones_many_ordermap_100_000(b: &mut Bencher) { + let mut map = OMAP_100K.clone(); + map.retain(|k, _| *k % 7 == 0); + + b.iter(|| { + let mut map = map.clone(); + map.remove_index_tombstones() + }); +} + +#[bench] +fn remove_index_tombstones_half_ordermap_100_000(b: &mut Bencher) { + let mut map = OMAP_100K.clone(); + map.retain(|k, _| *k % 2 == 0); + + b.iter(|| { + let mut map = map.clone(); + map.remove_index_tombstones() + }); +} + +#[bench] +fn remove_index_tombstones_some_ordermap_100_000(b: &mut Bencher) { + let mut map = OMAP_100K.clone(); + map.retain(|k, _| *k % 100 == 0); + + b.iter(|| { + let mut map = map.clone(); + map.remove_index_tombstones() + }); +} diff --git a/src/lib.rs b/src/lib.rs index 65b52f64..91abbc3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -484,16 +484,6 @@ fn probe_distance(mask: usize, hash: HashValue, current: usize) -> usize { probe_delta(mask, hash.desired_pos(mask), current) } - -enum Inserted { - Done, - Swapped { prev_value: V }, - RobinHood { - probe: usize, - old_pos: Pos, - } -} - impl fmt::Debug for OrderMap where K: fmt::Debug + Hash + Eq, V: fmt::Debug, @@ -697,6 +687,17 @@ pub enum Entry<'a, K: 'a, V: 'a,> { } impl<'a, K, V> Entry<'a, K, V> { + /// Computes in **O(1)** time (amortized average). + pub fn insert(self, value: V) -> Option { + match self { + Entry::Occupied(entry) => Some(entry.insert(value)), + Entry::Vacant(entry) => { + entry.insert(value); + None + }, + } + } + /// Computes in **O(1)** time (amortized average). pub fn or_insert(self, default: V) -> &'a mut V { match self { @@ -792,7 +793,7 @@ impl<'a, K, V> VacantEntry<'a, K, V> { let index = self.entries.len(); self.entries.push(Bucket::new(self.hash, self.key, value)); let old_pos = Pos::with_hash::(index, self.hash); - insert_phase_2::(self.indices, self.index_tombstones, self.probe, old_pos); + entry_phase_2::(self.indices, self.index_tombstones, self.probe, old_pos); self.entries[index].kv_mut().unwrap().1 } } @@ -809,36 +810,28 @@ impl OrderMap dispatch_32_vs_64!(self.entry_phase_1(key)) } - // Warning, this is a code duplication zone Entry is not yet finished + // First phase: Look for the preferred location for key. + // + // We will know if `key` is already in the map, before we need to insert it. + // + // If we don't find the key, or we find a location from which we should steal + // (robin hood hashing), `VacantEntry` is returned. Any displacing that needs + // to be done occurs in `entry_phase_2` fn entry_phase_1(&mut self, key: K) -> Entry where Sz: Size { let hash = hash_elem_using(&self.hash_builder, &key); let mut probe = hash.desired_pos(self.mask); let mut dist = 0; - let mut last_tombstone = None; - let indices_len = self.indices.len(); + let first_tombstone; debug_assert!(self.len() < self.raw_capacity()); probe_loop!(probe < self.indices.len(), { - match self.indices[probe].resolve::() { + match i!(self.indices[probe]).resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); // if existing element probed less than us, swap let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); if their_dist < dist { - // don't steal an entry if we can steal a tombstone - if let Some(probe) = last_tombstone { - return Entry::Vacant(VacantEntry { - size_class_is_64bit: self.size_class_is_64bit(), - indices: &mut self.indices, - entries: &mut self.entries, - index_tombstones: &mut self.index_tombstones, - hash: hash, - key: key, - probe: probe, - }); - } - // robin hood: steal the spot if it's better for us return Entry::Vacant(VacantEntry { size_class_is_64bit: self.size_class_is_64bit(), @@ -849,31 +842,73 @@ impl OrderMap key: key, probe: probe, }); - } else if entry_hash == hash { - if *self.entries[i].unwrap_kv().0 == key { - let taker = self.entries[i].unwrap_taker(); - return Entry::Occupied(OccupiedEntry { - entry_tombstones: &mut self.entry_tombstones, - index_tombstones: &mut self.index_tombstones, - bucket_taker: taker, - pos: &mut self.indices[probe], - }); - } + } else if entry_hash == hash && *self.entries[i].unwrap_kv().0 == key { + let taker = self.entries[i].unwrap_taker(); + return Entry::Occupied(OccupiedEntry { + entry_tombstones: &mut self.entry_tombstones, + index_tombstones: &mut self.index_tombstones, + bucket_taker: taker, + pos: &mut im!(self.indices[probe]), + }); } + }, + PosState::IsNone => { + // empty bucket, insert here + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: probe, + }); + }, + PosState::IsTombstone => { + first_tombstone = probe; + break; + }, + } + dist += 1; + }); - // We're already in the neighborhood, - // If a bucket wants to steal from a tombstone. make it happen - if let Some(tprobe) = last_tombstone { - let tdist = probe_delta(self.mask, tprobe, probe); - if tdist < their_dist { - self.indices.swap(tprobe, probe); - last_tombstone = Some(probe); - } + let mut first_tombstone = first_tombstone; + dist += 1; + probe += 1; + probe_loop!(probe < self.indices.len(), { + match i!(self.indices[probe]).resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + // if existing element probed less than us, swap + let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); + if their_dist < dist { + // take the tombstone + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: first_tombstone, + }); + } else if entry_hash == hash && *self.entries[i].unwrap_kv().0 == key { + let taker = self.entries[i].unwrap_taker(); + return Entry::Occupied(OccupiedEntry { + entry_tombstones: &mut self.entry_tombstones, + index_tombstones: &mut self.index_tombstones, + bucket_taker: taker, + pos: &mut im!(self.indices[probe]), + }); + } else { + // since their_dist >= dist and dist > tombstone_dist + // any bucket ahead of our target should be moved up + self.indices.swap(first_tombstone, probe); + first_tombstone = probe; } }, PosState::IsNone => { - // empty bucket, insert here (or any previous tombstones if we had those) - let probe = last_tombstone.unwrap_or(probe); + // empty bucket, insert at last tombstone return Entry::Vacant(VacantEntry { size_class_is_64bit: self.size_class_is_64bit(), indices: &mut self.indices, @@ -881,12 +916,12 @@ impl OrderMap index_tombstones: &mut self.index_tombstones, hash: hash, key: key, - probe: probe, + probe: first_tombstone, }); }, PosState::IsTombstone => { // tombstone bucket, insert here if we've checked everywhere else - if dist >= indices_len { + if dist >= self.indices.len() { return Entry::Vacant(VacantEntry { size_class_is_64bit: self.size_class_is_64bit(), indices: &mut self.indices, @@ -894,10 +929,9 @@ impl OrderMap index_tombstones: &mut self.index_tombstones, hash: hash, key: key, - probe: last_tombstone.unwrap(), + probe: first_tombstone, }); } - last_tombstone = Some(probe); }, } dist += 1; @@ -923,97 +957,6 @@ impl OrderMap } } - // First phase: Look for the preferred location for key. - // - // We will know if `key` is already in the map, before we need to insert it. - // When we insert they key, it might be that we need to continue displacing - // entries (robin hood hashing), in which case Inserted::RobinHood is returned - fn insert_phase_1(&mut self, key: K, value: V) -> Inserted - where Sz: Size - { - let hash = hash_elem_using(&self.hash_builder, &key); - let mut probe = hash.desired_pos(self.mask); - let mut dist = 0; - let mut last_tombstone = None; - let indices_len = self.indices.len(); - let insert_kind; - debug_assert!(self.len() < self.raw_capacity()); - probe_loop!(probe < self.indices.len(), { - match self.indices[probe].resolve::() { - PosState::Value((i, hash_proxy)) => { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - // if existing element probed less than us, swap - let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); - if their_dist < dist { - // don't steal an entry if we can steal a tombstone - if let Some(probe) = last_tombstone { - self.index_tombstones -= 1; - let index = self.entries.len(); - self.indices[probe] = Pos::with_hash::(index, hash); - insert_kind = Inserted::Done; - break; - } - - // robin hood: steal the spot if it's better for us - let index = self.entries.len(); - insert_kind = Inserted::RobinHood { - probe: probe, - old_pos: Pos::with_hash::(index, hash), - }; - break; - } else if entry_hash == hash { - let (entry_key, entry_value) = self.entries[i].unwrap_kv_mut(); - if *entry_key == key { - return Inserted::Swapped { - prev_value: replace(entry_value, value), - }; - } - } - - // We're already in the neighborhood, - // If a bucket wants to steal from a tombstone. make it happen - if let Some(tprobe) = last_tombstone { - let tdist = probe_delta(self.mask, tprobe, probe); - if tdist < their_dist { - self.indices.swap(tprobe, probe); - last_tombstone = Some(probe); - } - } - }, - PosState::IsNone => { - // use a the first tombstone if we found one of those - if let Some(probe) = last_tombstone { - self.index_tombstones -= 1; - let index = self.entries.len(); - self.indices[probe] = Pos::with_hash::(index, hash); - insert_kind = Inserted::Done; - break; - } - // empty bucket, insert here - let index = self.entries.len(); - self.indices[probe] = Pos::with_hash::(index, hash); - insert_kind = Inserted::Done; - break; - }, - PosState::IsTombstone => { - // tombstone bucket, insert here if we've checked everywhere else - if dist >= indices_len { - let probe = last_tombstone.unwrap(); - self.index_tombstones -= 1; - let index = self.entries.len(); - self.indices[probe] = Pos::with_hash::(index, hash); - insert_kind = Inserted::Done; - break; - } - last_tombstone = Some(probe); - }, - } - dist += 1; - }); - self.entries.push(Bucket::new(hash, key, value)); - insert_kind - } - /// Insert they key-value pair into the map. /// /// If a value already existed for `key`, that old value is returned @@ -1021,26 +964,7 @@ impl OrderMap /// /// Computes in **O(1)** time (amortized average). pub fn insert(&mut self, key: K, value: V) -> Option { - self.reserve_one(); - if self.size_class_is_64bit() { - match self.insert_phase_1::(key, value) { - Inserted::Swapped { prev_value } => Some(prev_value), - Inserted::Done => None, - Inserted::RobinHood { probe, old_pos } => { - insert_phase_2::(&mut self.indices, &mut self.index_tombstones, probe, old_pos); - None - } - } - } else { - match self.insert_phase_1::(key, value) { - Inserted::Swapped { prev_value } => Some(prev_value), - Inserted::Done => None, - Inserted::RobinHood { probe, old_pos } => { - insert_phase_2::(&mut self.indices, &mut self.index_tombstones, probe, old_pos); - None - } - } - } + self.entry(key).insert(value) } /// Return an iterator over the keys of the map, in their order @@ -1305,7 +1229,7 @@ impl OrderMap { /// Computes in **O(1)** time (average). pub fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { self.entries.get(index).and_then(|e| e.hash()) - .map(|hash| self.find_existing_entry_mut(index, hash, false)) + .map(|hash| self.find_existing_entry_mut(index, hash)) .and_then(|probe| self.swap_remove_found(probe, index)) } @@ -1421,32 +1345,35 @@ impl OrderMap { /// Removes all the index tombstones /// /// Computes in **O(n)** time. + #[cfg(test)] fn remove_index_tombstones(&mut self) { dispatch_32_vs_64!(self.remove_index_tombstones_impl()) } + /// Removes all the index tombstones + /// + /// Computes in **O(n)** time. + #[cfg(not(test))] + pub fn remove_index_tombstones(&mut self) { + dispatch_32_vs_64!(self.remove_index_tombstones_impl()) + } + /// Remove the last key-value pair /// /// Computes in **O(1)** time (average). fn pop_impl(&mut self) -> Option<(K, V)> { // if there entries are empty, just return // otherwise, try to get the hash (last entry might be a tombstone) - let hash = if let Some(e) = self.entries.last() { - e.hash() - } else { return None }; - - let index = self.entries.len()-1; - if let Some(hash) = hash { - let probe = self.find_existing_entry_mut(index, hash, false); - im!(self.indices[probe]) = Pos::tombstone(); - self.index_tombstones += 1; - let v = self.entries.pop().unwrap().take(); - v - //self.swap_remove_found(probe, index) - } else { - self.entries.pop(); - None - } + self.entries.last_mut() + .and_then(|mut e| e.taker().map(|e| (e.hash(), e.take()))) + .map(|(hash, value)| { + let index = self.entries.len() - 1; + let probe = self.find_existing_entry_mut(index, hash); + self.entries.pop(); + im!(self.indices[probe]) = Pos::tombstone(); + self.index_tombstones += 1; + value + }) } /// Removes cheap the index tombstones @@ -1499,6 +1426,13 @@ impl OrderMap { { if self.index_tombstones == 0 { return } + if self.len() == 0 { + for p in self.indices.iter_mut() { + *p = Pos::none(); + } + return; + } + // Find the first tombstone after an ideal or none let mut tombstone_head = enumerate(&self.indices).find(|&(i, index)| match index.pos::() { PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), @@ -1514,7 +1448,7 @@ impl OrderMap { let mut tombstone_tail = tombstone_head; let mut probe = tombstone_head + 1; probe_loop!(probe < self.indices.len(), { - match self.indices[probe].resolve::() { + match i!(self.indices[probe]).resolve::() { PosState::Value((i, hash_proxy)) => { //println!("value"); let entry_hash = hash_proxy.get_short_hash(&self.entries, i); @@ -1531,12 +1465,12 @@ impl OrderMap { if self.index_tombstones == 0 { return } // find a new tombstone_tail - tombstone_head = probe + 1; - probe_loop!(tombstone_head < self.indices.len(), { - if i!(self.indices[tombstone_head]).is_tombstone() { + probe_loop!(probe < self.indices.len(), { + if i!(self.indices[probe]).is_tombstone() { break } }); + tombstone_head = probe; self.index_tombstones -= 1; self.indices[tombstone_head] = Pos::none(); tombstone_tail = tombstone_head; @@ -1548,7 +1482,7 @@ impl OrderMap { let next = i!(self.indices[tombstone_head]).link_pos(); // move the value up - im!(self.indices[tombstone_head]) = self.indices[probe]; + im!(self.indices[tombstone_head]) = i!(self.indices[probe]); im!(self.indices[probe]) = Pos::none(); if let Some(next) = next { @@ -1563,7 +1497,7 @@ impl OrderMap { tombstone_tail = probe; } break - } else if let Some(next) = self.indices[tombstone_head].link_pos() { + } else if let Some(next) = i!(self.indices[tombstone_head]).link_pos() { // pop off the head and clear it im!(self.indices[tombstone_head]) = Pos::none(); tombstone_head = next; @@ -1572,12 +1506,12 @@ impl OrderMap { if self.index_tombstones == 0 { return } // find a new tombstone for the list - tombstone_head = probe + 1; - probe_loop!(tombstone_head < self.indices.len(), { - if i!(self.indices[tombstone_head]).is_tombstone() { + probe_loop!(probe < self.indices.len(), { + if i!(self.indices[probe]).is_tombstone() { break } }); + tombstone_head = probe; self.index_tombstones -= 1; im!(self.indices[tombstone_head]) = Pos::none(); tombstone_tail = tombstone_head; @@ -1605,12 +1539,12 @@ impl OrderMap { if self.index_tombstones == 0 { return } // find a new tombstone for the list - tombstone_head = probe + 1; - probe_loop!(tombstone_head < self.indices.len(), { - if i!(self.indices[tombstone_head]).is_tombstone() { + probe_loop!(probe < self.indices.len(), { + if i!(self.indices[probe]).is_tombstone() { break } }); + tombstone_head = probe; self.index_tombstones -= 1; im!(self.indices[tombstone_head]) = Pos::none(); tombstone_tail = tombstone_head; @@ -1675,7 +1609,7 @@ impl OrderMap { } else { continue }; let hash = e.unwrap_hash(); - let probe = find_existing_entry_impl::(self.mask, &self.indices, i, hash); + let probe = find_existing_entry_mut_impl::(self.mask, &mut self.indices, i, hash); im!(self.indices[probe]) = Pos::tombstone(); self.index_tombstones += 1; self.entry_tombstones += 1; @@ -1694,21 +1628,15 @@ impl OrderMap { fn remove_index_impl(&mut self, index: usize) -> Option<(K, V)> where Sz: Size { - let mask = self.mask; - let indices = &mut self.indices; - let entry_tombstones = &mut self.entry_tombstones; - let index_tombstones = &mut self.index_tombstones; - self.entries.get_mut(index).and_then(|e| { - if let Some(e) = e.taker() { - let probe = find_existing_entry_impl::(mask, indices, index, e.hash()); - im!(indices[probe]) = Pos::tombstone(); - *entry_tombstones += 1; - *index_tombstones += 1; - Some(e.take()) - } else { - None - } - }) + self.entries.get_mut(index) + .and_then(|e| e.taker().map(|e| (e.hash(), e.take()))) + .map(|(hash, value)| { + let probe = self.find_existing_entry_mut(index, hash); + im!(self.indices[probe]) = Pos::tombstone(); + self.entry_tombstones += 1; + self.index_tombstones += 1; + value + }) } /// Swaps the index of two key-value pairs by index @@ -1724,16 +1652,16 @@ impl OrderMap { match (self.entries[a].hash(), self.entries[b].hash()) { (None, None) => {}, (None, Some(b_hash)) => { - let probe_b = self.find_existing_entry_mut(b, b_hash, true); + let probe_b = self.find_existing_entry_mut(b, b_hash); self.indices[probe_b] = Pos::with_hash::(a, b_hash) }, (Some(a_hash), None) => { - let probe_a = self.find_existing_entry_mut(a, a_hash, true); + let probe_a = self.find_existing_entry_mut(a, a_hash); self.indices[probe_a] = Pos::with_hash::(b, a_hash) }, (Some(a_hash), Some(b_hash)) => { - let probe_a = self.find_existing_entry_mut(a, a_hash, true); - let probe_b = self.find_existing_entry_mut(b, b_hash, true); + let probe_a = self.find_existing_entry_mut(a, a_hash); + let probe_b = self.find_existing_entry_mut(b, b_hash); self.indices.swap(probe_a, probe_b); } } @@ -1760,7 +1688,7 @@ impl OrderMap { index += 1; if let Some(hash) = e.hash() { if removed != 0 { - let probe = find_existing_entry_impl::(mask, indices, index-1, hash); + let probe = find_existing_entry_mut_impl::(mask, indices, index-1, hash); im!(indices[probe]).sub_eq::(removed); } true @@ -1869,7 +1797,7 @@ impl OrderMap { if { let (key, value) = taker.kv_mut(); key_eq(key, value) } { self.index_tombstones += 1; self.entry_tombstones += 1; - self.indices[probe] = Pos::tombstone(); + im!(self.indices[probe]) = Pos::tombstone(); let (key, value) = taker.take(); return Some((probe, i, key, value)); } @@ -1906,20 +1834,20 @@ impl OrderMap { /// Return the probe for the entry. /// /// Computes in **O(1)** time (average). - fn find_existing_entry_mut(&mut self, actual_pos: usize, hash: HashValue, move_found: bool) -> usize + fn find_existing_entry_mut(&mut self, actual_pos: usize, hash: HashValue) -> usize { - dispatch_32_vs_64!(self.find_existing_entry_mut_impl(actual_pos, hash, move_found)) + dispatch_32_vs_64!(self.find_existing_entry_mut_impl(actual_pos, hash)) } /// Find an entry already placed inside `self.entries` given its position and hash. /// Return the probe for the entry. /// /// Computes in **O(1)** time (average). - fn find_existing_entry_mut_impl(&mut self, actual_pos: usize, hash: HashValue, move_found: bool) -> usize + fn find_existing_entry_mut_impl(&mut self, actual_pos: usize, hash: HashValue) -> usize where Sz: Size, { debug_assert!(self.len() > actual_pos); - find_existing_entry_mut_impl::(self.mask, &self.entries, &mut self.indices, actual_pos, hash, move_found) + find_existing_entry_mut_impl::(self.mask, &mut self.indices, actual_pos, hash) } fn swap_remove_found(&mut self, probe: usize, found: usize) -> Option<(K, V)> { @@ -1932,7 +1860,7 @@ impl OrderMap { // index `probe` and entry `found` is to be removed // use swap_remove, but then we need to update the index that points // to the other entry that has to move - self.indices[probe] = Pos::none(); + im!(self.indices[probe]) = Pos::none(); let kv = self.entries.swap_remove(found).take(); @@ -1960,7 +1888,7 @@ impl OrderMap { let mut last_probe = probe; let mut probe = probe + 1; probe_loop!(probe < self.indices.len(), { - match self.indices[probe].resolve::() { + match i!(self.indices[probe]).resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if probe_distance(self.mask, entry_hash.into_hash(), probe) > 0 { @@ -1984,12 +1912,12 @@ impl OrderMap { } } -/// phase 2 is post-insert where we forward-shift `Pos` in the indices. -fn insert_phase_2(indices: &mut Vec, index_tombstones: &mut usize, mut probe: usize, mut old_pos: Pos) +/// Second Phase: forward-shift `Pos` in the indices if need be. +fn entry_phase_2(indices: &mut Vec, index_tombstones: &mut usize, mut probe: usize, mut old_pos: Pos) where Sz: Size { probe_loop!(probe < indices.len(), { - let pos = &mut indices[probe]; + let pos = &mut im!(indices[probe]); match pos.state() { PosState::Value(()) => old_pos = replace(pos, old_pos), PosState::IsTombstone => { @@ -2014,7 +1942,7 @@ fn find_existing_entry_impl(mask: usize, indices: &Vec, actual_pos: usi { let mut probe = hash.desired_pos(mask); probe_loop!(probe < indices.len(), { - match indices[probe].pos::() { + match i!(indices[probe]).pos::() { PosState::Value(i) => if i == actual_pos { return probe }, PosState::IsTombstone => {}, PosState::IsNone => debug_assert!(false, "the entry does not exist"), @@ -2025,35 +1953,38 @@ fn find_existing_entry_impl(mask: usize, indices: &Vec, actual_pos: usi /// Find an entry already placed inside `self.entries` given its position and hash. /// Return the probe for the entry. /// -/// This method does will steal from tombstones where possible +/// This method will swap items with tombstones as it searches, moving items +/// closer to their ideal position. /// /// Computes in **O(1)** time (average). -fn find_existing_entry_mut_impl(mask: usize, entries: &Vec>, indices: &mut Vec, actual_pos: usize, hash: HashValue, move_found: bool) -> usize +fn find_existing_entry_mut_impl(mask: usize, indices: &mut Vec, actual_pos: usize, hash: HashValue) -> usize where Sz: Size, { let mut probe = hash.desired_pos(mask); - let mut last_tombstone = None; + let first_tombstone; probe_loop!(probe < indices.len(), { - match i!(indices[probe]).resolve::() { - PosState::Value((i, hash_proxy)) => { - if !move_found && i == actual_pos { return probe } + match i!(indices[probe]).pos::() { + PosState::Value(i) => if i == actual_pos { return probe }, + PosState::IsTombstone => { + first_tombstone = probe; + break + }, + PosState::IsNone => debug_assert!(false, "the entry does not exist"), + } + }); + let mut first_tombstone = first_tombstone; + probe += 1; + probe_loop!(probe < indices.len(), { + match i!(indices[probe]).pos::() { + PosState::Value(i) => { // We're already in the neighborhood, - // If a bucket wants to steal from a tombstone. make it happen - if let Some(tprobe) = last_tombstone { - let entry_hash = hash_proxy.get_short_hash(entries, i); - let dist = probe_distance(mask, entry_hash.into_hash(), probe); - let tdist = probe_delta(mask, tprobe, probe); - if tdist < dist { - indices.swap(tprobe, probe); - if i == actual_pos { return tprobe } - last_tombstone = Some(probe); - } - } - - if i == actual_pos { return probe } + // any bucket ahead of our target should be moved up + indices.swap(first_tombstone, probe); + if i == actual_pos { return first_tombstone } + first_tombstone = probe; }, - PosState::IsTombstone => last_tombstone = Some(probe), + PosState::IsTombstone => {}, PosState::IsNone => debug_assert!(false, "the entry does not exist"), } }); From 7f49e0b52f64b4ed44a5913bf6db48e7907b911e Mon Sep 17 00:00:00 2001 From: Popog Date: Sat, 3 Dec 2016 09:26:16 -0800 Subject: [PATCH 6/7] Take advantage of the robin-hood invariant during tombstone compression Derp. We don't need a linked list during tombstone removal, tombstones will always be consecutive, thanks to the robin-hood invariant. This also means we can improve tombstone compression in entry_phase_1 and find_exisint_entry_mut. This should provide a minor perf improvement overall, and a more substantial improvement for remove_index_tombstones --- src/lib.rs | 133 +++++++++++++++++++++-------------------------------- 1 file changed, 53 insertions(+), 80 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 91abbc3c..f5b6f43d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -300,13 +300,6 @@ impl Pos { Pos { index: i } } - #[inline] - fn link(i: usize) -> Self { - debug_assert!(i as u64 != POS_NONE); - debug_assert!(i as u64 != POS_TOMBSTONE); - Pos { index: i as u64 } - } - #[inline] fn is_none(&self) -> bool { self.index == POS_NONE @@ -326,16 +319,6 @@ impl Pos { } } - #[inline] - fn link_pos(&self) -> Option { - debug_assert!(self.index != POS_TOMBSTONE); - if self.index != POS_NONE { - Some(self.index as usize) - } else { - None - } - } - #[inline] fn debug_pos(&self) -> PosState { @@ -865,6 +848,18 @@ impl OrderMap }); }, PosState::IsTombstone => { + if self.index_tombstones == self.indices.len() { + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: probe, + }); + } + first_tombstone = probe; break; }, @@ -904,7 +899,11 @@ impl OrderMap // since their_dist >= dist and dist > tombstone_dist // any bucket ahead of our target should be moved up self.indices.swap(first_tombstone, probe); - first_tombstone = probe; + + // Since we're compacting everything, the next + // tombstone will always be directly after + first_tombstone += 1; + first_tombstone &= self.mask } }, PosState::IsNone => { @@ -1442,112 +1441,85 @@ impl OrderMap { probe_loop!(tombstone_head < self.indices.len(), { if i!(self.indices[tombstone_head]).is_tombstone() { break } }); - self.index_tombstones -= 1; - self.indices[tombstone_head] = Pos::none(); let mut tombstone_tail = tombstone_head; let mut probe = tombstone_head + 1; + let mut tombstones = 1; probe_loop!(probe < self.indices.len(), { match i!(self.indices[probe]).resolve::() { PosState::Value((i, hash_proxy)) => { - //println!("value"); let entry_hash = hash_proxy.get_short_hash(&self.entries, i); let dist = probe_distance(self.mask, entry_hash.into_hash(), probe); if dist == 0 { - // clear the linked list - let mut iter = tombstone_head; - while let Some(next) = im!(self.indices[iter]).link_pos() { - im!(self.indices[iter]) = Pos::none(); - iter = next; + // clear the tombstone list + while tombstone_head != tombstone_tail { + im!(self.indices[tombstone_head]) = Pos::none(); + tombstone_head += 1; + tombstone_head &= self.mask; } + im!(self.indices[tombstone_head]) = Pos::none(); + self.index_tombstones -= tombstones; // if we're out of tombstones, return if self.index_tombstones == 0 { return } - // find a new tombstone_tail + // find a new tombstone_head probe_loop!(probe < self.indices.len(), { if i!(self.indices[probe]).is_tombstone() { break } }); tombstone_head = probe; - self.index_tombstones -= 1; - self.indices[tombstone_head] = Pos::none(); tombstone_tail = tombstone_head; + tombstones = 1; } else { - loop { - let empty_dist = probe_delta(self.mask, tombstone_head, probe); - if dist >= empty_dist { - // record the next link - let next = i!(self.indices[tombstone_head]).link_pos(); - - // move the value up - im!(self.indices[tombstone_head]) = i!(self.indices[probe]); - im!(self.indices[probe]) = Pos::none(); - - if let Some(next) = next { - // add the value's old space to our linked list - im!(self.indices[tombstone_tail]) = Pos::link(probe); - // move the head of the linked list forward - tombstone_head = next; - } else { - // the list is empty, so just use the recently empied - // probe as our new list - tombstone_head = probe; - tombstone_tail = probe; - } - break - } else if let Some(next) = i!(self.indices[tombstone_head]).link_pos() { + if dist < tombstones { + let delta = tombstones - dist; + for _ in 0..delta { // pop off the head and clear it im!(self.indices[tombstone_head]) = Pos::none(); - tombstone_head = next; - } else { - // if we're out of tombstones, return - if self.index_tombstones == 0 { return } - - // find a new tombstone for the list - probe_loop!(probe < self.indices.len(), { - if i!(self.indices[probe]).is_tombstone() { - break - } - }); - tombstone_head = probe; - self.index_tombstones -= 1; - im!(self.indices[tombstone_head]) = Pos::none(); - tombstone_tail = tombstone_head; - break + tombstone_head += 1; + tombstone_head &= self.mask; } + self.index_tombstones -= delta; + tombstones = dist; } + + // move the value up + self.indices.swap(tombstone_head, probe); + + tombstone_head += 1; + tombstone_head &= self.mask; + + tombstone_tail = probe; } }, PosState::IsTombstone => { - self.index_tombstones -= 1; - // push it onto the back of the linked list - im!(self.indices[probe]) = Pos::none(); - im!(self.indices[tombstone_tail]) = Pos::link(probe); tombstone_tail = probe; + tombstones += 1; }, PosState::IsNone => { // clear the tombstone list - let mut iter = tombstone_head; - while let Some(next) = i!(self.indices[iter]).link_pos() { - im!(self.indices[iter]) = Pos::none(); - iter = next; + while tombstone_head != tombstone_tail { + im!(self.indices[tombstone_head]) = Pos::none(); + tombstone_head += 1; + tombstone_head &= self.mask; } + im!(self.indices[tombstone_head]) = Pos::none(); + self.index_tombstones -= tombstones; // if we're out of tombstones, return if self.index_tombstones == 0 { return } - // find a new tombstone for the list + // find a new tombstone_head probe_loop!(probe < self.indices.len(), { if i!(self.indices[probe]).is_tombstone() { break } }); tombstone_head = probe; - self.index_tombstones -= 1; - im!(self.indices[tombstone_head]) = Pos::none(); tombstone_tail = tombstone_head; + tombstones = 1; }, } }); @@ -1982,7 +1954,8 @@ fn find_existing_entry_mut_impl(mask: usize, indices: &mut Vec, actual_ // any bucket ahead of our target should be moved up indices.swap(first_tombstone, probe); if i == actual_pos { return first_tombstone } - first_tombstone = probe; + first_tombstone += 1; + first_tombstone &= mask; }, PosState::IsTombstone => {}, PosState::IsNone => debug_assert!(false, "the entry does not exist"), From c225681b3b12252a95b8350c2a0ade62719cb08f Mon Sep 17 00:00:00 2001 From: Popog Date: Tue, 6 Dec 2016 09:42:15 -0800 Subject: [PATCH 7/7] General cleanup for PR Remove all the unsafe testing, and unneeded functions. A couple bug fixes. Change swap_remove_found to use tombstones. --- Cargo.toml | 7 - src/lib.rs | 426 +++++++++++++++++--------------------------------- src/unsafe.rs | 163 ------------------- 3 files changed, 142 insertions(+), 454 deletions(-) delete mode 100644 src/unsafe.rs diff --git a/Cargo.toml b/Cargo.toml index 29c9ee59..fd5f54c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,9 +10,6 @@ description = "A hash table with consistent order and fast iteration." [lib] bench = false -[dependencies] -efficient_enum = { version = "^0.3.1", optional = true } - [dev-dependencies] itertools = "0.5.1" rand = "0.3" @@ -24,10 +21,6 @@ lazy_static = "0.2" # for testing only, of course test_low_transition_point = [] test_debug = [] -test_efficient_enum = ["efficient_enum", "unsafe"] -test_unsafe_efficient_enum = ["test_efficient_enum"] -test_unsafe_index = ["unsafe"] -unsafe = [] [profile.bench] debug = true diff --git a/src/lib.rs b/src/lib.rs index f5b6f43d..cc6ac32f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,6 @@ #![doc(html_root_url = "https://docs.rs/ordermap/0.2/")] -#[cfg(feature="test_efficient_enum")] -extern crate efficient_enum; - mod macros; mod util; @@ -26,25 +23,10 @@ fn hash_elem_using(build: &B, k: &K) -> HashVa HashValue::new(h.finish() as usize) } -#[cfg(not(feature="test_unsafe_index"))] -macro_rules! i { - ($e:ident$(.$e2:ident)*[$i:expr]) => {$e$(.$e2)*[$i]}; - (&$e:ident$(.$e2:ident)*[$i:expr]) => {&$e$(.$e2)*[$i]}; -} - -#[cfg(not(feature="test_unsafe_index"))] -macro_rules! im { - ($e:ident$(.$e2:ident)*[$i:expr]) => {$e$(.$e2)*[$i]}; - (&mut $e:ident$(.$e2:ident)*[$i:expr]) => {&mut $e(.$e2)+[$i]}; -} - -#[cfg(feature="unsafe")] -include!("unsafe.rs"); - /// Hash value newtype. Not larger than usize, since anything larger /// isn't used for selecting position anyway. #[derive(Copy, Debug)] -pub struct HashValue(usize); +struct HashValue(usize); impl Clone for HashValue { #[inline] @@ -57,140 +39,134 @@ impl PartialEq for HashValue { } } impl HashValue { - #[cfg(not(feature="test_efficient_enum"))] - pub fn new(v: usize) -> HashValue { + fn new(v: usize) -> HashValue { HashValue(v) } - pub fn eq_lo32(&self, rhs: &HashValue) -> bool { + fn eq_lo32(&self, rhs: &HashValue) -> bool { lo32(self.0 as u64) == lo32(rhs.0 as u64) } - pub fn desired_pos(self, mask: usize) -> usize { + fn desired_pos(self, mask: usize) -> usize { self.0 & mask } - pub fn combine_pos(self, i: usize) -> u64 { + fn combine_pos(self, i: usize) -> u64 { i as u64 | (lo32(self.0 as u64) << 32) as u64 } } -#[cfg(not(feature="test_efficient_enum"))] #[derive(Copy,Clone,Debug)] -pub struct Bucket { +struct Bucket { hash: HashValue, option: Option<(K, V)>, } /// A type which can take values from a Bucket, leaving the bucket empty -#[cfg(not(feature="test_efficient_enum"))] -pub struct BucketTaker<'a, K: 'a, V: 'a>(&'a mut Bucket); +struct BucketTaker<'a, K: 'a, V: 'a>(&'a mut Bucket); -#[cfg(not(feature="test_efficient_enum"))] impl Bucket { - pub fn new(hash: HashValue, key: K, value: V) -> Self { + fn new(hash: HashValue, key: K, value: V) -> Self { Bucket { hash: hash, option: Some((key, value)) } } - pub fn unwrap_hash_key(&self) -> (HashValue, &K) { + fn unwrap_hash_key(&self) -> (HashValue, &K) { debug_assert!(self.option.is_some()); (self.hash, &self.option.as_ref().unwrap().0) } // if the bucket is none, gives a meaningless hash in release and panics in debug - pub fn hash(&self) -> Option { + fn hash(&self) -> Option { if self.option.is_some() { Some(self.hash) } else { None } } // if the bucket is none, gives a meaningless hash in release and panics in debug - pub fn unwrap_hash(&self) -> HashValue { + fn unwrap_hash(&self) -> HashValue { debug_assert!(self.option.is_some()); self.hash } - pub fn kv(&self) -> Option<(&K, &V)> { + fn kv(&self) -> Option<(&K, &V)> { self.option.as_ref().map(|e| (&e.0, &e.1)) } - pub fn unwrap_kv(&self) -> (&K, &V) { + fn unwrap_kv(&self) -> (&K, &V) { debug_assert!(self.option.is_some()); let kv = self.option.as_ref().unwrap(); (&kv.0, &kv.1) } - pub fn kv_mut(&mut self) -> Option<(&mut K, &mut V)> { + fn kv_mut(&mut self) -> Option<(&mut K, &mut V)> { self.option.as_mut().map(|e| (&mut e.0, &mut e.1)) } - pub fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { + fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { debug_assert!(self.option.is_some()); let kv = self.option.as_mut().unwrap(); (&mut kv.0, &mut kv.1) } - pub fn taker<'a>(&'a mut self) -> Option> { + fn taker<'a>(&'a mut self) -> Option> { if self.option.is_some() { Some(BucketTaker(self)) } else { None } } - pub fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { + fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { debug_assert!(self.option.is_some()); BucketTaker(self) } - pub fn take(&mut self) -> Option<(K, V)> { + fn take(&mut self) -> Option<(K, V)> { self.option.take() } - pub fn into_kv(self) -> Option<(K, V)> { + fn into_kv(self) -> Option<(K, V)> { debug_assert!(self.option.is_some()); self.option } - pub fn unwrap_into_kv(self) -> (K, V) { + fn unwrap_into_kv(self) -> (K, V) { debug_assert!(self.option.is_some()); self.option.unwrap() } } -#[cfg(not(feature="test_efficient_enum"))] impl<'a, K, V> BucketTaker<'a, K, V> { - pub fn hash(&self) -> HashValue { + fn hash(&self) -> HashValue { debug_assert!(self.0.option.is_some()); self.0.hash } - pub fn key(&self) -> &K { + fn key(&self) -> &K { debug_assert!(self.0.option.is_some()); &self.0.option.as_ref().unwrap().0 } - pub fn value(&self) -> &V { + fn value(&self) -> &V { debug_assert!(self.0.option.is_some()); &self.0.option.as_ref().unwrap().1 } - pub fn value_mut(&mut self) -> &mut V { + fn value_mut(&mut self) -> &mut V { debug_assert!(self.0.option.is_some()); &mut self.0.option.as_mut().unwrap().1 } - pub fn into_value_mut(self) -> &'a mut V { + fn into_value_mut(self) -> &'a mut V { debug_assert!(self.0.option.is_some()); &mut self.0.option.as_mut().unwrap().1 } - pub fn kv_mut(&mut self) -> (&mut K, &mut V) { + #[allow(dead_code)] + fn kv_mut(&mut self) -> (&mut K, &mut V) { debug_assert!(self.0.option.is_some()); let e = self.0.option.as_mut().unwrap(); (&mut e.0, &mut e.1) } - pub fn take(self) -> (K, V) { + fn take(self) -> (K, V) { debug_assert!(self.0.option.is_some()); self.0.option.take().unwrap() } } - - /// A possibly truncated hash value. /// #[derive(Debug)] @@ -300,11 +276,6 @@ impl Pos { Pos { index: i } } - #[inline] - fn is_none(&self) -> bool { - self.index == POS_NONE - } - #[inline] fn is_tombstone(&self) -> bool { self.index == POS_TOMBSTONE @@ -455,16 +426,10 @@ pub struct OrderMap { hash_builder: S, } -/// The number of steps that `current` is forward of the prev -#[inline(always)] -fn probe_delta(mask: usize, prev: usize, current: usize) -> usize { - current.wrapping_sub(prev) & mask -} - /// The number of steps that `current` is forward of the desired position for hash #[inline(always)] fn probe_distance(mask: usize, hash: HashValue, current: usize) -> usize { - probe_delta(mask, hash.desired_pos(mask), current) + current.wrapping_sub(hash.desired_pos(mask)) & mask } impl fmt::Debug for OrderMap @@ -478,18 +443,20 @@ impl fmt::Debug for OrderMap return Ok(()); } writeln!(f, "")?; - for (i, index) in enumerate(&self.indices) { - write!(f, "{}: {:?}", i, index)?; - if let PosState::Value(pos) = index.debug_pos() { - let (hash, key) = self.entries[pos].unwrap_hash_key(); - writeln!(f, ", desired={}, probe_distance={}, key={:?}", - hash.desired_pos(self.mask), - probe_distance(self.mask, hash, i), - key)?; - } else { - writeln!(f, ", tombstone")?; + if self.indices.len() > 0 { + for (i, index) in enumerate(&self.indices) { + write!(f, "{}: {:?}", i, index)?; + if let PosState::Value(pos) = index.debug_pos() { + let (hash, key) = self.entries[pos].unwrap_hash_key(); + writeln!(f, ", desired={}, probe_distance={}, key={:?}", + hash.desired_pos(self.mask), + probe_distance(self.mask, hash, i), + key)?; + } else { + writeln!(f, ", tombstone")?; + } + writeln!(f, "")?; } - writeln!(f, "")?; } writeln!(f, "cap={}, raw_cap={}, entries.cap={}", self.capacity(), @@ -523,19 +490,6 @@ macro_rules! probe_loop { } } -// this could not be captured in an efficient iterator -macro_rules! rev_probe_loop { - ($probe_var: ident < $len: expr, $body: expr) => { - loop { - $body - if $probe_var == 0 { - $probe_var = $len; - } - $probe_var -= 1; - } - } -} - impl OrderMap { /// Create a new map. (Does not allocate.) pub fn new() -> Self { @@ -641,13 +595,6 @@ impl Size for u64 { /// /// The u32 or u64 is *prepended* to the type parameter list! macro_rules! dispatch_32_vs_64 { - ($self_:ident . $method:ident::<$($t:ty),*>($($arg:expr),*)) => { - if $self_.size_class_is_64bit() { - $self_.$method::($($arg),*) - } else { - $self_.$method::($($arg),*) - } - }; ($self_:ident . $method:ident::<$($t:ty),*>($($arg:expr),*)) => { if $self_.size_class_is_64bit() { $self_.$method::($($arg),*) @@ -809,7 +756,7 @@ impl OrderMap let first_tombstone; debug_assert!(self.len() < self.raw_capacity()); probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).resolve::() { + match self.indices[probe].resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); // if existing element probed less than us, swap @@ -831,7 +778,7 @@ impl OrderMap entry_tombstones: &mut self.entry_tombstones, index_tombstones: &mut self.index_tombstones, bucket_taker: taker, - pos: &mut im!(self.indices[probe]), + pos: &mut self.indices[probe], }); } }, @@ -868,10 +815,10 @@ impl OrderMap }); let mut first_tombstone = first_tombstone; - dist += 1; probe += 1; probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).resolve::() { + dist += 1; + match self.indices[probe].resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); // if existing element probed less than us, swap @@ -893,7 +840,7 @@ impl OrderMap entry_tombstones: &mut self.entry_tombstones, index_tombstones: &mut self.index_tombstones, bucket_taker: taker, - pos: &mut im!(self.indices[probe]), + pos: &mut self.indices[probe], }); } else { // since their_dist >= dist and dist > tombstone_dist @@ -933,7 +880,6 @@ impl OrderMap } }, } - dist += 1; }); } @@ -1057,7 +1003,10 @@ impl OrderMap if self.len() <= 0 { return None; } let h = hash_elem_using(&self.hash_builder, key); - self.find_using(h, move |k, _| { *k.borrow() == *key }) + self.find_using(h, move |k, _| { *k.borrow() == *key }).map(|(probe, i)| { + let (key, value) = self.entries[i].unwrap_kv(); + (probe, i, key, value) + }) } /// Return probe (indices), position (entries), and key-value pairs by @@ -1069,7 +1018,10 @@ impl OrderMap if self.len() <= 0 { return None; } let h = hash_elem_using(&self.hash_builder, key); - self.find_mut_using(h, move |k, _| { *k.borrow() == *key }) + self.find_using_mut(h, move |k, _| { *k.borrow() == *key }).map(move |(probe, i)| { + let (key, value) = self.entries[i].unwrap_kv_mut(); + (probe, i, key, value) + }) } /// Return probe (indices), position (entries), and key-value pairs by @@ -1081,7 +1033,13 @@ impl OrderMap if self.len() <= 0 { return None; } let h = hash_elem_using(&self.hash_builder, key); - self.find_remove_using(h, move |k, _| { *k.borrow() == *key }) + self.find_using_mut(h, move |k, _| { *k.borrow() == *key }).map(|(probe, i)| { + let (key, value) = self.entries[i].unwrap_taker().take(); + self.indices[probe] = Pos::tombstone(); + self.entry_tombstones += 1; + self.index_tombstones += 1; + (probe, i, key, value) + }) } /// Remove the key-value pair equivalent to `key` and return the `value`. @@ -1334,13 +1292,6 @@ impl OrderMap { self.entries.reserve_exact(more); } - /// Removes all the index tombstones - /// - /// Computes in **O(n)** time. - fn remove_cheap_index_tombstones(&mut self) { - dispatch_32_vs_64!(self.remove_cheap_index_tombstones_impl()) - } - /// Removes all the index tombstones /// /// Computes in **O(n)** time. @@ -1369,54 +1320,12 @@ impl OrderMap { let index = self.entries.len() - 1; let probe = self.find_existing_entry_mut(index, hash); self.entries.pop(); - im!(self.indices[probe]) = Pos::tombstone(); + self.indices[probe] = Pos::tombstone(); self.index_tombstones += 1; value }) } - /// Removes cheap the index tombstones - /// - /// Computes in **O(n)** time. - fn remove_cheap_index_tombstones_impl(&mut self) - where Sz: Size - { - if self.index_tombstones == 0 { return } - - // Find the first ideal or none (looking backwards) - let mut probe = enumerate(&self.indices).rev().find(|&(i, index)| match index.pos::() { - PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), - PosState::IsTombstone => false, - PosState::IsNone => true, - }).map_or(0, |(i, _)| i); - - let mut need_processing = self.len() + self.index_tombstones; - rev_probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).state() { - PosState::Value(()) => { - need_processing -= 1; - if need_processing == 0 { return } - - rev_probe_loop!(probe < self.indices.len(), { - if i!(self.indices[probe]).is_none() { break } - need_processing -= 1; - if need_processing == 0 { return } - }); - }, - PosState::IsTombstone => { - im!(self.indices[probe]) = Pos::none(); - - self.index_tombstones -= 1; - if self.index_tombstones == 0 { return } - - need_processing -= 1; - if need_processing == 0 { return } - }, - PosState::IsNone => {}, - } - }); - } - /// Removes all the index tombstones /// /// Computes in **O(n)** time. @@ -1438,48 +1347,46 @@ impl OrderMap { PosState::IsTombstone => false, PosState::IsNone => true, }).map_or(0, |(i, _)| i); - probe_loop!(tombstone_head < self.indices.len(), { - if i!(self.indices[tombstone_head]).is_tombstone() { break } - }); + loop { + tombstone_head = (tombstone_head+1) & self.mask; + if self.indices[tombstone_head].is_tombstone() { break } + } - let mut tombstone_tail = tombstone_head; - let mut probe = tombstone_head + 1; + let mut probe = tombstone_head; let mut tombstones = 1; - probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).resolve::() { + loop { + probe = (probe+1) & self.mask; + match self.indices[probe].resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); let dist = probe_distance(self.mask, entry_hash.into_hash(), probe); if dist == 0 { // clear the tombstone list - while tombstone_head != tombstone_tail { - im!(self.indices[tombstone_head]) = Pos::none(); - tombstone_head += 1; - tombstone_head &= self.mask; + while tombstone_head != probe { + self.indices[tombstone_head] = Pos::none(); + tombstone_head = (tombstone_head + 1) & self.mask; } - im!(self.indices[tombstone_head]) = Pos::none(); self.index_tombstones -= tombstones; // if we're out of tombstones, return if self.index_tombstones == 0 { return } // find a new tombstone_head - probe_loop!(probe < self.indices.len(), { - if i!(self.indices[probe]).is_tombstone() { + loop { + probe = (probe+1) & self.mask; + if self.indices[probe].is_tombstone() { break } - }); + } tombstone_head = probe; - tombstone_tail = tombstone_head; tombstones = 1; } else { if dist < tombstones { let delta = tombstones - dist; for _ in 0..delta { // pop off the head and clear it - im!(self.indices[tombstone_head]) = Pos::none(); - tombstone_head += 1; - tombstone_head &= self.mask; + self.indices[tombstone_head] = Pos::none(); + tombstone_head = (tombstone_head + 1) & self.mask; } self.index_tombstones -= delta; tombstones = dist; @@ -1487,42 +1394,33 @@ impl OrderMap { // move the value up self.indices.swap(tombstone_head, probe); - - tombstone_head += 1; - tombstone_head &= self.mask; - - tombstone_tail = probe; + tombstone_head = (tombstone_head + 1) & self.mask; } }, - PosState::IsTombstone => { - tombstone_tail = probe; - tombstones += 1; - }, + PosState::IsTombstone => tombstones += 1, PosState::IsNone => { // clear the tombstone list - while tombstone_head != tombstone_tail { - im!(self.indices[tombstone_head]) = Pos::none(); - tombstone_head += 1; - tombstone_head &= self.mask; + while tombstone_head != probe { + self.indices[tombstone_head] = Pos::none(); + tombstone_head = (tombstone_head + 1) & self.mask; } - im!(self.indices[tombstone_head]) = Pos::none(); self.index_tombstones -= tombstones; // if we're out of tombstones, return if self.index_tombstones == 0 { return } // find a new tombstone_head - probe_loop!(probe < self.indices.len(), { - if i!(self.indices[probe]).is_tombstone() { + loop { + probe = (probe+1) & self.mask; + if self.indices[probe].is_tombstone() { break } - }); + } tombstone_head = probe; - tombstone_tail = tombstone_head; tombstones = 1; }, } - }); + } } // write to self.indices @@ -1539,16 +1437,16 @@ impl OrderMap { let entry_hash = if SzOld::is_same_size::() { hash_proxy.get_short_hash(&self.entries, i).into_hash() } else { - i!(self.entries[i]).unwrap_hash() + self.entries[i].unwrap_hash() }; // find first empty bucket or tombstone and insert there let mut probe = entry_hash.desired_pos(self.mask); probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).state() { + match self.indices[probe].state() { // skip values PosState::Value(()) => {}, PosState::IsNone => { - im!(self.indices[probe]) = Pos::with_hash::(i, entry_hash); + self.indices[probe] = Pos::with_hash::(i, entry_hash); return }, PosState::IsTombstone => debug_assert!(false, "reinserting into tombstones"), @@ -1582,7 +1480,7 @@ impl OrderMap { let hash = e.unwrap_hash(); let probe = find_existing_entry_mut_impl::(self.mask, &mut self.indices, i, hash); - im!(self.indices[probe]) = Pos::tombstone(); + self.indices[probe] = Pos::tombstone(); self.index_tombstones += 1; self.entry_tombstones += 1; e.take(); @@ -1604,7 +1502,7 @@ impl OrderMap { .and_then(|e| e.taker().map(|e| (e.hash(), e.take()))) .map(|(hash, value)| { let probe = self.find_existing_entry_mut(index, hash); - im!(self.indices[probe]) = Pos::tombstone(); + self.indices[probe] = Pos::tombstone(); self.entry_tombstones += 1; self.index_tombstones += 1; value @@ -1661,7 +1559,7 @@ impl OrderMap { if let Some(hash) = e.hash() { if removed != 0 { let probe = find_existing_entry_mut_impl::(mask, indices, index-1, hash); - im!(indices[probe]).sub_eq::(removed); + indices[probe].sub_eq::(removed); } true } else { @@ -1674,13 +1572,13 @@ impl OrderMap { } /// Return probe (indices) and position (entries), and kv reference - fn find_using(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &K, &V)> + fn find_using(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> where F: Fn(&K, &V) -> bool, { dispatch_32_vs_64!(self.find_using_impl::<_>(hash, key_eq)) } - fn find_using_impl(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &K, &V)> + fn find_using_impl(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> where F: Fn(&K, &V) -> bool, Sz: Size, { @@ -1688,16 +1586,16 @@ impl OrderMap { let mut probe = hash.desired_pos(self.mask); let mut dist = 0; probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).resolve::() { + match self.indices[probe].resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { // give up when probe distance is too long return None; } else if entry_hash == hash { - let (key, value) = i!(self.entries[i]).unwrap_kv(); + let (key, value) = self.entries[i].unwrap_kv(); if key_eq(key, value) { - return Some((probe, i, key, value)); + return Some((probe, i)); } } }, @@ -1708,77 +1606,72 @@ impl OrderMap { }); } - /// Return probe (indices), position (entries), and kv reference - fn find_mut_using(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &mut K, &mut V)> + /// Return probe (indices), position (entries) + fn find_using_mut(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> where F: Fn(&K, &V) -> bool, { - dispatch_32_vs_64!(self.find_mut_using_impl::<_>(hash, key_eq)) + dispatch_32_vs_64!(self.find_using_mut_impl::<_>(hash, key_eq)) } - fn find_mut_using_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, &mut K, &mut V)> + fn find_using_mut_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> where F: Fn(&K, &V) -> bool, Sz: Size, { debug_assert!(self.len() > 0); let mut probe = hash.desired_pos(self.mask); let mut dist = 0; + let first_tombstone; probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).resolve::() { + match self.indices[probe].resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { // give up when probe distance is too long return None; } else if entry_hash == hash { - if { let (key, value) = im!(self.entries[i]).unwrap_kv_mut(); key_eq(key, value) } { - let (key, value) = im!(self.entries[i]).unwrap_kv_mut(); - return Some((probe, i, key, value)); + let (key, value) = self.entries[i].unwrap_kv_mut(); + if key_eq(key, value) { + return Some((probe, i)); } } }, - PosState::IsTombstone => {}, + PosState::IsTombstone => { + first_tombstone = probe; + break + }, PosState::IsNone => return None, } dist += 1; }); - } - /// Return probe (indices) and position (entries) - fn find_remove_using(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, K, V)> - where F: Fn(&K, &V) -> bool, - { - dispatch_32_vs_64!(self.find_remove_using_impl::<_>(hash, key_eq)) - } - - fn find_remove_using_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize, K, V)> - where F: Fn(&K, &V) -> bool, - Sz: Size, - { - debug_assert!(self.len() > 0); - let mut probe = hash.desired_pos(self.mask); - let mut dist = 0; + let mut first_tombstone = first_tombstone; + probe += 1; probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).resolve::() { + dist += 1; + match self.indices[probe].resolve::() { PosState::Value((i, hash_proxy)) => { let entry_hash = hash_proxy.get_short_hash(&self.entries, i); if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { // give up when probe distance is too long return None; - } else if entry_hash == hash { - let mut taker = im!(self.entries[i]).unwrap_taker(); - if { let (key, value) = taker.kv_mut(); key_eq(key, value) } { - self.index_tombstones += 1; - self.entry_tombstones += 1; - im!(self.indices[probe]) = Pos::tombstone(); - let (key, value) = taker.take(); - return Some((probe, i, key, value)); + } + + // We're already in the neighborhood, + // any bucket ahead of our target should be moved up + self.indices.swap(first_tombstone, probe); + + if entry_hash == hash { + let (key, value) = self.entries[i].unwrap_kv_mut(); + if key_eq(key, value) { + return Some((first_tombstone, i)); } } + + first_tombstone = (first_tombstone + 1) & self.mask; }, PosState::IsTombstone => {}, PosState::IsNone => return None, } - dist += 1; }); } @@ -1786,6 +1679,7 @@ impl OrderMap { /// Return the probe for the entry. /// /// Computes in **O(1)** time (average). + #[allow(dead_code)] fn find_existing_entry(&self, actual_pos: usize, hash: HashValue) -> usize { dispatch_32_vs_64!(self.find_existing_entry_impl(actual_pos, hash)) @@ -1829,57 +1723,22 @@ impl OrderMap { fn swap_remove_found_impl(&mut self, probe: usize, found: usize) -> Option<(K, V)> where Sz: Size { + // tombstone the original entry + self.indices[probe] = Pos::tombstone(); + self.index_tombstones += 1; + // index `probe` and entry `found` is to be removed // use swap_remove, but then we need to update the index that points // to the other entry that has to move - im!(self.indices[probe]) = Pos::none(); let kv = self.entries.swap_remove(found).take(); - // correct index that points to the entry that had to swap places // check if it was the last element or a tombstone if let Some(hash) = self.entries.get(found).and_then(Bucket::hash) { - // examine new element in `found` and find it in indices - let mut probe = hash.desired_pos(self.mask); - probe_loop!(probe < self.indices.len(), { - if let PosState::Value(i) = i!(self.indices[probe]).pos::() { - if i >= self.entries.len() { - // found it - im!(self.indices[probe]) = Pos::with_hash::(found, hash); - break; - } - } - }); + let probe = find_existing_entry_mut_impl::(self.mask, &mut self.indices, self.entries.len(), hash); + self.indices[probe] = Pos::with_hash::(found, hash); } - // if we're empty there is there's no work to do - if self.len() == 0 { return kv } - - // backward shift deletion in self.indices - // after probe, shift all non-ideally placed indices backward - let mut last_probe = probe; - let mut probe = probe + 1; - probe_loop!(probe < self.indices.len(), { - match i!(self.indices[probe]).resolve::() { - PosState::Value((i, hash_proxy)) => { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - if probe_distance(self.mask, entry_hash.into_hash(), probe) > 0 { - im!(self.indices[last_probe]) = self.indices[probe]; - im!(self.indices[probe]) = Pos::none(); - } else { - break; - } - }, - // Always move tombstones - PosState::IsTombstone => { - im!(self.indices[last_probe]) = Pos::tombstone(); - im!(self.indices[probe]) = Pos::none(); - }, - PosState::IsNone => break, - } - last_probe = probe; - }); - kv } } @@ -1889,7 +1748,7 @@ fn entry_phase_2(indices: &mut Vec, index_tombstones: &mut usize, mut p where Sz: Size { probe_loop!(probe < indices.len(), { - let pos = &mut im!(indices[probe]); + let pos = &mut indices[probe]; match pos.state() { PosState::Value(()) => old_pos = replace(pos, old_pos), PosState::IsTombstone => { @@ -1914,7 +1773,7 @@ fn find_existing_entry_impl(mask: usize, indices: &Vec, actual_pos: usi { let mut probe = hash.desired_pos(mask); probe_loop!(probe < indices.len(), { - match i!(indices[probe]).pos::() { + match indices[probe].pos::() { PosState::Value(i) => if i == actual_pos { return probe }, PosState::IsTombstone => {}, PosState::IsNone => debug_assert!(false, "the entry does not exist"), @@ -1935,7 +1794,7 @@ fn find_existing_entry_mut_impl(mask: usize, indices: &mut Vec, actual_ let mut probe = hash.desired_pos(mask); let first_tombstone; probe_loop!(probe < indices.len(), { - match i!(indices[probe]).pos::() { + match indices[probe].pos::() { PosState::Value(i) => if i == actual_pos { return probe }, PosState::IsTombstone => { first_tombstone = probe; @@ -1948,14 +1807,13 @@ fn find_existing_entry_mut_impl(mask: usize, indices: &mut Vec, actual_ let mut first_tombstone = first_tombstone; probe += 1; probe_loop!(probe < indices.len(), { - match i!(indices[probe]).pos::() { + match indices[probe].pos::() { PosState::Value(i) => { // We're already in the neighborhood, // any bucket ahead of our target should be moved up indices.swap(first_tombstone, probe); if i == actual_pos { return first_tombstone } - first_tombstone += 1; - first_tombstone &= mask; + first_tombstone = (first_tombstone + 1) & mask; }, PosState::IsTombstone => {}, PosState::IsNone => debug_assert!(false, "the entry does not exist"), diff --git a/src/unsafe.rs b/src/unsafe.rs deleted file mode 100644 index b5a0cbe4..00000000 --- a/src/unsafe.rs +++ /dev/null @@ -1,163 +0,0 @@ - -#[cfg(feature="test_unsafe_index")] -macro_rules! i { - ($e:ident$(.$e2:ident)*[$i:expr]) => {*unsafe { $e$(.$e2)*.get_unchecked($i) }}; - (&$e:ident$(.$e2:ident)*[$i:expr]) => {unsafe { $e$(.$e2)*.get_unchecked($i) }}; -} - -#[cfg(feature="test_unsafe_index")] -macro_rules! im { - ($e:ident$(.$e2:ident)*[$i:expr]) => {*unsafe { $e$(.$e2)*.get_unchecked_mut($i) }}; - (&mut $e:ident$(.$e2:ident)*[$i:expr]) => {unsafe { $e$(.$e2)*.get_unchecked_mut($i) }}; -} - - -#[cfg(feature="test_efficient_enum")] -use efficient_enum::tag::TagMSB; -#[cfg(feature="test_efficient_enum")] -use efficient_enum::option::{EfficientOption, EfficientOptionInnerSome}; - - -impl HashValue { - #[cfg(feature="test_efficient_enum")] - pub fn new(v: usize) -> HashValue { - HashValue(v & (usize::max_value() >> 1)) - } -} - - -#[cfg(feature="test_efficient_enum")] -#[derive(Copy,Clone,Debug)] -pub struct Bucket { - option: EfficientOption, -} - -/// A type which can take values from a Bucket, leaving the bucket empty -#[cfg(feature="test_efficient_enum")] -pub struct BucketTaker<'a, K: 'a, V: 'a>(EfficientOptionInnerSome<'a, usize, (K, V), TagMSB>); - - -#[cfg(feature="test_efficient_enum")] -impl Bucket { - pub fn new(hash: HashValue, key: K, value: V) -> Self { - Bucket { option: EfficientOption::some((hash.0, (key, value))) } - } - - pub fn unwrap_hash_key(&self) -> (HashValue, &K) { - debug_assert!(self.option.is_some()); - let hash = self.option.clone_0().unwrap(); - (HashValue(hash), &self.option.ref_1().unwrap().0) - } - - pub fn hash(&self) -> Option { - self.option.clone_0().map(HashValue) - } - - pub fn kv(&self) -> Option<(&K, &V)> { - self.option.ref_1().map(|e| (&e.0, &e.1)) - } - - pub fn kv_mut(&mut self) -> Option<(&mut K, &mut V)> { - self.option.mut_1().map(|e| (&mut e.0, &mut e.1)) - } - - pub fn taker<'a>(&'a mut self) -> Option> { - use efficient_enum::option::EfficientOptionInner::IsSome; - if let IsSome(e) = self.option.inner() { - Some(BucketTaker(e)) - } else { None } - } - - pub fn take(&mut self) -> Option<(K, V)> { - self.option.take_1() - } - - pub fn into_kv(self) -> Option<(K, V)> { - debug_assert!(self.option.is_some()); - self.option.destructure_1() - } -} - -#[cfg(all(feature="test_efficient_enum", not(feature="test_unsafe_efficient_enum")))] -impl Bucket { - pub fn unwrap_hash(&self) -> HashValue { - debug_assert!(self.option.is_some()); - HashValue(self.option.clone_0().unwrap()) - } - - pub fn unwrap_kv(&self) -> (&K, &V) { - debug_assert!(self.option.is_some()); - self.kv().unwrap() - } - - pub fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { - debug_assert!(self.option.is_some()); - self.kv_mut().unwrap() - } - - pub fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { - debug_assert!(self.option.is_some()); - self.taker().unwrap() - } - - pub fn unwrap_into_kv(self) -> (K, V) { - debug_assert!(self.option.is_some()); - self.into_kv().unwrap() - } -} - -#[cfg(feature="test_efficient_enum")] -impl<'a, K, V> BucketTaker<'a, K, V> { - pub fn hash(&self) -> HashValue { - HashValue(self.0.clone_0()) - } - pub fn key(&self) -> &K { - &self.0.ref_1().0 - } - pub fn value(&self) -> &V { - &self.0.ref_1().1 - } - pub fn value_mut(&mut self) -> &mut V { - &mut self.0.mut_1().1 - } - pub fn into_value_mut(self) -> &'a mut V { - &mut self.0.destructure_1().1 - } - pub fn kv_mut(&mut self) -> (&mut K, &mut V) { - let e = self.0.mut_1(); - (&mut e.0, &mut e.1) - } - pub fn take(self) -> (K, V) { - self.0.take_1().1 - } -} - -#[cfg(feature="test_unsafe_efficient_enum")] -impl Bucket { - pub fn unwrap_hash(&self) -> HashValue { - debug_assert!(self.option.is_some()); - unsafe { HashValue(*self.option.unwrap_ref_0()) } - } - - pub fn unwrap_kv(&self) -> (&K, &V) { - debug_assert!(self.option.is_some()); - let kv = unsafe { self.option.unwrap_ref_1() }; - (&kv.0, &kv.1) - } - - pub fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { - debug_assert!(self.option.is_some()); - let kv = unsafe { self.option.unwrap_mut_1() }; - (&mut kv.0, &mut kv.1) - } - - pub fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { - debug_assert!(self.option.is_some()); - unsafe { BucketTaker(self.option.inner_some()) } - } - - pub fn unwrap_into_kv(self) -> (K, V) { - debug_assert!(self.option.is_some()); - unsafe { self.option.unwrap_destructure_1() } - } -}