diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..22bc1a68 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Compiled files +*.o +*.so +*.rlib +*.dll + +# Executables +*.exe + +# Generated by Cargo +/target/ +Cargo.lock + diff --git a/Cargo.toml b/Cargo.toml index e92fc795..fd5f54c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,8 +10,6 @@ description = "A hash table with consistent order and fast iteration." [lib] bench = false -[dependencies] - [dev-dependencies] itertools = "0.5.1" rand = "0.3" diff --git a/benches/bench.rs b/benches/bench.rs index 69eca1b0..dca4a1d7 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -610,3 +610,36 @@ fn retain_many_ordermap_100_000(b: &mut Bencher) { map }); } + +#[bench] +fn remove_index_tombstones_many_ordermap_100_000(b: &mut Bencher) { + let mut map = OMAP_100K.clone(); + map.retain(|k, _| *k % 7 == 0); + + b.iter(|| { + let mut map = map.clone(); + map.remove_index_tombstones() + }); +} + +#[bench] +fn remove_index_tombstones_half_ordermap_100_000(b: &mut Bencher) { + let mut map = OMAP_100K.clone(); + map.retain(|k, _| *k % 2 == 0); + + b.iter(|| { + let mut map = map.clone(); + map.remove_index_tombstones() + }); +} + +#[bench] +fn remove_index_tombstones_some_ordermap_100_000(b: &mut Bencher) { + let mut map = OMAP_100K.clone(); + map.retain(|k, _| *k % 100 == 0); + + b.iter(|| { + let mut map = map.clone(); + map.remove_index_tombstones() + }); +} diff --git a/src/lib.rs b/src/lib.rs index 5428f111..cc6ac32f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,15 +12,15 @@ use std::borrow::Borrow; use std::cmp::max; use std::fmt; -use std::mem::{replace}; +use std::mem::replace; use std::marker::PhantomData; -use util::{second, ptrdistance, enumerate}; +use util::enumerate; fn hash_elem_using(build: &B, k: &K) -> HashValue { let mut h = build.build_hasher(); k.hash(&mut h); - HashValue(h.finish() as usize) + HashValue::new(h.finish() as usize) } /// Hash value newtype. Not larger than usize, since anything larger @@ -38,11 +38,139 @@ impl PartialEq for HashValue { self.0 == rhs.0 } } +impl HashValue { + fn new(v: usize) -> HashValue { + HashValue(v) + } + + fn eq_lo32(&self, rhs: &HashValue) -> bool { + lo32(self.0 as u64) == lo32(rhs.0 as u64) + } + + fn desired_pos(self, mask: usize) -> usize { + self.0 & mask + } + + fn combine_pos(self, i: usize) -> u64 { + i as u64 | (lo32(self.0 as u64) << 32) as u64 + } +} + +#[derive(Copy,Clone,Debug)] +struct Bucket { + hash: HashValue, + option: Option<(K, V)>, +} + +/// A type which can take values from a Bucket, leaving the bucket empty +struct BucketTaker<'a, K: 'a, V: 'a>(&'a mut Bucket); + +impl Bucket { + fn new(hash: HashValue, key: K, value: V) -> Self { + Bucket { hash: hash, option: Some((key, value)) } + } + + fn unwrap_hash_key(&self) -> (HashValue, &K) { + debug_assert!(self.option.is_some()); + (self.hash, &self.option.as_ref().unwrap().0) + } + + // if the bucket is none, gives a meaningless hash in release and panics in debug + fn hash(&self) -> Option { + if self.option.is_some() { + Some(self.hash) + } else { None } + } + + // if the bucket is none, gives a meaningless hash in release and panics in debug + fn unwrap_hash(&self) -> HashValue { + debug_assert!(self.option.is_some()); + self.hash + } + + fn kv(&self) -> Option<(&K, &V)> { + self.option.as_ref().map(|e| (&e.0, &e.1)) + } + + fn unwrap_kv(&self) -> (&K, &V) { + debug_assert!(self.option.is_some()); + let kv = self.option.as_ref().unwrap(); + (&kv.0, &kv.1) + } + + fn kv_mut(&mut self) -> Option<(&mut K, &mut V)> { + self.option.as_mut().map(|e| (&mut e.0, &mut e.1)) + } + + fn unwrap_kv_mut(&mut self) -> (&mut K, &mut V) { + debug_assert!(self.option.is_some()); + let kv = self.option.as_mut().unwrap(); + (&mut kv.0, &mut kv.1) + } + + fn taker<'a>(&'a mut self) -> Option> { + if self.option.is_some() { + Some(BucketTaker(self)) + } else { None } + } + + fn unwrap_taker<'a>(&'a mut self) -> BucketTaker<'a, K, V> { + debug_assert!(self.option.is_some()); + BucketTaker(self) + } + + fn take(&mut self) -> Option<(K, V)> { + self.option.take() + } + + fn into_kv(self) -> Option<(K, V)> { + debug_assert!(self.option.is_some()); + self.option + } + + fn unwrap_into_kv(self) -> (K, V) { + debug_assert!(self.option.is_some()); + self.option.unwrap() + } +} + +impl<'a, K, V> BucketTaker<'a, K, V> { + fn hash(&self) -> HashValue { + debug_assert!(self.0.option.is_some()); + self.0.hash + } + fn key(&self) -> &K { + debug_assert!(self.0.option.is_some()); + &self.0.option.as_ref().unwrap().0 + } + fn value(&self) -> &V { + debug_assert!(self.0.option.is_some()); + &self.0.option.as_ref().unwrap().1 + } + fn value_mut(&mut self) -> &mut V { + debug_assert!(self.0.option.is_some()); + &mut self.0.option.as_mut().unwrap().1 + } + fn into_value_mut(self) -> &'a mut V { + debug_assert!(self.0.option.is_some()); + &mut self.0.option.as_mut().unwrap().1 + } + #[allow(dead_code)] + fn kv_mut(&mut self) -> (&mut K, &mut V) { + debug_assert!(self.0.option.is_some()); + let e = self.0.option.as_mut().unwrap(); + (&mut e.0, &mut e.1) + } + fn take(self) -> (K, V) { + debug_assert!(self.0.option.is_some()); + self.0.option.take().unwrap() + } +} /// A possibly truncated hash value. /// #[derive(Debug)] -struct ShortHash(usize, PhantomData); +struct ShortHash(HashValue, PhantomData); impl ShortHash { /// Pretend this is a full HashValue, which @@ -51,7 +179,7 @@ impl ShortHash { /// - Sz = u32: 32-bit hash is enough to select bucket index /// - Sz = u64: hash is not truncated fn into_hash(self) -> HashValue { - HashValue(self.0) + self.0 } } @@ -74,15 +202,12 @@ impl PartialEq for ShortHash where Sz: Size { #[inline] fn eq(&self, rhs: &HashValue) -> bool { if Sz::is_64_bit() { - self.0 == rhs.0 + self.0 == *rhs } else { - lo32(self.0 as u64) == lo32(rhs.0 as u64) + self.0.eq_lo32(rhs) } } } -impl From> for HashValue { - fn from(x: ShortHash) -> Self { HashValue(x.0) } -} /// `Pos` is stored in the `indices` array and it points to the index of a /// `Bucket` in self.entries. @@ -112,57 +237,112 @@ impl Clone for Pos { impl fmt::Debug for Pos { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.pos() { - Some(i) => write!(f, "Pos({} / {:x})", i, self.index), - None => write!(f, "Pos(None)"), + match self.debug_pos() { + PosState::Value(i) => write!(f, "Pos({} / {:x})", i, self.index), + PosState::IsNone => write!(f, "Pos(None)"), + PosState::IsTombstone => write!(f, "Pos(Tombstone)"), } } } +#[derive(PartialEq, Eq)] +enum PosState { + IsNone, + IsTombstone, + Value(T), +} + +const POS_NONE: u64 = !0; +const POS_TOMBSTONE: u64 = !1; + impl Pos { #[inline] - fn none() -> Self { Pos { index: !0 } } + fn none() -> Self { Pos { index: POS_NONE } } + + #[inline] + fn tombstone() -> Self { Pos { index: POS_TOMBSTONE } } #[inline] - fn is_none(&self) -> bool { self.index == !0 } + fn with_hash(i: usize, hash: HashValue) -> Self + where Sz: Size + { + let i = if Sz::is_64_bit() { + i as u64 + } else { + hash.combine_pos(i) + }; + debug_assert!(i != POS_NONE); + debug_assert!(i != POS_TOMBSTONE); + Pos { index: i } + } #[inline] - fn pos(&self) -> Option { - if self.index == !0 { None } else { Some(lo32(self.index as u64)) } + fn is_tombstone(&self) -> bool { + self.index == POS_TOMBSTONE } #[inline] - fn with_hash(i: usize, hash: HashValue) -> Self + fn state(&self) -> PosState<()> { + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + _ => PosState::Value(()), + } + } + + #[inline] + fn debug_pos(&self) -> PosState + { + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + v => PosState::Value(lo32(v as u64)), + } + } + + #[inline] + fn sub_eq(&mut self, delta: usize) where Sz: Size { + debug_assert!(self.index != POS_NONE); + debug_assert!(self.index != POS_TOMBSTONE); if Sz::is_64_bit() { - Pos { - index: i as u64, - } + self.index -= delta as u64 } else { - Pos { - index: (i | (lo32(hash.0 as u64) << 32)) as u64 - } + let (i, hash) = split_lo_hi(self.index); + let i = i as usize - delta; + self.index = (i | (lo32(hash as u64) << 32)) as u64; } } #[inline] - fn resolve(&self) -> Option<(usize, ShortHashProxy)> + fn pos(&self) -> PosState where Sz: Size { - if Sz::is_64_bit() { - if !self.is_none() { - Some((self.index as usize, ShortHashProxy::new(0))) + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + v => if Sz::is_64_bit() { + PosState::Value(v as usize) } else { - None - } - } else { - if !self.is_none() { - let (i, hash) = split_lo_hi(self.index); - Some((i as usize, ShortHashProxy::new(hash as usize))) + PosState::Value(lo32(v as u64)) + }, + } + } + + #[inline] + fn resolve(&self) -> PosState<(usize, ShortHashProxy)> + where Sz: Size + { + match self.index { + POS_NONE => PosState::IsNone, + POS_TOMBSTONE => PosState::IsTombstone, + v => if Sz::is_64_bit() { + PosState::Value((v as usize, ShortHashProxy::new(0))) } else { - None - } + let (i, hash) = split_lo_hi(v); + PosState::Value((i as usize, ShortHashProxy::new(hash as usize))) + }, } } } @@ -189,9 +369,9 @@ impl ShortHashProxy /// depending on `Sz`. fn get_short_hash(&self, entries: &[Bucket], index: usize) -> ShortHash { if Sz::is_64_bit() { - ShortHash(entries[index].hash.0, PhantomData) + ShortHash(entries[index].unwrap_hash(), PhantomData) } else { - ShortHash(self.0, PhantomData) + ShortHash(HashValue::new(self.0), PhantomData) } } } @@ -226,7 +406,7 @@ impl ShortHashProxy /// for ch in "a short treatise on fungi".chars() { /// *letters.entry(ch).or_insert(0) += 1; /// } -/// +/// /// assert_eq!(letters[&'s'], 2); /// assert_eq!(letters[&'t'], 3); /// assert_eq!(letters[&'u'], 1); @@ -239,34 +419,17 @@ pub struct OrderMap { indices: Vec, /// entries is a dense vec of entries in their order. entries.len() == len entries: Vec>, + /// the number of tombstones in `indices` waiting to be cleaned up + index_tombstones: usize, + /// the number of tombstones in `entries` waiting to be cleaned up + entry_tombstones: usize, hash_builder: S, } -#[derive(Copy, Clone, Debug)] -struct Bucket { - hash: HashValue, - key: K, - value: V, -} - -#[inline(always)] -fn desired_pos(mask: usize, hash: HashValue) -> usize { - hash.0 & mask -} - /// The number of steps that `current` is forward of the desired position for hash #[inline(always)] fn probe_distance(mask: usize, hash: HashValue, current: usize) -> usize { - current.wrapping_sub(desired_pos(mask, hash)) & mask -} - -enum Inserted { - Done, - Swapped { prev_value: V }, - RobinHood { - probe: usize, - old_pos: Pos, - } + current.wrapping_sub(hash.desired_pos(mask)) & mask } impl fmt::Debug for OrderMap @@ -275,28 +438,30 @@ impl fmt::Debug for OrderMap S: BuildHasher, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(f.debug_map().entries(self.iter()).finish()); + f.debug_map().entries(self.iter()).finish()?; if cfg!(not(feature = "test_debug")) { return Ok(()); } - try!(writeln!(f, "")); - for (i, index) in enumerate(&self.indices) { - try!(write!(f, "{}: {:?}", i, index)); - if let Some(pos) = index.pos() { - let hash = self.entries[pos].hash; - let key = &self.entries[pos].key; - let desire = desired_pos(self.mask, hash); - try!(writeln!(f, ", desired={}, probe_distance={}, key={:?}", - desire, - probe_distance(self.mask, hash, i), - key)); + writeln!(f, "")?; + if self.indices.len() > 0 { + for (i, index) in enumerate(&self.indices) { + write!(f, "{}: {:?}", i, index)?; + if let PosState::Value(pos) = index.debug_pos() { + let (hash, key) = self.entries[pos].unwrap_hash_key(); + writeln!(f, ", desired={}, probe_distance={}, key={:?}", + hash.desired_pos(self.mask), + probe_distance(self.mask, hash, i), + key)?; + } else { + writeln!(f, ", tombstone")?; + } + writeln!(f, "")?; } - try!(writeln!(f, "")); } - try!(writeln!(f, "cap={}, raw_cap={}, entries.cap={}", + writeln!(f, "cap={}, raw_cap={}, entries.cap={}", self.capacity(), self.raw_capacity(), - self.entries.capacity())); + self.entries.capacity())?; Ok(()) } } @@ -354,6 +519,8 @@ impl OrderMap mask: 0, indices: Vec::new(), entries: Vec::new(), + entry_tombstones: 0, + index_tombstones: 0, hash_builder: hash_builder, } } else { @@ -363,6 +530,8 @@ impl OrderMap mask: raw_cap.wrapping_sub(1), indices: vec![Pos::none(); raw_cap], entries: Vec::with_capacity(usable_capacity(raw_cap)), + entry_tombstones: 0, + index_tombstones: 0, hash_builder: hash_builder, } } @@ -371,7 +540,12 @@ impl OrderMap /// Return the number of key-value pairs in the map. /// /// Computes in **O(1)** time. - pub fn len(&self) -> usize { self.entries.len() } + pub fn len(&self) -> usize { self.entries.len() - self.entry_tombstones } + + /// Return the number of tombstoned key-value pairs in the map. + /// + /// Computes in **O(1)** time. + pub fn tombstones(&self) -> usize { self.entry_tombstones } // Return whether we need 32 or 64 bits to specify a bucket or entry index #[cfg(not(feature = "test_low_transition_point"))] @@ -437,14 +611,23 @@ macro_rules! dispatch_32_vs_64 { }; } -/// FIXME: Remove dependence on the `S` parameter -/// (to match HashMap). -pub enum Entry<'a, K: 'a, V: 'a, S: 'a = RandomState> { - Occupied(OccupiedEntry<'a, K, V, S>), - Vacant(VacantEntry<'a, K, V, S>), +pub enum Entry<'a, K: 'a, V: 'a,> { + Occupied(OccupiedEntry<'a, K, V>), + Vacant(VacantEntry<'a, K, V>), } -impl<'a, K, V, S> Entry<'a, K, V, S> { +impl<'a, K, V> Entry<'a, K, V> { + /// Computes in **O(1)** time (amortized average). + pub fn insert(self, value: V) -> Option { + match self { + Entry::Occupied(entry) => Some(entry.insert(value)), + Entry::Vacant(entry) => { + entry.insert(value); + None + }, + } + } + /// Computes in **O(1)** time (amortized average). pub fn or_insert(self, default: V) -> &'a mut V { match self { @@ -471,27 +654,29 @@ impl<'a, K, V, S> Entry<'a, K, V, S> { } } -pub struct OccupiedEntry<'a, K: 'a, V: 'a, S: 'a = RandomState> { - map: &'a mut OrderMap, - key: K, - probe: usize, - index: usize, +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + index_tombstones: &'a mut usize, + entry_tombstones: &'a mut usize, + pos: &'a mut Pos, + bucket_taker: BucketTaker<'a, K, V>, } -impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { - pub fn key(&self) -> &K { &self.key } +impl<'a, K, V> OccupiedEntry<'a, K, V> { + pub fn key(&self) -> &K { + self.bucket_taker.key() + } pub fn get(&self) -> &V { - &self.map.entries[self.index].value + self.bucket_taker.value() } pub fn get_mut(&mut self) -> &mut V { - &mut self.map.entries[self.index].value + self.bucket_taker.value_mut() } pub fn into_mut(self) -> &'a mut V { - &mut self.map.entries[self.index].value + self.bucket_taker.into_value_mut() } pub fn insert(self, value: V) -> V { - replace(&mut self.into_mut(), value) + replace(self.into_mut(), value) } pub fn remove(self) -> V { @@ -499,24 +684,33 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { } /// Remove and return the key, value pair stored in the map for this entry + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. pub fn remove_entry(self) -> (K, V) { - self.map.remove_found(self.probe, self.index) + *self.index_tombstones +=1; + *self.entry_tombstones += 1; + *self.pos = Pos::tombstone(); + self.bucket_taker.take() } } -pub struct VacantEntry<'a, K: 'a, V: 'a, S: 'a = RandomState> { - map: &'a mut OrderMap, +pub struct VacantEntry<'a, K: 'a, V: 'a> { + indices: &'a mut Vec, + entries: &'a mut Vec>, + index_tombstones: &'a mut usize, + size_class_is_64bit: bool, key: K, hash: HashValue, probe: usize, } -impl<'a, K, V, S> VacantEntry<'a, K, V, S> { +impl<'a, K, V> VacantEntry<'a, K, V> { pub fn key(&self) -> &K { &self.key } pub fn into_key(self) -> K { self.key } pub fn insert(self, value: V) -> &'a mut V { - if self.map.size_class_is_64bit() { + if self.size_class_is_64bit { self.insert_impl::(value) } else { self.insert_impl::(value) @@ -526,11 +720,11 @@ impl<'a, K, V, S> VacantEntry<'a, K, V, S> { fn insert_impl(self, value: V) -> &'a mut V where Sz: Size { - let index = self.map.entries.len(); - self.map.entries.push(Bucket { hash: self.hash, key: self.key, value: value }); + let index = self.entries.len(); + self.entries.push(Bucket::new(self.hash, self.key, value)); let old_pos = Pos::with_hash::(index, self.hash); - self.map.insert_phase_2::(self.probe, old_pos); - &mut {self.map}.entries[index].value + entry_phase_2::(self.indices, self.index_tombstones, self.probe, old_pos); + self.entries[index].kv_mut().unwrap().1 } } @@ -541,195 +735,170 @@ impl OrderMap /// Get the given key’s corresponding entry in the map for in-place manipulation. /// /// Computes in **O(1)** time (amortized average). - pub fn entry(&mut self, key: K) -> Entry { + pub fn entry(&mut self, key: K) -> Entry { self.reserve_one(); dispatch_32_vs_64!(self.entry_phase_1(key)) } - // Warning, this is a code duplication zone Entry is not yet finished - fn entry_phase_1(&mut self, key: K) -> Entry + // First phase: Look for the preferred location for key. + // + // We will know if `key` is already in the map, before we need to insert it. + // + // If we don't find the key, or we find a location from which we should steal + // (robin hood hashing), `VacantEntry` is returned. Any displacing that needs + // to be done occurs in `entry_phase_2` + fn entry_phase_1(&mut self, key: K) -> Entry where Sz: Size { let hash = hash_elem_using(&self.hash_builder, &key); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); let mut dist = 0; + let first_tombstone; debug_assert!(self.len() < self.raw_capacity()); probe_loop!(probe < self.indices.len(), { - if let Some((i, hash_proxy)) = self.indices[probe].resolve::() { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - // if existing element probed less than us, swap - let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); - if their_dist < dist { - // robin hood: steal the spot if it's better for us + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + // if existing element probed less than us, swap + let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); + if their_dist < dist { + // robin hood: steal the spot if it's better for us + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: probe, + }); + } else if entry_hash == hash && *self.entries[i].unwrap_kv().0 == key { + let taker = self.entries[i].unwrap_taker(); + return Entry::Occupied(OccupiedEntry { + entry_tombstones: &mut self.entry_tombstones, + index_tombstones: &mut self.index_tombstones, + bucket_taker: taker, + pos: &mut self.indices[probe], + }); + } + }, + PosState::IsNone => { + // empty bucket, insert here return Entry::Vacant(VacantEntry { - map: self, + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, hash: hash, key: key, probe: probe, }); - } else if entry_hash == hash && self.entries[i].key == key { - return Entry::Occupied(OccupiedEntry { - map: self, - key: key, - probe: probe, - index: i, - }); - } - } else { - // empty bucket, insert here - return Entry::Vacant(VacantEntry { - map: self, - hash: hash, - key: key, - probe: probe, - }); - } - dist += 1; - }); - } - - /// Remove all key-value pairs in the map, while preserving its capacity. - /// - /// Computes in **O(n)** time. - pub fn clear(&mut self) { - self.entries.clear(); - for pos in &mut self.indices { - *pos = Pos::none(); - } - } - - /// FIXME Not implemented fully yet - pub fn reserve(&mut self, additional: usize) { - if additional > 0 { - self.reserve_one(); - } - } + }, + PosState::IsTombstone => { + if self.index_tombstones == self.indices.len() { + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: probe, + }); + } - // First phase: Look for the preferred location for key. - // - // We will know if `key` is already in the map, before we need to insert it. - // When we insert they key, it might be that we need to continue displacing - // entries (robin hood hashing), in which case Inserted::RobinHood is returned - fn insert_phase_1(&mut self, key: K, value: V) -> Inserted - where Sz: Size - { - let hash = hash_elem_using(&self.hash_builder, &key); - let mut probe = desired_pos(self.mask, hash); - let mut dist = 0; - let insert_kind; - debug_assert!(self.len() < self.raw_capacity()); - probe_loop!(probe < self.indices.len(), { - let pos = &mut self.indices[probe]; - if let Some((i, hash_proxy)) = pos.resolve::() { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - // if existing element probed less than us, swap - let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); - if their_dist < dist { - // robin hood: steal the spot if it's better for us - let index = self.entries.len(); - insert_kind = Inserted::RobinHood { - probe: probe, - old_pos: Pos::with_hash::(index, hash), - }; + first_tombstone = probe; break; - } else if entry_hash == hash && self.entries[i].key == key { - return Inserted::Swapped { - prev_value: replace(&mut self.entries[i].value, value), - }; - } - } else { - // empty bucket, insert here - let index = self.entries.len(); - *pos = Pos::with_hash::(index, hash); - insert_kind = Inserted::Done; - break; + }, } dist += 1; }); - self.entries.push(Bucket { hash: hash, key: key, value: value }); - insert_kind - } - - fn first_allocation(&mut self) { - debug_assert_eq!(self.len(), 0); - let raw_cap = 8usize; - self.mask = raw_cap.wrapping_sub(1); - self.indices = vec![Pos::none(); raw_cap]; - self.entries = Vec::with_capacity(usable_capacity(raw_cap)); - } - - #[inline(never)] - // `Sz` is *current* Size class, before grow - fn double_capacity(&mut self) - where Sz: Size - { - debug_assert!(self.raw_capacity() == 0 || self.len() > 0); - if self.raw_capacity() == 0 { - return self.first_allocation(); - } - // find first ideally placed element -- start of cluster - let mut first_ideal = 0; - for (i, index) in enumerate(&self.indices) { - if let Some(pos) = index.pos() { - if 0 == probe_distance(self.mask, self.entries[pos].hash, i) { - first_ideal = i; - break; - } + let mut first_tombstone = first_tombstone; + probe += 1; + probe_loop!(probe < self.indices.len(), { + dist += 1; + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + // if existing element probed less than us, swap + let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe); + if their_dist < dist { + // take the tombstone + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: first_tombstone, + }); + } else if entry_hash == hash && *self.entries[i].unwrap_kv().0 == key { + let taker = self.entries[i].unwrap_taker(); + return Entry::Occupied(OccupiedEntry { + entry_tombstones: &mut self.entry_tombstones, + index_tombstones: &mut self.index_tombstones, + bucket_taker: taker, + pos: &mut self.indices[probe], + }); + } else { + // since their_dist >= dist and dist > tombstone_dist + // any bucket ahead of our target should be moved up + self.indices.swap(first_tombstone, probe); + + // Since we're compacting everything, the next + // tombstone will always be directly after + first_tombstone += 1; + first_tombstone &= self.mask + } + }, + PosState::IsNone => { + // empty bucket, insert at last tombstone + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: first_tombstone, + }); + }, + PosState::IsTombstone => { + // tombstone bucket, insert here if we've checked everywhere else + if dist >= self.indices.len() { + return Entry::Vacant(VacantEntry { + size_class_is_64bit: self.size_class_is_64bit(), + indices: &mut self.indices, + entries: &mut self.entries, + index_tombstones: &mut self.index_tombstones, + hash: hash, + key: key, + probe: first_tombstone, + }); + } + }, } - } - - // visit the entries in an order where we can simply reinsert them - // into self.indices without any bucket stealing. - let new_raw_cap = self.indices.len() * 2; - let old_indices = replace(&mut self.indices, vec![Pos::none(); new_raw_cap]); - self.mask = new_raw_cap.wrapping_sub(1); - - // `Sz` is the old size class, and either u32 or u64 is the new - for &pos in &old_indices[first_ideal..] { - dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); - } - - for &pos in &old_indices[..first_ideal] { - dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); - } - let more = self.capacity() - self.len(); - self.entries.reserve_exact(more); + }); } - // write to self.indices - // read from self.entries at `pos` - // - // reinserting rewrites all `Pos` entries anyway. This handles transitioning - // from u32 to u64 size class if needed by using the two type parameters. - fn reinsert_entry_in_order(&mut self, pos: Pos) - where SzNew: Size, - SzOld: Size, - { - if let Some((i, hash_proxy)) = pos.resolve::() { - // only if the size class is conserved can we use the short hash - let entry_hash = if SzOld::is_same_size::() { - hash_proxy.get_short_hash(&self.entries, i).into_hash() - } else { - self.entries[i].hash - }; - // find first empty bucket and insert there - let mut probe = desired_pos(self.mask, entry_hash); - probe_loop!(probe < self.indices.len(), { - if let Some(_) = self.indices[probe].resolve::() { - /* nothing */ - } else { - // empty bucket, insert here - self.indices[probe] = Pos::with_hash::(i, entry_hash); - return; - } - }); - } - } + /// Reserves capacity for at least `additional` more elements to be + /// inserted. + /// + /// The collection may reserve more space to avoid frequent reallocations. + /// + /// Computes in **O(n)** time. + pub fn reserve(&mut self, additional: usize) { + if additional == 0 { return } + self.entries.reserve(additional); + let raw_cap = to_raw_capacity(self.len() + self.index_tombstones + additional); + if raw_cap < self.raw_capacity() { return } - fn reserve_one(&mut self) { - if self.len() == self.capacity() { - dispatch_32_vs_64!(self.double_capacity()); + if self.index_tombstones >= additional { + self.remove_index_tombstones(); + } else { + dispatch_32_vs_64!(self.change_capacity(raw_cap.next_power_of_two())); } } @@ -740,46 +909,30 @@ impl OrderMap /// /// Computes in **O(1)** time (amortized average). pub fn insert(&mut self, key: K, value: V) -> Option { - self.reserve_one(); - if self.size_class_is_64bit() { - match self.insert_phase_1::(key, value) { - Inserted::Swapped { prev_value } => Some(prev_value), - Inserted::Done => None, - Inserted::RobinHood { probe, old_pos } => { - self.insert_phase_2::(probe, old_pos); - None - } - } - } else { - match self.insert_phase_1::(key, value) { - Inserted::Swapped { prev_value } => Some(prev_value), - Inserted::Done => None, - Inserted::RobinHood { probe, old_pos } => { - self.insert_phase_2::(probe, old_pos); - None - } - } - } + self.entry(key).insert(value) } /// Return an iterator over the keys of the map, in their order pub fn keys(&self) -> Keys { Keys { - iter: self.entries.iter() + iter: self.entries.iter(), + tombstones: self.entry_tombstones, } } /// Return an iterator over the key-value pairs of the map, in their order pub fn iter(&self) -> Iter { Iter { - iter: self.entries.iter() + iter: self.entries.iter(), + tombstones: self.entry_tombstones, } } /// Return an iterator over the key-value pairs of the map, in their order pub fn iter_mut(&mut self) -> IterMut { IterMut { - iter: self.entries.iter_mut() + iter: self.entries.iter_mut(), + tombstones: self.entry_tombstones, } } @@ -801,19 +954,14 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - self.get_pair(key).map(second) + self.get_pair(key).map(|(_, v)| v) } pub fn get_pair(&self, key: &Q) -> Option<(&K, &V)> where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &self.entries[found]; - Some((&entry.key, &entry.value)) - } else { - None - } + self.get_pair_index(key).map(|(_, k, v)| (k, v)) } /// Return item index, key and value @@ -821,19 +969,14 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &self.entries[found]; - Some((found, &entry.key, &entry.value)) - } else { - None - } + self.find(key).map(|(_, found, k, v)| (found, k, v)) } pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> where K: Borrow, Q: Eq + Hash, { - self.get_pair_mut(key).map(second) + self.get_pair_mut(key).map(|(_, v)| v) } pub fn get_pair_mut(&mut self, key: &Q) @@ -841,12 +984,7 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &mut self.entries[found]; - Some((&mut entry.key, &mut entry.value)) - } else { - None - } + self.get_pair_index_mut(key).map(|(_, k, v)| (k, v)) } pub fn get_pair_index_mut(&mut self, key: &Q) @@ -854,49 +992,116 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - if let Some((_, found)) = self.find(key) { - let entry = &mut self.entries[found]; - Some((found, &mut entry.key, &mut entry.value)) - } else { - None - } + self.find_mut(key).map(|(_, found, k, v)| (found, k, v)) } - /// Return probe (indices) and position (entries) - fn find(&self, key: &Q) -> Option<(usize, usize)> + /// Return probe (indices), position (entries), and key-value pairs by `&`. + fn find(&self, key: &Q) -> Option<(usize, usize, &K, &V)> where K: Borrow, Q: Eq + Hash, { - if self.len() == 0 { return None; } + if self.len() <= 0 { return None; } + let h = hash_elem_using(&self.hash_builder, key); - self.find_using(h, move |entry| { *entry.key.borrow() == *key }) + self.find_using(h, move |k, _| { *k.borrow() == *key }).map(|(probe, i)| { + let (key, value) = self.entries[i].unwrap_kv(); + (probe, i, key, value) + }) } - /// Remove the key-value pair equivalent to `key` and return - /// its value. - /// - /// Like `Vec::swap_remove`, the pair is removed by swapping it with the - /// last element of the map and popping it off. **This perturbs - /// the postion of what used to be the last element!** - /// + /// Return probe (indices), position (entries), and key-value pairs by + /// `&mut`. + fn find_mut(&mut self, key: &Q) -> Option<(usize, usize, &mut K, &mut V)> + where K: Borrow, + Q: Eq + Hash, + { + if self.len() <= 0 { return None; } + + let h = hash_elem_using(&self.hash_builder, key); + self.find_using_mut(h, move |k, _| { *k.borrow() == *key }).map(move |(probe, i)| { + let (key, value) = self.entries[i].unwrap_kv_mut(); + (probe, i, key, value) + }) + } + + /// Return probe (indices), position (entries), and key-value pairs by + /// value. + fn find_remove(&mut self, key: &Q) -> Option<(usize, usize, K, V)> + where K: Borrow, + Q: Eq + Hash, + { + if self.len() <= 0 { return None; } + + let h = hash_elem_using(&self.hash_builder, key); + self.find_using_mut(h, move |k, _| { *k.borrow() == *key }).map(|(probe, i)| { + let (key, value) = self.entries[i].unwrap_taker().take(); + self.indices[probe] = Pos::tombstone(); + self.entry_tombstones += 1; + self.index_tombstones += 1; + (probe, i, key, value) + }) + } + + /// Remove the key-value pair equivalent to `key` and return the `value`. + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// /// Return `None` if `key` is not in map. /// /// Computes in **O(1)** time (average). - pub fn swap_remove(&mut self, key: &Q) -> Option + pub fn remove(&mut self, key: &Q) -> Option where K: Borrow, Q: Eq + Hash, { - self.swap_remove_pair(key).map(second) + self.remove_pair(key).map(|(_, v)| v) } - /// FIXME Same as .swap_remove + /// Remove the key-value pair equivalent to `key` and return it. + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Return `None` if `key` is not in map. /// /// Computes in **O(1)** time (average). - pub fn remove(&mut self, key: &Q) -> Option + fn remove_pair(&mut self, key: &Q) -> Option<(K, V)> where K: Borrow, Q: Eq + Hash, { - self.swap_remove(key) + self.remove_pair_index(key).map(|(_, k, v)| (k, v)) + } + + /// Remove the key-value pair equivalent to `key` and return it along with + /// the formerly occupied index. + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Return `None` if `key` is not in map. + /// + /// Computes in **O(1)** time (average). + fn remove_pair_index(&mut self, key: &Q) -> Option<(usize, K, V)> + where K: Borrow, + Q: Eq + Hash, + { + self.find_remove(key).map(|(_, found, k, v)| (found, k, v)) + } + + /// Remove the key-value pair equivalent to `key` and return the `value`. + /// + /// Like `Vec::swap_remove`, the pair is removed by swapping it with the + /// last element of the map and popping it off. **This perturbs + /// the postion of what used to be the last element!** + /// + /// Return `None` if `key` is not in map. + /// + /// Computes in **O(1)** time (average). + pub fn swap_remove(&mut self, key: &Q) -> Option + where K: Borrow, + Q: Eq + Hash, + { + self.swap_remove_pair(key).map(|(_, v)| v) } /// Remove the key-value pair equivalent to `key` and return it. @@ -910,11 +1115,31 @@ impl OrderMap where K: Borrow, Q: Eq + Hash, { - let (probe, found) = match self.find(key) { + let (probe, found, _, _) = match self.find(key) { None => return None, Some(t) => t, }; - Some(self.remove_found(probe, found)) + self.swap_remove_found(probe, found) + } +} + +// Methods that don't use any properties (Hash / Eq) of K. +// +// It's cleaner to separate them out, then the compiler checks that we are not +// using Hash + Eq at all in these methods. +// +// However, we should probably not let this show in the public API or docs. +impl OrderMap { + /// Remove all key-value pairs in the map, while preserving its capacity. + /// + /// Computes in **O(n)** time. + pub fn clear(&mut self) { + self.entry_tombstones = 0; + self.index_tombstones = 0; + self.entries.clear(); + for pos in &mut self.indices { + *pos = Pos::none(); + } } /// Remove the last key-value pair @@ -930,58 +1155,70 @@ impl OrderMap /// The order the elements are visited is not specified. /// /// Computes in **O(n)** time (average). - pub fn retain(&mut self, mut keep: F) - where F: FnMut(&mut K, &mut V) -> bool, + pub fn retain(&mut self, keep: F) + where F: FnMut(&mut K, &mut V) -> bool { - // We can use either forward or reverse scan, but forward was - // faster in a microbenchmark - let mut i = 0; - while i < self.len() { - { - let entry = &mut self.entries[i]; - if keep(&mut entry.key, &mut entry.value) { - i += 1; - continue; - } - } - self.swap_remove_index(i); - // skip increment on remove - } + dispatch_32_vs_64!(self.retain_impl::<_>(keep)) } -} -impl OrderMap { /// Get a key-value pair by index /// - /// Valid indices are *0 <= index < self.len()* + /// Valid indices are *0 <= index < self.len() + self.tombstones()* /// /// Computes in **O(1)** time. pub fn get_index(&self, index: usize) -> Option<(&K, &V)> { - self.entries.get(index).map(|ent| (&ent.key, &ent.value)) + self.entries.get(index).and_then(|e| e.kv()) } /// Get a key-value pair by index /// - /// Valid indices are *0 <= index < self.len()* + /// Valid indices are *0 <= index < self.len() + self.tombstones()* /// /// Computes in **O(1)** time. pub fn get_index_mut(&mut self, index: usize) -> Option<(&mut K, &mut V)> { - self.entries.get_mut(index).map(|ent| (&mut ent.key, &mut ent.value)) + self.entries.get_mut(index).and_then(|e| e.kv_mut()) } /// Remove the key-value pair by index /// - /// Valid indices are *0 <= index < self.len()* + /// Valid indices are *0 <= index < self.len() + self.tombstones()* /// /// Computes in **O(1)** time (average). pub fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { - let (probe, found) = match self.entries.get(index) - .map(|e| self.find_existing_entry(e)) - { - None => return None, - Some(t) => t, - }; - Some(self.remove_found(probe, found)) + self.entries.get(index).and_then(|e| e.hash()) + .map(|hash| self.find_existing_entry_mut(index, hash)) + .and_then(|probe| self.swap_remove_found(probe, index)) + } + + /// Remove the key-value pair by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Computes in **O(1)** time (average). + pub fn remove_index(&mut self, index: usize) -> Option<(K, V)> { + dispatch_32_vs_64!(self.remove_index_impl(index)) + } + + /// Swaps the index of two key-value pairs by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// Computes in **O(1)** time (average). + pub fn swap_index(&mut self, a: usize, b: usize) { + dispatch_32_vs_64!(self.swap_index_impl(a, b)) + } + + /// Removes all the entry tombstones. + /// + /// Note that this means indices (e.g. those returned by `get_index`) may + /// no longer be valid. + /// + /// Computes in **O(n)** time (average). + pub fn remove_tombstones(&mut self) { + dispatch_32_vs_64!(self.remove_tombstones_impl()) } } @@ -992,140 +1229,596 @@ impl OrderMap { // // However, we should probably not let this show in the public API or docs. impl OrderMap { + fn first_allocation(&mut self) { + debug_assert_eq!(self.len(), 0); + let raw_cap = 8usize; + self.mask = raw_cap.wrapping_sub(1); + self.indices = vec![Pos::none(); raw_cap]; + self.entries = Vec::with_capacity(usable_capacity(raw_cap)); + } + + // `Sz` is *current* Size class, before grow + /// Computes in **O(n)** time. + fn double_capacity(&mut self) { + if self.raw_capacity() == 0 { + return self.first_allocation(); + } + let raw_cap = self.indices.len() * 2; + + dispatch_32_vs_64!(self.change_capacity(raw_cap)); + } + + #[inline(never)] + // `Sz` is *current* Size class, before grow + // it is the caller's responsibility to only pass powers of 2 to new_raw_cap + /// Computes in **O(n)** time. + fn change_capacity(&mut self, raw_cap: usize) + where Sz: Size + { + debug_assert!(self.raw_capacity() == 0 || self.len() > 0); + debug_assert!(raw_cap.is_power_of_two()); + if self.raw_capacity() == 0 { + self.mask = raw_cap.wrapping_sub(1); + self.indices = vec![Pos::none(); raw_cap]; + self.entries = Vec::with_capacity(usable_capacity(raw_cap)); + return; + } + + // find first ideally placed element -- start of cluster + let first_ideal = enumerate(&self.indices).find(|&(i, pos)| match pos.pos::() { + PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), + PosState::IsTombstone => false, + PosState::IsNone => false, + }).map_or(0, |(i, _)| i); + + // visit the entries in an order where we can simply reinsert them + // into self.indices without any bucket stealing. + let old_indices = replace(&mut self.indices, vec![Pos::none(); raw_cap]); + self.mask = raw_cap.wrapping_sub(1); + + // `Sz` is the old size class, and either u32 or u64 is the new + for &pos in &old_indices[first_ideal..] { + dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); + } + + for &pos in &old_indices[..first_ideal] { + dispatch_32_vs_64!(self.reinsert_entry_in_order::(pos)); + } + + // We've removed all the index tombstones + self.index_tombstones = 0; + + let more = self.capacity() - self.len(); + self.entries.reserve_exact(more); + } + + /// Removes all the index tombstones + /// + /// Computes in **O(n)** time. + #[cfg(test)] + fn remove_index_tombstones(&mut self) { + dispatch_32_vs_64!(self.remove_index_tombstones_impl()) + } + + /// Removes all the index tombstones + /// + /// Computes in **O(n)** time. + #[cfg(not(test))] + pub fn remove_index_tombstones(&mut self) { + dispatch_32_vs_64!(self.remove_index_tombstones_impl()) + } + + /// Remove the last key-value pair + /// + /// Computes in **O(1)** time (average). fn pop_impl(&mut self) -> Option<(K, V)> { - let (probe, found) = match self.entries.last() - .map(|e| self.find_existing_entry(e)) - { - None => return None, - Some(t) => t, - }; - debug_assert_eq!(found, self.entries.len() - 1); - Some(self.remove_found(probe, found)) + // if there entries are empty, just return + // otherwise, try to get the hash (last entry might be a tombstone) + self.entries.last_mut() + .and_then(|mut e| e.taker().map(|e| (e.hash(), e.take()))) + .map(|(hash, value)| { + let index = self.entries.len() - 1; + let probe = self.find_existing_entry_mut(index, hash); + self.entries.pop(); + self.indices[probe] = Pos::tombstone(); + self.index_tombstones += 1; + value + }) + } + + /// Removes all the index tombstones + /// + /// Computes in **O(n)** time. + fn remove_index_tombstones_impl(&mut self) + where Sz: Size + { + if self.index_tombstones == 0 { return } + + if self.len() == 0 { + for p in self.indices.iter_mut() { + *p = Pos::none(); + } + return; + } + + // Find the first tombstone after an ideal or none + let mut tombstone_head = enumerate(&self.indices).find(|&(i, index)| match index.pos::() { + PosState::Value(pos) => 0 == probe_distance(self.mask, self.entries[pos].unwrap_hash(), i), + PosState::IsTombstone => false, + PosState::IsNone => true, + }).map_or(0, |(i, _)| i); + loop { + tombstone_head = (tombstone_head+1) & self.mask; + if self.indices[tombstone_head].is_tombstone() { break } + } + + let mut probe = tombstone_head; + let mut tombstones = 1; + loop { + probe = (probe+1) & self.mask; + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + let dist = probe_distance(self.mask, entry_hash.into_hash(), probe); + if dist == 0 { + // clear the tombstone list + while tombstone_head != probe { + self.indices[tombstone_head] = Pos::none(); + tombstone_head = (tombstone_head + 1) & self.mask; + } + self.index_tombstones -= tombstones; + + // if we're out of tombstones, return + if self.index_tombstones == 0 { return } + + // find a new tombstone_head + loop { + probe = (probe+1) & self.mask; + if self.indices[probe].is_tombstone() { + break + } + } + tombstone_head = probe; + tombstones = 1; + } else { + if dist < tombstones { + let delta = tombstones - dist; + for _ in 0..delta { + // pop off the head and clear it + self.indices[tombstone_head] = Pos::none(); + tombstone_head = (tombstone_head + 1) & self.mask; + } + self.index_tombstones -= delta; + tombstones = dist; + } + + // move the value up + self.indices.swap(tombstone_head, probe); + tombstone_head = (tombstone_head + 1) & self.mask; + } + }, + PosState::IsTombstone => tombstones += 1, + PosState::IsNone => { + // clear the tombstone list + while tombstone_head != probe { + self.indices[tombstone_head] = Pos::none(); + tombstone_head = (tombstone_head + 1) & self.mask; + } + self.index_tombstones -= tombstones; + + // if we're out of tombstones, return + if self.index_tombstones == 0 { return } + + // find a new tombstone_head + loop { + probe = (probe+1) & self.mask; + if self.indices[probe].is_tombstone() { + break + } + } + tombstone_head = probe; + tombstones = 1; + }, + } + } + } + + // write to self.indices + // read from self.entries at `pos` + // + // reinserting rewrites all `Pos` entries anyway. This handles transitioning + // from u32 to u64 size class if needed by using the two type parameters. + fn reinsert_entry_in_order(&mut self, pos: Pos) + where SzNew: Size, + SzOld: Size, + { + if let PosState::Value((i, hash_proxy)) = pos.resolve::() { + // only if the size class is conserved can we use the short hash + let entry_hash = if SzOld::is_same_size::() { + hash_proxy.get_short_hash(&self.entries, i).into_hash() + } else { + self.entries[i].unwrap_hash() + }; + // find first empty bucket or tombstone and insert there + let mut probe = entry_hash.desired_pos(self.mask); + probe_loop!(probe < self.indices.len(), { + match self.indices[probe].state() { + // skip values + PosState::Value(()) => {}, + PosState::IsNone => { + self.indices[probe] = Pos::with_hash::(i, entry_hash); + return + }, + PosState::IsTombstone => debug_assert!(false, "reinserting into tombstones"), + } + }); + } + } + + /// Reserves at least one capacity + /// Computes in **O(n)** time (average). + fn reserve_one(&mut self) { + if self.len() == self.capacity() { + self.double_capacity(); + } + } + + /// Scan through each key-value pair in the map and keep those where the + /// closure `keep` returns `true`. + /// + /// The order the elements are visited is not specified. + /// + /// Computes in **O(n)** time (average). + fn retain_impl(&mut self, mut keep: F) + where F: FnMut(&mut K, &mut V) -> bool, + Sz: Size, + { + for (i, e) in self.entries.iter_mut().enumerate() { + if let Some((key, value)) = e.kv_mut() { + if keep(key, value) { continue } + } else { continue }; + + let hash = e.unwrap_hash(); + let probe = find_existing_entry_mut_impl::(self.mask, &mut self.indices, i, hash); + self.indices[probe] = Pos::tombstone(); + self.index_tombstones += 1; + self.entry_tombstones += 1; + e.take(); + } + } + + /// Remove the key-value pair by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// This leaves a tombstone in the place of the removed element, which preserves + /// indices. + /// + /// Computes in **O(1)** time (average). + fn remove_index_impl(&mut self, index: usize) -> Option<(K, V)> + where Sz: Size + { + self.entries.get_mut(index) + .and_then(|e| e.taker().map(|e| (e.hash(), e.take()))) + .map(|(hash, value)| { + let probe = self.find_existing_entry_mut(index, hash); + self.indices[probe] = Pos::tombstone(); + self.entry_tombstones += 1; + self.index_tombstones += 1; + value + }) + } + + /// Swaps the index of two key-value pairs by index + /// + /// Valid indices are *0 <= index < self.len() + self.tombstones()* + /// + /// Computes in **O(1)** time (average). + fn swap_index_impl(&mut self, a: usize, b: usize) + where Sz: Size + { + if a == b { return } + + match (self.entries[a].hash(), self.entries[b].hash()) { + (None, None) => {}, + (None, Some(b_hash)) => { + let probe_b = self.find_existing_entry_mut(b, b_hash); + self.indices[probe_b] = Pos::with_hash::(a, b_hash) + }, + (Some(a_hash), None) => { + let probe_a = self.find_existing_entry_mut(a, a_hash); + self.indices[probe_a] = Pos::with_hash::(b, a_hash) + }, + (Some(a_hash), Some(b_hash)) => { + let probe_a = self.find_existing_entry_mut(a, a_hash); + let probe_b = self.find_existing_entry_mut(b, b_hash); + self.indices.swap(probe_a, probe_b); + } + } + self.entries.swap(a, b); } - /// phase 2 is post-insert where we forward-shift `Pos` in the indices. - fn insert_phase_2(&mut self, mut probe: usize, mut old_pos: Pos) + /// Removes all the entry tombstones. + /// + /// Note that this means indices (e.g. those returned by `get_index`) may + /// no longer be valid. + /// + /// Computes in **O(n)** time (average). + fn remove_tombstones_impl(&mut self) where Sz: Size { - probe_loop!(probe < self.indices.len(), { - let pos = &mut self.indices[probe]; - if pos.is_none() { - *pos = old_pos; - break; + if self.entry_tombstones == 0 { return } + + let mask = self.mask; + let indices = &mut self.indices; + let mut removed = 0; + let mut index = 0; + + self.entries.retain(|e| { + index += 1; + if let Some(hash) = e.hash() { + if removed != 0 { + let probe = find_existing_entry_mut_impl::(mask, indices, index-1, hash); + indices[probe].sub_eq::(removed); + } + true } else { - old_pos = replace(pos, old_pos); + removed += 1; + false } }); - } + self.entry_tombstones = 0; + } - /// Return probe (indices) and position (entries) + /// Return probe (indices) and position (entries), and kv reference fn find_using(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, { dispatch_32_vs_64!(self.find_using_impl::<_>(hash, key_eq)) } fn find_using_impl(&self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> - where F: Fn(&Bucket) -> bool, + where F: Fn(&K, &V) -> bool, Sz: Size, { debug_assert!(self.len() > 0); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); let mut dist = 0; probe_loop!(probe < self.indices.len(), { - if let Some((i, hash_proxy)) = self.indices[probe].resolve::() { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { - // give up when probe distance is too long - return None; - } else if entry_hash == hash && key_eq(&self.entries[i]) { - return Some((probe, i)); - } - } else { - return None; + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { + // give up when probe distance is too long + return None; + } else if entry_hash == hash { + let (key, value) = self.entries[i].unwrap_kv(); + if key_eq(key, value) { + return Some((probe, i)); + } + } + }, + PosState::IsTombstone => {}, + PosState::IsNone => return None, } dist += 1; }); } - /// Find `entry` which is already placed inside self.entries; - /// return its probe and entry index. - fn find_existing_entry(&self, entry: &Bucket) -> (usize, usize) + /// Return probe (indices), position (entries) + fn find_using_mut(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> + where F: Fn(&K, &V) -> bool, { - dispatch_32_vs_64!(self.find_existing_entry_impl(entry)) + dispatch_32_vs_64!(self.find_using_mut_impl::<_>(hash, key_eq)) } - fn find_existing_entry_impl(&self, entry: &Bucket) -> (usize, usize) - where Sz: Size, + fn find_using_mut_impl(&mut self, hash: HashValue, key_eq: F) -> Option<(usize, usize)> + where F: Fn(&K, &V) -> bool, + Sz: Size, { debug_assert!(self.len() > 0); - let hash = entry.hash; - let actual_pos = ptrdistance(&self.entries[0], entry); - let mut probe = desired_pos(self.mask, hash); + let mut probe = hash.desired_pos(self.mask); + let mut dist = 0; + let first_tombstone; probe_loop!(probe < self.indices.len(), { - if let Some((i, _)) = self.indices[probe].resolve::() { - if i == actual_pos { - return (probe, actual_pos); - } - } else { - debug_assert!(false, "the entry does not exist"); + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { + // give up when probe distance is too long + return None; + } else if entry_hash == hash { + let (key, value) = self.entries[i].unwrap_kv_mut(); + if key_eq(key, value) { + return Some((probe, i)); + } + } + }, + PosState::IsTombstone => { + first_tombstone = probe; + break + }, + PosState::IsNone => return None, + } + dist += 1; + }); + + let mut first_tombstone = first_tombstone; + probe += 1; + probe_loop!(probe < self.indices.len(), { + dist += 1; + match self.indices[probe].resolve::() { + PosState::Value((i, hash_proxy)) => { + let entry_hash = hash_proxy.get_short_hash(&self.entries, i); + if dist > probe_distance(self.mask, entry_hash.into_hash(), probe) { + // give up when probe distance is too long + return None; + } + + // We're already in the neighborhood, + // any bucket ahead of our target should be moved up + self.indices.swap(first_tombstone, probe); + + if entry_hash == hash { + let (key, value) = self.entries[i].unwrap_kv_mut(); + if key_eq(key, value) { + return Some((first_tombstone, i)); + } + } + + first_tombstone = (first_tombstone + 1) & self.mask; + }, + PosState::IsTombstone => {}, + PosState::IsNone => return None, } }); } - fn remove_found(&mut self, probe: usize, found: usize) -> (K, V) { - dispatch_32_vs_64!(self.remove_found_impl(probe, found)) + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + #[allow(dead_code)] + fn find_existing_entry(&self, actual_pos: usize, hash: HashValue) -> usize + { + dispatch_32_vs_64!(self.find_existing_entry_impl(actual_pos, hash)) + } + + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + fn find_existing_entry_impl(&self, actual_pos: usize, hash: HashValue) -> usize + where Sz: Size, + { + debug_assert!(self.len() > actual_pos); + find_existing_entry_impl::(self.mask, &self.indices, actual_pos, hash) } - fn remove_found_impl(&mut self, probe: usize, found: usize) -> (K, V) + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + fn find_existing_entry_mut(&mut self, actual_pos: usize, hash: HashValue) -> usize + { + dispatch_32_vs_64!(self.find_existing_entry_mut_impl(actual_pos, hash)) + } + + /// Find an entry already placed inside `self.entries` given its position and hash. + /// Return the probe for the entry. + /// + /// Computes in **O(1)** time (average). + fn find_existing_entry_mut_impl(&mut self, actual_pos: usize, hash: HashValue) -> usize + where Sz: Size, + { + debug_assert!(self.len() > actual_pos); + find_existing_entry_mut_impl::(self.mask, &mut self.indices, actual_pos, hash) + } + + fn swap_remove_found(&mut self, probe: usize, found: usize) -> Option<(K, V)> { + dispatch_32_vs_64!(self.swap_remove_found_impl(probe, found)) + } + + fn swap_remove_found_impl(&mut self, probe: usize, found: usize) -> Option<(K, V)> where Sz: Size { + // tombstone the original entry + self.indices[probe] = Pos::tombstone(); + self.index_tombstones += 1; + // index `probe` and entry `found` is to be removed // use swap_remove, but then we need to update the index that points // to the other entry that has to move - self.indices[probe] = Pos::none(); - let entry = self.entries.swap_remove(found); + let kv = self.entries.swap_remove(found).take(); // correct index that points to the entry that had to swap places - if let Some(entry) = self.entries.get(found) { - // was not last element - // examine new element in `found` and find it in indices - let mut probe = desired_pos(self.mask, entry.hash); - probe_loop!(probe < self.indices.len(), { - if let Some((i, _)) = self.indices[probe].resolve::() { - if i >= self.entries.len() { - // found it - self.indices[probe] = Pos::with_hash::(found, entry.hash); - break; - } - } - }); - } - // backward shift deletion in self.indices - // after probe, shift all non-ideally placed indices backward - if self.len() > 0 { - let mut last_probe = probe; - let mut probe = probe + 1; - probe_loop!(probe < self.indices.len(), { - if let Some((i, hash_proxy)) = self.indices[probe].resolve::() { - let entry_hash = hash_proxy.get_short_hash(&self.entries, i); - if probe_distance(self.mask, entry_hash.into_hash(), probe) > 0 { - self.indices[last_probe] = self.indices[probe]; - self.indices[probe] = Pos::none(); - } else { - break; - } - } else { - break; - } - last_probe = probe; - }); + // check if it was the last element or a tombstone + if let Some(hash) = self.entries.get(found).and_then(Bucket::hash) { + let probe = find_existing_entry_mut_impl::(self.mask, &mut self.indices, self.entries.len(), hash); + self.indices[probe] = Pos::with_hash::(found, hash); } - (entry.key, entry.value) + kv } +} +/// Second Phase: forward-shift `Pos` in the indices if need be. +fn entry_phase_2(indices: &mut Vec, index_tombstones: &mut usize, mut probe: usize, mut old_pos: Pos) + where Sz: Size +{ + probe_loop!(probe < indices.len(), { + let pos = &mut indices[probe]; + match pos.state() { + PosState::Value(()) => old_pos = replace(pos, old_pos), + PosState::IsTombstone => { + *index_tombstones -= 1; + *pos = old_pos; + break; + }, + PosState::IsNone => { + *pos = old_pos; + break; + }, + } + }); +} + +/// Find an entry already placed inside `self.entries` given its position and hash. +/// Return the probe for the entry. +/// +/// Computes in **O(1)** time (average). +fn find_existing_entry_impl(mask: usize, indices: &Vec, actual_pos: usize, hash: HashValue) -> usize + where Sz: Size, +{ + let mut probe = hash.desired_pos(mask); + probe_loop!(probe < indices.len(), { + match indices[probe].pos::() { + PosState::Value(i) => if i == actual_pos { return probe }, + PosState::IsTombstone => {}, + PosState::IsNone => debug_assert!(false, "the entry does not exist"), + } + }); +} + +/// Find an entry already placed inside `self.entries` given its position and hash. +/// Return the probe for the entry. +/// +/// This method will swap items with tombstones as it searches, moving items +/// closer to their ideal position. +/// +/// Computes in **O(1)** time (average). +fn find_existing_entry_mut_impl(mask: usize, indices: &mut Vec, actual_pos: usize, hash: HashValue) -> usize + where Sz: Size, +{ + let mut probe = hash.desired_pos(mask); + let first_tombstone; + probe_loop!(probe < indices.len(), { + match indices[probe].pos::() { + PosState::Value(i) => if i == actual_pos { return probe }, + PosState::IsTombstone => { + first_tombstone = probe; + break + }, + PosState::IsNone => debug_assert!(false, "the entry does not exist"), + } + }); + + let mut first_tombstone = first_tombstone; + probe += 1; + probe_loop!(probe < indices.len(), { + match indices[probe].pos::() { + PosState::Value(i) => { + // We're already in the neighborhood, + // any bucket ahead of our target should be moved up + indices.swap(first_tombstone, probe); + if i == actual_pos { return first_tombstone } + first_tombstone = (first_tombstone + 1) & mask; + }, + PosState::IsTombstone => {}, + PosState::IsNone => debug_assert!(false, "the entry does not exist"), + } + }); } use std::slice::Iter as SliceIter; @@ -1134,25 +1827,49 @@ use std::vec::IntoIter as VecIntoIter; pub struct Keys<'a, K: 'a, V: 'a> { iter: SliceIter<'a, Bucket>, + tombstones: usize, } impl<'a, K, V> Iterator for Keys<'a, K, V> { type Item = &'a K; fn next(&mut self) -> Option<&'a K> { - self.iter.next().map(|ent| &ent.key) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, _)) = e.kv() { + Some(key) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|ent| &ent.key) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + e.unwrap_kv().0 + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, _)) = e.kv() { + Some(key) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1162,31 +1879,70 @@ impl<'a, K, V> Iterator for Keys<'a, K, V> { impl<'a, K, V> DoubleEndedIterator for Keys<'a, K, V> { fn next_back(&mut self) -> Option<&'a K> { - self.iter.next_back().map(|ent| &ent.key) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, _)) = e.kv() { + Some(key) + } else { + *tombstones -= 1; + None + } + }).next_back() } } +impl<'a, K, V> ExactSizeIterator for Keys<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() - self.tombstones + } +} + + pub struct Iter<'a, K: 'a, V: 'a> { iter: SliceIter<'a, Bucket>, + tombstones: usize, } impl<'a, K, V> Iterator for Iter<'a, K, V> { type Item = (&'a K, &'a V); fn next(&mut self) -> Option { - self.iter.next().map(|e| (&e.key, &e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.kv() { + Some((key, value)) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|e| (&e.key, &e.value)) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + e.unwrap_kv() + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.kv() { + Some((key, value)) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1196,31 +1952,70 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { impl<'a, K, V> DoubleEndedIterator for Iter<'a, K, V> { fn next_back(&mut self) -> Option { - self.iter.next_back().map(|e| (&e.key, &e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.kv() { + Some((key, value)) + } else { + *tombstones -= 1; + None + } + }).next_back() + } +} + +impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() - self.tombstones } } pub struct IterMut<'a, K: 'a, V: 'a> { iter: SliceIterMut<'a, Bucket>, + tombstones: usize, } impl<'a, K, V> Iterator for IterMut<'a, K, V> { type Item = (&'a K, &'a mut V); fn next(&mut self) -> Option { - self.iter.next().map(|e| (&e.key, &mut e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.kv_mut() { + Some((&*key, value)) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } - + fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|e| (&e.key, &mut e.value)) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + let (key, value) = e.unwrap_kv_mut(); + (&*key, value) + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.kv_mut() { + Some((&*key, value)) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1230,31 +2025,69 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { impl<'a, K, V> DoubleEndedIterator for IterMut<'a, K, V> { fn next_back(&mut self) -> Option { - self.iter.next_back().map(|e| (&e.key, &mut e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.kv_mut() { + Some((&*key, value)) + } else { + *tombstones -= 1; + None + } + }).next_back() + } +} + +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() - self.tombstones } } pub struct IntoIter { iter: VecIntoIter>, + tombstones: usize, } impl Iterator for IntoIter { type Item = (K, V); fn next(&mut self) -> Option { - self.iter.next().map(|e| (e.key, e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.into_kv() { + Some((key, value)) + } else { + *tombstones -= 1; + None + } + }).next() } fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() + let len = self.len(); + (len, Some(len)) } fn count(self) -> usize { - self.iter.len() + self.len() } fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n).map(|e| (e.key, e.value)) + if self.tombstones == 0 { + self.iter.nth(n).map(|e| { + e.unwrap_into_kv() + }) + } else { + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(move |e| { + if let Some((key, value)) = e.into_kv() { + Some((key, value)) + } else { + *tombstones -= 1; + None + } + }).nth(n) + } } fn last(mut self) -> Option { @@ -1262,9 +2095,23 @@ impl Iterator for IntoIter { } } -impl<'a, K, V> DoubleEndedIterator for IntoIter { +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() - self.tombstones + } +} + +impl DoubleEndedIterator for IntoIter { fn next_back(&mut self) -> Option { - self.iter.next_back().map(|e| (e.key, e.value)) + let tombstones = &mut self.tombstones; + self.iter.by_ref().filter_map(|e| { + if let Some((key, value)) = e.into_kv() { + Some((key, value)) + } else { + *tombstones -= 1; + None + } + }).next_back() } } @@ -1299,6 +2146,7 @@ impl IntoIterator for OrderMap fn into_iter(self) -> Self::IntoIter { IntoIter { iter: self.entries.into_iter(), + tombstones: self.entry_tombstones, } } } @@ -1363,7 +2211,15 @@ impl Extend<(K, V)> for OrderMap S: BuildHasher, { fn extend>(&mut self, iterable: I) { - for (k, v) in iterable { self.insert(k, v); } + let mut iterator = iterable.into_iter(); + while let Some((k, v)) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.reserve(lower.saturating_add(1)); + } + self.insert(k, v); + } } } @@ -1461,6 +2317,56 @@ mod tests { } } + #[test] + fn swap() { + let mut insert = [0, 4, 2, 12]; + let mut map = OrderMap::new(); + + for &elt in &insert { + map.insert(elt, ()); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + for (i, k) in (0..insert.len()).zip(map.keys()) { + assert_eq!(map.get_index(i).unwrap().0, k); + } + + println!("{:?}", map); + println!("{:?}", insert); + + insert.swap(1, 2); + map.swap_index(1, 2); + + println!("{:?}", insert); + println!("{:?}", map); + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + for (i, k) in (0..insert.len()).zip(map.keys()) { + assert_eq!(map.get_index(i).unwrap().0, k); + } + + + insert.swap(0, 3); + map.swap_index(0, 3); + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + for (i, k) in (0..insert.len()).zip(map.keys()) { + assert_eq!(map.get_index(i).unwrap().0, k); + } + } + #[test] fn grow() { let insert = [0, 4, 2, 12, 8, 7, 11]; @@ -1493,7 +2399,35 @@ mod tests { } #[test] - fn remove() { + fn retain() { + let insert = vec![0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut map = OrderMap::new(); + + for &elt in &insert { + map.insert(elt, elt); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + let removed: Vec<_> = insert.iter().filter(|&v| *v >= 10).collect(); + map.retain(|k, _| *k < 10); + + println!("{:?}", insert); + println!("{:?}", map); + + assert_eq!(map.len(), insert.len() - removed.len()); + + for (&a, &b) in insert.iter().filter(|&v| *v < 10).zip(map.keys()) { + assert_eq!(a, b); + } + } + + + #[test] + fn swap_remove() { let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; let mut map = OrderMap::new(); @@ -1527,6 +2461,68 @@ mod tests { assert_eq!(map.keys().count(), insert.len() - remove.len()); } + #[test] + fn remove() { + let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; + let mut map = OrderMap::new(); + + for &elt in &insert { + map.insert(elt, elt); + } + + assert_eq!(map.keys().count(), map.len()); + assert_eq!(map.keys().count(), insert.len()); + for (a, b) in insert.iter().zip(map.keys()) { + assert_eq!(a, b); + } + + let remove_fail = [99, 77]; + let remove = [4, 12, 8, 7]; + + for &key in &remove_fail { + assert!(map.get_pair_index(&key).is_none()); + assert!(map.remove(&key).is_none()); + } + println!("{:?}", map); + for &key in &remove { + assert!(map.get_pair_index(&key).is_some()); + let (index, _, _) = map.get_pair_index(&key).unwrap(); + assert!(map.get_index(index).is_some()); + assert_eq!(map.remove(&key), Some(key)); + assert!(map.get_pair_index(&key).is_none()); + assert!(map.get_index(index).is_none()); + } + println!("{:?}", map); + + for key in &insert { + assert_eq!(map.get(key).is_some(), !remove.contains(key)); + } + + assert_eq!(map.tombstones(), remove.len()); + assert_eq!(map.len(), insert.len() - remove.len()); + assert_eq!(map.keys().count(), insert.len() - remove.len()); + + for (&a, (&b, _)) in insert.iter().filter(|key| !remove.contains(key)).zip(map.iter()) { + assert_eq!(a, b); + } + + map.remove_tombstones(); + + assert_eq!(map.tombstones(), 0); + assert_eq!(map.len(), insert.len() - remove.len()); + assert_eq!(map.keys().count(), insert.len() - remove.len()); + + for (&a, (&b, _)) in insert.iter().filter(|key| !remove.contains(key)).zip(map.iter()) { + assert_eq!(a, b); + } + + for (i, &a) in insert.iter().filter(|key| !remove.contains(key)).enumerate() { + assert!(map.get_index(i).is_some()); + assert_eq!(a, *map.get_index(i).unwrap().0); + } + + } + #[test] fn swap_remove_index() { let insert = [0, 4, 2, 12, 8, 7, 11, 5, 3, 17, 19, 22, 23]; diff --git a/src/util.rs b/src/util.rs index 95742631..6c841123 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,16 +1,8 @@ use std::iter::Enumerate; -use std::mem::size_of; -pub fn second(t: (A, B)) -> B { t.1 } pub fn enumerate(iterable: I) -> Enumerate where I: IntoIterator { iterable.into_iter().enumerate() } - -/// return the number of steps from a to b -pub fn ptrdistance(a: *const T, b: *const T) -> usize { - debug_assert!(a as usize <= b as usize); - (b as usize - a as usize) / size_of::() -} diff --git a/tests/quick.rs b/tests/quick.rs index bfc2963c..97e77b1c 100644 --- a/tests/quick.rs +++ b/tests/quick.rs @@ -123,8 +123,8 @@ impl Arbitrary for Op } fn do_ops(ops: &[Op], a: &mut OrderMap, b: &mut HashMap) - where K: Hash + Eq + Clone, - V: Clone, + where K: Hash + Eq + Clone + Debug, + V: Clone + Debug, { for op in ops { match *op { @@ -151,7 +151,7 @@ fn do_ops(ops: &[Op], a: &mut OrderMap, b: &mut HashMap) } } } - //println!("{:?}", a); + assert_eq!(a.len(), b.len(), "last operation was {:?}", op); } } @@ -200,7 +200,6 @@ quickcheck! { assert!(map.contains_key(key)); } true - } // Check that retain visits each key exactly once