From 17d138f62573b33397cfaaa89fc5cfcf30186503 Mon Sep 17 00:00:00 2001 From: Pablo Escobar Gaviria Date: Wed, 29 Jan 2020 20:26:32 -0200 Subject: [PATCH 1/4] Avoid restricting impls by RandomState Now the `S` type parameter is only required to implement `Default`. --- src/map.rs | 20 +++++++++++++------- tests/jsr166.rs | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/map.rs b/src/map.rs index 6dd0283b..572992d0 100644 --- a/src/map.rs +++ b/src/map.rs @@ -85,30 +85,32 @@ pub struct HashMap { build_hasher: S, } -impl Default for HashMap +impl Default for HashMap where K: Sync + Send + Clone + Hash + Eq, V: Sync + Send, + S: BuildHasher + Default, { fn default() -> Self { Self::new() } } -impl HashMap +impl HashMap where K: Sync + Send + Clone + Hash + Eq, V: Sync + Send, + S: BuildHasher + Default, { /// Creates a new, empty map with the default initial table size (16). pub fn new() -> Self { - Self::with_hasher(RandomState::new()) + Self::with_hasher(S::default()) } /// Creates a new, empty map with an initial table size accommodating the specified number of /// elements without the need to dynamically resize. pub fn with_capacity(n: usize) -> Self { - Self::with_capacity_and_hasher(RandomState::new(), n) + Self::with_capacity_and_hasher(S::default(), n) } } @@ -1448,10 +1450,11 @@ where } } -impl FromIterator<(K, V)> for HashMap +impl FromIterator<(K, V)> for HashMap where K: Sync + Send + Clone + Hash + Eq, V: Sync + Send, + S: BuildHasher + Default, { fn from_iter>(iter: T) -> Self { let mut iter = iter.into_iter(); @@ -1473,10 +1476,11 @@ where } } -impl<'a, K, V> FromIterator<(&'a K, &'a V)> for HashMap +impl<'a, K, V, S> FromIterator<(&'a K, &'a V)> for HashMap where K: Sync + Send + Copy + Hash + Eq, V: Sync + Send + Copy, + S: BuildHasher + Default, { #[inline] fn from_iter>(iter: T) -> Self { @@ -1484,10 +1488,11 @@ where } } -impl<'a, K, V> FromIterator<&'a (K, V)> for HashMap +impl<'a, K, V, S> FromIterator<&'a (K, V)> for HashMap where K: Sync + Send + Copy + Hash + Eq, V: Sync + Send + Copy, + S: BuildHasher + Default, { #[inline] fn from_iter>(iter: T) -> Self { @@ -1549,6 +1554,7 @@ fn capacity() { // The table has been resized once (and it's capacity doubled), // since we inserted more elements than it can hold } + #[cfg(test)] mod tests { use super::*; diff --git a/tests/jsr166.rs b/tests/jsr166.rs index a915558d..fe47e734 100644 --- a/tests/jsr166.rs +++ b/tests/jsr166.rs @@ -7,7 +7,7 @@ const ITER: [(usize, &'static str); 5] = [(1, "A"), (2, "B"), (3, "C"), (4, "D") fn test_from_iter() { let guard = unsafe { crossbeam_epoch::unprotected() }; let map1 = from_iter_contron(); - let map2 = HashMap::from_iter(ITER.iter()); + let map2: HashMap<_, _> = HashMap::from_iter(ITER.iter()); // TODO: improve when `Map: Eq` let mut fst: Vec<_> = map1.iter(&guard).collect(); From d2568dc15b2158a8dffc9f7b793745687696a3df Mon Sep 17 00:00:00 2001 From: Pablo Escobar Gaviria Date: Thu, 30 Jan 2020 10:23:43 -0200 Subject: [PATCH 2/4] Always accept `&Guard` rather that constructing This makes the methods more consistent with one another, allows for greater efficiency, and are necessary for potential eventual `no_std` support. --- src/map.rs | 39 +++++++++++++++++---------------------- tests/basic.rs | 20 +++++++++++++------- tests/jdk/map_check.rs | 8 +++++--- tests/jsr166.rs | 2 +- 4 files changed, 36 insertions(+), 33 deletions(-) diff --git a/src/map.rs b/src/map.rs index 572992d0..3c379339 100644 --- a/src/map.rs +++ b/src/map.rs @@ -174,16 +174,16 @@ where h.finish() } + #[inline] /// Tests if `key` is a key in this table. /// /// The key may be any borrowed form of the map's key type, but `Hash` and `Eq` on the borrowed /// form must match those for the key type. - pub fn contains_key(&self, key: &Q) -> bool + pub fn contains_key(&self, key: &Q, guard: &Guard) -> bool where K: Borrow, Q: ?Sized + Hash + Eq, { - let guard = crossbeam_epoch::pin(); self.get(key, &guard).is_some() } @@ -263,19 +263,19 @@ where unsafe { v.as_ref() } } + #[inline] /// Obtains the value to which `key` is mapped and passes it through the closure `then`. /// /// Returns `None` if this map contains no mapping for `key`. /// /// The key may be any borrowed form of the map's key type, but `Hash` and `Eq` on the borrowed /// form must match those for the key type. - pub fn get_and(&self, key: &Q, then: F) -> Option + pub fn get_and(&self, key: &Q, then: F, guard: &Guard) -> Option where K: Borrow, Q: ?Sized + Hash + Eq, F: FnOnce(&V) -> R, { - let guard = &crossbeam_epoch::pin(); self.get(key, guard).map(then) } @@ -538,7 +538,7 @@ where } } - fn put_all<'g, I: Iterator>(&self, iter: I, guard: &'g Guard) { + fn put_all>(&self, iter: I, guard: &Guard) { for (key, value) in iter { self.put(key, value, false, guard); } @@ -951,7 +951,7 @@ where } /// Tries to presize table to accommodate the given number of elements. - fn try_presize<'g>(&self, size: usize, guard: &'g Guard) { + fn try_presize(&self, size: usize, guard: &Guard) { let requested_capacity = if size >= MAXIMUM_CAPACITY / 2 { MAXIMUM_CAPACITY } else { @@ -1062,11 +1062,9 @@ where #[inline] /// Tries to reserve capacity for at least additional more elements. /// The collection may reserve more space to avoid frequent reallocations. - pub fn reserve(&self, additional: usize) { + pub fn reserve(&self, additional: usize, guard: &Guard) { let absolute = self.len() + additional; - - let guard = epoch::pin(); - self.try_presize(absolute, &guard); + self.try_presize(absolute, guard); } /// Removes the key (and its corresponding value) from this map. @@ -1259,16 +1257,15 @@ where /// If `f` returns `false` for a given key/value pair, but the value for that pair is concurrently /// modified before the removal takes place, the entry will not be removed. /// If you want the removal to happen even in the case of concurrent modification, use [`HashMap::retain_force`]. - pub fn retain(&self, mut f: F) + pub fn retain(&self, mut f: F, guard: &Guard) where F: FnMut(&K, &V) -> bool, { - let guard = epoch::pin(); // removed selected keys for (k, v) in self.iter(&guard) { if !f(k, v) { let old_value: Shared<'_, V> = Shared::from(v as *const V); - self.replace_node(k, None, Some(old_value), &guard); + self.replace_node(k, None, Some(old_value), guard); } } } @@ -1279,15 +1276,14 @@ where /// /// This method always deletes any key/value pair that `f` returns `false` for, /// even if if the value is updated concurrently. If you do not want that behavior, use [`HashMap::retain`]. - pub fn retain_force(&self, mut f: F) + pub fn retain_force(&self, mut f: F, guard: &Guard) where F: FnMut(&K, &V) -> bool, { - let guard = epoch::pin(); // removed selected keys for (k, v) in self.iter(&guard) { if !f(k, v) { - self.replace_node(k, None, None, &guard); + self.replace_node(k, None, None, guard); } } } @@ -1331,7 +1327,7 @@ where #[inline] #[cfg(test)] /// Returns the capacity of the map. - fn capacity<'g>(&self, guard: &'g Guard) -> usize { + fn capacity(&self, guard: &Guard) -> usize { let table = self.table.load(Ordering::Relaxed, &guard); if table.is_null() { @@ -1431,9 +1427,9 @@ where (iter.size_hint().0 + 1) / 2 }; - self.reserve(reserve); + let guard = epoch::pin(); - let guard = crossbeam_epoch::pin(); + self.reserve(reserve, &guard); (*self).put_all(iter.into_iter(), &guard); } } @@ -1523,7 +1519,6 @@ where /// Returns the number of physical CPUs in the machine (_O(1)_). fn num_cpus() -> usize { NCPU_INITIALIZER.call_once(|| NCPU.store(num_cpus::get_physical(), Ordering::Relaxed)); - NCPU.load(Ordering::Relaxed) } @@ -1565,7 +1560,7 @@ mod tests { map.insert(42, 0, &guard); - map.reserve(32); + map.reserve(32, &guard); let capacity = map.capacity(&guard); assert!(capacity >= 16 + 32); @@ -1576,7 +1571,7 @@ mod tests { let map = HashMap::::new(); let guard = epoch::pin(); - map.reserve(32); + map.reserve(32, &guard); let capacity = map.capacity(&guard); assert!(capacity >= 32); diff --git a/tests/basic.rs b/tests/basic.rs index b61d6df6..c00ab92f 100644 --- a/tests/basic.rs +++ b/tests/basic.rs @@ -368,7 +368,7 @@ fn get_and() { let guard = epoch::pin(); map.insert(42, 32, &guard); - assert_eq!(map.get_and(&42, |value| *value + 10), Some(42)); + assert_eq!(map.get_and(&42, |value| *value + 10, &guard), Some(42)); } #[test] @@ -448,47 +448,53 @@ fn from_iter_empty() { #[test] fn retain_empty() { + let guard = epoch::pin(); let map = HashMap::<&'static str, u32>::new(); - map.retain(|_, _| false); + map.retain(|_, _| false, &guard); assert_eq!(map.len(), 0); } #[test] fn retain_all_false() { + let guard = epoch::pin(); let map: HashMap = (0..10 as u32).map(|x| (x, x)).collect(); - map.retain(|_, _| false); + map.retain(|_, _| false, &guard); assert_eq!(map.len(), 0); } #[test] fn retain_all_true() { let size = 10usize; + let guard = epoch::pin(); let map: HashMap = (0..size).map(|x| (x, x)).collect(); - map.retain(|_, _| true); + map.retain(|_, _| true, &guard); assert_eq!(map.len(), size); } #[test] fn retain_some() { + let guard = epoch::pin(); let map: HashMap = (0..10).map(|x| (x, x)).collect(); let expected_map: HashMap = (5..10).map(|x| (x, x)).collect(); - map.retain(|_, v| *v >= 5); + map.retain(|_, v| *v >= 5, &guard); assert_eq!(map.len(), 5); assert_eq!(map, expected_map); } #[test] fn retain_force_empty() { + let guard = epoch::pin(); let map = HashMap::<&'static str, u32>::new(); - map.retain_force(|_, _| false); + map.retain_force(|_, _| false, &guard); assert_eq!(map.len(), 0); } #[test] fn retain_force_some() { + let guard = epoch::pin(); let map: HashMap = (0..10).map(|x| (x, x)).collect(); let expected_map: HashMap = (5..10).map(|x| (x, x)).collect(); - map.retain_force(|_, v| *v >= 5); + map.retain_force(|_, v| *v >= 5, &guard); assert_eq!(map.len(), 5); assert_eq!(map, expected_map); } diff --git a/tests/jdk/map_check.rs b/tests/jdk/map_check.rs index 28548baf..42efaaaf 100644 --- a/tests/jdk/map_check.rs +++ b/tests/jdk/map_check.rs @@ -57,8 +57,9 @@ where K: Sync + Send + Copy + Hash + Eq, { let mut sum = 0; + let guard = epoch::pin(); for i in 0..keys.len() { - if map.contains_key(&keys[i]) { + if map.contains_key(&keys[i], &guard) { sum += 1; } } @@ -86,11 +87,12 @@ where K: Sync + Send + Copy + Hash + Eq, { let mut sum = 0; + let guard = epoch::pin(); for i in 0..k1.len() { - if map.contains_key(&k1[i]) { + if map.contains_key(&k1[i], &guard) { sum += 1; } - if map.contains_key(&k2[i]) { + if map.contains_key(&k2[i], &guard) { sum += 1; } } diff --git a/tests/jsr166.rs b/tests/jsr166.rs index fe47e734..a108639f 100644 --- a/tests/jsr166.rs +++ b/tests/jsr166.rs @@ -56,5 +56,5 @@ fn test_remove() { map.remove(&5, &guard); // TODO: add len check once method exists // assert_eq!(map.len(), 4); - assert!(!map.contains_key(&5)); + assert!(!map.contains_key(&5, &guard)); } From aaeb2adc188f62be3e65762326a882f61e3010e2 Mon Sep 17 00:00:00 2001 From: Pablo Escobar Gaviria Date: Thu, 30 Jan 2020 12:29:32 -0200 Subject: [PATCH 3/4] Use `ahash::RandomState` as the default hasher We choose this over `std`'s `RandomState` for two reasons: 1. It is faster, and the DoS protection will be provided by the (as of yet unimplemented) `TreeBin` optimization. 2. It is more amenable to `no_std` environments. --- Cargo.toml | 4 ++++ src/lib.rs | 4 ++++ src/map.rs | 3 +-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f70f184f..b64c06ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,10 @@ crossbeam-epoch = "0.9" parking_lot = "0.10" num_cpus = "1.12.0" +[dependencies.ahash] +version = "0.3.2" +default-features = false + [dev-dependencies] rand = "0.7" diff --git a/src/lib.rs b/src/lib.rs index 8aa7365f..58f2000c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -196,6 +196,7 @@ #![deny( missing_docs, missing_debug_implementations, + unreachable_pub, intra_doc_link_resolution_failure )] #![warn(rust_2018_idioms)] @@ -209,6 +210,9 @@ pub mod iter; pub use map::HashMap; +/// Default hasher for [`HashMap`]. +pub type DefaultHashBuilder = ahash::RandomState; + /// Types needed to safely access shared data concurrently. pub mod epoch { pub use crossbeam_epoch::{pin, Guard}; diff --git a/src/map.rs b/src/map.rs index 3c379339..58a566fb 100644 --- a/src/map.rs +++ b/src/map.rs @@ -3,7 +3,6 @@ use crate::node::*; use crate::raw::*; use crossbeam_epoch::{self as epoch, Atomic, Guard, Owned, Shared}; use std::borrow::Borrow; -use std::collections::hash_map::RandomState; use std::fmt::{self, Debug, Formatter}; use std::hash::{BuildHasher, Hash, Hasher}; use std::iter::FromIterator; @@ -61,7 +60,7 @@ macro_rules! load_factor { /// A concurrent hash table. /// /// See the [crate-level documentation](index.html) for details. -pub struct HashMap { +pub struct HashMap { /// The array of bins. Lazily initialized upon first insertion. /// Size is always a power of two. Accessed directly by iterators. table: Atomic>, From 3bf1f8b1cd39233b9d6199865baa837e6896f4a7 Mon Sep 17 00:00:00 2001 From: Pablo Escobar Gaviria Date: Wed, 29 Jan 2020 20:49:11 -0200 Subject: [PATCH 4/4] Switch the bits macro to a const fn --- src/map.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/map.rs b/src/map.rs index 58a566fb..9ea4bf77 100644 --- a/src/map.rs +++ b/src/map.rs @@ -11,18 +11,14 @@ use std::sync::{ Once, }; -macro_rules! isize_bits { - () => { - std::mem::size_of::() * 8 - }; -} +const ISIZE_BITS: usize = core::mem::size_of::() * 8; /// The largest possible table capacity. This value must be /// exactly 1<<30 to stay within Java array allocation and indexing /// bounds for power of two table sizes, and is further required /// because the top two bits of 32bit hash fields are used for /// control purposes. -const MAXIMUM_CAPACITY: usize = 1 << 30; // TODO: use isize_bits!() +const MAXIMUM_CAPACITY: usize = 1 << 30; // TODO: use ISIZE_BITS /// The default initial table capacity. Must be a power of 2 /// (i.e., at least 1) and at most `MAXIMUM_CAPACITY`. @@ -37,15 +33,15 @@ const MIN_TRANSFER_STRIDE: isize = 16; /// The number of bits used for generation stamp in `size_ctl`. /// Must be at least 6 for 32bit arrays. -const RESIZE_STAMP_BITS: usize = isize_bits!() / 2; +const RESIZE_STAMP_BITS: usize = ISIZE_BITS / 2; /// The maximum number of threads that can help resize. /// Must fit in `32 - RESIZE_STAMP_BITS` bits for 32 bit architectures /// and `64 - RESIZE_STAMP_BITS` bits for 64 bit architectures -const MAX_RESIZERS: isize = (1 << (isize_bits!() - RESIZE_STAMP_BITS)) - 1; +const MAX_RESIZERS: isize = (1 << (ISIZE_BITS - RESIZE_STAMP_BITS)) - 1; /// The bit shift for recording size stamp in `size_ctl`. -const RESIZE_STAMP_SHIFT: usize = isize_bits!() - RESIZE_STAMP_BITS; +const RESIZE_STAMP_SHIFT: usize = ISIZE_BITS - RESIZE_STAMP_BITS; static NCPU_INITIALIZER: Once = Once::new(); static NCPU: AtomicUsize = AtomicUsize::new(0);