From 8d6c27836639a8538f6e4ff8a80fa9af6ef24a79 Mon Sep 17 00:00:00 2001 From: Stiopa Koltsov Date: Sun, 12 Sep 2021 19:40:29 +0100 Subject: [PATCH] insert_unique_unchecked operation Sometimes a map is constructed when it is known that all keys are unique (e. e. if keys are coming from another map or from a sorted/deduplicated iterator). In this case we can make insertion faster by skipping a check that a key already exists in the map. `insert_unique_unchecked` is guaranteed to be memory-safe, but does not guarantee anything beyond that: if inserted key is not unique, `HashMap` can panic, loop forever, return incorrect entry etc. Added simple benchmark. `insert_unique_unchecked` is about 30% faster than `insert`. Your mileage may vary of course. Similar PR was [added to `indexmap` crate](https://github.com/bluss/indexmap/pull/200) and they asked to discuss the name of the operation with `hashbrown` crate owners to come to the same naming convention (if `hashbrown` is willing to have the same operation). --- benches/insert_unique_unchecked.rs | 32 +++++++++++++++++++++++++++ src/map.rs | 35 ++++++++++++++++++++++++++++++ src/set.rs | 22 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 benches/insert_unique_unchecked.rs diff --git a/benches/insert_unique_unchecked.rs b/benches/insert_unique_unchecked.rs new file mode 100644 index 0000000000..857ad18e57 --- /dev/null +++ b/benches/insert_unique_unchecked.rs @@ -0,0 +1,32 @@ +//! Compare `insert` and `insert_unique_unchecked` operations performance. + +#![feature(test)] + +extern crate test; + +use hashbrown::HashMap; +use test::Bencher; + +#[bench] +fn insert(b: &mut Bencher) { + let keys: Vec = (0..1000).map(|i| format!("xxxx{}yyyy", i)).collect(); + b.iter(|| { + let mut m = HashMap::with_capacity(1000); + for k in &keys { + m.insert(k, k); + } + m + }); +} + +#[bench] +fn insert_unique_unchecked(b: &mut Bencher) { + let keys: Vec = (0..1000).map(|i| format!("xxxx{}yyyy", i)).collect(); + b.iter(|| { + let mut m = HashMap::with_capacity(1000); + for k in &keys { + m.insert_unique_unchecked(k, k); + } + m + }); +} diff --git a/src/map.rs b/src/map.rs index 032096439f..a5b802179c 100644 --- a/src/map.rs +++ b/src/map.rs @@ -1278,6 +1278,31 @@ where } } + /// Insert a key-value pair into the map without checking + /// if the key already exists in the map. + /// + /// This operation is safe if a key does not exist in the map. + /// + /// However, if a key exists in the map already, the behavior is unspecified: + /// this operation may panic, loop forever, or any following operation with the map + /// may panic, loop forever or return arbitrary result. + /// + /// That said, this operation (and following operations) are guaranteed to + /// not violate memory safety. + /// + /// This operation is faster than regular insert, because it does not perform + /// lookup before insertion. + /// + /// This operation is useful during initial population of the map. + /// For example, when constructing a map from another map, we know + /// that keys are unique. + #[cfg_attr(feature = "inline-more", inline)] + pub fn insert_unique_unchecked(&mut self, k: K, v: V) { + let hash = make_insert_hash::(&self.hash_builder, &k); + self.table + .insert(hash, (k, v), make_hasher::(&self.hash_builder)); + } + /// Tries to insert a key-value pair into the map, and returns /// a mutable reference to the value in the entry. /// @@ -3898,6 +3923,16 @@ mod test_map { assert_eq!(*m.get(&5).unwrap(), 3); } + #[test] + fn test_insert_unique_unchecked() { + let mut map = HashMap::new(); + map.insert_unique_unchecked(10, 11); + map.insert_unique_unchecked(20, 21); + assert_eq!(Some(&11), map.get(&10)); + assert_eq!(Some(&21), map.get(&20)); + assert_eq!(None, map.get(&30)); + } + #[test] fn test_is_empty() { let mut m = HashMap::with_capacity(4); diff --git a/src/set.rs b/src/set.rs index 70d5d7d643..a6bc4c271c 100644 --- a/src/set.rs +++ b/src/set.rs @@ -991,6 +991,28 @@ where self.map.insert(value, ()).is_none() } + /// Insert a value the set without checking if the value already exists in the set. + /// + /// This operation is safe if a value does not exist in the set. + /// + /// However, if a value exists in the set already, the behavior is unspecified: + /// this operation may panic, loop forever, or any following operation with the set + /// may panic, loop forever or return arbitrary result. + /// + /// That said, this operation (and following operations) are guaranteed to + /// not violate memory safety. + /// + /// This operation is faster than regular insert, because it does not perform + /// lookup before insertion. + /// + /// This operation is useful during initial population of the set. + /// For example, when constructing a set from another set, we know + /// that values are unique. + #[cfg_attr(feature = "inline-more", inline)] + pub fn insert_unique_unchecked(&mut self, value: T) { + self.map.insert_unique_unchecked(value, ()); + } + /// Adds a value to the set, replacing the existing value, if any, that is equal to the given /// one. Returns the replaced value. ///