diff --git a/src/map.rs b/src/map.rs index edd197771..ed1a6d507 100644 --- a/src/map.rs +++ b/src/map.rs @@ -944,6 +944,8 @@ impl HashMap { /// /// Keeps the allocated memory for reuse. /// + /// [`retain()`]: HashMap::retain + /// /// # Examples /// /// ``` diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 789c3a545..49c90239f 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -905,10 +905,35 @@ impl RawTable { &self.alloc } - /// Returns pointer to one past last element of data table. + /// Returns pointer to one past last `data` element in the the table as viewed from + /// the start point of the allocation. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - pub unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) + pub fn data_end(&self) -> NonNull { + // SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe + // + // `self.table.ctrl.as_ptr().cast()` returns pointer that + // points here (to the end of `T0`) + // ∨ + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + // \________ ________/ + // \/ + // `n = buckets - 1`, i.e. `RawTable::buckets() - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) } } /// Returns pointer to start of data table. @@ -940,8 +965,55 @@ impl RawTable { } /// Returns a pointer to an element in the table. + /// + /// The caller must ensure that the `RawTable` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the caller of this function must observe the + /// following safety rules: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTable::buckets`] + /// function, i.e. `(index + 1) <= self.buckets()`. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e. + /// `(index + 1) <= self.buckets()`. + /// + /// [`RawTable::buckets`]: RawTable::buckets + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] pub unsafe fn bucket(&self, index: usize) -> Bucket { + // If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + // the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"): + // + // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`) + // | + // | `base = self.data_end()` points here + // | (to the start of CT0 or to the end of T0) + // v v + // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + // ^ \__________ __________/ + // `table.bucket(3)` returns a pointer that points \/ + // here in the `data` part of the `RawTable` (to additional control bytes + // the end of T3) `m = Group::WIDTH - 1` + // + // where: T0...T_n - our stored data; + // CT0...CT_n - control bytes or metadata for `data`; + // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask` + // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`. debug_assert_ne!(self.table.bucket_mask, 0); debug_assert!(index < self.buckets()); Bucket::from_base_index(self.data_end(), index) @@ -2212,6 +2284,9 @@ impl RawTableInner { /// /// * The [`RawTableInner`] must have properly initialized control bytes. /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise using the returned [`RawIter`] results in [`undefined behavior`]. + /// /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn iter(&self) -> RawIter { @@ -2228,13 +2303,20 @@ impl RawTableInner { // `ctrl` points here (to the start // of the first control byte `CT0`) // ∨ - // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH + // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m // \________ ________/ // \/ - // `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1` + // `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1` // // where: T0...T_n - our stored data; // CT0...CT_n - control bytes or metadata for `data`. + // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + // with loading `Group` bytes from the heap works properly, even if the result + // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + // `RawTableInner::set_ctrl` function. + // + // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. let data = Bucket::from_base_index(self.data_end(), 0); RawIter { // SAFETY: See explanation above @@ -2258,6 +2340,9 @@ impl RawTableInner { /// /// # Safety /// + /// The type `T` must be the actual type of the elements stored in the table, + /// otherwise calling this function may result in [`undefined behavior`]. + /// /// If `T` is a type that should be dropped and **the table is not empty**, /// calling this function more than once results in [`undefined behavior`]. /// @@ -2309,6 +2394,8 @@ impl RawTableInner { /// /// * Calling this function more than once; /// + /// * The type `T` must be the actual type of the elements stored in the table. + /// /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used /// to allocate this table. /// @@ -2348,6 +2435,63 @@ impl RawTableInner { } } + /// Returns a pointer to an element in the table (convenience for + /// `Bucket::from_base_index(self.data_end::(), index)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If `mem::size_of::() != 0`, then the safety rules are directly derived from the + /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling + /// this function, the following safety rules must be observed: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`] + /// function, i.e. `(index + 1) <= self.buckets()`. + /// + /// * The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`Bucket`] may result in [`undefined behavior`]. + /// + /// It is safe to call this function with index of zero (`index == 0`) on a table that has + /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`]. + /// + /// If `mem::size_of::() == 0`, then the only requirement is that the `index` must + /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e. + /// `(index + 1) <= self.buckets()`. + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"): + /// + /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data` + /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`]) + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// ^ \__________ __________/ + /// `table.bucket(3)` returns a pointer that points \/ + /// here in the `data` part of the `RawTableInner` additional control bytes + /// (to the end of T3) `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + /// ``` + /// + /// [`Bucket::from_base_index`]: Bucket::from_base_index + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn bucket(&self, index: usize) -> Bucket { debug_assert_ne!(self.bucket_mask, 0); @@ -2355,6 +2499,52 @@ impl RawTableInner { Bucket::from_base_index(self.data_end(), index) } + /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table + /// (convenience for `self.data_end::().as_ptr().sub((index + 1) * size_of)`). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`, + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is [`undefined behavior`]: + /// + /// * The table must already be allocated; + /// + /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`] + /// function, i.e. `(index + 1) <= self.buckets()`; + /// + /// * The `size_of` must be equal to the size of the elements stored in the table; + /// + /// ```none + /// If mem::size_of::() != 0 then return a pointer to the `element` in the `data part` of the table + /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than + /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"): + /// + /// `table.bucket_ptr(3, mem::size_of::())` returns a pointer that points here in the + /// `data` part of the `RawTableInner`, i.e. to the start of T3 + /// | + /// | `base = table.data_end::()` points here + /// | (to the start of CT0 or to the end of T0) + /// v v + /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m + /// \__________ __________/ + /// \/ + /// additional control bytes + /// `m = Group::WIDTH - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`; + /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from + /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask` + /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + /// ``` + /// + /// [`RawTableInner::buckets`]: RawTableInner::buckets + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 { debug_assert_ne!(self.bucket_mask, 0); @@ -2363,9 +2553,47 @@ impl RawTableInner { base.sub((index + 1) * size_of) } + /// Returns pointer to one past last `data` element in the the table as viewed from + /// the start point of the allocation (convenience for `self.ctrl.cast()`). + /// + /// This function actually returns a pointer to the end of the `data element` at + /// index "0" (zero). + /// + /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull`], + /// otherwise using it may result in [`undefined behavior`]. + /// + /// # Note + /// + /// The type `T` must be the actual type of the elements stored in the table, otherwise + /// using the returned [`NonNull`] may result in [`undefined behavior`]. + /// + /// ```none + /// `table.data_end::()` returns pointer that points here + /// (to the end of `T0`) + /// ∨ + /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m + /// \________ ________/ + /// \/ + /// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1` + /// + /// where: T0...T_n - our stored data; + /// CT0...CT_n - control bytes or metadata for `data`. + /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search + /// with loading `Group` bytes from the heap works properly, even if the result + /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also + /// `RawTableInner::set_ctrl` function. + /// + /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number + /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. + /// ``` + /// + /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.ctrl.as_ptr().cast()) + fn data_end(&self) -> NonNull { + unsafe { + // SAFETY: `self.ctrl` is `NonNull`, so casting it is safe + NonNull::new_unchecked(self.ctrl.as_ptr().cast()) + } } /// Returns an iterator-like object for a probe sequence on the table. @@ -2758,7 +2986,7 @@ impl RawTableInner { // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH // \________ ________/ // \/ - // `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1` + // `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1` // // where: T0...T_n - our stored data; // CT0...CT_n - control bytes or metadata for `data`. @@ -3000,7 +3228,7 @@ impl RawTableInner { /// /// # Note /// - /// This function must be called only after [`drop_elements`](RawTable::drop_elements), + /// This function must be called only after [`drop_elements`](RawTableInner::drop_elements), /// else it can lead to leaking of memory. Also calling this function automatically /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid /// (dangling) the `ctrl` field of the table. @@ -3521,6 +3749,7 @@ impl RawIterRange { /// /// * The `len` must be a power of two. /// + /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { diff --git a/src/set.rs b/src/set.rs index 0b8d9849b..38a96fbbc 100644 --- a/src/set.rs +++ b/src/set.rs @@ -383,6 +383,8 @@ impl HashSet { /// or the iteration short-circuits, then the remaining elements will be retained. /// Use [`retain()`] with a negated predicate if you do not need the returned iterator. /// + /// [`retain()`]: HashSet::retain + /// /// # Examples /// /// ```