Skip to content

Commit

Permalink
Auto merge of #470 - JustForFun88:doc_bucket_fn, r=Amanieu
Browse files Browse the repository at this point in the history
Doc all bucket functions

Further improvements to documentation. Now it’s time for all ***bucket*** functions. Additionally, the `RawTableInner::data_end<T>` and `RawTable::data_end` functions are now **safe** because they simply cast an existing pointer to a pointer of another type, which is safe.
  • Loading branch information
bors committed Oct 2, 2023
2 parents 844b337 + 708b6e7 commit 3d2d163
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 9 deletions.
2 changes: 2 additions & 0 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,8 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
///
/// Keeps the allocated memory for reuse.
///
/// [`retain()`]: HashMap::retain
///
/// # Examples
///
/// ```
Expand Down
247 changes: 238 additions & 9 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -905,10 +905,35 @@ impl<T, A: Allocator> RawTable<T, A> {
&self.alloc
}

/// Returns pointer to one past last element of data table.
/// Returns pointer to one past last `data` element in the the table as viewed from
/// the start point of the allocation.
///
/// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
/// otherwise using it may result in [`undefined behavior`].
///
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
pub unsafe fn data_end(&self) -> NonNull<T> {
NonNull::new_unchecked(self.table.ctrl.as_ptr().cast())
pub fn data_end(&self) -> NonNull<T> {
// SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe
//
// `self.table.ctrl.as_ptr().cast()` returns pointer that
// points here (to the end of `T0`)
// ∨
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
// \________ ________/
// \/
// `n = buckets - 1`, i.e. `RawTable::buckets() - 1`
//
// where: T0...T_n - our stored data;
// CT0...CT_n - control bytes or metadata for `data`.
// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
// with loading `Group` bytes from the heap works properly, even if the result
// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
// `RawTableInner::set_ctrl` function.
//
// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) }
}

/// Returns pointer to start of data table.
Expand Down Expand Up @@ -940,8 +965,55 @@ impl<T, A: Allocator> RawTable<T, A> {
}

/// Returns a pointer to an element in the table.
///
/// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
/// otherwise using it may result in [`undefined behavior`].
///
/// # Safety
///
/// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
/// following safety rules:
///
/// * The table must already be allocated;
///
/// * The `index` must not be greater than the number returned by the [`RawTable::buckets`]
/// function, i.e. `(index + 1) <= self.buckets()`.
///
/// It is safe to call this function with index of zero (`index == 0`) on a table that has
/// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
///
/// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
/// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
/// `(index + 1) <= self.buckets()`.
///
/// [`RawTable::buckets`]: RawTable::buckets
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
pub unsafe fn bucket(&self, index: usize) -> Bucket<T> {
// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
// the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"):
//
// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
// part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
// |
// | `base = self.data_end()` points here
// | (to the start of CT0 or to the end of T0)
// v v
// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
// ^ \__________ __________/
// `table.bucket(3)` returns a pointer that points \/
// here in the `data` part of the `RawTable` (to additional control bytes
// the end of T3) `m = Group::WIDTH - 1`
//
// where: T0...T_n - our stored data;
// CT0...CT_n - control bytes or metadata for `data`;
// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
// the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
// is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
//
// P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
// of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`.
debug_assert_ne!(self.table.bucket_mask, 0);
debug_assert!(index < self.buckets());
Bucket::from_base_index(self.data_end(), index)
Expand Down Expand Up @@ -2212,6 +2284,9 @@ impl RawTableInner {
///
/// * The [`RawTableInner`] must have properly initialized control bytes.
///
/// The type `T` must be the actual type of the elements stored in the table,
/// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
///
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
unsafe fn iter<T>(&self) -> RawIter<T> {
Expand All @@ -2228,13 +2303,20 @@ impl RawTableInner {
// `ctrl` points here (to the start
// of the first control byte `CT0`)
// ∨
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
// \________ ________/
// \/
// `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1`
// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
//
// where: T0...T_n - our stored data;
// CT0...CT_n - control bytes or metadata for `data`.
// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
// with loading `Group` bytes from the heap works properly, even if the result
// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
// `RawTableInner::set_ctrl` function.
//
// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
let data = Bucket::from_base_index(self.data_end(), 0);
RawIter {
// SAFETY: See explanation above
Expand All @@ -2258,6 +2340,9 @@ impl RawTableInner {
///
/// # Safety
///
/// The type `T` must be the actual type of the elements stored in the table,
/// otherwise calling this function may result in [`undefined behavior`].
///
/// If `T` is a type that should be dropped and **the table is not empty**,
/// calling this function more than once results in [`undefined behavior`].
///
Expand Down Expand Up @@ -2309,6 +2394,8 @@ impl RawTableInner {
///
/// * Calling this function more than once;
///
/// * The type `T` must be the actual type of the elements stored in the table.
///
/// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
/// to allocate this table.
///
Expand Down Expand Up @@ -2348,13 +2435,116 @@ impl RawTableInner {
}
}

/// Returns a pointer to an element in the table (convenience for
/// `Bucket::from_base_index(self.data_end::<T>(), index)`).
///
/// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
/// otherwise using it may result in [`undefined behavior`].
///
/// # Safety
///
/// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
/// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
/// this function, the following safety rules must be observed:
///
/// * The table must already be allocated;
///
/// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
/// function, i.e. `(index + 1) <= self.buckets()`.
///
/// * The type `T` must be the actual type of the elements stored in the table, otherwise
/// using the returned [`Bucket`] may result in [`undefined behavior`].
///
/// It is safe to call this function with index of zero (`index == 0`) on a table that has
/// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
///
/// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
/// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
/// `(index + 1) <= self.buckets()`.
///
/// ```none
/// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
/// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
/// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
///
/// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
/// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
/// |
/// | `base = table.data_end::<T>()` points here
/// | (to the start of CT0 or to the end of T0)
/// v v
/// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
/// ^ \__________ __________/
/// `table.bucket(3)` returns a pointer that points \/
/// here in the `data` part of the `RawTableInner` additional control bytes
/// (to the end of T3) `m = Group::WIDTH - 1`
///
/// where: T0...T_n - our stored data;
/// CT0...CT_n - control bytes or metadata for `data`;
/// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
/// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
/// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
///
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
/// ```
///
/// [`Bucket::from_base_index`]: Bucket::from_base_index
/// [`RawTableInner::buckets`]: RawTableInner::buckets
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
unsafe fn bucket<T>(&self, index: usize) -> Bucket<T> {
debug_assert_ne!(self.bucket_mask, 0);
debug_assert!(index < self.buckets());
Bucket::from_base_index(self.data_end(), index)
}

/// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
/// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
///
/// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
/// otherwise using it may result in [`undefined behavior`].
///
/// # Safety
///
/// If any of the following conditions are violated, the result is [`undefined behavior`]:
///
/// * The table must already be allocated;
///
/// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
/// function, i.e. `(index + 1) <= self.buckets()`;
///
/// * The `size_of` must be equal to the size of the elements stored in the table;
///
/// ```none
/// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
/// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
/// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
///
/// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
/// `data` part of the `RawTableInner`, i.e. to the start of T3
/// |
/// | `base = table.data_end::<u8>()` points here
/// | (to the start of CT0 or to the end of T0)
/// v v
/// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
/// \__________ __________/
/// \/
/// additional control bytes
/// `m = Group::WIDTH - 1`
///
/// where: T0...T_n - our stored data;
/// CT0...CT_n - control bytes or metadata for `data`;
/// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
/// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
/// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
///
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
/// ```
///
/// [`RawTableInner::buckets`]: RawTableInner::buckets
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
debug_assert_ne!(self.bucket_mask, 0);
Expand All @@ -2363,9 +2553,47 @@ impl RawTableInner {
base.sub((index + 1) * size_of)
}

/// Returns pointer to one past last `data` element in the the table as viewed from
/// the start point of the allocation (convenience for `self.ctrl.cast()`).
///
/// This function actually returns a pointer to the end of the `data element` at
/// index "0" (zero).
///
/// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
/// otherwise using it may result in [`undefined behavior`].
///
/// # Note
///
/// The type `T` must be the actual type of the elements stored in the table, otherwise
/// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
///
/// ```none
/// `table.data_end::<T>()` returns pointer that points here
/// (to the end of `T0`)
/// ∨
/// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
/// \________ ________/
/// \/
/// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
///
/// where: T0...T_n - our stored data;
/// CT0...CT_n - control bytes or metadata for `data`.
/// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
/// with loading `Group` bytes from the heap works properly, even if the result
/// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
/// `RawTableInner::set_ctrl` function.
///
/// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
/// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
/// ```
///
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[inline]
unsafe fn data_end<T>(&self) -> NonNull<T> {
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
fn data_end<T>(&self) -> NonNull<T> {
unsafe {
// SAFETY: `self.ctrl` is `NonNull`, so casting it is safe
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
}
}

/// Returns an iterator-like object for a probe sequence on the table.
Expand Down Expand Up @@ -2758,7 +2986,7 @@ impl RawTableInner {
// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
// \________ ________/
// \/
// `n = buckets - 1`, i.e. `RawIndexTableInner::buckets() - 1`
// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
//
// where: T0...T_n - our stored data;
// CT0...CT_n - control bytes or metadata for `data`.
Expand Down Expand Up @@ -3000,7 +3228,7 @@ impl RawTableInner {
///
/// # Note
///
/// This function must be called only after [`drop_elements`](RawTable::drop_elements),
/// This function must be called only after [`drop_elements`](RawTableInner::drop_elements),
/// else it can lead to leaking of memory. Also calling this function automatically
/// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
/// (dangling) the `ctrl` field of the table.
Expand Down Expand Up @@ -3521,6 +3749,7 @@ impl<T> RawIterRange<T> {
///
/// * The `len` must be a power of two.
///
/// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
/// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
#[cfg_attr(feature = "inline-more", inline)]
unsafe fn new(ctrl: *const u8, data: Bucket<T>, len: usize) -> Self {
Expand Down
2 changes: 2 additions & 0 deletions src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ impl<T, S, A: Allocator> HashSet<T, S, A> {
/// or the iteration short-circuits, then the remaining elements will be retained.
/// Use [`retain()`] with a negated predicate if you do not need the returned iterator.
///
/// [`retain()`]: HashSet::retain
///
/// # Examples
///
/// ```
Expand Down

0 comments on commit 3d2d163

Please sign in to comment.