Skip to content

Commit 97c953f

Browse files
Add Iterator::map_windows
This is inherited from the old PR. This method returns an iterator over mapped windows of the starting iterator. Adding the more straight-forward `Iterator::windows` is not easily possible right now as the items are stored in the iterator type, meaning the `next` call would return references to `self`. This is not allowed by the current `Iterator` trait design. This might change once GATs have landed. The idea has been brought up by @m-ou-se here: https://rust-lang.zulipchat.com/#narrow/stream/219381-t-libs/topic/Iterator.3A.3A.7Bpairwise.2C.20windows.7D/near/224587771 Co-authored-by: Lukas Kalbertodt <[email protected]>
1 parent faee636 commit 97c953f

File tree

7 files changed

+743
-1
lines changed

7 files changed

+743
-1
lines changed

Diff for: library/core/src/iter/adapters/map_windows.rs

+293
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
use crate::{
2+
fmt,
3+
iter::{ExactSizeIterator, FusedIterator},
4+
mem::{self, MaybeUninit},
5+
ptr,
6+
};
7+
8+
/// An iterator over the mapped windows of another iterator.
9+
///
10+
/// This `struct` is created by the [`Iterator::map_windows`]. See its
11+
/// documentation for more information.
12+
#[must_use = "iterators are lazy and do nothing unless consumed"]
13+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
14+
pub struct MapWindows<I: Iterator, F, const N: usize> {
15+
f: F,
16+
inner: MapWindowsInner<I, N>,
17+
}
18+
19+
struct MapWindowsInner<I: Iterator, const N: usize> {
20+
// We fuse the inner iterator because there shouldn't be "holes" in
21+
// the sliding window. Once the iterator returns a `None`, we make
22+
// our `MapWindows` iterator return `None` forever.
23+
iter: Option<I>,
24+
// Since iterators are assumed lazy, i.e. it only yields an item when
25+
// `Iterator::next()` is called, and `MapWindows` is not an exception.
26+
//
27+
// Before the first iteration, we keep the buffer `None`. When the user
28+
// first call `next` or other methods that makes the iterator advance,
29+
// we collect the first `N` items yielded from the inner iterator and
30+
// put it into the buffer.
31+
//
32+
// When the inner iterator has returned a `None` (i.e. fused), we take
33+
// away this `buffer` and leave it `None` to reclaim its resources.
34+
//
35+
// FIXME: should we shrink the size of `buffer` using niche optimization?
36+
buffer: Option<Buffer<I::Item, N>>,
37+
}
38+
39+
// `Buffer` uses two times of space to reduce moves among the iterations.
40+
// `Buffer<T, N>` is semantically `[MaybeUninit<T>; 2 * N]`. However, due
41+
// to limitations of const generics, we use this different type. Note that
42+
// it has the same underlying memory layout.
43+
struct Buffer<T, const N: usize> {
44+
// Invariant: `self.buffer[self.start..self.start + N]` is initialized,
45+
// with all other elements being uninitialized. This also
46+
// implies that `self.start <= N`.
47+
buffer: [[MaybeUninit<T>; N]; 2],
48+
start: usize,
49+
}
50+
51+
impl<I: Iterator, F, const N: usize> MapWindows<I, F, N> {
52+
pub(in crate::iter) fn new(iter: I, f: F) -> Self {
53+
assert!(N != 0, "array in `Iterator::map_windows` must contain more than 0 elements");
54+
55+
// Only ZST arrays' length can be so large.
56+
if mem::size_of::<I::Item>() == 0 {
57+
assert!(
58+
N.checked_mul(2).is_some(),
59+
"array size of `Iterator::map_windows` is too large"
60+
);
61+
}
62+
63+
Self { inner: MapWindowsInner::new(iter), f }
64+
}
65+
}
66+
67+
impl<I: Iterator, const N: usize> MapWindowsInner<I, N> {
68+
#[inline]
69+
fn new(iter: I) -> Self {
70+
Self { iter: Some(iter), buffer: None }
71+
}
72+
73+
fn next_window(&mut self) -> Option<&[I::Item; N]> {
74+
let iter = self.iter.as_mut()?;
75+
match self.buffer {
76+
// It is the first time to advance. We collect
77+
// the first `N` items from `self.iter` to initialize `self.buffer`.
78+
None => self.buffer = Buffer::try_from_iter(iter),
79+
Some(ref mut buffer) => match iter.next() {
80+
None => {
81+
// Fuse the inner iterator since it yields a `None`.
82+
self.iter.take();
83+
self.buffer.take();
84+
}
85+
// Advance the iterator. We first call `next` before changing our buffer
86+
// at all. This means that if `next` panics, our invariant is upheld and
87+
// our `Drop` impl drops the correct elements.
88+
Some(item) => buffer.push(item),
89+
},
90+
}
91+
self.buffer.as_ref().map(Buffer::as_array_ref)
92+
}
93+
94+
fn size_hint(&self) -> (usize, Option<usize>) {
95+
let Some(ref iter) = self.iter else { return (0, Some(0)) };
96+
let (lo, hi) = iter.size_hint();
97+
if self.buffer.is_some() {
98+
// If the first `N` items are already yielded by the inner iterator,
99+
// the size hint is then equal to the that of the inner iterator's.
100+
(lo, hi)
101+
} else {
102+
// If the first `N` items are not yet yielded by the inner iterator,
103+
// the first `N` elements should be counted as one window, so both bounds
104+
// should subtract `N - 1`.
105+
(lo.saturating_sub(N - 1), hi.map(|hi| hi.saturating_sub(N - 1)))
106+
}
107+
}
108+
}
109+
110+
impl<T, const N: usize> Buffer<T, N> {
111+
fn try_from_iter(iter: &mut impl Iterator<Item = T>) -> Option<Self> {
112+
let first_half = crate::array::iter_next_chunk(iter).ok()?;
113+
let buffer = [MaybeUninit::new(first_half).transpose(), MaybeUninit::uninit_array()];
114+
Some(Self { buffer, start: 0 })
115+
}
116+
117+
#[inline]
118+
fn buffer_ptr(&self) -> *const MaybeUninit<T> {
119+
self.buffer.as_ptr().cast()
120+
}
121+
122+
#[inline]
123+
fn buffer_mut_ptr(&mut self) -> *mut MaybeUninit<T> {
124+
self.buffer.as_mut_ptr().cast()
125+
}
126+
127+
#[inline]
128+
fn as_array_ref(&self) -> &[T; N] {
129+
debug_assert!(self.start + N <= 2 * N);
130+
131+
// SAFETY: our invariant guarantees these elements are initialized.
132+
unsafe { &*self.buffer_ptr().add(self.start).cast() }
133+
}
134+
135+
#[inline]
136+
fn as_uninit_array_mut(&mut self) -> &mut MaybeUninit<[T; N]> {
137+
debug_assert!(self.start + N <= 2 * N);
138+
139+
// SAFETY: our invariant guarantees these elements are in bounds.
140+
unsafe { &mut *self.buffer_mut_ptr().add(self.start).cast() }
141+
}
142+
143+
/// Pushes a new item `next` to the back, and pops the front-most one.
144+
///
145+
/// All the elements will be shifted to the front end when pushing reaches
146+
/// the back end.
147+
fn push(&mut self, next: T) {
148+
let buffer_mut_ptr = self.buffer_mut_ptr();
149+
debug_assert!(self.start + N <= 2 * N);
150+
151+
let to_drop = if self.start == N {
152+
// We have reached the end of our buffer and have to copy
153+
// everything to the start. Example layout for N = 3.
154+
//
155+
// 0 1 2 3 4 5 0 1 2 3 4 5
156+
// ┌───┬───┬───┬───┬───┬───┐ ┌───┬───┬───┬───┬───┬───┐
157+
// │ - │ - │ - │ a │ b │ c │ -> │ b │ c │ n │ - │ - │ - │
158+
// └───┴───┴───┴───┴───┴───┘ └───┴───┴───┴───┴───┴───┘
159+
// ↑ ↑
160+
// start start
161+
162+
// SAFETY: the two pointers are valid for reads/writes of N -1
163+
// elements because our array's size is semantically 2 * N. The
164+
// regions also don't overlap for the same reason.
165+
//
166+
// We leave the old elements in place. As soon as `start` is set
167+
// to 0, we treat them as uninitialized and treat their copies
168+
// as initialized.
169+
let to_drop = unsafe {
170+
ptr::copy_nonoverlapping(buffer_mut_ptr.add(self.start + 1), buffer_mut_ptr, N - 1);
171+
(*buffer_mut_ptr.add(N - 1)).write(next);
172+
buffer_mut_ptr.add(self.start)
173+
};
174+
self.start = 0;
175+
to_drop
176+
} else {
177+
// SAFETY: `self.start` is < N as guaranteed by the invariant
178+
// plus the check above. Even if the drop at the end panics,
179+
// the invariant is upheld.
180+
//
181+
// Example layout for N = 3:
182+
//
183+
// 0 1 2 3 4 5 0 1 2 3 4 5
184+
// ┌───┬───┬───┬───┬───┬───┐ ┌───┬───┬───┬───┬───┬───┐
185+
// │ - │ a │ b │ c │ - │ - │ -> │ - │ - │ b │ c │ n │ - │
186+
// └───┴───┴───┴───┴───┴───┘ └───┴───┴───┴───┴───┴───┘
187+
// ↑ ↑
188+
// start start
189+
//
190+
let to_drop = unsafe {
191+
(*buffer_mut_ptr.add(self.start + N)).write(next);
192+
buffer_mut_ptr.add(self.start)
193+
};
194+
self.start += 1;
195+
to_drop
196+
};
197+
198+
// SAFETY: the index is valid and this is element `a` in the
199+
// diagram above and has not been dropped yet.
200+
unsafe { ptr::drop_in_place(to_drop.cast::<T>()) };
201+
}
202+
}
203+
204+
impl<T: Clone, const N: usize> Clone for Buffer<T, N> {
205+
fn clone(&self) -> Self {
206+
let mut buffer = Buffer {
207+
buffer: [MaybeUninit::uninit_array(), MaybeUninit::uninit_array()],
208+
start: self.start,
209+
};
210+
buffer.as_uninit_array_mut().write(self.as_array_ref().clone());
211+
buffer
212+
}
213+
}
214+
215+
impl<I, const N: usize> Clone for MapWindowsInner<I, N>
216+
where
217+
I: Iterator + Clone,
218+
I::Item: Clone,
219+
{
220+
fn clone(&self) -> Self {
221+
Self { iter: self.iter.clone(), buffer: self.buffer.clone() }
222+
}
223+
}
224+
225+
impl<T, const N: usize> Drop for Buffer<T, N> {
226+
fn drop(&mut self) {
227+
// SAFETY: our invariant guarantees that N elements starting from
228+
// `self.start` are initialized. We drop them here.
229+
unsafe {
230+
let initialized_part: *mut [T] = crate::ptr::slice_from_raw_parts_mut(
231+
self.buffer_mut_ptr().add(self.start).cast(),
232+
N,
233+
);
234+
ptr::drop_in_place(initialized_part);
235+
}
236+
}
237+
}
238+
239+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
240+
impl<I, F, R, const N: usize> Iterator for MapWindows<I, F, N>
241+
where
242+
I: Iterator,
243+
F: FnMut(&[I::Item; N]) -> R,
244+
{
245+
type Item = R;
246+
247+
fn next(&mut self) -> Option<Self::Item> {
248+
let window = self.inner.next_window()?;
249+
let out = (self.f)(window);
250+
Some(out)
251+
}
252+
253+
fn size_hint(&self) -> (usize, Option<usize>) {
254+
self.inner.size_hint()
255+
}
256+
}
257+
258+
// Note that even if the inner iterator not fused, the `MapWindows` is still fused,
259+
// because we don't allow "holes" in the mapping window.
260+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
261+
impl<I, F, R, const N: usize> FusedIterator for MapWindows<I, F, N>
262+
where
263+
I: Iterator,
264+
F: FnMut(&[I::Item; N]) -> R,
265+
{
266+
}
267+
268+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
269+
impl<I, F, R, const N: usize> ExactSizeIterator for MapWindows<I, F, N>
270+
where
271+
I: ExactSizeIterator,
272+
F: FnMut(&[I::Item; N]) -> R,
273+
{
274+
}
275+
276+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
277+
impl<I: Iterator + fmt::Debug, F, const N: usize> fmt::Debug for MapWindows<I, F, N> {
278+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
279+
f.debug_struct("MapWindows").field("iter", &self.inner.iter).finish()
280+
}
281+
}
282+
283+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
284+
impl<I, F, const N: usize> Clone for MapWindows<I, F, N>
285+
where
286+
I: Iterator + Clone,
287+
F: Clone,
288+
I::Item: Clone,
289+
{
290+
fn clone(&self) -> Self {
291+
Self { f: self.f.clone(), inner: self.inner.clone() }
292+
}
293+
}

Diff for: library/core/src/iter/adapters/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ mod inspect;
1616
mod intersperse;
1717
mod map;
1818
mod map_while;
19+
mod map_windows;
1920
mod peekable;
2021
mod rev;
2122
mod scan;
@@ -57,6 +58,9 @@ pub use self::intersperse::{Intersperse, IntersperseWith};
5758
#[stable(feature = "iter_map_while", since = "1.57.0")]
5859
pub use self::map_while::MapWhile;
5960

61+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
62+
pub use self::map_windows::MapWindows;
63+
6064
#[unstable(feature = "trusted_random_access", issue = "none")]
6165
pub use self::zip::TrustedRandomAccess;
6266

Diff for: library/core/src/iter/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,8 @@ pub use self::adapters::Copied;
440440
pub use self::adapters::Flatten;
441441
#[stable(feature = "iter_map_while", since = "1.57.0")]
442442
pub use self::adapters::MapWhile;
443+
#[unstable(feature = "iter_map_windows", reason = "recently added", issue = "87155")]
444+
pub use self::adapters::MapWindows;
443445
#[unstable(feature = "inplace_iteration", issue = "none")]
444446
pub use self::adapters::SourceIter;
445447
#[stable(feature = "iterator_step_by", since = "1.28.0")]

0 commit comments

Comments
 (0)