From 920d6c098428d8783e3e6d33c4b64b56eb9e76c2 Mon Sep 17 00:00:00 2001 From: feathercyc Date: Tue, 20 Aug 2024 12:54:36 +0000 Subject: [PATCH] feat: add some iter types Signed-off-by: feathercyc --- benches/bench.rs | 96 +++++++++++------ src/intervalmap.rs | 123 +++++++++++++++++++++- src/iter.rs | 252 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + 4 files changed, 440 insertions(+), 33 deletions(-) create mode 100755 src/iter.rs diff --git a/benches/bench.rs b/benches/bench.rs index 904e254..feca181 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,44 +1,24 @@ use criterion::{criterion_group, criterion_main, Bencher, Criterion}; use interval_map::{Interval, IntervalMap}; +use rand::{rngs::StdRng, Rng, SeedableRng}; use std::hint::black_box; -struct Rng { - state: u32, -} -impl Rng { - fn new() -> Self { - Self { state: 0x87654321 } - } - - fn gen_u32(&mut self) -> u32 { - self.state ^= self.state << 13; - self.state ^= self.state >> 17; - self.state ^= self.state << 5; - self.state - } - - fn gen_range_i32(&mut self, low: i32, high: i32) -> i32 { - let d = (high - low) as u32; - low + (self.gen_u32() % d) as i32 - } -} - struct IntervalGenerator { - rng: Rng, - limit: i32, + rng: StdRng, + limit: u32, } impl IntervalGenerator { fn new() -> Self { - const LIMIT: i32 = 100000; + const LIMIT: u32 = 1000; Self { - rng: Rng::new(), + rng: StdRng::from_seed([0; 32]), limit: LIMIT, } } - fn next(&mut self) -> Interval { - let low = self.rng.gen_range_i32(0, self.limit - 1); - let high = self.rng.gen_range_i32(low + 1, self.limit); + fn next(&mut self) -> Interval { + let low = self.rng.gen_range(0..=self.limit - 1); + let high = self.rng.gen_range(low + 1..=self.limit); Interval::new(low, high) } } @@ -100,14 +80,68 @@ fn bench_interval_map_insert_remove(c: &mut Criterion) { }); } +// FilterIter helper fn +fn interval_map_filter_iter(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + let mut map = IntervalMap::new(); + for i in intervals.clone() { + map.insert(i, ()); + } + bench.iter(|| { + for i in intervals.clone() { + black_box(map.filter_iter(&i).collect::>()); + } + }); +} + +// iter().filter() helper fn +fn interval_map_iter_filter(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + let mut map = IntervalMap::new(); + for i in intervals.clone() { + map.insert(i, ()); + } + bench.iter(|| { + for i in intervals.clone() { + black_box(map.iter().filter(|v| v.0.overlaps(&i)).collect::>()); + } + }); +} + +fn bench_interval_map_filter_iter(c: &mut Criterion) { + c.bench_function("bench_interval_map_filter_iter_100", |b| { + interval_map_filter_iter(100, b) + }); + c.bench_function("bench_interval_map_filter_iter_1000", |b| { + interval_map_filter_iter(1000, b) + }); +} + +fn bench_interval_map_iter_filter(c: &mut Criterion) { + c.bench_function("bench_interval_map_iter_filter_100", |b| { + interval_map_iter_filter(100, b) + }); + c.bench_function("bench_interval_map_iter_filter_1000", |b| { + interval_map_iter_filter(1000, b) + }); +} + fn criterion_config() -> Criterion { Criterion::default().configure_from_args().without_plots() } criterion_group! { - name = benches; + name = benches_basic_op; + config = criterion_config(); + targets = bench_interval_map_insert, bench_interval_map_insert_remove, +} + +criterion_group! { + name = benches_iter; config = criterion_config(); - targets = bench_interval_map_insert, bench_interval_map_insert_remove + targets = bench_interval_map_filter_iter, bench_interval_map_iter_filter } -criterion_main!(benches); +criterion_main!(benches_basic_op, benches_iter); diff --git a/src/intervalmap.rs b/src/intervalmap.rs index 1d7fd20..def4049 100644 --- a/src/intervalmap.rs +++ b/src/intervalmap.rs @@ -1,6 +1,7 @@ use crate::entry::{Entry, OccupiedEntry, VacantEntry}; use crate::index::{DefaultIx, IndexType, NodeIndex}; use crate::interval::{Interval, IntervalRef}; +use crate::iter::{FilterIter, IntoIter, Iter, UnsortedIter}; use crate::node::{Color, Node}; use std::collections::VecDeque; @@ -69,7 +70,7 @@ where // check for max capacity, except if we use usize assert!( ::max().index() == !0 - || as IndexType>::max() != node_idx, + || as IndexType>::max() != node_idx, "Reached maximum number of nodes" ); self.nodes.push(node); @@ -126,6 +127,10 @@ where !self.node_ref(node_idx, Node::is_sentinel) } + /// When `interval_map.len() < Self::BFS_MIN_THRESHOLD`, directly traversing the inner vec of `interval_map` + /// is faster than BFS. + const BFS_MIN_THRESHOLD: usize = 20; + /// Find all intervals in the map that overlaps with the given interval. /// /// # Note @@ -146,10 +151,45 @@ where /// ``` #[inline] pub fn find_all_overlap(&self, interval: &Interval) -> Vec<(&Interval, &V)> { + if self.node_ref(self.root, Node::is_sentinel) { + return Vec::new(); + } + if self.len() > Self::BFS_MIN_THRESHOLD { + self.find_all_overlap_inner(self.root, interval) + } else { + self.unsorted_iter() + .filter(|v| v.0.overlaps(interval)) + .collect() + } + } + + /// Find all intervals in the map that overlaps with the given interval. + /// + /// # Note + /// This method's returned data is ordered. Generally, it's much slower than `find_all_overlap`. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(1, 3), ()); + /// map.insert(Interval::new(2, 4), ()); + /// map.insert(Interval::new(6, 7), ()); + /// map.insert(Interval::new(7, 11), ()); + /// assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 3); + /// map.remove(&Interval::new(1, 3)); + /// assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 2); + /// ``` + #[inline] + pub fn find_all_overlap_ordered<'a>( + &'a self, + interval: &'a Interval, + ) -> Vec<(&Interval, &V)> { if self.node_ref(self.root, Node::is_sentinel) { Vec::new() } else { - self.find_all_overlap_inner(self.root, interval) + self.filter_iter(interval).collect() } } @@ -189,6 +229,70 @@ where .map(|idx| self.node_mut(idx, Node::value_mut)) } + /// Get an iterator over the entries of the map, sorted by key. + #[inline] + #[must_use] + pub fn iter(&self) -> Iter<'_, T, V, Ix> { + Iter::new(self) + } + + /// Get an iterator over the entries of the map, unsorted. + #[inline] + pub fn unsorted_iter(&self) -> UnsortedIter { + UnsortedIter::new(self) + } + + /// Get an iterator over the entries that overlap the `query`, sorted by key. + /// + /// # Panics + /// + /// The method panics when `query` contains a value that cannot be compared. + #[inline] + pub fn filter_iter<'a, 'b: 'a>(&'a self, query: &'b Interval) -> FilterIter { + FilterIter::new(self, query) + } + + /// Return true if the interval tree's key cover the entire given interval. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(3, 5), 0); + /// map.insert(Interval::new(5, 8), 1); + /// map.insert(Interval::new(9, 12), 1); + /// assert!(map.contains(&Interval::new(4, 6))); + /// assert!(!map.contains(&Interval::new(7, 10))); + /// ``` + #[inline] + pub fn contains(&self, interval: &Interval) -> bool { + let mut max_end: Option<&T> = None; + let mut min_begin: Option<&T> = None; + + let mut continuous = true; + self.filter_iter(interval).find(|v| { + if min_begin.is_none() { + min_begin = Some(&v.0.low); + max_end = Some(&v.0.high); + return false; + } + if max_end.map(|mv| mv < &v.0.low).unwrap() { + continuous = false; + return true; + } + if max_end.map(|mv| mv < &v.0.high).unwrap() { + max_end = Some(&v.0.high); + } + false + }); + + continuous + && min_begin.is_some() + && max_end.map(|mv| mv >= &interval.high).unwrap() + && min_begin.map(|mv| mv <= &interval.low).unwrap() + } + /// Get the given key's corresponding entry in the map for in-place manipulation. /// /// # Example @@ -241,6 +345,21 @@ where } } +impl IntoIterator for IntervalMap +where + T: Ord, + Ix: IndexType, +{ + type Item = (Interval, V); + + type IntoIter = IntoIter; + + /// Get an into iterator over the entries of the map, sorted by key. + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self) + } +} + impl IntervalMap where T: Ord, diff --git a/src/iter.rs b/src/iter.rs new file mode 100755 index 0000000..5bc304c --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,252 @@ +use std::fmt::Debug; + +use crate::index::{IndexType, NodeIndex}; +use crate::interval::Interval; +use crate::intervalmap::IntervalMap; +use crate::node::Node; + +/// Pushes a link of nodes on the left to stack. +fn left_link(map_ref: &IntervalMap, mut x: NodeIndex) -> Vec> +where + T: Ord, + Ix: IndexType, +{ + let mut nodes = vec![]; + while !map_ref.node_ref(x, Node::is_sentinel) { + nodes.push(x); + x = map_ref.node_ref(x, Node::left); + } + nodes +} + +/// An iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct Iter<'a, T, V, Ix> +where + T: Ord, +{ + /// Reference to the map + pub(crate) map_ref: &'a IntervalMap, + /// Stack for iteration + pub(crate) stack: Vec>, +} + +impl<'a, T, V, Ix> Iter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + pub fn new(map_ref: &'a IntervalMap) -> Self { + Iter { + map_ref, + stack: left_link(map_ref, map_ref.root), + } + } +} + +impl<'a, T, V, Ix> Iterator for Iter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + let x = self.stack.pop()?; + self.stack.extend(left_link( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + )); + Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) + } +} + +/// An into iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct IntoIter +where + T: Ord, +{ + interval_map: IntervalMap, + /// Stack for iteration + pub(crate) stack: Vec>, +} + +impl IntoIter +where + T: Ord, + Ix: IndexType, +{ + pub fn new(interval_map: IntervalMap) -> Self { + let mut temp = IntoIter { + interval_map, + stack: vec![], + }; + temp.stack = left_link(&temp.interval_map, temp.interval_map.root); + temp + } +} + +impl Iterator for IntoIter +where + T: Ord, + Ix: IndexType, +{ + type Item = (Interval, V); + + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + let x = self.stack.pop()?; + self.stack.extend(left_link( + &self.interval_map, + self.interval_map.node_ref(x, Node::right), + )); + let res = &mut self.interval_map.nodes[x.index()]; + Some((res.interval.take().unwrap(), res.value.take().unwrap())) + } +} + +/// An unsorted iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct UnsortedIter<'a, T, V, Ix> +where + T: Ord, +{ + map_ref: &'a IntervalMap, + /// Stack for iteration + pub(crate) cur: NodeIndex, +} + +impl<'a, T, V, Ix> UnsortedIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + pub fn new(map_ref: &'a IntervalMap) -> Self { + UnsortedIter { + map_ref, + cur: NodeIndex::SENTINEL, + } + } +} + +impl<'a, T, V, Ix> Iterator for UnsortedIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.map_ref.is_empty() + || self.cur.index() >= self.map_ref.len() + || self.cur.index() == ::max().index() + { + return None; + } + self.cur = self.cur.inc(); + Some( + self.map_ref + .node_ref(self.cur, |xn| (xn.interval(), xn.value())), + ) + } +} + +/// A filter iterator over the entries of a `IntervalMap`.It's equal to `iter().filter()` +/// but faster than the latter. +#[derive(Debug)] +pub struct FilterIter<'a, T, V, Ix> +where + T: Ord, +{ + /// Reference to the map + pub(crate) map_ref: &'a IntervalMap, + /// Stack for iteration + pub(crate) stack: Vec>, + /// Filter criteria + pub(crate) query: &'a Interval, +} + +fn left_link_with_query( + map_ref: &IntervalMap, + mut x: NodeIndex, + query: &Interval, +) -> Vec> +where + T: Ord, + Ix: IndexType, +{ + let mut stack: Vec> = vec![]; + if map_ref.max(x).is_some_and(|v| v <= &query.low) { + return stack; + } + while map_ref.node_ref(x, Node::sentinel).is_some() { + if map_ref.node_ref(x, Node::interval).low < query.high { + stack.push(x); + } + if map_ref.max(map_ref.node_ref(x, Node::left)) <= Some(&query.low) { + break; + } + x = map_ref.node_ref(x, Node::left); + } + stack +} + +impl<'a, T, V, Ix> FilterIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + pub fn new(map_ref: &'a IntervalMap, query: &'a Interval) -> Self { + FilterIter { + map_ref, + stack: left_link_with_query(map_ref, map_ref.root, query), + query, + } + } +} + +impl<'a, T, V, Ix> Iterator for FilterIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + let mut x = self.stack.pop()?; + while !self + .map_ref + .node_ref(x, Node::interval) + .overlaps(self.query) + { + self.stack.extend(left_link_with_query( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + self.query, + )); + if self.stack.is_empty() { + return None; + } + x = self.stack.pop()?; + } + self.stack.extend(left_link_with_query( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + self.query, + )); + Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) + } +} diff --git a/src/lib.rs b/src/lib.rs index 446178d..2be0e8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,8 +25,10 @@ mod entry; mod index; mod interval; mod intervalmap; +mod iter; mod node; pub use entry::{Entry, OccupiedEntry, VacantEntry}; pub use interval::Interval; pub use intervalmap::IntervalMap; +pub use iter::{FilterIter, IntoIter, Iter, UnsortedIter};