diff --git a/Cargo.lock b/Cargo.lock index d3a602c27..e12269210 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3396,6 +3396,8 @@ dependencies = [ "opentelemetry-jaeger", "opentelemetry_sdk", "parking_lot", + "petgraph", + "rand", "serde", "test-macros", "thiserror", @@ -3761,6 +3763,7 @@ dependencies = [ "num-traits", "opentelemetry-jaeger", "opentelemetry_sdk", + "petgraph", "rand", "serde", "serde_json", diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 9a1ccbfc9..8ac9e5c36 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -26,6 +26,8 @@ getset = "0.1" opentelemetry = { version = "0.21.0", features = ["trace"] } opentelemetry_sdk = { version = "0.21.0", features = ["trace"] } parking_lot = { version = "0.12.1", optional = true } +petgraph = "0.6.4" +rand = "0.8.5" serde = { version = "1.0.137", features = ["derive"] } thiserror = "1.0.31" tokio = { version = "0.2.23", package = "madsim-tokio", features = [ diff --git a/crates/utils/benches/interval_map.rs b/crates/utils/benches/interval_map.rs new file mode 100644 index 000000000..02953eb97 --- /dev/null +++ b/crates/utils/benches/interval_map.rs @@ -0,0 +1,119 @@ +#![cfg(bench)] +#![feature(test)] + +extern crate test; +extern crate utils; + +use std::hint::black_box; + +use test::Bencher; + +use utils::interval_map::{Interval, IntervalMap}; + +struct Rng { + state: u32, +} + +impl Rng { + fn new() -> Self { + Self { state: 0x87654321 } + } + + fn gen_u32(&mut self) -> u32 { + self.state ^= self.state << 13; + self.state ^= self.state >> 17; + self.state ^= self.state << 5; + self.state + } + + fn gen_range_i32(&mut self, low: i32, high: i32) -> i32 { + let d = (high - low) as u32; + low + (self.gen_u32() % d) as i32 + } +} + +struct IntervalGenerator { + rng: Rng, + limit: i32, +} + +impl IntervalGenerator { + fn new() -> Self { + const LIMIT: i32 = 1000; + Self { + rng: Rng::new(), + limit: LIMIT, + } + } + + fn next(&mut self) -> Interval { + let low = self.rng.gen_range_i32(0, self.limit - 1); + let high = self.rng.gen_range_i32(low + 1, self.limit); + Interval::new(low, high) + } +} + +fn bench_interval_map_insert(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + bench.iter(|| { + let mut map = IntervalMap::new(); + for i in intervals.clone() { + black_box(map.insert(i, ())); + } + }); +} + +fn bench_interval_map_insert_remove(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + bench.iter(|| { + let mut map = IntervalMap::new(); + for i in intervals.clone() { + black_box(map.insert(i, ())); + } + for i in &intervals { + black_box(map.remove(&i)); + } + }); +} + +#[bench] +fn bench_interval_map_insert_100(bench: &mut Bencher) { + bench_interval_map_insert(100, bench); +} + +#[bench] +fn bench_interval_map_insert_1000(bench: &mut Bencher) { + bench_interval_map_insert(1000, bench); +} + +#[bench] +fn bench_interval_map_insert_10000(bench: &mut Bencher) { + bench_interval_map_insert(10_000, bench); +} + +#[bench] +fn bench_interval_map_insert_100000(bench: &mut Bencher) { + bench_interval_map_insert(100_000, bench); +} + +#[bench] +fn bench_interval_map_insert_remove_100(bench: &mut Bencher) { + bench_interval_map_insert_remove(100, bench); +} + +#[bench] +fn bench_interval_map_insert_remove_1000(bench: &mut Bencher) { + bench_interval_map_insert_remove(1000, bench); +} + +#[bench] +fn bench_interval_map_insert_remove_10000(bench: &mut Bencher) { + bench_interval_map_insert_remove(10_000, bench); +} + +#[bench] +fn bench_interval_map_insert_remove_100000(bench: &mut Bencher) { + bench_interval_map_insert_remove(100_000, bench); +} diff --git a/crates/utils/src/config.rs b/crates/utils/src/config.rs index a51e1f13e..e6c484672 100644 --- a/crates/utils/src/config.rs +++ b/crates/utils/src/config.rs @@ -46,7 +46,7 @@ pub type LevelConfig = tracing::metadata::LevelFilter; pub mod duration_format { use std::time::Duration; - use serde::{self, Deserialize, Deserializer}; + use serde::{Deserialize, Deserializer}; use crate::parse_duration; @@ -63,7 +63,7 @@ pub mod duration_format { /// batch size deserialization formatter pub mod bytes_format { - use serde::{self, Deserialize, Deserializer}; + use serde::{Deserialize, Deserializer}; use crate::parse_batch_bytes; @@ -156,7 +156,7 @@ pub enum InitialClusterState { /// `InitialClusterState` deserialization formatter pub mod state_format { - use serde::{self, Deserialize, Deserializer}; + use serde::{Deserialize, Deserializer}; use super::InitialClusterState; use crate::parse_state; @@ -774,7 +774,7 @@ impl Default for LogConfig { /// `LevelConfig` deserialization formatter pub mod level_format { - use serde::{self, Deserialize, Deserializer}; + use serde::{Deserialize, Deserializer}; use super::LevelConfig; use crate::parse_log_level; @@ -837,7 +837,7 @@ impl std::fmt::Display for RotationConfig { /// `RotationConfig` deserialization formatter pub mod rotation_format { - use serde::{self, Deserialize, Deserializer}; + use serde::{Deserialize, Deserializer}; use super::RotationConfig; use crate::parse_rotation; @@ -1037,7 +1037,7 @@ impl std::fmt::Display for MetricsPushProtocol { /// Metrics push protocol format pub mod protocol_format { - use serde::{self, Deserialize, Deserializer}; + use serde::{Deserialize, Deserializer}; use super::MetricsPushProtocol; use crate::parse_metrics_push_protocol; diff --git a/crates/utils/src/interval_map/mod.rs b/crates/utils/src/interval_map/mod.rs new file mode 100644 index 000000000..be66f3337 --- /dev/null +++ b/crates/utils/src/interval_map/mod.rs @@ -0,0 +1,1043 @@ +use std::collections::VecDeque; + +use petgraph::graph::{DefaultIx, IndexType, NodeIndex}; + +#[cfg(test)] +mod tests; + +/// An interval-value map, which support operations on dynamic sets of intervals. +#[derive(Debug)] +pub struct IntervalMap { + /// Vector that stores nodes + nodes: Vec>, + /// Root of the interval tree + root: NodeIndex, + /// Number of elements in the map + len: usize, +} + +impl IntervalMap +where + T: Ord, + Ix: IndexType, +{ + /// Creates a new `IntervalMap` with estimated capacity. + #[inline] + #[must_use] + pub fn with_capacity(capacity: usize) -> Self { + let mut nodes = vec![Self::new_sentinel()]; + nodes.reserve(capacity); + IntervalMap { + nodes, + root: Self::sentinel(), + len: 0, + } + } + + /// Inserts a interval-value pair into the map. + /// + /// # Panics + /// + /// This method panics when the tree is at the maximum number of nodes for its index + #[inline] + pub fn insert(&mut self, interval: Interval, value: V) -> Option { + let node_idx = NodeIndex::new(self.nodes.len()); + let node = Self::new_node(interval, value, node_idx); + // check for max capacity, except if we use usize + assert!( + ::max().index() == !0 || NodeIndex::end() != node_idx, + "Reached maximum number of nodes" + ); + self.nodes.push(node); + self.insert_inner(node_idx) + } + + /// Removes a interval from the map, returning the value at the interval if the interval + /// was previously in the map. + #[inline] + pub fn remove(&mut self, interval: &Interval) -> Option { + if let Some(node_idx) = self.search_exact(interval) { + self.remove_inner(node_idx); + // Swap the node with the last node stored in the vector and update indices + let mut node = self.nodes.swap_remove(node_idx.index()); + let old = NodeIndex::::new(self.nodes.len()); + self.update_idx(old, node_idx); + + return node.value.take(); + } + None + } + + /// Checks if an interval in the map overlaps with the given interval. + #[inline] + pub fn overlap(&self, interval: &Interval) -> bool { + let node_idx = self.search(interval); + !self.node_ref(node_idx, Node::is_sentinel) + } + + /// Finds all intervals in the map that overlaps with the given interval. + #[inline] + pub fn find_all_overlap(&self, interval: &Interval) -> Vec<(&Interval, &V)> { + if self.node_ref(self.root, Node::is_sentinel) { + Vec::new() + } else { + self.find_all_overlap_inner_unordered(self.root, interval) + } + } + + /// Returns a reference to the value corresponding to the key. + #[inline] + pub fn get(&self, interval: &Interval) -> Option<&V> { + self.search_exact(interval) + .map(|idx| self.node_ref(idx, Node::value)) + } + + /// Returns a reference to the value corresponding to the key. + #[inline] + pub fn get_mut(&mut self, interval: &Interval) -> Option<&mut V> { + self.search_exact(interval) + .map(|idx| self.node_mut(idx, Node::value_mut)) + } + + /// Gets an iterator over the entries of the map, sorted by key. + #[inline] + #[must_use] + pub fn iter(&self) -> Iter<'_, T, V, Ix> { + Iter { + map_ref: self, + stack: None, + } + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + #[inline] + pub fn entry(&mut self, interval: Interval) -> Entry<'_, T, V, Ix> { + match self.search_exact(&interval) { + Some(node) => Entry::Occupied(OccupiedEntry { + map_ref: self, + node, + }), + None => Entry::Vacant(VacantEntry { + map_ref: self, + interval, + }), + } + } + + /// Removes all elements from the map + #[inline] + pub fn clear(&mut self) { + self.nodes.clear(); + self.nodes.push(Self::new_sentinel()); + self.root = Self::sentinel(); + self.len = 0; + } + + /// Returns the number of elements in the map. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if the map contains no elements. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl IntervalMap +where + T: Ord, +{ + /// Creates an empty `IntervalMap` + #[must_use] + #[inline] + pub fn new() -> Self { + Self { + nodes: vec![Self::new_sentinel()], + root: Self::sentinel(), + len: 0, + } + } +} + +impl Default for IntervalMap +where + T: Ord, +{ + #[inline] + fn default() -> Self { + Self::with_capacity(0) + } +} + +impl IntervalMap +where + T: Ord, + Ix: IndexType, +{ + /// Creates a new sentinel node + fn new_sentinel() -> Node { + Node { + interval: None, + value: None, + max_index: None, + left: None, + right: None, + parent: None, + color: Color::Black, + } + } + + /// Creates a new tree node + fn new_node(interval: Interval, value: V, index: NodeIndex) -> Node { + Node { + max_index: Some(index), + interval: Some(interval), + value: Some(value), + left: Some(Self::sentinel()), + right: Some(Self::sentinel()), + parent: Some(Self::sentinel()), + color: Color::Red, + } + } + + /// Gets the sentinel node index + fn sentinel() -> NodeIndex { + NodeIndex::new(0) + } +} + +impl IntervalMap +where + T: Ord, + Ix: IndexType, +{ + /// Inserts a node into the tree. + fn insert_inner(&mut self, z: NodeIndex) -> Option { + let mut y = Self::sentinel(); + let mut x = self.root; + + while !self.node_ref(x, Node::is_sentinel) { + y = x; + if self.node_ref(z, Node::interval) == self.node_ref(y, Node::interval) { + let zval = self.node_mut(z, Node::take_value); + let old_value = self.node_mut(y, Node::set_value(zval)); + return Some(old_value); + } + if self.node_ref(z, Node::interval) < self.node_ref(x, Node::interval) { + x = self.node_ref(x, Node::left); + } else { + x = self.node_ref(x, Node::right); + } + } + self.node_mut(z, Node::set_parent(y)); + if self.node_ref(y, Node::is_sentinel) { + self.root = z; + } else { + if self.node_ref(z, Node::interval) < self.node_ref(y, Node::interval) { + self.node_mut(y, Node::set_left(z)); + } else { + self.node_mut(y, Node::set_right(z)); + } + self.update_max_bottom_up(y); + } + self.node_mut(z, Node::set_color(Color::Red)); + + self.insert_fixup(z); + + self.len = self.len.wrapping_add(1); + None + } + + /// Removes a node from the tree. + fn remove_inner(&mut self, z: NodeIndex) { + let mut y = z; + let mut y_orig_color = self.node_ref(y, Node::color); + let x; + if self.left_ref(z, Node::is_sentinel) { + x = self.node_ref(z, Node::right); + self.transplant(z, x); + self.update_max_bottom_up(self.node_ref(z, Node::parent)); + } else if self.right_ref(z, Node::is_sentinel) { + x = self.node_ref(z, Node::left); + self.transplant(z, x); + self.update_max_bottom_up(self.node_ref(z, Node::parent)); + } else { + y = self.tree_minimum(self.node_ref(z, Node::right)); + let mut p = y; + y_orig_color = self.node_ref(y, Node::color); + x = self.node_ref(y, Node::right); + if self.node_ref(y, Node::parent) == z { + self.node_mut(x, Node::set_parent(y)); + } else { + self.transplant(y, x); + p = self.node_ref(y, Node::parent); + self.node_mut(y, Node::set_right(self.node_ref(z, Node::right))); + self.right_mut(y, Node::set_parent(y)); + } + self.transplant(z, y); + self.node_mut(y, Node::set_left(self.node_ref(z, Node::left))); + self.left_mut(y, Node::set_parent(y)); + self.node_mut(y, Node::set_color(self.node_ref(z, Node::color))); + + self.update_max_bottom_up(p); + } + + if matches!(y_orig_color, Color::Black) { + self.remove_fixup(x); + } + + self.len = self.len.wrapping_sub(1); + } + + /// Finds all intervals in the map that overlaps with the given interval. + #[cfg(interval_tree_find_overlap_ordered)] + fn find_all_overlap_inner( + &self, + x: NodeIndex, + interval: &Interval, + ) -> Vec<(&Interval, &V)> { + let mut list = vec![]; + if self.node_ref(x, Node::interval).overlap(interval) { + list.push(self.node_ref(x, |nx| (nx.interval(), nx.value()))); + } + if self.max(self.node_ref(x, Node::left)) >= Some(&interval.low) { + list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::left), interval)); + } + if self + .max(self.node_ref(x, Node::right)) + .map(|rmax| IntervalRef::new(&self.node_ref(x, Node::interval).low, rmax)) + .is_some_and(|i| i.overlap(interval)) + { + list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::right), interval)); + } + list + } + + /// Finds all intervals in the map that overlaps with the given interval. + /// + /// The result is unordered because of breadth-first search to save stack size + fn find_all_overlap_inner_unordered( + &self, + x: NodeIndex, + interval: &Interval, + ) -> Vec<(&Interval, &V)> { + let mut list = Vec::new(); + let mut queue = VecDeque::new(); + queue.push_back(x); + while let Some(p) = queue.pop_front() { + if self.node_ref(p, Node::interval).overlap(interval) { + list.push(self.node_ref(p, |np| (np.interval(), np.value()))); + } + let p_left = self.node_ref(p, Node::left); + let p_right = self.node_ref(p, Node::right); + if self.max(p_left) >= Some(&interval.low) { + queue.push_back(p_left); + } + if self + .max(self.node_ref(p, Node::right)) + .map(|rmax| IntervalRef::new(&self.node_ref(p, Node::interval).low, rmax)) + .is_some_and(|i| i.overlap(interval)) + { + queue.push_back(p_right); + } + } + + list + } + + /// Search for an interval that overlaps with the given interval. + fn search(&self, interval: &Interval) -> NodeIndex { + let mut x = self.root; + while self + .node_ref(x, Node::sentinel) + .map(Node::interval) + .is_some_and(|xi| !xi.overlap(interval)) + { + if self.max(self.node_ref(x, Node::left)) > Some(&interval.low) { + x = self.node_ref(x, Node::left); + } else { + x = self.node_ref(x, Node::right); + } + } + x + } + + /// Search for the node with exact the given interval + fn search_exact(&self, interval: &Interval) -> Option> { + let mut x = self.root; + while !self.node_ref(x, Node::is_sentinel) { + if self.node_ref(x, Node::interval) == interval { + return Some(x); + } + if self.max(x) < Some(&interval.high) { + return None; + } + if self.node_ref(x, Node::interval) > interval { + x = self.node_ref(x, Node::left); + } else { + x = self.node_ref(x, Node::right); + } + } + None + } + + /// Restores red-black tree properties after an insert. + fn insert_fixup(&mut self, mut z: NodeIndex) { + while self.parent_ref(z, Node::is_red) { + if self.grand_parent_ref(z, Node::is_sentinel) { + break; + } + if self.is_left_child(self.node_ref(z, Node::parent)) { + let y = self.grand_parent_ref(z, Node::right); + if self.node_ref(y, Node::is_red) { + self.parent_mut(z, Node::set_color(Color::Black)); + self.node_mut(y, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + z = self.parent_ref(z, Node::parent); + } else { + if self.is_right_child(z) { + z = self.node_ref(z, Node::parent); + self.left_rotate(z); + } + self.parent_mut(z, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + self.right_rotate(self.parent_ref(z, Node::parent)); + } + } else { + let y = self.grand_parent_ref(z, Node::left); + if self.node_ref(y, Node::is_red) { + self.parent_mut(z, Node::set_color(Color::Black)); + self.node_mut(y, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + z = self.parent_ref(z, Node::parent); + } else { + if self.is_left_child(z) { + z = self.node_ref(z, Node::parent); + self.right_rotate(z); + } + self.parent_mut(z, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + self.left_rotate(self.parent_ref(z, Node::parent)); + } + } + } + self.node_mut(self.root, Node::set_color(Color::Black)); + } + + /// Restores red-black tree properties after a remove. + fn remove_fixup(&mut self, mut x: NodeIndex) { + while x != self.root && self.node_ref(x, Node::is_black) { + let mut w; + if self.is_left_child(x) { + w = self.parent_ref(x, Node::right); + if self.node_ref(w, Node::is_red) { + self.node_mut(w, Node::set_color(Color::Black)); + self.parent_mut(x, Node::set_color(Color::Red)); + self.left_rotate(self.node_ref(x, Node::parent)); + w = self.parent_ref(x, Node::right); + } + if self.node_ref(w, Node::is_sentinel) { + break; + } + if self.left_ref(w, Node::is_black) && self.right_ref(w, Node::is_black) { + self.node_mut(w, Node::set_color(Color::Red)); + x = self.node_ref(x, Node::parent); + } else { + if self.right_ref(w, Node::is_black) { + self.left_mut(w, Node::set_color(Color::Black)); + self.node_mut(w, Node::set_color(Color::Red)); + self.right_rotate(w); + w = self.parent_ref(x, Node::right); + } + self.node_mut(w, Node::set_color(self.parent_ref(x, Node::color))); + self.parent_mut(x, Node::set_color(Color::Black)); + self.right_mut(w, Node::set_color(Color::Black)); + self.left_rotate(self.node_ref(x, Node::parent)); + x = self.root; + } + } else { + w = self.parent_ref(x, Node::left); + if self.node_ref(w, Node::is_red) { + self.node_mut(w, Node::set_color(Color::Black)); + self.parent_mut(x, Node::set_color(Color::Red)); + self.right_rotate(self.node_ref(x, Node::parent)); + w = self.parent_ref(x, Node::left); + } + if self.node_ref(w, Node::is_sentinel) { + break; + } + if self.right_ref(w, Node::is_black) && self.left_ref(w, Node::is_black) { + self.node_mut(w, Node::set_color(Color::Red)); + x = self.node_ref(x, Node::parent); + } else { + if self.left_ref(w, Node::is_black) { + self.right_mut(w, Node::set_color(Color::Black)); + self.node_mut(w, Node::set_color(Color::Red)); + self.left_rotate(w); + w = self.parent_ref(x, Node::left); + } + self.node_mut(w, Node::set_color(self.parent_ref(x, Node::color))); + self.parent_mut(x, Node::set_color(Color::Black)); + self.left_mut(w, Node::set_color(Color::Black)); + self.right_rotate(self.node_ref(x, Node::parent)); + x = self.root; + } + } + } + self.node_mut(x, Node::set_color(Color::Black)); + } + + /// Binary tree left rotate. + fn left_rotate(&mut self, x: NodeIndex) { + if self.right_ref(x, Node::is_sentinel) { + return; + } + let y = self.node_ref(x, Node::right); + self.node_mut(x, Node::set_right(self.node_ref(y, Node::left))); + if !self.left_ref(y, Node::is_sentinel) { + self.left_mut(y, Node::set_parent(x)); + } + + self.replace_parent(x, y); + self.node_mut(y, Node::set_left(x)); + + self.rotate_update_max(x, y); + } + + /// Binary tree right rotate. + fn right_rotate(&mut self, x: NodeIndex) { + if self.left_ref(x, Node::is_sentinel) { + return; + } + let y = self.node_ref(x, Node::left); + self.node_mut(x, Node::set_left(self.node_ref(y, Node::right))); + if !self.right_ref(y, Node::is_sentinel) { + self.right_mut(y, Node::set_parent(x)); + } + + self.replace_parent(x, y); + self.node_mut(y, Node::set_right(x)); + + self.rotate_update_max(x, y); + } + + /// Replaces parent during a rotation. + fn replace_parent(&mut self, x: NodeIndex, y: NodeIndex) { + self.node_mut(y, Node::set_parent(self.node_ref(x, Node::parent))); + if self.parent_ref(x, Node::is_sentinel) { + self.root = y; + } else if self.is_left_child(x) { + self.parent_mut(x, Node::set_left(y)); + } else { + self.parent_mut(x, Node::set_right(y)); + } + self.node_mut(x, Node::set_parent(y)); + } + + /// Updates the max value after a rotation. + fn rotate_update_max(&mut self, x: NodeIndex, y: NodeIndex) { + self.node_mut(y, Node::set_max_index(self.node_ref(x, Node::max_index))); + self.recaculate_max(x); + } + + /// Updates the max value towards the root + fn update_max_bottom_up(&mut self, x: NodeIndex) { + let mut p = x; + while !self.node_ref(p, Node::is_sentinel) { + self.recaculate_max(p); + p = self.node_ref(p, Node::parent); + } + } + + /// Recaculate max value from left and right childrens + fn recaculate_max(&mut self, x: NodeIndex) { + self.node_mut(x, Node::set_max_index(x)); + let x_left = self.node_ref(x, Node::left); + let x_right = self.node_ref(x, Node::right); + if self.max(x_left) > self.max(x) { + self.node_mut( + x, + Node::set_max_index(self.node_ref(x_left, Node::max_index)), + ); + } + if self.max(x_right) > self.max(x) { + self.node_mut( + x, + Node::set_max_index(self.node_ref(x_right, Node::max_index)), + ); + } + } + + /// Finds the node with the minimum interval. + fn tree_minimum(&self, mut x: NodeIndex) -> NodeIndex { + while !self.left_ref(x, Node::is_sentinel) { + x = self.node_ref(x, Node::left); + } + x + } + + /// Replaces one subtree as a child of its parent with another subtree. + fn transplant(&mut self, u: NodeIndex, v: NodeIndex) { + if self.parent_ref(u, Node::is_sentinel) { + self.root = v; + } else if self.is_left_child(u) { + self.parent_mut(u, Node::set_left(v)); + } else { + self.parent_mut(u, Node::set_right(v)); + } + self.node_mut(v, Node::set_parent(self.node_ref(u, Node::parent))); + } + + /// Checks if a node is a left child of its parent. + fn is_left_child(&self, node: NodeIndex) -> bool { + self.parent_ref(node, Node::left) == node + } + + /// Checks if a node is a right child of its parent. + fn is_right_child(&self, node: NodeIndex) -> bool { + self.parent_ref(node, Node::right) == node + } + + /// Updates nodes indices after remove + /// + /// This method has a time complexity of `O(logn)`, as we need to + /// update the max index from bottom to top. + fn update_idx(&mut self, old: NodeIndex, new: NodeIndex) { + if self.root == old { + self.root = new; + } + if self.nodes.get(new.index()).is_some() { + if !self.parent_ref(new, Node::is_sentinel) { + if self.parent_ref(new, Node::left) == old { + self.parent_mut(new, Node::set_left(new)); + } else { + self.parent_mut(new, Node::set_right(new)); + } + } + self.left_mut(new, Node::set_parent(new)); + self.right_mut(new, Node::set_parent(new)); + + let mut p = new; + while !self.node_ref(p, Node::is_sentinel) { + if self.node_ref(p, Node::max_index) == old { + self.node_mut(p, Node::set_max_index(new)); + } + p = self.node_ref(p, Node::parent); + } + } + } +} + +// Convenient methods for reference or mutate current/parent/left/right node +#[allow(clippy::missing_docs_in_private_items)] // Trivial convenient methods +#[allow(clippy::indexing_slicing)] // Won't panic since all the indices we used are inbound +impl<'a, T, V, Ix> IntervalMap +where + Ix: IndexType, +{ + fn node_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + op(&self.nodes[node.index()]) + } + + fn node_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + op(&mut self.nodes[node.index()]) + } + + fn left_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let idx = self.nodes[node.index()].left().index(); + op(&self.nodes[idx]) + } + + fn right_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let idx = self.nodes[node.index()].right().index(); + op(&self.nodes[idx]) + } + + fn parent_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let idx = self.nodes[node.index()].parent().index(); + op(&self.nodes[idx]) + } + + fn grand_parent_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let parent_idx = self.nodes[node.index()].parent().index(); + let grand_parent_idx = self.nodes[parent_idx].parent().index(); + op(&self.nodes[grand_parent_idx]) + } + + fn left_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let idx = self.nodes[node.index()].left().index(); + op(&mut self.nodes[idx]) + } + + fn right_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let idx = self.nodes[node.index()].right().index(); + op(&mut self.nodes[idx]) + } + + fn parent_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let idx = self.nodes[node.index()].parent().index(); + op(&mut self.nodes[idx]) + } + + fn grand_parent_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let parent_idx = self.nodes[node.index()].parent().index(); + let grand_parent_idx = self.nodes[parent_idx].parent().index(); + op(&mut self.nodes[grand_parent_idx]) + } + + fn max(&self, node: NodeIndex) -> Option<&T> { + let max_index = self.nodes[node.index()].max_index?.index(); + self.nodes[max_index].interval.as_ref().map(|i| &i.high) + } +} + +/// An iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct Iter<'a, T, V, Ix> { + /// Reference to the map + map_ref: &'a IntervalMap, + /// Stack for iteration + stack: Option>>, +} + +impl Iter<'_, T, V, Ix> +where + Ix: IndexType, +{ + /// Initializes the stack + fn init_stack(&mut self) { + self.stack = Some(Self::left_link(self.map_ref, self.map_ref.root)); + } + + /// Pushes a link of nodes on the left to stack. + fn left_link(map_ref: &IntervalMap, mut x: NodeIndex) -> Vec> { + let mut nodes = vec![]; + while !map_ref.node_ref(x, Node::is_sentinel) { + nodes.push(x); + x = map_ref.node_ref(x, Node::left); + } + nodes + } +} + +impl<'a, T, V, Ix> Iterator for Iter<'a, T, V, Ix> +where + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[allow(clippy::unwrap_used, clippy::unwrap_in_result)] + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_none() { + self.init_stack(); + } + let stack = self.stack.as_mut().unwrap(); + if stack.is_empty() { + return None; + } + let x = stack.pop().unwrap(); + stack.extend(Self::left_link( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + )); + Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) + } +} + +/// A view into a single entry in a map, which may either be vacant or occupied. +#[allow(clippy::exhaustive_enums)] // It is final +#[derive(Debug)] +pub enum Entry<'a, T, V, Ix> { + /// An occupied entry. + Occupied(OccupiedEntry<'a, T, V, Ix>), + /// A vacant entry. + Vacant(VacantEntry<'a, T, V, Ix>), +} + +/// A view into an occupied entry in a `IntervalMap`. +/// It is part of the [`Entry`] enum. +#[derive(Debug)] +pub struct OccupiedEntry<'a, T, V, Ix> { + /// Reference to the map + map_ref: &'a mut IntervalMap, + /// The entry node + node: NodeIndex, +} + +/// A view into a vacant entry in a `IntervalMap`. +/// It is part of the [`Entry`] enum. +#[derive(Debug)] +pub struct VacantEntry<'a, T, V, Ix> { + /// Mutable reference to the map + map_ref: &'a mut IntervalMap, + /// The interval of this entry + interval: Interval, +} + +impl<'a, T, V, Ix> Entry<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + /// Ensures a value is in the entry by inserting the default if empty, and returns + /// a mutable reference to the value in the entry. + #[inline] + pub fn or_insert(self, default: V) -> &'a mut V { + match self { + Entry::Occupied(entry) => entry.map_ref.node_mut(entry.node, Node::value_mut), + Entry::Vacant(entry) => { + let entry_idx = NodeIndex::new(entry.map_ref.nodes.len()); + let _ignore = entry.map_ref.insert(entry.interval, default); + entry.map_ref.node_mut(entry_idx, Node::value_mut) + } + } + } + + /// Provides in-place mutable access to an occupied entry before any + /// potential inserts into the map. + /// + /// # Panics + /// + /// This method panics when the node is a sentinel node + #[inline] + #[must_use] + pub fn and_modify(self, f: F) -> Self + where + F: FnOnce(&mut V), + { + match self { + Entry::Occupied(entry) => { + f(entry.map_ref.node_mut(entry.node, Node::value_mut)); + Self::Occupied(entry) + } + Entry::Vacant(entry) => Self::Vacant(entry), + } + } +} + +// TODO: better typed `Node` +/// Node of the interval tree +#[derive(Debug)] +pub struct Node { + /// Left children + left: Option>, + /// Right children + right: Option>, + /// Parent + parent: Option>, + /// Color of the node + color: Color, + + /// Interval of the node + interval: Option>, + /// The index that point to the node with the max value + max_index: Option>, + /// Value of the node + value: Option, +} + +// Convenient getter/setter methods +#[allow(clippy::missing_docs_in_private_items)] +#[allow(clippy::missing_docs_in_private_items)] // Trivial convenient methods +#[allow(clippy::unwrap_used)] // Won't panic since the conditions are checked in the implementation +impl Node +where + Ix: IndexType, +{ + fn color(&self) -> Color { + self.color + } + + fn interval(&self) -> &Interval { + self.interval.as_ref().unwrap() + } + + fn max_index(&self) -> NodeIndex { + self.max_index.unwrap() + } + + fn left(&self) -> NodeIndex { + self.left.unwrap() + } + + fn right(&self) -> NodeIndex { + self.right.unwrap() + } + + fn parent(&self) -> NodeIndex { + self.parent.unwrap() + } + + fn is_sentinel(&self) -> bool { + self.interval.is_none() + } + + fn sentinel(&self) -> Option<&Self> { + self.interval.is_some().then_some(self) + } + + fn is_black(&self) -> bool { + matches!(self.color, Color::Black) + } + + fn is_red(&self) -> bool { + matches!(self.color, Color::Red) + } + + fn value(&self) -> &V { + self.value.as_ref().unwrap() + } + + fn value_mut(&mut self) -> &mut V { + self.value.as_mut().unwrap() + } + + fn take_value(&mut self) -> V { + self.value.take().unwrap() + } + + fn set_value(value: V) -> impl FnOnce(&mut Node) -> V { + move |node: &mut Node| node.value.replace(value).unwrap() + } + + fn set_color(color: Color) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + node.color = color; + } + } + + fn set_max_index(max_index: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.max_index.replace(max_index); + } + } + + fn set_left(left: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.left.replace(left); + } + } + + fn set_right(right: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.right.replace(right); + } + } + + fn set_parent(parent: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.parent.replace(parent); + } + } +} + +/// The Interval stored in `IntervalMap` +/// Represents the interval [low, high) +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Interval { + /// Low value + low: T, + /// high value + high: T, +} + +impl Interval { + /// Creates a new `Interval` + /// + /// # Panics + /// + /// This method panics when low is greater than high + #[inline] + pub fn new(low: T, high: T) -> Self { + assert!(low < high, "invalid range"); + Self { low, high } + } + + /// Checks if self overlaps with other interval + #[inline] + pub fn overlap(&self, other: &Self) -> bool { + self.high > other.low && other.high > self.low + } +} + +/// Reference type of `Interval` +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct IntervalRef<'a, T> { + /// Low value + low: &'a T, + /// high value + high: &'a T, +} + +impl<'a, T: Ord> IntervalRef<'a, T> { + /// Creates a new `IntervalRef` + /// + /// # Panics + /// + /// This method panics when low is greater than high + #[inline] + fn new(low: &'a T, high: &'a T) -> Self { + assert!(low < high, "invalid range"); + Self { low, high } + } + + /// Checks if self overlaps with a `Interval` + fn overlap(&self, other: &Interval) -> bool { + self.high > &other.low && &other.high > self.low + } +} + +/// The color of the node +#[derive(Debug, Clone, Copy)] +enum Color { + /// Red node + Red, + /// Black node + Black, +} diff --git a/crates/utils/src/interval_map/tests.rs b/crates/utils/src/interval_map/tests.rs new file mode 100644 index 000000000..0e864076b --- /dev/null +++ b/crates/utils/src/interval_map/tests.rs @@ -0,0 +1,322 @@ +use std::collections::HashSet; + +use rand::{rngs::StdRng, Rng, SeedableRng}; + +use super::*; + +struct IntervalGenerator { + rng: StdRng, + unique: HashSet>, + limit: i32, +} + +impl IntervalGenerator { + fn new(seed: [u8; 32]) -> Self { + const LIMIT: i32 = 1000; + Self { + rng: SeedableRng::from_seed(seed), + unique: HashSet::new(), + limit: LIMIT, + } + } + + fn next(&mut self) -> Interval { + let low = self.rng.gen_range(0..self.limit - 1); + let high = self.rng.gen_range((low + 1)..self.limit); + Interval::new(low, high) + } + + fn next_unique(&mut self) -> Interval { + let mut interval = self.next(); + while self.unique.contains(&interval) { + interval = self.next(); + } + self.unique.insert(interval.clone()); + interval + } + + fn next_with_range(&mut self, range: i32) -> Interval { + let low = self.rng.gen_range(0..self.limit - 1); + let high = self + .rng + .gen_range((low + 1)..self.limit.min(low + 1 + range)); + Interval::new(low, high) + } +} + +impl IntervalMap { + fn check_max(&self) { + let _ignore = self.check_max_inner(self.root); + } + + fn check_max_inner(&self, x: NodeIndex) -> i32 { + if self.node_ref(x, Node::is_sentinel) { + return 0; + } + let l_max = self.check_max_inner(self.node_ref(x, Node::left)); + let r_max = self.check_max_inner(self.node_ref(x, Node::right)); + let max = self.node_ref(x, |x| x.interval().high.max(l_max).max(r_max)); + assert_eq!(self.max(x), Some(&max)); + max + } + + /// 1. Every node is either red or black. + /// 2. The root is black. + /// 3. Every leaf (NIL) is black. + /// 4. If a node is red, then both its children are black. + /// 5. For each node, all simple paths from the node to descendant leaves contain the + /// same number of black nodes. + fn check_rb_properties(&self) { + assert!(matches!( + self.node_ref(self.root, Node::color), + Color::Black + )); + self.check_children_color(self.root); + self.check_black_height(self.root); + } + + fn check_children_color(&self, x: NodeIndex) { + if self.node_ref(x, Node::is_sentinel) { + return; + } + self.check_children_color(self.node_ref(x, Node::left)); + self.check_children_color(self.node_ref(x, Node::right)); + if self.node_ref(x, Node::is_red) { + assert!(matches!(self.left_ref(x, Node::color), Color::Black)); + assert!(matches!(self.right_ref(x, Node::color), Color::Black)); + } + } + + fn check_black_height(&self, x: NodeIndex) -> usize { + if self.node_ref(x, Node::is_sentinel) { + return 0; + } + let lefth = self.check_black_height(self.node_ref(x, Node::left)); + let righth = self.check_black_height(self.node_ref(x, Node::right)); + assert_eq!(lefth, righth); + if self.node_ref(x, Node::is_black) { + return lefth + 1; + } + lefth + } +} + +fn with_map_and_generator(test_fn: impl Fn(IntervalMap, IntervalGenerator)) { + let seeds = vec![[0; 32], [1; 32], [2; 32]]; + for seed in seeds { + let gen = IntervalGenerator::new(seed); + let map = IntervalMap::new(); + test_fn(map, gen); + } +} + +#[test] +fn red_black_tree_properties_is_satisfied() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + map.check_rb_properties(); + }); +} + +#[test] +#[should_panic] +fn invalid_range_should_panic() { + let _interval = Interval::new(3, 1); +} + +#[test] +fn insert_equal_interval_returns_previous_value() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(1, 3), 1); + assert_eq!(map.insert(Interval::new(1, 3), 2), Some(1)); + assert_eq!(map.insert(Interval::new(1, 3), 3), Some(2)); +} + +#[test] +fn map_len_will_update() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(100) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + assert_eq!(map.len(), 100); + for i in intervals { + let _ignore = map.remove(&i); + } + assert_eq!(map.len(), 0); + }); +} + +#[test] +fn check_overlap_is_ok_simple() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(1, 3), ()); + map.insert(Interval::new(6, 7), ()); + map.insert(Interval::new(9, 11), ()); + assert!(map.overlap(&Interval::new(2, 5))); + assert!(map.overlap(&Interval::new(1, 17))); + assert!(!map.overlap(&Interval::new(4, 5))); + assert!(!map.overlap(&Interval::new(20, 23))); +} + +#[test] +fn check_overlap_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) + .take(100) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + let to_check: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) + .take(1000) + .collect(); + let expects: Vec<_> = to_check + .iter() + .map(|ci| intervals.iter().any(|i| ci.overlap(i))) + .collect(); + + for (ci, expect) in to_check.into_iter().zip(expects.into_iter()) { + assert_eq!(map.overlap(&ci), expect); + } + }); +} + +#[test] +fn check_max_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + map.check_max(); + } + assert_eq!(map.len(), 1000); + for i in intervals { + let _ignore = map.remove(&i); + map.check_max(); + } + }); +} + +#[test] +fn remove_non_exist_interval_will_do_nothing() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals { + let _ignore = map.insert(i, ()); + } + assert_eq!(map.len(), 1000); + let to_remove: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in to_remove { + let _ignore = map.remove(&i); + } + assert_eq!(map.len(), 1000); + }); +} + +#[test] +fn find_all_overlap_is_ok_simple() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(1, 3), ()); + map.insert(Interval::new(2, 4), ()); + map.insert(Interval::new(6, 7), ()); + map.insert(Interval::new(7, 11), ()); + assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 3); + map.remove(&Interval::new(1, 3)); + assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 2); +} + +#[test] +fn find_all_overlap_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + let to_find: Vec<_> = std::iter::repeat_with(|| gen.next()).take(1000).collect(); + + let expects: Vec> = to_find + .iter() + .map(|ti| intervals.iter().filter(|i| ti.overlap(i)).collect()) + .collect(); + + for (ti, mut expect) in to_find.into_iter().zip(expects.into_iter()) { + let mut result = map.find_all_overlap(&ti); + expect.sort_unstable(); + result.sort_unstable(); + assert_eq!(expect.len(), result.len()); + for (e, r) in expect.into_iter().zip(result.into_iter()) { + assert_eq!(e, r.0); + } + } + }); +} + +#[test] +fn entry_modify_is_ok() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(1, 3), 1); + map.insert(Interval::new(2, 4), 2); + map.insert(Interval::new(6, 7), 3); + map.insert(Interval::new(7, 11), 4); + let _ignore = map.entry(Interval::new(6, 7)).and_modify(|v| *v += 1); + assert_eq!(map.get(&Interval::new(1, 3)), Some(&1)); + assert_eq!(map.get(&Interval::new(2, 4)), Some(&2)); + assert_eq!(map.get(&Interval::new(6, 7)), Some(&4)); + assert_eq!(map.get(&Interval::new(7, 11)), Some(&4)); + assert_eq!(map.get(&Interval::new(5, 17)), None); + map.entry(Interval::new(3, 5)) + .and_modify(|v| *v += 1) + .or_insert(0); + let _ignore = map.get_mut(&Interval::new(3, 5)).map(|v| *v += 1); + assert_eq!(map.get(&Interval::new(3, 5)), Some(&1)); +} + +#[test] +fn iterate_through_map_is_sorted() { + with_map_and_generator(|mut map, mut gen| { + let mut intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .enumerate() + .take(1000) + .collect(); + for (v, i) in intervals.clone() { + let _ignore = map.insert(i, v); + } + intervals.sort_unstable_by(|a, b| a.1.cmp(&b.1)); + + #[allow(clippy::pattern_type_mismatch)] + for ((ei, ev), (v, i)) in map.iter().zip(intervals.iter()) { + assert_eq!(ei, i); + assert_eq!(ev, v); + } + }); +} + +#[test] +fn interval_map_clear_is_ok() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(1, 3), 1); + map.insert(Interval::new(2, 4), 2); + map.insert(Interval::new(6, 7), 3); + assert_eq!(map.len(), 3); + map.clear(); + assert_eq!(map.len(), 0); + assert!(map.is_empty()); + assert_eq!(map.nodes.len(), 1); + assert!(map.nodes[0].is_sentinel()); +} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index 46b1ebab0..aef5c75fa 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -178,6 +178,8 @@ pub struct ServerTlsConfig; /// configuration pub mod config; +/// Interval tree implementation +pub mod interval_map; /// utils for metrics pub mod metrics; /// utils of `parking_lot` lock diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index e30062fb1..e50ac63e1 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -31,6 +31,7 @@ memchr = { version = "2" } num-traits = { version = "0.2", default-features = false, features = ["i128", "std"] } opentelemetry-jaeger = { version = "0.20", features = ["rt-tokio"] } opentelemetry_sdk = { version = "0.21", features = ["metrics", "rt-tokio"] } +petgraph = { version = "0.6" } rand = { version = "0.8", features = ["small_rng"] } serde = { version = "1", features = ["derive", "rc"] } serde_json = { version = "1", features = ["raw_value"] } @@ -52,6 +53,7 @@ itertools = { version = "0.11" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } +petgraph = { version = "0.6" } syn = { version = "2", features = ["extra-traits", "full", "visit", "visit-mut"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "rt-multi-thread", "signal", "sync", "time"] }