From 2c346dbc5270d7807f99fc9f35f36bbd0a404a54 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 12 Dec 2024 18:04:52 -0500 Subject: [PATCH] simplify StatsSet Changed Option -> Scalar. If there's a reason why it was Option, I couldn't find it. Reorganized a bit and added a few docstrings. Add a non-consuming `iter` for iterating over the StatsSet values, so that you can use it without `.clone()` first. --- vortex-array/src/stats/statsset.rs | 134 ++++++++++++++++------------- 1 file changed, 74 insertions(+), 60 deletions(-) diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index 8ff75a5f9..48a65e552 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -8,25 +8,21 @@ use crate::stats::Stat; #[derive(Debug, Clone, Default, PartialEq)] pub struct StatsSet { - values: Vec<(Stat, Option)>, + values: Vec<(Stat, Scalar)>, } impl StatsSet { /// Create new StatSet without validating uniqueness of all the entries + /// + /// # Safety + /// + /// This method will not panic or trigger UB, but may lead to duplicate stats being stored. pub fn new_unchecked(values: Vec<(Stat, Scalar)>) -> Self { Self { - values: values.into_iter().map(|(st, s)| (st, Some(s))).collect(), + values: values.into_iter().map(|(st, s)| (st, s)).collect(), } } - pub fn len(&self) -> usize { - self.values.iter().filter(|(_, v)| v.is_some()).count() - } - - pub fn is_empty(&self) -> bool { - self.values.is_empty() - } - /// Specialized constructor for the case where the StatsSet represents /// an array consisting entirely of [null](vortex_dtype::DType::Null) values. pub fn nulls(len: usize, dtype: &DType) -> Self { @@ -110,12 +106,22 @@ impl StatsSet { pub fn of>(stat: Stat, value: S) -> Self { Self::new_unchecked(vec![(stat, value.into())]) } +} + +// Getters and setters for individual stats. +impl StatsSet { + /// Count of stored stats with known values. + pub fn len(&self) -> usize { + self.values.len() + } + + /// Predicate equivalent to a [len][Self:len] of zero. + pub fn is_empty(&self) -> bool { + self.values.is_empty() + } pub fn get(&self, stat: Stat) -> Option<&Scalar> { - self.values - .iter() - .find(|(s, _)| *s == stat) - .and_then(|(_, v)| v.as_ref()) + self.values.iter().find(|(s, _)| *s == stat).map(|(_, v)| v) } pub fn get_as TryFrom<&'a Scalar, Error = VortexError>>( @@ -136,11 +142,10 @@ impl StatsSet { /// Set the stat `stat` to `value`. pub fn set>(&mut self, stat: Stat, value: S) { - let new_value = Some(value.into()); if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) { - *existing = (stat, new_value); + *existing = (stat, value.into()); } else { - self.values.push((stat, new_value)); + self.values.push((stat, value.into())); } } @@ -153,6 +158,49 @@ impl StatsSet { self.values.retain(|(s, _)| stats.contains(s)) } + /// Iterate over the statistic names and values in-place. + /// + /// See [Iterator]. + pub fn iter<'a>(&'a self) -> impl Iterator { + self.values.iter() + } +} + +// StatSetIntoIter just exists to protect current implementation from exposure on the public API. + +/// Owned iterator over the stats. +/// +/// See [IntoIterator]. +pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Scalar)>); + +impl Iterator for StatsSetIntoIter { + type Item = (Stat, Scalar); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl IntoIterator for StatsSet { + type Item = (Stat, Scalar); + type IntoIter = StatsSetIntoIter; + + fn into_iter(self) -> Self::IntoIter { + StatsSetIntoIter(self.values.into_iter()) + } +} + +impl Extend<(Stat, Scalar)> for StatsSet { + #[inline] + fn extend>(&mut self, iter: T) { + iter.into_iter().for_each(|(stat, scalar)| { + self.set(stat, scalar); + }); + } +} + +// Merge helpers +impl StatsSet { /// Merge stats set `other` into `self`, with the semantic assumption that `other` /// contains stats from an array that is *appended* to the array represented by `self`. pub fn merge_ordered(mut self, other: &Self) -> Self { @@ -329,49 +377,6 @@ impl StatsSet { } } -impl Extend<(Stat, Scalar)> for StatsSet { - #[inline] - fn extend>(&mut self, iter: T) { - iter.into_iter().for_each(|(stat, scalar)| { - self.set(stat, scalar); - }); - } -} - -pub struct StatsSetIntoIter { - inner: std::vec::IntoIter<(Stat, Option)>, -} - -impl Iterator for StatsSetIntoIter { - type Item = (Stat, Scalar); - - fn next(&mut self) -> Option { - loop { - match self.inner.next() { - Some((stat, Some(value))) => return Some((stat, value)), - Some(_) => continue, - None => return None, - } - } - } - - fn size_hint(&self) -> (usize, Option) { - // Our lower-bound is zero since we may filter all remaining values. - (0, self.inner.size_hint().1) - } -} - -impl IntoIterator for StatsSet { - type Item = (Stat, Scalar); - type IntoIter = StatsSetIntoIter; - - fn into_iter(self) -> Self::IntoIter { - StatsSetIntoIter { - inner: self.values.into_iter(), - } - } -} - #[cfg(test)] mod test { use enum_iterator::all; @@ -381,6 +386,15 @@ mod test { use crate::stats::{ArrayStatistics as _, Stat, StatsSet}; use crate::IntoArrayData as _; + #[test] + fn test_iter() { + let set = StatsSet::new_unchecked(vec![(Stat::Max, 100.into()), (Stat::Min, 42.into())]); + assert_eq!( + set.iter().cloned().collect_vec(), + vec![(Stat::Max, 100.into()), (Stat::Min, 42.into())] + ); + } + #[test] fn into_iter() { let set = StatsSet::new_unchecked(vec![(Stat::Max, 100.into()), (Stat::Min, 42.into())]);