Skip to content

Commit

Permalink
more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
lwwmanning committed Oct 9, 2024
1 parent 1c656e8 commit 2c9e284
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 21 deletions.
60 changes: 52 additions & 8 deletions encodings/roaring/src/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ pub use compress::*;
use croaring::{Bitmap, Portable};
use serde::{Deserialize, Serialize};
use vortex::array::PrimitiveArray;
use vortex::compute::unary::try_cast;
use vortex::encoding::ids;
use vortex::stats::{ArrayStatisticsCompute, StatsSet};
use vortex::validity::{ArrayValidity, LogicalValidity};
use vortex::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet};
use vortex::validity::{ArrayValidity, LogicalValidity, Validity};
use vortex::variants::{ArrayVariants, PrimitiveArrayTrait};
use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex::{impl_encoding, Array, ArrayTrait, Canonical, IntoArray, IntoCanonical, TypedArray};
use vortex::{
impl_encoding, Array, ArrayDType as _, ArrayTrait, Canonical, IntoArray, IntoArrayVariant,
IntoCanonical, TypedArray,
};
use vortex_buffer::Buffer;
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::{DType, PType};
Expand All @@ -34,9 +38,25 @@ impl Display for RoaringIntMetadata {
impl RoaringIntArray {
pub fn try_new(bitmap: Bitmap, ptype: PType) -> VortexResult<Self> {
if !ptype.is_unsigned_int() {
vortex_bail!("RoaringInt expected unsigned int");
vortex_bail!(MismatchedTypes: "unsigned int", ptype);
}

let length = bitmap.statistics().cardinality as usize;
let max = bitmap.maximum();
if max.map(|mv| mv as u64 > ptype.max_value()).unwrap_or(false) {
vortex_bail!(
"RoaringInt maximum value is greater than the maximum value for the primitive type"
);
}

let mut stats = StatsSet::new();
stats.set(Stat::NullCount, 0.into());
stats.set(Stat::Max, max.into());
stats.set(Stat::Min, bitmap.minimum().into());
stats.set(Stat::IsConstant, (length <= 1).into());
stats.set(Stat::IsSorted, true.into());
stats.set(Stat::IsStrictSorted, true.into());

Ok(Self {
typed: TypedArray::try_from_parts(
DType::Primitive(ptype, NonNullable),
Expand Down Expand Up @@ -94,17 +114,41 @@ impl ArrayValidity for RoaringIntArray {

impl IntoCanonical for RoaringIntArray {
fn into_canonical(self) -> VortexResult<Canonical> {
todo!()
try_cast(
PrimitiveArray::from_vec(self.bitmap().to_vec(), Validity::NonNullable),
self.dtype(),
)
.and_then(|a| a.into_primitive())
.map(Canonical::Primitive)
}
}

impl AcceptArrayVisitor for RoaringIntArray {
fn accept(&self, _visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
todo!()
fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
visitor.visit_buffer(
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in RoaringIntArray"),
)
}
}

impl ArrayStatisticsCompute for RoaringIntArray {}
impl ArrayStatisticsCompute for RoaringIntArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
let mut stats = self.statistics().to_set();
if stats.get(stat).is_some() {
return Ok(stats);
}

if stat == Stat::TrailingZeroFreq || stat == Stat::BitWidthFreq || stat == Stat::RunCount {
let primitive = PrimitiveArray::from_vec(self.bitmap().to_vec(), Validity::NonNullable);
let prim_stats = primitive.statistics().to_set();
stats.merge(&prim_stats);
}

Ok(stats)
}
}

#[cfg(test)]
mod test {
Expand Down
23 changes: 12 additions & 11 deletions vortex-array/src/array/chunked/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,18 @@ fn take_strict_sorted(chunked: &ChunkedArray, indices: &Array) -> VortexResult<A
// Adjust the indices so they're relative to the chunk
// Note. Indices might not have a dtype big enough to fit chunk_begin after cast,
// if it does cast the scalar otherwise upcast the indices.
let chunk_indices = if chunk_begin < PType::try_from(chunk_indices.dtype())?.max_value() {
subtract_scalar(
&chunk_indices,
&Scalar::from(chunk_begin).cast(chunk_indices.dtype())?,
)?
} else {
// Note. this try_cast (memory copy) is unnecessary, could instead upcast in the subtract fn.
// and avoid an extra
let u64_chunk_indices = try_cast(&chunk_indices, PType::U64.into())?;
subtract_scalar(&u64_chunk_indices, &chunk_begin.into())?
};
let chunk_indices =
if chunk_begin < PType::try_from(chunk_indices.dtype())?.max_value() as usize {
subtract_scalar(
&chunk_indices,
&Scalar::from(chunk_begin).cast(chunk_indices.dtype())?,
)?
} else {
// Note. this try_cast (memory copy) is unnecessary, could instead upcast in the subtract fn.
// and avoid an extra
let u64_chunk_indices = try_cast(&chunk_indices, PType::U64.into())?;
subtract_scalar(&u64_chunk_indices, &chunk_begin.into())?
};

indices_by_chunk[chunk_idx] = Some(chunk_indices);

Expand Down
4 changes: 2 additions & 2 deletions vortex-dtype/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ impl PType {
self.byte_width() * 8
}

pub const fn max_value(&self) -> usize {
match_each_integer_ptype!(self, |$T| $T::MAX as usize)
pub const fn max_value(&self) -> u64 {
match_each_integer_ptype!(self, |$T| $T::MAX as u64)
}

pub fn to_signed(self) -> Self {
Expand Down

0 comments on commit 2c9e284

Please sign in to comment.