Skip to content

Commit

Permalink
Fix bitpacked take for 0 bit width arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 committed Apr 8, 2024
1 parent 1bec4c2 commit 75c273f
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions vortex-fastlanes/src/bitpacking/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,23 @@ impl ScalarAtFn for BitPackedArray {

impl TakeFn for BitPackedArray {
fn take(&self, indices: &dyn Array) -> VortexResult<ArrayRef> {
let indices = flatten_primitive(indices)?;
let ptype = self.dtype().try_into()?;
let taken_validity = self.validity().map(|v| v.take(&indices)).transpose()?;
let taken_validity = self.validity().map(|v| v.take(indices)).transpose()?;
if self.bit_width() == 0 {
if let Some(patches) = self.patches() {
let primitive_patches = flatten_primitive(&take(patches, indices)?)?;
return Ok(PrimitiveArray::new(
ptype,
primitive_patches.buffer().clone(),
taken_validity,
)
.into_array());
} else {
unreachable!("array can't be packed to 0 width and have no patches")
}
}

let indices = flatten_primitive(indices)?;
let taken = match_integers_by_width!(ptype, |$T| {
PrimitiveArray::from_nullable(take_primitive::<$T>(self, &indices)?, taken_validity)
});
Expand Down Expand Up @@ -89,14 +103,14 @@ fn take_primitive<T: NativePType + TryBitPack>(
.patches()
.map(|p| {
p.maybe_sparse()
.ok_or(vortex_err!("Only sparse patches are currently supported!"))
.ok_or_else(|| vortex_err!("Only sparse patches are currently supported!"))
})
.transpose()?;

// if we have a small number of relatively large batches, we gain by slicing and then patching inside the loop
// if we have a large number of relatively small batches, the overhead isn't worth it, and we're better off with a bulk patch
// roughly, if we have an average of less than 64 elements per batch, we prefer bulk patching
let prefer_bulk_patch = relative_indices.len() * 64 > indices.len();
let prefer_bulk_patch = relative_indices.len() * 64 > indices.len() || array.bit_width() == 0;

// assuming the buffer is already allocated (which will happen at most once)
// then unpacking all 1024 elements takes ~8.8x as long as unpacking a single element
Expand Down Expand Up @@ -130,7 +144,7 @@ fn take_primitive<T: NativePType + TryBitPack>(
patches.slice(chunk * 1024, min((chunk + 1) * 1024, patches.len()))?;
let patches_slice = patches_slice
.maybe_sparse()
.ok_or(vortex_err!("Only sparse patches are currently supported!"))?;
.ok_or_else(|| vortex_err!("Only sparse patches are currently supported!"))?;
let offsets = PrimitiveArray::from(offsets);
do_patch_for_take_primitive(patches_slice, &offsets, &mut output)?;
}
Expand All @@ -154,15 +168,15 @@ fn do_patch_for_take_primitive<T: NativePType + TryBitPack>(
let taken_patches = take(patches, indices)?;
let taken_patches = taken_patches
.maybe_sparse()
.ok_or(vortex_err!("Only sparse patches are currently supported!"))?;
.ok_or_else(|| vortex_err!("Only sparse patches are currently supported!"))?;

let base_index = output.len() - indices.len();
let output_patches = flatten_primitive(taken_patches.values())?;
taken_patches
.resolved_indices()
.iter()
.map(|idx| base_index + *idx)
.zip_eq(output_patches.typed_data::<T>())
.zip_eq(output_patches.buffer().typed_data::<T>())
.for_each(|(idx, val)| {
output[idx] = *val;
});
Expand Down

0 comments on commit 75c273f

Please sign in to comment.