Fix bitpacked take for 0 bit width arrays

spiraldb · Apr 8, 2024 · 75c273f · 75c273f
1 parent 1bec4c2
commit 75c273f
Showing 1 changed file with 21 additions and 7 deletions.
diff --git a/vortex-fastlanes/src/bitpacking/compute.rs b/vortex-fastlanes/src/bitpacking/compute.rs
@@ -56,9 +56,23 @@ impl ScalarAtFn for BitPackedArray {
 
 impl TakeFn for BitPackedArray {
     fn take(&self, indices: &dyn Array) -> VortexResult<ArrayRef> {
-        let indices = flatten_primitive(indices)?;
         let ptype = self.dtype().try_into()?;
-        let taken_validity = self.validity().map(|v| v.take(&indices)).transpose()?;
+        let taken_validity = self.validity().map(|v| v.take(indices)).transpose()?;
+        if self.bit_width() == 0 {
+            if let Some(patches) = self.patches() {
+                let primitive_patches = flatten_primitive(&take(patches, indices)?)?;
+                return Ok(PrimitiveArray::new(
+                    ptype,
+                    primitive_patches.buffer().clone(),
+                    taken_validity,
+                )
+                .into_array());
+            } else {
+                unreachable!("array can't be packed to 0 width and have no patches")
+            }
+        }
+
+        let indices = flatten_primitive(indices)?;
         let taken = match_integers_by_width!(ptype, |$T| {
             PrimitiveArray::from_nullable(take_primitive::<$T>(self, &indices)?, taken_validity)
         });
@@ -89,14 +103,14 @@ fn take_primitive<T: NativePType + TryBitPack>(
         .patches()
         .map(|p| {
             p.maybe_sparse()
-                .ok_or(vortex_err!("Only sparse patches are currently supported!"))
+                .ok_or_else(|| vortex_err!("Only sparse patches are currently supported!"))
         })
         .transpose()?;
 
     // if we have a small number of relatively large batches, we gain by slicing and then patching inside the loop
     // if we have a large number of relatively small batches, the overhead isn't worth it, and we're better off with a bulk patch
     // roughly, if we have an average of less than 64 elements per batch, we prefer bulk patching
-    let prefer_bulk_patch = relative_indices.len() * 64 > indices.len();
+    let prefer_bulk_patch = relative_indices.len() * 64 > indices.len() || array.bit_width() == 0;
 
     // assuming the buffer is already allocated (which will happen at most once)
     // then unpacking all 1024 elements takes ~8.8x as long as unpacking a single element
@@ -130,7 +144,7 @@ fn take_primitive<T: NativePType + TryBitPack>(
                     patches.slice(chunk * 1024, min((chunk + 1) * 1024, patches.len()))?;
                 let patches_slice = patches_slice
                     .maybe_sparse()
-                    .ok_or(vortex_err!("Only sparse patches are currently supported!"))?;
+                    .ok_or_else(|| vortex_err!("Only sparse patches are currently supported!"))?;
                 let offsets = PrimitiveArray::from(offsets);
                 do_patch_for_take_primitive(patches_slice, &offsets, &mut output)?;
             }
@@ -154,15 +168,15 @@ fn do_patch_for_take_primitive<T: NativePType + TryBitPack>(
     let taken_patches = take(patches, indices)?;
     let taken_patches = taken_patches
         .maybe_sparse()
-        .ok_or(vortex_err!("Only sparse patches are currently supported!"))?;
+        .ok_or_else(|| vortex_err!("Only sparse patches are currently supported!"))?;
 
     let base_index = output.len() - indices.len();
     let output_patches = flatten_primitive(taken_patches.values())?;
     taken_patches
         .resolved_indices()
         .iter()
         .map(|idx| base_index + *idx)
-        .zip_eq(output_patches.typed_data::<T>())
+        .zip_eq(output_patches.buffer().typed_data::<T>())
         .for_each(|(idx, val)| {
             output[idx] = *val;
         });