Skip to content

Commit

Permalink
feat: kleene_or, kleene_and, and restore SQL semantics (#1284)
Browse files Browse the repository at this point in the history
I noticed that our row filters disagreed with DuckDB's semantics which
is SQL semantics, in particular `where true or null` _keeps_ the row (we
filtered it).

cc: @robert3005 we'll probably conflict on #1272
  • Loading branch information
danking authored Nov 13, 2024
1 parent 47e7742 commit 1249a7d
Show file tree
Hide file tree
Showing 9 changed files with 264 additions and 69 deletions.
10 changes: 6 additions & 4 deletions pyvortex/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ use crate::dtype::PyDType;
/// ]
///
/// Read rows whose name is `Angela` or whose age is between 20 and 30, inclusive. Notice that the
/// Angela row is excluded because its age is null. The entire row filtering expression therefore
/// evaluates to null which is interpreted as false:
/// Angela row is included even though its age is null. Under SQL / Kleene semantics, `true or
/// null` is `true`.
///
/// >>> name = vortex.expr.column("name")
/// >>> e = vortex.io.read_path("a.vortex", row_filter = (name == "Angela") | ((age >= 20) & (age <= 30)))
Expand All @@ -109,11 +109,13 @@ use crate::dtype::PyDType;
/// -- is_valid: all not null
/// -- child 0 type: int64
/// [
/// 25
/// 25,
/// null
/// ]
/// -- child 1 type: string_view
/// [
/// "Joseph"
/// "Joseph",
/// "Angela"
/// ]
#[pyclass(name = "Expr", module = "vortex")]
pub struct PyExpr {
Expand Down
38 changes: 25 additions & 13 deletions vortex-array/src/array/bool/compute/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,48 @@
use arrow_arith::boolean;
use arrow_array::cast::AsArray as _;
use arrow_array::{Array as _, BooleanArray};
use arrow_schema::ArrowError;
use vortex_error::VortexResult;

use crate::array::BoolArray;
use crate::arrow::FromArrowArray as _;
use crate::compute::{AndFn, OrFn};
use crate::{Array, IntoCanonical};

impl OrFn for BoolArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
impl BoolArray {
/// Lift an Arrow binary boolean kernel function to Vortex arrays.
fn lift_arrow<F>(&self, arrow_fun: F, other: &Array) -> VortexResult<Array>
where
F: FnOnce(&BooleanArray, &BooleanArray) -> Result<BooleanArray, ArrowError>,
{
let lhs = self.clone().into_canonical()?.into_arrow()?;
let lhs = lhs.as_boolean();

let rhs = array.clone().into_canonical()?.into_arrow()?;
let rhs = other.clone().into_canonical()?.into_arrow()?;
let rhs = rhs.as_boolean();

let array = boolean::or(lhs, rhs)?;
let array = arrow_fun(lhs, rhs)?;

Ok(Array::from_arrow(&array, true))
Ok(Array::from_arrow(&array, array.is_nullable()))
}
}

impl AndFn for BoolArray {
fn and(&self, array: &Array) -> VortexResult<Array> {
let lhs = self.clone().into_canonical()?.into_arrow()?;
let lhs = lhs.as_boolean();
impl OrFn for BoolArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::or, array)
}

let rhs = array.clone().into_canonical()?.into_arrow()?;
let rhs = rhs.as_boolean();
fn or_kleene(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::or_kleene, array)
}
}

let array = boolean::and(lhs, rhs)?;
impl AndFn for BoolArray {
fn and(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::and, array)
}

Ok(Array::from_arrow(&array, true))
fn and_kleene(&self, array: &Array) -> VortexResult<Array> {
self.lift_arrow(boolean::and_kleene, array)
}
}
62 changes: 48 additions & 14 deletions vortex-array/src/array/constant/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,40 +119,74 @@ impl MaybeCompareFn for ConstantArray {
}
}

fn and(left: Option<bool>, right: Option<bool>) -> Option<bool> {
left.zip(right).map(|(l, r)| l & r)
}

fn kleene_and(left: Option<bool>, right: Option<bool>) -> Option<bool> {
match (left, right) {
(Some(false), _) => Some(false),
(_, Some(false)) => Some(false),
(None, _) => None,
(_, None) => None,
(Some(l), Some(r)) => Some(l & r),
}
}

fn or(left: Option<bool>, right: Option<bool>) -> Option<bool> {
left.zip(right).map(|(l, r)| l | r)
}

fn kleene_or(left: Option<bool>, right: Option<bool>) -> Option<bool> {
match (left, right) {
(Some(true), _) => Some(true),
(_, Some(true)) => Some(true),
(None, _) => None,
(_, None) => None,
(Some(l), Some(r)) => Some(l | r),
}
}

impl AndFn for ConstantArray {
fn and(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(
self,
array,
|(l, r)| l & r,
|other, this| other.with_dyn(|other| other.and().map(|other| other.and(this))),
)
constant_array_bool_impl(self, array, and, |other, this| {
other.with_dyn(|other| other.and().map(|other| other.and(this)))
})
}

fn and_kleene(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(self, array, kleene_and, |other, this| {
other.with_dyn(|other| other.and_kleene().map(|other| other.and_kleene(this)))
})
}
}

impl OrFn for ConstantArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(
self,
array,
|(l, r)| l | r,
|other, this| other.with_dyn(|other| other.or().map(|other| other.or(this))),
)
constant_array_bool_impl(self, array, or, |other, this| {
other.with_dyn(|other| other.or().map(|other| other.or(this)))
})
}

fn or_kleene(&self, array: &Array) -> VortexResult<Array> {
constant_array_bool_impl(self, array, kleene_or, |other, this| {
other.with_dyn(|other| other.or_kleene().map(|other| other.or_kleene(this)))
})
}
}

fn constant_array_bool_impl(
constant_array: &ConstantArray,
other: &Array,
bool_op: impl Fn((bool, bool)) -> bool,
bool_op: impl Fn(Option<bool>, Option<bool>) -> Option<bool>,
fallback_fn: impl Fn(&Array, &Array) -> Option<VortexResult<Array>>,
) -> VortexResult<Array> {
// If the right side is constant
if other.statistics().get_as::<bool>(Stat::IsConstant) == Some(true) {
let lhs = constant_array.scalar_value().as_bool()?;
let rhs = scalar_at(other, 0)?.value().as_bool()?;

let scalar = match lhs.zip(rhs).map(bool_op) {
let scalar = match bool_op(lhs, rhs) {
Some(b) => Scalar::bool(b, Nullability::Nullable),
None => Scalar::null(constant_array.dtype().as_nullable()),
};
Expand Down
150 changes: 123 additions & 27 deletions vortex-array/src/compute/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,101 @@
use vortex_error::{vortex_bail, VortexResult};

use crate::array::BoolArray;
use crate::{Array, ArrayDType, IntoArrayVariant};

pub trait AndFn {
/// Point-wise logical _and_ between two Boolean arrays.
///
/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::and;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = and(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, None, Some(false), None, Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn and(&self, array: &Array) -> VortexResult<Array>;

/// Point-wise Kleene logical _and_ between two Boolean arrays.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::and_kleene;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = and_kleene(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), None, Some(false), None, None, Some(false), Some(false), Some(false), Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn and_kleene(&self, array: &Array) -> VortexResult<Array>;
}

pub trait OrFn {
/// Point-wise logical _or_ between two Boolean arrays.
///
/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::or;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = or(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), None, Some(true), None, None, None, Some(true), None, Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn or(&self, array: &Array) -> VortexResult<Array>;

/// Point-wise Kleene logical _or_ between two Boolean arrays.
///
/// # Examples
///
/// ```
/// use vortex_array::Array;
/// use vortex_array::compute::or_kleene;
/// use vortex_array::IntoCanonical;
/// use vortex_array::accessor::ArrayAccessor;
/// let a = Array::from(vec![Some(true), Some(true), Some(true), None, None, None, Some(false), Some(false), Some(false)]);
/// let b = Array::from(vec![Some(true), None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false)]);
/// let result = or_kleene(a, b)?.into_canonical()?.into_bool()?;
/// let result_vec = result.with_iterator(|it| it.map(|x| x.cloned()).collect::<Vec<_>>())?;
/// assert_eq!(result_vec, vec![Some(true), Some(true), Some(true), Some(true), None, None, Some(true), None, Some(false)]);
/// # use vortex_error::VortexError;
/// # Ok::<(), VortexError>(())
/// ```
fn or_kleene(&self, array: &Array) -> VortexResult<Array>;
}

pub fn and(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
fn lift_boolean_operator<F, G>(
lhs: impl AsRef<Array>,
rhs: impl AsRef<Array>,
trait_fun: F,
bool_array_fun: G,
) -> VortexResult<Array>
where
F: Fn(&Array, &Array) -> Option<VortexResult<Array>>,
G: FnOnce(BoolArray, &Array) -> VortexResult<Array>,
{
let lhs = lhs.as_ref();
let rhs = rhs.as_ref();

Expand All @@ -22,44 +107,55 @@ pub fn and(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array
vortex_bail!("Boolean operations are only supported on boolean arrays")
}

if let Some(selection) = lhs.with_dyn(|lhs| lhs.and().map(|lhs| lhs.and(rhs))) {
if let Some(selection) = trait_fun(lhs, rhs) {
return selection;
}

if let Some(selection) = rhs.with_dyn(|rhs| rhs.and().map(|rhs| rhs.and(lhs))) {
if let Some(selection) = trait_fun(rhs, lhs) {
return selection;
}

// If neither side implements `AndFn`, we try to expand the left-hand side into a `BoolArray`, which we know does implement it, and call into that implementation.
// If neither side implements the trait, we try to expand the left-hand side into a `BoolArray`,
// which we know does implement it, and call into that implementation.
let lhs = lhs.clone().into_bool()?;

lhs.and(rhs)
bool_array_fun(lhs, rhs)
}

pub fn or(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
let lhs = lhs.as_ref();
let rhs = rhs.as_ref();

if lhs.len() != rhs.len() {
vortex_bail!("Boolean operations aren't supported on arrays of different lengths")
}

if !lhs.dtype().is_boolean() || !rhs.dtype().is_boolean() {
vortex_bail!("Boolean operations are only supported on boolean arrays")
}

if let Some(selection) = lhs.with_dyn(|lhs| lhs.or().map(|lhs| lhs.or(rhs))) {
return selection;
}
pub fn and(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.and().map(|lhs| lhs.and(rhs))),
|lhs, rhs| lhs.and(rhs),
)
}

if let Some(selection) = rhs.with_dyn(|rhs| rhs.or().map(|rhs| rhs.or(lhs))) {
return selection;
}
pub fn and_kleene(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.and_kleene().map(|lhs| lhs.and_kleene(rhs))),
|lhs, rhs| lhs.and_kleene(rhs),
)
}

// If neither side implements `OrFn`, we try to expand the left-hand side into a `BoolArray`, which we know does implement it, and call into that implementation.
let lhs = lhs.clone().into_bool()?;
pub fn or(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.or().map(|lhs| lhs.or(rhs))),
|lhs, rhs| lhs.or(rhs),
)
}

lhs.or(rhs)
pub fn or_kleene(lhs: impl AsRef<Array>, rhs: impl AsRef<Array>) -> VortexResult<Array> {
lift_boolean_operator(
lhs,
rhs,
|lhs, rhs| lhs.with_dyn(|lhs| lhs.or_kleene().map(|lhs| lhs.or_kleene(rhs))),
|lhs, rhs| lhs.or_kleene(rhs),
)
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 1249a7d

Please sign in to comment.