From 2c5672ac428400db4fe0dc1af125afbeaa364715 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Fri, 18 Oct 2024 16:39:13 -0400 Subject: [PATCH 1/4] feat: teach PyArray to compare --- pyvortex/src/array.rs | 70 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 7cfe628074..80d46374cd 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -2,9 +2,9 @@ use arrow::array::{Array as ArrowArray, ArrayRef}; use arrow::pyarrow::ToPyArrow; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use pyo3::types::{IntoPyDict, PyList}; +use pyo3::types::{IntoPyDict, PyList, PyString}; use vortex::array::ChunkedArray; -use vortex::compute::{slice, take}; +use vortex::compute::{compare, slice, take, Operator}; use vortex::{Array, ArrayDType, IntoCanonical}; use crate::dtype::PyDType; @@ -138,6 +138,72 @@ impl PyArray { PyDType::wrap(self_.py(), self_.inner.dtype().clone()) } + /// Point-wise compare the elements of this array to another array. + /// + /// Parameters + /// ---------- + /// other : :class:`vortex.encoding.Array` + /// An array with whom to compare elements. + /// + /// operator : :class:`str` + /// + /// One of `eq`, `ne`, `gt`, `ge`, `lt`, or `le` indicating which binary comparison operator + /// to apply. + /// + /// Returns + /// ------- + /// :class:`vortex.encoding.Array` + /// + /// Examples + /// -------- + /// + /// Compare an array of strings to itself: + /// + /// >>> a = vortex.encoding.array(['a', 'b', 'c', 'd']) + /// >>> a.compare(a, "eq").to_arrow_array() + /// + /// [ + /// true, + /// true, + /// true, + /// true + /// ] + /// + /// Compare two arrays containing nulls: + /// + /// >>> a = vortex.encoding.array(['dog', None, 'cat', 'mouse', 'fish']) + /// >>> b = vortex.encoding.array(['doug', 'jennifer', 'casper', 'mouse', 'faust']) + /// >>> a.compare(b, 'lt').to_arrow_array() + /// + /// [ + /// true, + /// null, + /// false, + /// false, + /// false + /// ] + fn compare(&self, other: &Bound, operator: &Bound) -> PyResult { + let other = other.borrow(); + let operator = match operator.extract()? { + "eq" => Operator::Eq, + "ne" => Operator::NotEq, + "gt" => Operator::Gt, + "ge" => Operator::Gte, + "lt" => Operator::Lt, + "le" => Operator::Lte, + op => { + return Err(PyValueError::new_err(format!( + "expected eq, ne, gt, ge, lt, or le: {}", + op + ))) + } + }; + + compare(&self.inner, &other.inner, operator) + .map(|arr| PyArray { inner: arr }) + .map_err(PyVortexError::map_err) + } + /// Filter, permute, and/or repeat elements by their index. /// /// Parameters From ca3efd4d8057329cae518c4979bb4b0c3e8e7606 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Fri, 18 Oct 2024 16:40:13 -0400 Subject: [PATCH 2/4] fix docs --- pyvortex/src/array.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 80d46374cd..764c0f7d6e 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -146,7 +146,6 @@ impl PyArray { /// An array with whom to compare elements. /// /// operator : :class:`str` - /// /// One of `eq`, `ne`, `gt`, `ge`, `lt`, or `le` indicating which binary comparison operator /// to apply. /// From b8704bb25cd8293214f03286fe94265dfee01e22 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Mon, 21 Oct 2024 11:51:43 -0400 Subject: [PATCH 3/4] use the operators, move docs into the class docs --- docs/encoding.rst | 2 +- pyvortex/src/array.rs | 165 +++++++++++++++++++++++++++--------------- 2 files changed, 106 insertions(+), 61 deletions(-) diff --git a/docs/encoding.rst b/docs/encoding.rst index 8448777fba..ceeb92c406 100644 --- a/docs/encoding.rst +++ b/docs/encoding.rst @@ -4,4 +4,4 @@ Arrays .. automodule:: vortex.encoding :members: :imported-members: - :special-members: __len__ + :special-members: __len__, __lt__, __le__, __eq__, __ne__, __ge__, __gt__ diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 764c0f7d6e..3c265b5c11 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -2,7 +2,7 @@ use arrow::array::{Array as ArrowArray, ArrayRef}; use arrow::pyarrow::ToPyArrow; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use pyo3::types::{IntoPyDict, PyList, PyString}; +use pyo3::types::{IntoPyDict, PyList}; use vortex::array::ChunkedArray; use vortex::compute::{compare, slice, take, Operator}; use vortex::{Array, ArrayDType, IntoCanonical}; @@ -13,6 +13,68 @@ use crate::python_repr::PythonRepr; #[pyclass(name = "Array", module = "vortex", sequence, subclass)] /// An array of zero or more *rows* each with the same set of *columns*. +/// +/// Examples +/// -------- +/// +/// Arrays support all the standard comparison operations: +/// +/// >>> a = vortex.encoding.array(['dog', None, 'cat', 'mouse', 'fish']) +/// >>> b = vortex.encoding.array(['doug', 'jennifer', 'casper', 'mouse', 'faust']) +/// >>> (a < b).to_arrow_array() +/// +/// [ +/// true, +/// null, +/// false, +/// false, +/// false +/// ] +/// >>> (a <= b).to_arrow_array() +/// +/// [ +/// true, +/// null, +/// false, +/// true, +/// false +/// ] +/// >>> (a == b).to_arrow_array() +/// +/// [ +/// false, +/// null, +/// false, +/// true, +/// false +/// ] +/// >>> (a != b).to_arrow_array() +/// +/// [ +/// true, +/// null, +/// true, +/// false, +/// true +/// ] +/// >>> (a >= b).to_arrow_array() +/// +/// [ +/// false, +/// null, +/// true, +/// true, +/// true +/// ] +/// >>> (a > b).to_arrow_array() +/// +/// [ +/// false, +/// null, +/// true, +/// false, +/// true +/// ] pub struct PyArray { inner: Array, } @@ -138,67 +200,50 @@ impl PyArray { PyDType::wrap(self_.py(), self_.inner.dtype().clone()) } - /// Point-wise compare the elements of this array to another array. - /// - /// Parameters - /// ---------- - /// other : :class:`vortex.encoding.Array` - /// An array with whom to compare elements. - /// - /// operator : :class:`str` - /// One of `eq`, `ne`, `gt`, `ge`, `lt`, or `le` indicating which binary comparison operator - /// to apply. - /// - /// Returns - /// ------- - /// :class:`vortex.encoding.Array` - /// - /// Examples - /// -------- - /// - /// Compare an array of strings to itself: - /// - /// >>> a = vortex.encoding.array(['a', 'b', 'c', 'd']) - /// >>> a.compare(a, "eq").to_arrow_array() - /// - /// [ - /// true, - /// true, - /// true, - /// true - /// ] - /// - /// Compare two arrays containing nulls: - /// - /// >>> a = vortex.encoding.array(['dog', None, 'cat', 'mouse', 'fish']) - /// >>> b = vortex.encoding.array(['doug', 'jennifer', 'casper', 'mouse', 'faust']) - /// >>> a.compare(b, 'lt').to_arrow_array() - /// - /// [ - /// true, - /// null, - /// false, - /// false, - /// false - /// ] - fn compare(&self, other: &Bound, operator: &Bound) -> PyResult { + // Rust docs are *not* copied into Python for __lt__: https://github.com/PyO3/pyo3/issues/4326 + fn __lt__(&self, other: &Bound) -> PyResult { let other = other.borrow(); - let operator = match operator.extract()? { - "eq" => Operator::Eq, - "ne" => Operator::NotEq, - "gt" => Operator::Gt, - "ge" => Operator::Gte, - "lt" => Operator::Lt, - "le" => Operator::Lte, - op => { - return Err(PyValueError::new_err(format!( - "expected eq, ne, gt, ge, lt, or le: {}", - op - ))) - } - }; + compare(&self.inner, &other.inner, Operator::Lt) + .map(|arr| PyArray { inner: arr }) + .map_err(PyVortexError::map_err) + } - compare(&self.inner, &other.inner, operator) + // Rust docs are *not* copied into Python for __le__: https://github.com/PyO3/pyo3/issues/4326 + fn __le__(&self, other: &Bound) -> PyResult { + let other = other.borrow(); + compare(&self.inner, &other.inner, Operator::Lte) + .map(|arr| PyArray { inner: arr }) + .map_err(PyVortexError::map_err) + } + + // Rust docs are *not* copied into Python for __eq__: https://github.com/PyO3/pyo3/issues/4326 + fn __eq__(&self, other: &Bound) -> PyResult { + let other = other.borrow(); + compare(&self.inner, &other.inner, Operator::Eq) + .map(|arr| PyArray { inner: arr }) + .map_err(PyVortexError::map_err) + } + + // Rust docs are *not* copied into Python for __ne__: https://github.com/PyO3/pyo3/issues/4326 + fn __ne__(&self, other: &Bound) -> PyResult { + let other = other.borrow(); + compare(&self.inner, &other.inner, Operator::NotEq) + .map(|arr| PyArray { inner: arr }) + .map_err(PyVortexError::map_err) + } + + // Rust docs are *not* copied into Python for __ge__: https://github.com/PyO3/pyo3/issues/4326 + fn __ge__(&self, other: &Bound) -> PyResult { + let other = other.borrow(); + compare(&self.inner, &other.inner, Operator::Gte) + .map(|arr| PyArray { inner: arr }) + .map_err(PyVortexError::map_err) + } + + // Rust docs are *not* copied into Python for __gt__: https://github.com/PyO3/pyo3/issues/4326 + fn __gt__(&self, other: &Bound) -> PyResult { + let other = other.borrow(); + compare(&self.inner, &other.inner, Operator::Gt) .map(|arr| PyArray { inner: arr }) .map_err(PyVortexError::map_err) } From 94bffc790829a97f7a102c004f76813f404e263c Mon Sep 17 00:00:00 2001 From: Daniel King Date: Mon, 21 Oct 2024 14:53:55 -0400 Subject: [PATCH 4/4] remove duplicate inputs from merge cruft --- pyvortex/src/array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 2f04a79263..7dcb9dd751 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -5,7 +5,7 @@ use pyo3::prelude::*; use pyo3::types::{IntoPyDict, PyList}; use vortex::array::ChunkedArray; use vortex::compute::unary::fill_forward; -use vortex::compute::{compare, slice, slice, take, take, Operator}; +use vortex::compute::{compare, slice, take, Operator}; use vortex::{Array, ArrayDType, IntoCanonical}; use crate::dtype::PyDType;