Skip to content

Commit

Permalink
Datum based comparison kernels (#4596) (#4701)
Browse files Browse the repository at this point in the history
* Datum based comparison kernels (#4596)

* Clippy

* More clippy

* Even more clippy

* Further clippy

* Format

* Use take kernel for scalar evaluation

* Clippy

* Review feedback

* Use AnyDictionaryArray
  • Loading branch information
tustvold authored Aug 18, 2023
1 parent b810e8f commit 8bbb5c1
Show file tree
Hide file tree
Showing 17 changed files with 1,095 additions and 2,130 deletions.
10 changes: 4 additions & 6 deletions .github/workflows/arrow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ jobs:
run: cargo test -p arrow-json --all-features
- name: Test arrow-string with all features
run: cargo test -p arrow-string --all-features
- name: Test arrow-ord with all features except SIMD
run: cargo test -p arrow-ord --features dyn_cmp_dict
- name: Test arrow-ord with all features
run: cargo test -p arrow-ord --all-features
- name: Test arrow-arith with all features except SIMD
run: cargo test -p arrow-arith
- name: Test arrow-row with all features
Expand Down Expand Up @@ -145,8 +145,6 @@ jobs:
rust-version: nightly
- name: Test arrow-array with SIMD
run: cargo test -p arrow-array --features simd
- name: Test arrow-ord with SIMD
run: cargo test -p arrow-ord --features simd
- name: Test arrow-arith with SIMD
run: cargo test -p arrow-arith --features simd
- name: Test arrow with SIMD
Expand Down Expand Up @@ -206,8 +204,8 @@ jobs:
run: cargo clippy -p arrow-json --all-targets --all-features -- -D warnings
- name: Clippy arrow-string with all features
run: cargo clippy -p arrow-string --all-targets --all-features -- -D warnings
- name: Clippy arrow-ord with all features except SIMD
run: cargo clippy -p arrow-ord --all-targets --features dyn_cmp_dict -- -D warnings
- name: Clippy arrow-ord with all features
run: cargo clippy -p arrow-ord --all-targets --all-features -- -D warnings
- name: Clippy arrow-arith with all features except SIMD
run: cargo clippy -p arrow-arith --all-targets -- -D warnings
- name: Clippy arrow-row with all features
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/miri.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ cargo miri test -p arrow-data --features ffi
cargo miri test -p arrow-schema --features ffi
cargo miri test -p arrow-array
cargo miri test -p arrow-arith --features simd
cargo miri test -p arrow-ord --features simd
cargo miri test -p arrow-ord
7 changes: 4 additions & 3 deletions arrow-flight/src/sql/metadata/db_schemas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
use std::sync::Arc;

use arrow_arith::boolean::and;
use arrow_array::{builder::StringBuilder, ArrayRef, RecordBatch};
use arrow_ord::comparison::eq_utf8_scalar;
use arrow_array::{builder::StringBuilder, ArrayRef, RecordBatch, Scalar, StringArray};
use arrow_ord::cmp::eq;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use arrow_select::{filter::filter_record_batch, take::take};
use arrow_string::like::like_utf8_scalar;
Expand Down Expand Up @@ -129,7 +129,8 @@ impl GetDbSchemasBuilder {
}

if let Some(catalog_filter_name) = catalog_filter {
filters.push(eq_utf8_scalar(&catalog_name, &catalog_filter_name)?);
let scalar = StringArray::from_iter_values([catalog_filter_name]);
filters.push(eq(&catalog_name, &Scalar::new(&scalar))?);
}

// `AND` any filters together
Expand Down
14 changes: 8 additions & 6 deletions arrow-flight/src/sql/metadata/sql_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ use arrow_array::builder::{
ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
MapBuilder, StringBuilder, UInt32Builder,
};
use arrow_array::cast::downcast_array;
use arrow_array::RecordBatch;
use arrow_array::{RecordBatch, Scalar};
use arrow_data::ArrayData;
use arrow_ord::comparison::eq_scalar;
use arrow_ord::cmp::eq;
use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef, UnionFields, UnionMode};
use arrow_select::filter::filter_record_batch;
use once_cell::sync::Lazy;
Expand Down Expand Up @@ -425,13 +424,16 @@ impl SqlInfoData {
&self,
info: impl IntoIterator<Item = u32>,
) -> Result<RecordBatch> {
let arr: UInt32Array = downcast_array(self.batch.column(0).as_ref());
let arr = self.batch.column(0);
let type_filter = info
.into_iter()
.map(|tt| eq_scalar(&arr, tt))
.map(|tt| {
let s = UInt32Array::from(vec![tt]);
eq(arr, &Scalar::new(&s))
})
.collect::<std::result::Result<Vec<_>, _>>()?
.into_iter()
// We know the arrays are of same length as they are produced fromn the same root array
// We know the arrays are of same length as they are produced from the same root array
.reduce(|filter, arr| or(&filter, &arr).unwrap());
if let Some(filter) = type_filter {
Ok(filter_record_batch(&self.batch, &filter)?)
Expand Down
12 changes: 8 additions & 4 deletions arrow-flight/src/sql/metadata/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use std::sync::Arc;

use arrow_arith::boolean::{and, or};
use arrow_array::builder::{BinaryBuilder, StringBuilder};
use arrow_array::{ArrayRef, RecordBatch};
use arrow_ord::comparison::eq_utf8_scalar;
use arrow_array::{ArrayRef, RecordBatch, Scalar, StringArray};
use arrow_ord::cmp::eq;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use arrow_select::{filter::filter_record_batch, take::take};
use arrow_string::like::like_utf8_scalar;
Expand Down Expand Up @@ -184,12 +184,16 @@ impl GetTablesBuilder {
let mut filters = vec![];

if let Some(catalog_filter_name) = catalog_filter {
filters.push(eq_utf8_scalar(&catalog_name, &catalog_filter_name)?);
let scalar = StringArray::from_iter_values([catalog_filter_name]);
filters.push(eq(&catalog_name, &Scalar::new(&scalar))?);
}

let tt_filter = table_types_filter
.into_iter()
.map(|tt| eq_utf8_scalar(&table_type, &tt))
.map(|tt| {
let scalar = StringArray::from_iter_values([tt]);
eq(&table_type, &Scalar::new(&scalar))
})
.collect::<std::result::Result<Vec<_>, _>>()?
.into_iter()
// We know the arrays are of same length as they are produced fromn the same root array
Expand Down
9 changes: 4 additions & 5 deletions arrow-flight/src/sql/metadata/xdbc_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@
use std::sync::Arc;

use arrow_array::builder::{BooleanBuilder, Int32Builder, ListBuilder, StringBuilder};
use arrow_array::cast::downcast_array;
use arrow_array::{ArrayRef, Int32Array, ListArray, RecordBatch};
use arrow_ord::comparison::eq_scalar;
use arrow_array::{ArrayRef, Int32Array, ListArray, RecordBatch, Scalar};
use arrow_ord::cmp::eq;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use arrow_select::filter::filter_record_batch;
use arrow_select::take::take;
Expand Down Expand Up @@ -81,8 +80,8 @@ impl XdbcTypeInfoData {
/// from [`CommandGetXdbcTypeInfo`]
pub fn record_batch(&self, data_type: impl Into<Option<i32>>) -> Result<RecordBatch> {
if let Some(dt) = data_type.into() {
let arr: Int32Array = downcast_array(self.batch.column(1).as_ref());
let filter = eq_scalar(&arr, dt)?;
let scalar = Int32Array::from(vec![dt]);
let filter = eq(self.batch.column(1), &Scalar::new(&scalar))?;
Ok(filter_record_batch(&self.batch, &filter)?)
} else {
Ok(self.batch.clone())
Expand Down
7 changes: 0 additions & 7 deletions arrow-ord/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,3 @@ half = { version = "2.1", default-features = false, features = ["num-traits"] }

[dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }

[package.metadata.docs.rs]
features = ["dyn_cmp_dict"]

[features]
dyn_cmp_dict = []
simd = ["arrow-array/simd"]
Loading

0 comments on commit 8bbb5c1

Please sign in to comment.