diff --git a/type_test/Cargo.toml b/type_test/Cargo.toml new file mode 100644 index 0000000..92c55a6 --- /dev/null +++ b/type_test/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "type_test" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1" +arrow = "50" +datafusion = "35" +substrait = "0.24" +tokio = { version = "1", features = ["full"] } +tokio-stream = "0.1" +rayon = "1" diff --git a/type_test/src/main.rs b/type_test/src/main.rs new file mode 100644 index 0000000..0cc3bd0 --- /dev/null +++ b/type_test/src/main.rs @@ -0,0 +1,63 @@ +use arrow::{ + array::{Array, BooleanArray, Int32Array}, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, +}; +use std::sync::Arc; + +fn main() { + let schema = Arc::new(make_schema()); + let columns = make_test_columns(); + let rb = RecordBatch::try_new(schema, columns).unwrap(); + println!("Record Batch Before Filter:\n{:?}", rb); + let rb = filter_record_batch(rb); + println!("Record Batch After Filter:\n{:?}", rb); +} + +fn make_schema() -> Schema { + let field_a = Field::new("a", DataType::Int32, false); + let field_b = Field::new("b", DataType::Boolean, false); + + Schema::new(vec![field_a, field_b]) +} + +fn make_test_columns() -> Vec> { + let column_b = BooleanArray::from(vec![true, false, true, true, false]); + let column_a = Int32Array::from(vec![1, 2, 3, 4, 5]); + vec![Arc::new(column_a), Arc::new(column_b)] +} + +fn filter_record_batch(rb: RecordBatch) -> RecordBatch { + // Filter on the int being even + let column_a = rb.column(0).clone(); + let column_b = rb.column(1).clone(); + + assert_eq!(*column_a.data_type(), DataType::Int32); + assert_eq!(*column_b.data_type(), DataType::Boolean); + + let array_a = column_a.into_data(); + let buf_a = array_a.buffer::(0); + + let buf_b: BooleanArray = column_b.into_data().into(); + + let mut out_vec_a = Vec::with_capacity(5); + let mut out_vec_b = Vec::with_capacity(5); + + (0..5).for_each(|i| { + if buf_a[i] % 2 == 0 { + out_vec_a.push(buf_a[i]); + out_vec_b.push(buf_b.value(i)); + } + }); + + let out_schema = rb.schema(); + + let out_column_a = Int32Array::from(out_vec_a); + let out_column_b = BooleanArray::from(out_vec_b); + + RecordBatch::try_new( + out_schema, + vec![Arc::new(out_column_a), Arc::new(out_column_b)], + ) + .unwrap() +}