Skip to content

Commit

Permalink
Cleanup isnan using BooleanArray::from_unary
Browse files Browse the repository at this point in the history
  • Loading branch information
simonvandel committed Oct 12, 2024
1 parent e7ac843 commit 3978dc4
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 43 deletions.
5 changes: 5 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ harness = false
name = "to_char"
required-features = ["datetime_expressions"]

[[bench]]
harness = false
name = "isnan"
required-features = ["math_expressions"]

[[bench]]
harness = false
name = "substr_index"
Expand Down
46 changes: 46 additions & 0 deletions datafusion/functions/benches/isnan.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate criterion;

use arrow::{
datatypes::{Float32Type, Float64Type},
util::bench_util::create_primitive_array,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_functions::math::isnan;
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
let isnan = isnan();
for size in [1024, 4096, 8192] {
let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
let f32_args = vec![ColumnarValue::Array(f32_array)];
c.bench_function(&format!("isnan f32 array: {}", size), |b| {
b.iter(|| black_box(isnan.invoke(&f32_args).unwrap()))
});
let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
let f64_args = vec![ColumnarValue::Array(f64_array)];
c.bench_function(&format!("isnan f64 array: {}", size), |b| {
b.iter(|| black_box(isnan.invoke(&f64_args).unwrap()))
});
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
54 changes: 11 additions & 43 deletions datafusion/physical-expr/src/math_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,60 +17,27 @@

//! Math expressions
use std::any::type_name;
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::array::{BooleanArray, Float32Array, Float64Array};
use arrow::datatypes::DataType;
use arrow::array::BooleanArray;
use arrow::array::{ArrayRef, AsArray};
use arrow::datatypes::{DataType, Float32Type, Float64Type};
use arrow_array::Array;

use datafusion_common::exec_err;
use datafusion_common::{DataFusionError, Result};

macro_rules! downcast_arg {
($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
DataFusionError::Internal(format!(
"could not cast {} from {} to {}",
$NAME,
$ARG.data_type(),
type_name::<$ARRAY_TYPE>()
))
})?
}};
}

macro_rules! make_function_scalar_inputs_return_type {
($ARG: expr, $NAME:expr, $ARGS_TYPE:ident, $RETURN_TYPE:ident, $FUNC: block) => {{
let arg = downcast_arg!($ARG, $NAME, $ARGS_TYPE);

arg.iter()
.map(|a| match a {
Some(a) => Some($FUNC(a)),
_ => None,
})
.collect::<$RETURN_TYPE>()
}};
}
use datafusion_common::Result;

/// Isnan SQL function
pub fn isnan(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ f64::is_nan }
DataType::Float64 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float64Type>(),
f64::is_nan,
)) as ArrayRef),

DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ f32::is_nan }
DataType::Float32 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float32Type>(),
f32::is_nan,
)) as ArrayRef),

other => exec_err!("Unsupported data type {other:?} for function isnan"),
Expand All @@ -80,6 +47,7 @@ pub fn isnan(args: &[ArrayRef]) -> Result<ArrayRef> {
#[cfg(test)]
mod tests {

use arrow_array::{Float32Array, Float64Array};
use datafusion_common::cast::as_boolean_array;

use super::*;
Expand Down

0 comments on commit 3978dc4

Please sign in to comment.