Skip to content

Commit

Permalink
Optimize isnan (2-5x faster) (#12889)
Browse files Browse the repository at this point in the history
* add bench

* optimize isnan

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
simonvandel and alamb authored Oct 15, 2024
1 parent d9450da commit 1936774
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 17 deletions.
5 changes: 5 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,11 @@ harness = false
name = "to_char"
required-features = ["datetime_expressions"]

[[bench]]
harness = false
name = "isnan"
required-features = ["math_expressions"]

[[bench]]
harness = false
name = "signum"
Expand Down
46 changes: 46 additions & 0 deletions datafusion/functions/benches/isnan.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate criterion;

use arrow::{
datatypes::{Float32Type, Float64Type},
util::bench_util::create_primitive_array,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_functions::math::isnan;
use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
let isnan = isnan();
for size in [1024, 4096, 8192] {
let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
let f32_args = vec![ColumnarValue::Array(f32_array)];
c.bench_function(&format!("isnan f32 array: {}", size), |b| {
b.iter(|| black_box(isnan.invoke(&f32_args).unwrap()))
});
let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
let f64_args = vec![ColumnarValue::Array(f64_array)];
c.bench_function(&format!("isnan f64 array: {}", size), |b| {
b.iter(|| black_box(isnan.invoke(&f64_args).unwrap()))
});
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
29 changes: 12 additions & 17 deletions datafusion/functions/src/math/nans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@

//! Math function: `isnan()`.
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, DataFusionError, Result};
use arrow::datatypes::{DataType, Float32Type, Float64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::{ColumnarValue, TypeSignature};

use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
use arrow::array::{ArrayRef, AsArray, BooleanArray};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::Arc;
Expand Down Expand Up @@ -72,20 +72,15 @@ impl ScalarUDFImpl for IsNanFunc {
let args = ColumnarValue::values_to_arrays(args)?;

let arr: ArrayRef = match args[0].data_type() {
DataType::Float64 => Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
self.name(),
Float64Array,
BooleanArray,
{ f64::is_nan }
)),
DataType::Float32 => Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
self.name(),
Float32Array,
BooleanArray,
{ f32::is_nan }
)),
DataType::Float64 => Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float64Type>(),
f64::is_nan,
)) as ArrayRef,

DataType::Float32 => Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float32Type>(),
f32::is_nan,
)) as ArrayRef,
other => {
return exec_err!(
"Unsupported data type {other:?} for function {}",
Expand Down

0 comments on commit 1936774

Please sign in to comment.