From 1582e8d9cc0d307bdc3311cd22566c66fb6b840f Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Sun, 13 Oct 2024 06:02:36 +0200 Subject: [PATCH] Optimize `iszero` function (3-5x faster) (#12881) * add bench * Optimize iszero function (3-5x) faster --- datafusion/functions/Cargo.toml | 5 +++ datafusion/functions/benches/iszero.rs | 46 +++++++++++++++++++++++++ datafusion/functions/src/math/iszero.rs | 24 +++++-------- 3 files changed, 60 insertions(+), 15 deletions(-) create mode 100644 datafusion/functions/benches/iszero.rs diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index a3d114221d3f..2ffe93a0e567 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -117,6 +117,11 @@ harness = false name = "make_date" required-features = ["datetime_expressions"] +[[bench]] +harness = false +name = "iszero" +required-features = ["math_expressions"] + [[bench]] harness = false name = "nullif" diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs new file mode 100644 index 000000000000..3348d172e1f2 --- /dev/null +++ b/datafusion/functions/benches/iszero.rs @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::{ + datatypes::{Float32Type, Float64Type}, + util::bench_util::create_primitive_array, +}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_expr::ColumnarValue; +use datafusion_functions::math::iszero; +use std::sync::Arc; + +fn criterion_benchmark(c: &mut Criterion) { + let iszero = iszero(); + for size in [1024, 4096, 8192] { + let f32_array = Arc::new(create_primitive_array::(size, 0.2)); + let f32_args = vec![ColumnarValue::Array(f32_array)]; + c.bench_function(&format!("iszero f32 array: {}", size), |b| { + b.iter(|| black_box(iszero.invoke(&f32_args).unwrap())) + }); + let f64_array = Arc::new(create_primitive_array::(size, 0.2)); + let f64_args = vec![ColumnarValue::Array(f64_array)]; + c.bench_function(&format!("iszero f64 array: {}", size), |b| { + b.iter(|| black_box(iszero.invoke(&f64_args).unwrap())) + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index e6a728053359..74611b65aaba 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -18,11 +18,11 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array}; -use arrow::datatypes::DataType; +use arrow::array::{ArrayRef, AsArray, BooleanArray}; use arrow::datatypes::DataType::{Boolean, Float32, Float64}; +use arrow::datatypes::{DataType, Float32Type, Float64Type}; -use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_common::{exec_err, Result}; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; @@ -77,20 +77,14 @@ impl ScalarUDFImpl for IsZeroFunc { /// Iszero SQL function pub fn iszero(args: &[ArrayRef]) -> Result { match args[0].data_type() { - Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!( - &args[0], - "x", - Float64Array, - BooleanArray, - { |x: f64| { x == 0_f64 } } + Float64 => Ok(Arc::new(BooleanArray::from_unary( + args[0].as_primitive::(), + |x| x == 0.0, )) as ArrayRef), - Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!( - &args[0], - "x", - Float32Array, - BooleanArray, - { |x: f32| { x == 0_f32 } } + Float32 => Ok(Arc::new(BooleanArray::from_unary( + args[0].as_primitive::(), + |x| x == 0.0, )) as ArrayRef), other => exec_err!("Unsupported data type {other:?} for function iszero"),