From 7e6f8202d7d108a93576093b123166c6f591e516 Mon Sep 17 00:00:00 2001 From: Tai Le Manh Date: Sun, 13 Oct 2024 20:04:45 +0700 Subject: [PATCH] Optimize performance of math::trunc Signed-off-by: Tai Le Manh --- datafusion/functions/Cargo.toml | 5 ++ datafusion/functions/benches/trunc.rs | 47 +++++++++++++++ datafusion/functions/src/math/trunc.rs | 82 ++++++++++++++++---------- 3 files changed, 104 insertions(+), 30 deletions(-) create mode 100644 datafusion/functions/benches/trunc.rs diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 2ffe93a0e567..13d14ca94b93 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -186,3 +186,8 @@ required-features = ["unicode_expressions"] harness = false name = "strpos" required-features = ["unicode_expressions"] + +[[bench]] +harness = false +name = "trunc" +required-features = ["math_expressions"] diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs new file mode 100644 index 000000000000..92a08abf3d32 --- /dev/null +++ b/datafusion/functions/benches/trunc.rs @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::{ + datatypes::{Float32Type, Float64Type}, + util::bench_util::create_primitive_array, +}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_expr::ColumnarValue; +use datafusion_functions::math::trunc; + +use std::sync::Arc; + +fn criterion_benchmark(c: &mut Criterion) { + let trunc = trunc(); + for size in [1024, 4096, 8192] { + let f32_array = Arc::new(create_primitive_array::(size, 0.2)); + let f32_args = vec![ColumnarValue::Array(f32_array)]; + c.bench_function(&format!("trunc f32 array: {}", size), |b| { + b.iter(|| black_box(trunc.invoke(&f32_args).unwrap())) + }); + let f64_array = Arc::new(create_primitive_array::(size, 0.2)); + let f64_args = vec![ColumnarValue::Array(f64_array)]; + c.bench_function(&format!("trunc f64 array: {}", size), |b| { + b.iter(|| black_box(trunc.invoke(&f64_args).unwrap())) + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 3344438454c4..a09a5624ca8a 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -20,11 +20,11 @@ use std::sync::Arc; use crate::utils::make_scalar_function; -use arrow::array::{ArrayRef, Float32Array, Float64Array, Int64Array}; -use arrow::datatypes::DataType; +use arrow::array::{ArrayRef, AsArray, PrimitiveArray}; use arrow::datatypes::DataType::{Float32, Float64}; +use arrow::datatypes::{DataType, Float32Type, Float64Type, Int64Type}; use datafusion_common::ScalarValue::Int64; -use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_common::{exec_err, Result}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; @@ -111,8 +111,8 @@ fn trunc(args: &[ArrayRef]) -> Result { ); } - //if only one arg then invoke toolchain trunc(num) and precision = 0 by default - //or then invoke the compute_truncate method to process precision + // If only one arg then invoke toolchain trunc(num) and precision = 0 by default + // or then invoke the compute_truncate method to process precision let num = &args[0]; let precision = if args.len() == 1 { ColumnarValue::Scalar(Int64(Some(0))) @@ -120,35 +120,57 @@ fn trunc(args: &[ArrayRef]) -> Result { ColumnarValue::Array(Arc::clone(&args[1])) }; - match args[0].data_type() { + match num.data_type() { Float64 => match precision { - ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new( - make_function_scalar_inputs!(num, "num", Float64Array, { f64::trunc }), - ) as ArrayRef), - ColumnarValue::Array(precision) => Ok(Arc::new(make_function_inputs2!( - num, - precision, - "x", - "y", - Float64Array, - Int64Array, - { compute_truncate64 } - )) as ArrayRef), + ColumnarValue::Scalar(Int64(Some(0))) => { + Ok(Arc::new( + args[0] + .as_primitive::() + .unary::<_, Float64Type>(|x: f64| { + if x == 0_f64 { + 0_f64 + } else { + x.trunc() + } + }), + ) as ArrayRef) + } + ColumnarValue::Array(precision) => { + let num_array = num.as_primitive::(); + let precision_array = precision.as_primitive::(); + let result: PrimitiveArray = + arrow::compute::binary(num_array, precision_array, |x, y| { + compute_truncate64(x, y) + })?; + + Ok(Arc::new(result) as ArrayRef) + } _ => exec_err!("trunc function requires a scalar or array for precision"), }, Float32 => match precision { - ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new( - make_function_scalar_inputs!(num, "num", Float32Array, { f32::trunc }), - ) as ArrayRef), - ColumnarValue::Array(precision) => Ok(Arc::new(make_function_inputs2!( - num, - precision, - "x", - "y", - Float32Array, - Int64Array, - { compute_truncate32 } - )) as ArrayRef), + ColumnarValue::Scalar(Int64(Some(0))) => { + Ok(Arc::new( + args[0] + .as_primitive::() + .unary::<_, Float32Type>(|x: f32| { + if x == 0_f32 { + 0_f32 + } else { + x.trunc() + } + }), + ) as ArrayRef) + } + ColumnarValue::Array(precision) => { + let num_array = num.as_primitive::(); + let precision_array = precision.as_primitive::(); + let result: PrimitiveArray = + arrow::compute::binary(num_array, precision_array, |x, y| { + compute_truncate32(x, y) + })?; + + Ok(Arc::new(result) as ArrayRef) + } _ => exec_err!("trunc function requires a scalar or array for precision"), }, other => exec_err!("Unsupported data type {other:?} for function trunc"),