Skip to content

Commit

Permalink
change dependency structure
Browse files Browse the repository at this point in the history
  • Loading branch information
Lordworms committed Mar 2, 2024
1 parent 259b8d3 commit 245fb44
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 19 deletions.
1 change: 0 additions & 1 deletion datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,7 @@ tokio = { workspace = true, features = ["macros", "rt", "sync"] }
[[bench]]
harness = false
name = "to_timestamp"

[[bench]]
harness = false
name = "regx"
107 changes: 107 additions & 0 deletions datafusion/functions/benches/regx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate criterion;

use std::sync::Arc;

use arrow_array::builder::StringBuilder;
use arrow_array::{ArrayRef, StringArray};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_functions::regex::regexplike::regexp_like;
use datafusion_functions::regex::regexpmatch::regexp_match;
use rand::distributions::Alphanumeric;
use rand::rngs::ThreadRng;
use rand::seq::SliceRandom;
use rand::Rng;
fn data(rng: &mut ThreadRng) -> StringArray {
let mut data: Vec<String> = vec![];
for _ in 0..1000 {
data.push(
rng.sample_iter(&Alphanumeric)
.take(7)
.map(char::from)
.collect(),
);
}

StringArray::from(data)
}

fn regex(rng: &mut ThreadRng) -> StringArray {
let samples = vec![
".*([A-Z]{1}).*".to_string(),
"^(A).*".to_string(),
r#"[\p{Letter}-]+"#.to_string(),
r#"[\p{L}-]+"#.to_string(),
"[a-zA-Z]_[a-zA-Z]{2}".to_string(),
];
let mut data: Vec<String> = vec![];
for _ in 0..1000 {
data.push(samples.choose(rng).unwrap().to_string());
}

StringArray::from(data)
}

fn flags(rng: &mut ThreadRng) -> StringArray {
let samples = vec![Some("i".to_string()), Some("im".to_string()), None];
let mut sb = StringBuilder::new();
for _ in 0..1000 {
let sample = samples.choose(rng).unwrap();
if sample.is_some() {
sb.append_value(sample.clone().unwrap());
} else {
sb.append_null();
}
}

sb.finish()
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("regexp_like_1000", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
let regex = Arc::new(regex(&mut rng)) as ArrayRef;
let flags = Arc::new(flags(&mut rng)) as ArrayRef;

b.iter(|| {
black_box(
regexp_like::<i32>(&[data.clone(), regex.clone(), flags.clone()])
.expect("regexp_like should work on valid values"),
)
})
});

c.bench_function("regexp_match_1000", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
let regex = Arc::new(regex(&mut rng)) as ArrayRef;
let flags = Arc::new(flags(&mut rng)) as ArrayRef;

b.iter(|| {
black_box(
regexp_match::<i32>(&[data.clone(), regex.clone(), flags.clone()])
.expect("regexp_match should work on valid values"),
)
})
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion datafusion/functions/src/regex/regexplike.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! Encoding expressions
//! Regx expressions
use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
use arrow::compute::kernels::regexp;
use arrow::datatypes::DataType;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/regex/regexpmatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! Encoding expressions
//! Regx expressions
use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
use arrow::compute::kernels::regexp;
use arrow::datatypes::DataType;
Expand Down
1 change: 0 additions & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ chrono = { workspace = true }
datafusion-common = { workspace = true, default-features = true }
datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-functions = { workspace = true }
half = { workspace = true }
hashbrown = { version = "0.14", features = ["raw"] }
hex = { version = "0.4", optional = true }
Expand Down
15 changes: 0 additions & 15 deletions datafusion/physical-expr/benches/regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use std::sync::Arc;
use arrow_array::builder::StringBuilder;
use arrow_array::{ArrayRef, StringArray};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_functions::regex::regexplike::regexp_like;
use datafusion_physical_expr::regex_expressions::{regexp_match, regexp_replace};
use rand::distributions::Alphanumeric;
use rand::rngs::ThreadRng;
Expand Down Expand Up @@ -75,20 +74,6 @@ fn flags(rng: &mut ThreadRng) -> StringArray {
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("regexp_like_1000", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
let regex = Arc::new(regex(&mut rng)) as ArrayRef;
let flags = Arc::new(flags(&mut rng)) as ArrayRef;

b.iter(|| {
black_box(
regexp_like::<i32>(&[data.clone(), regex.clone(), flags.clone()])
.expect("regexp_like should work on valid values"),
)
})
});

c.bench_function("regexp_match_1000", |b| {
let mut rng = rand::thread_rng();
let data = Arc::new(data(&mut rng)) as ArrayRef;
Expand Down

0 comments on commit 245fb44

Please sign in to comment.