Skip to content

Commit

Permalink
feat: Implement OR for PromQL (#3024)
Browse files Browse the repository at this point in the history
* with anit-join

Signed-off-by: Ruihang Xia <[email protected]>

* impl UnionDistinctOn

Signed-off-by: Ruihang Xia <[email protected]>

* unify schema

Signed-off-by: Ruihang Xia <[email protected]>

* fix clippy

Signed-off-by: Ruihang Xia <[email protected]>

* add sqlness case

Signed-off-by: Ruihang Xia <[email protected]>

* add UTs

Signed-off-by: Ruihang Xia <[email protected]>

* Update src/promql/src/planner.rs

Co-authored-by: dennis zhuang <[email protected]>

---------

Signed-off-by: Ruihang Xia <[email protected]>
Co-authored-by: dennis zhuang <[email protected]>
  • Loading branch information
waynexia and killme2008 authored Dec 28, 2023
1 parent 1d80a0f commit b58296d
Show file tree
Hide file tree
Showing 11 changed files with 951 additions and 64 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/promql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ edition.workspace = true
license.workspace = true

[dependencies]
ahash.workspace = true
async-recursion = "1.0"
async-trait.workspace = true
bytemuck.workspace = true
Expand Down
4 changes: 4 additions & 0 deletions src/promql/src/extension_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ mod normalize;
mod planner;
mod range_manipulate;
mod series_divide;
#[cfg(test)]
mod test_util;
mod union_distinct_on;

use datafusion::arrow::datatypes::{ArrowPrimitiveType, TimestampMillisecondType};
pub use empty_metric::{build_special_time_expr, EmptyMetric, EmptyMetricExec, EmptyMetricStream};
Expand All @@ -28,5 +31,6 @@ pub use normalize::{SeriesNormalize, SeriesNormalizeExec, SeriesNormalizeStream}
pub use planner::PromExtensionPlanner;
pub use range_manipulate::{RangeManipulate, RangeManipulateExec, RangeManipulateStream};
pub use series_divide::{SeriesDivide, SeriesDivideExec, SeriesDivideStream};
pub use union_distinct_on::{UnionDistinctOn, UnionDistinctOnExec, UnionDistinctOnStream};

pub(crate) type Millisecond = <TimestampMillisecondType as ArrowPrimitiveType>::Native;
50 changes: 3 additions & 47 deletions src/promql/src/extension_plan/instant_manipulate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,40 +445,12 @@ impl InstantManipulateStream {

#[cfg(test)]
mod test {
use datafusion::arrow::array::Float64Array;
use datafusion::arrow::datatypes::{
ArrowPrimitiveType, DataType, Field, Schema, TimestampMillisecondType,
};
use datafusion::physical_plan::memory::MemoryExec;
use datafusion::prelude::SessionContext;
use datatypes::arrow::array::TimestampMillisecondArray;
use datatypes::arrow_array::StringArray;

use super::*;

const TIME_INDEX_COLUMN: &str = "timestamp";

fn prepare_test_data() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
Field::new("path", DataType::Utf8, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
180_000, 240_000, // every 60s
241_000, 271_000, 291_000, // others
])) as _;
let field_column = Arc::new(Float64Array::from(vec![1.0; 10])) as _;
let path_column = Arc::new(StringArray::from(vec!["foo"; 10])) as _;
let data = RecordBatch::try_new(
schema.clone(),
vec![timestamp_column, field_column, path_column],
)
.unwrap();

MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}
use crate::extension_plan::test_util::{
prepare_test_data, prepare_test_data_with_nan, TIME_INDEX_COLUMN,
};

async fn do_normalize_test(
start: Millisecond,
Expand Down Expand Up @@ -749,22 +721,6 @@ mod test {
do_normalize_test(190_000, 300_000, 30_000, 10_000, expected, false).await;
}

fn prepare_test_data_with_nan() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
])) as _;
let field_column =
Arc::new(Float64Array::from(vec![0.0, f64::NAN, 6.0, f64::NAN, 12.0])) as _;
let data =
RecordBatch::try_new(schema.clone(), vec![timestamp_column, field_column]).unwrap();

MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}

#[tokio::test]
async fn lookback_10s_interval_10s_with_nan() {
let expected = String::from(
Expand Down
7 changes: 6 additions & 1 deletion src/promql/src/extension_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNode};
use datafusion::physical_plan::ExecutionPlan;
use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};

use super::HistogramFold;
use super::{HistogramFold, UnionDistinctOn};
use crate::extension_plan::{
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
};
Expand Down Expand Up @@ -50,6 +50,11 @@ impl ExtensionPlanner for PromExtensionPlanner {
Ok(Some(node.to_execution_plan(session_state, planner)?))
} else if let Some(node) = node.as_any().downcast_ref::<HistogramFold>() {
Ok(Some(node.to_execution_plan(physical_inputs[0].clone())))
} else if let Some(node) = node.as_any().downcast_ref::<UnionDistinctOn>() {
Ok(Some(node.to_execution_plan(
physical_inputs[0].clone(),
physical_inputs[1].clone(),
)))
} else {
Ok(None)
}
Expand Down
64 changes: 64 additions & 0 deletions src/promql/src/extension_plan/test_util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Utils for testing extension plan
use std::sync::Arc;

use common_recordbatch::DfRecordBatch as RecordBatch;
use datafusion::arrow::array::Float64Array;
use datafusion::arrow::datatypes::{
ArrowPrimitiveType, DataType, Field, Schema, TimestampMillisecondType,
};
use datafusion::physical_plan::memory::MemoryExec;
use datatypes::arrow::array::TimestampMillisecondArray;
use datatypes::arrow_array::StringArray;

pub(crate) const TIME_INDEX_COLUMN: &str = "timestamp";

pub(crate) fn prepare_test_data() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
Field::new("path", DataType::Utf8, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
180_000, 240_000, // every 60s
241_000, 271_000, 291_000, // others
])) as _;
let field_column = Arc::new(Float64Array::from(vec![1.0; 10])) as _;
let path_column = Arc::new(StringArray::from(vec!["foo"; 10])) as _;
let data = RecordBatch::try_new(
schema.clone(),
vec![timestamp_column, field_column, path_column],
)
.unwrap();

MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}

pub(crate) fn prepare_test_data_with_nan() -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new("value", DataType::Float64, true),
]));
let timestamp_column = Arc::new(TimestampMillisecondArray::from(vec![
0, 30_000, 60_000, 90_000, 120_000, // every 30s
])) as _;
let field_column = Arc::new(Float64Array::from(vec![0.0, f64::NAN, 6.0, f64::NAN, 12.0])) as _;
let data = RecordBatch::try_new(schema.clone(), vec![timestamp_column, field_column]).unwrap();

MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
}
Loading

0 comments on commit b58296d

Please sign in to comment.