Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(core): disable pushdown filter to keep the casting for timestamp column #907

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions wren-core/core/src/mdl/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ use datafusion::optimizer::eliminate_outer_join::EliminateOuterJoin;
use datafusion::optimizer::extract_equijoin_predicate::ExtractEquijoinPredicate;
use datafusion::optimizer::filter_null_join_keys::FilterNullJoinKeys;
use datafusion::optimizer::propagate_empty_relation::PropagateEmptyRelation;
use datafusion::optimizer::push_down_filter::PushDownFilter;
use datafusion::optimizer::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
use datafusion::optimizer::scalar_subquery_to_join::ScalarSubqueryToJoin;
use datafusion::optimizer::single_distinct_to_groupby::SingleDistinctToGroupBy;
Expand Down Expand Up @@ -176,7 +175,8 @@ fn optimize_rule_for_unparsing() -> Vec<Arc<dyn OptimizerRule + Send + Sync>> {
// Filters can't be pushed down past Limits, we should do PushDownFilter after PushDownLimit
// TODO: Sort with pushdown-limit doesn't support to be unparse
// Arc::new(PushDownLimit::new()),
Arc::new(PushDownFilter::new()),
// Disable PushDownFilter to avoid the casting for bigquery (datetime/timestamp) column be removed
// Arc::new(PushDownFilter::new()),
Arc::new(SingleDistinctToGroupBy::new()),
// Disable SimplifyExpressions to avoid apply some function locally
// Arc::new(SimplifyExpressions::new()),
Expand Down
39 changes: 39 additions & 0 deletions wren-core/core/src/mdl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,45 @@ mod test {
Ok(())
}

#[tokio::test]
async fn test_disable_pushdown_filter() -> Result<()> {
let ctx = SessionContext::new();
ctx.register_batch("artist", artist())?;
let manifest = ManifestBuilder::new()
.catalog("wren")
.schema("test")
.model(
ModelBuilder::new("artist")
.table_reference("artist")
.column(
ColumnBuilder::new("出道時間", "timestamp")
.hidden(true)
.build(),
)
.column(
ColumnBuilder::new("cast_timestamp", "timestamp")
.expression(r#"cast("出道時間" as timestamp with time zone)"#)
.build(),
)
.build(),
)
.build();

let analyzed_mdl = Arc::new(AnalyzedWrenMDL::analyze(manifest)?);
let sql = r#"select count(*) from wren.test.artist where cast(cast_timestamp as timestamp) > timestamp '2011-01-01 21:00:00'"#;
let actual = transform_sql_with_ctx(
&SessionContext::new(),
Arc::clone(&analyzed_mdl),
&[],
sql,
)
.await?;
assert_eq!(actual,
"SELECT count(*) FROM (SELECT artist.cast_timestamp FROM (SELECT CAST(artist.\"出道時間\" AS TIMESTAMP WITH TIME ZONE) AS cast_timestamp \
FROM artist) AS artist) AS artist WHERE artist.cast_timestamp > CAST('2011-01-01 21:00:00' AS TIMESTAMP)");
Ok(())
}

/// Return a RecordBatch with made up data about customer
fn customer() -> RecordBatch {
let custkey: ArrayRef = Arc::new(Int64Array::from(vec![1, 2, 3]));
Expand Down
Loading