Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support IGNORE|RESPECT NULLs clause in window functions #998

Merged
merged 14 commits into from
Oct 24, 2023
26 changes: 26 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,26 @@ impl fmt::Display for WindowFrameUnits {
}
}

/// Specifies Ignore / Respect NULL within window functions.
/// For example
/// `FIRST_VALUE(column2) IGNORE NULLS OVER (PARTITION BY column1)`
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
yuval-illumex marked this conversation as resolved.
Show resolved Hide resolved
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum NullTreatment {
IgnoreNulls,
RespectNulls,
}

impl fmt::Display for NullTreatment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(match self {
NullTreatment::IgnoreNulls => "IGNORE NULLS",
NullTreatment::RespectNulls => "RESPECT NULLS",
})
}
}

/// Specifies [WindowFrame]'s `start_bound` and `end_bound`
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -3757,6 +3777,8 @@ pub struct Function {
pub args: Vec<FunctionArg>,
/// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)`
pub filter: Option<Box<Expr>>,
// Snowflake/MSSQL supports diffrent options for null treatment in rank functions
pub null_treatment: Option<NullTreatment>,
yuval-illumex marked this conversation as resolved.
Show resolved Hide resolved
pub over: Option<WindowType>,
// aggregate functions may specify eg `COUNT(DISTINCT x)`
pub distinct: bool,
Expand Down Expand Up @@ -3809,6 +3831,10 @@ impl fmt::Display for Function {
write!(f, " FILTER (WHERE {filter_cond})")?;
}

if let Some(o) = &self.null_treatment {
write!(f, " {o}")?;
}

if let Some(o) = &self.over {
write!(f, " OVER {o}")?;
}
Expand Down
1 change: 1 addition & 0 deletions src/ast/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ where
/// *expr = Expr::Function(Function {
/// name: ObjectName(vec![Ident::new("f")]),
/// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))],
/// null_treatment: None,
/// filter: None, over: None, distinct: false, special: false, order_by: vec![],
/// });
/// }
Expand Down
1 change: 1 addition & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,7 @@ define_keywords!(
REPLACE,
REPLICATION,
RESET,
RESPECT,
RESTRICT,
RESULT,
RETAIN,
Expand Down
16 changes: 16 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,7 @@ impl<'a> Parser<'a> {
Ok(Expr::Function(Function {
name: ObjectName(vec![w.to_ident()]),
args: vec![],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -987,6 +988,19 @@ impl<'a> Parser<'a> {
} else {
None
};
let null_treatment = match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE])
{
Some(keyword) => {
self.expect_keyword(Keyword::NULLS)?;

match keyword {
Keyword::RESPECT => Some(NullTreatment::RespectNulls),
Keyword::IGNORE => Some(NullTreatment::IgnoreNulls),
_ => None,
}
}
None => None,
};
let over = if self.parse_keyword(Keyword::OVER) {
if self.consume_token(&Token::LParen) {
let window_spec = self.parse_window_spec()?;
Expand All @@ -1000,6 +1014,7 @@ impl<'a> Parser<'a> {
Ok(Expr::Function(Function {
name,
args,
null_treatment,
filter,
over,
distinct,
Expand All @@ -1018,6 +1033,7 @@ impl<'a> Parser<'a> {
Ok(Expr::Function(Function {
name,
args,
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down
1 change: 1 addition & 0 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ fn parse_map_access_offset() {
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
number("0")
))),],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down
4 changes: 4 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ fn parse_map_access_expr() {
Value::SingleQuotedString("endpoint".to_string())
))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -90,6 +91,7 @@ fn parse_map_access_expr() {
Value::SingleQuotedString("app".to_string())
))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -140,6 +142,7 @@ fn parse_array_fn() {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x1")))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x2")))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -199,6 +202,7 @@ fn parse_delimited_identifiers() {
&Expr::Function(Function {
name: ObjectName(vec![Ident::with_quote('"', "myfun")]),
args: vec![],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down
58 changes: 58 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,7 @@ fn parse_select_count_wildcard() {
&Expr::Function(Function {
name: ObjectName(vec![Ident::new("COUNT")]),
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand All @@ -896,6 +897,7 @@ fn parse_select_count_distinct() {
op: UnaryOperator::Plus,
expr: Box::new(Expr::Identifier(Ident::new("x"))),
}))],
null_treatment: None,
filter: None,
over: None,
distinct: true,
Expand Down Expand Up @@ -1864,6 +1866,7 @@ fn parse_select_having() {
left: Box::new(Expr::Function(Function {
name: ObjectName(vec![Ident::new("COUNT")]),
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand All @@ -1890,6 +1893,7 @@ fn parse_select_qualify() {
left: Box::new(Expr::Function(Function {
name: ObjectName(vec![Ident::new("ROW_NUMBER")]),
args: vec![],
null_treatment: None,
filter: None,
over: Some(WindowType::WindowSpec(WindowSpec {
partition_by: vec![Expr::Identifier(Ident::new("p"))],
Expand Down Expand Up @@ -2287,6 +2291,45 @@ fn parse_agg_with_order_by() {
}
}

#[test]
fn parse_window_rank_function() {
let supported_dialects = TestedDialects {
dialects: vec![
Box::new(GenericDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(AnsiDialect {}),
Box::new(HiveDialect {}),
Box::new(SnowflakeDialect {}),
],
options: None,
};

for sql in [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why were these tests changed?

This PR adds support for window functions (aka those that have an OVER clause) which is different than the ORDER BY clause in the aggregate function argument.

I think you should keep the existing tests and add a new one for the IGNORE NULLS / RESPECT NULLs syntax

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback @alamb .
Please look at the syntax of FIRST_VALUE (for example) in some of the dialects:
https://docs.snowflake.com/en/sql-reference/functions/first_value
https://learn.microsoft.com/en-us/sql/t-sql/functions/first-value-transact-sql?view=sql-server-ver16
https://www.postgresqltutorial.com/postgresql-window-function/postgresql-first_value-function/

The Over Keyword is a must. so the syntax FIRST_VALUE(x ORDER BY x) is not valid.
Even if the tests pass, those tests are not aligned with the syntax.
That's why I added two arrays of dialects - for each dialect his supported syntax.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Over Keyword is a must. so the syntax FIRST_VALUE(x ORDER BY x) is not valid.

I think that is true when the FIRST_VALUE function is being used as a window function

Some systems allow FIRST_VALUE to be used as normal aggregate functions -- so like

SELECT 
  FIRST_VALUE(amount ORDER BY time), 
  LAST_VALUE(amount ORDER BY time)
FROM 
  t
GROUP BY
  currency

Perhaps @mustafasrepo can offer some perspective as the author of #882

But basically I think we should retain these tests

Copy link
Contributor

@mustafasrepo mustafasrepo Oct 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually FIRST_VALUE and LAST_VALUE can be used as window or aggregate function (similar to SUM, MIN, etc.). In these tests, they are used as aggregate function. Hence these tests should remain as is.
But having the window tests FIRST_VALUE and LAST_VALUE is also helpful. I think you shouldn't modify existing test, and add your test a a new test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @alamb and @mustafasrepo :) Learned something about these functions :)
I maintained the old tests

"SELECT column1, column2, FIRST_VALUE(column2) OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1",
"SELECT column1, column2, FIRST_VALUE(column2) OVER (ORDER BY column2 NULLS LAST) AS column2_first FROM t1",
"SELECT col_1, col_2, LAG(col_2) OVER (ORDER BY col_1) FROM t1",
"SELECT LAG(col_2, 1, 0) OVER (ORDER BY col_1) FROM t1",
"SELECT LAG(col_2, 1, 0) OVER (PARTITION BY col_3 ORDER BY col_1)",
] {
supported_dialects.verified_stmt(sql);
}

let supported_dialects_nulls = TestedDialects {
dialects: vec![Box::new(MsSqlDialect {}), Box::new(SnowflakeDialect {})],
options: None,
};

for sql in [
"SELECT column1, column2, FIRST_VALUE(column2) IGNORE NULLS OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1",
"SELECT column1, column2, FIRST_VALUE(column2) RESPECT NULLS OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1",
"SELECT LAG(col_2, 1, 0) IGNORE NULLS OVER (ORDER BY col_1) FROM t1",
"SELECT LAG(col_2, 1, 0) RESPECT NULLS OVER (ORDER BY col_1) FROM t1",
] {
supported_dialects_nulls.verified_stmt(sql);
}
}

#[test]
fn parse_create_table() {
let sql = "CREATE TABLE uk_cities (\
Expand Down Expand Up @@ -3346,6 +3389,7 @@ fn parse_scalar_function_in_projection() {
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::Identifier(Ident::new("id"))
))],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -3466,6 +3510,7 @@ fn parse_named_argument_function() {
))),
},
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -3498,6 +3543,7 @@ fn parse_window_functions() {
&Expr::Function(Function {
name: ObjectName(vec![Ident::new("row_number")]),
args: vec![],
null_treatment: None,
filter: None,
over: Some(WindowType::WindowSpec(WindowSpec {
partition_by: vec![],
Expand Down Expand Up @@ -3542,6 +3588,7 @@ fn test_parse_named_window() {
quote_style: None,
}),
))],
null_treatment: None,
filter: None,
over: Some(WindowType::NamedWindow(Ident {
value: "window1".to_string(),
Expand All @@ -3568,6 +3615,7 @@ fn test_parse_named_window() {
quote_style: None,
}),
))],
null_treatment: None,
filter: None,
over: Some(WindowType::NamedWindow(Ident {
value: "window2".to_string(),
Expand Down Expand Up @@ -4038,6 +4086,7 @@ fn parse_at_timezone() {
quote_style: None,
}]),
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero.clone()))],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -4066,6 +4115,7 @@ fn parse_at_timezone() {
quote_style: None,
},],),
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero))],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand All @@ -4078,6 +4128,7 @@ fn parse_at_timezone() {
Value::SingleQuotedString("%Y-%m-%dT%H".to_string()),
),),),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -4237,6 +4288,7 @@ fn parse_table_function() {
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString("1".to_owned()),
)))],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -4389,6 +4441,7 @@ fn parse_unnest_in_from_clause() {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -4419,6 +4472,7 @@ fn parse_unnest_in_from_clause() {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand All @@ -4431,6 +4485,7 @@ fn parse_unnest_in_from_clause() {
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("5")))),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("6")))),
],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -6904,6 +6959,7 @@ fn parse_time_functions() {
let select_localtime_func_call_ast = Function {
name: ObjectName(vec![Ident::new(func_name)]),
args: vec![],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -7391,6 +7447,7 @@ fn parse_pivot_table() {
args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("amount"),])
))]),
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down Expand Up @@ -7541,6 +7598,7 @@ fn parse_pivot_unpivot_table() {
args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(
Expr::Identifier(Ident::new("population"))
))]),
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down
1 change: 1 addition & 0 deletions tests/sqlparser_hive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ fn parse_delimited_identifiers() {
&Expr::Function(Function {
name: ObjectName(vec![Ident::with_quote('"', "myfun")]),
args: vec![],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down
1 change: 1 addition & 0 deletions tests/sqlparser_mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ fn parse_delimited_identifiers() {
&Expr::Function(Function {
name: ObjectName(vec![Ident::with_quote('"', "myfun")]),
args: vec![],
null_treatment: None,
filter: None,
over: None,
distinct: false,
Expand Down
Loading
Loading