From 048aba496e839a176c995ee3755642645f074d47 Mon Sep 17 00:00:00 2001 From: taozhi8833998 Date: Tue, 3 Dec 2024 20:21:27 +0800 Subject: [PATCH 1/2] feat: support range expr as window frame in snowflake --- pegjs/snowflake.pegjs | 7 +++++++ src/window.js | 10 +++++++++- test/snowflake.spec.js | 17 +++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/pegjs/snowflake.pegjs b/pegjs/snowflake.pegjs index d61253d4..033183ec 100644 --- a/pegjs/snowflake.pegjs +++ b/pegjs/snowflake.pegjs @@ -2687,6 +2687,13 @@ window_frame_clause // => string return `rows between ${p.value} and ${f.value}` } + / 'RANGE'i __ KW_BETWEEN __ i:interval_expr __ 'PRECEDING'i __ KW_AND __ f:interval_expr __ 'PRECEDING'i { + return { + type: 'range', + between: i, + and: f + } + } window_frame_following = s:window_frame_value __ 'FOLLOWING'i { diff --git a/src/window.js b/src/window.js index 3becfcf8..669c0fa7 100644 --- a/src/window.js +++ b/src/window.js @@ -1,7 +1,15 @@ import { hasVal, toUpper } from './util' import { exprToSQL, orderOrPartitionByToSQL } from './expr' +import { intervalToSQL } from './interval' import { overToSQL } from './over' +function rangeExprToSQL(rangeExpr) { + if (!rangeExpr) return + if (typeof rangeExpr === 'string') return toUpper(rangeExpr) + const { type, between, and } = rangeExpr + const result = [toUpper(type), 'BETWEEN', intervalToSQL(between), 'PRECEDING', 'AND', intervalToSQL(and), 'PRECEDING'] + return result.filter(hasVal).join(' ') +} function windowSpecificationToSQL(windowSpec) { const { name, @@ -13,7 +21,7 @@ function windowSpecificationToSQL(windowSpec) { name, orderOrPartitionByToSQL(partitionby, 'partition by'), orderOrPartitionByToSQL(orderby, 'order by'), - toUpper(windowFrame), + rangeExprToSQL(windowFrame), ] return result.filter(hasVal).join(' ') } diff --git a/test/snowflake.spec.js b/test/snowflake.spec.js index 0a11aa89..939e9806 100644 --- a/test/snowflake.spec.js +++ b/test/snowflake.spec.js @@ -467,6 +467,23 @@ describe('snowflake', () => { 'SELECT "my_column"::FLOAT AS "my_number", "my_column"::FLOAT4 AS "my_number2", "my_column"::FLOAT8 AS "my_number3" FROM "my_table"' ] }, + { + title: 'over window frame', + sql: [ + `SELECT + user_id, + date(derived_tstamp) AS event_date, + price_point, + MAX(price_point) OVER ( + PARTITION BY user_id + ORDER BY date(derived_tstamp) + RANGE BETWEEN INTERVAL '29 DAYS' PRECEDING AND INTERVAL '1 DAY' PRECEDING + ) AS max_price_point_last_30_days + FROM + some_table;`, + `SELECT "user_id", date("derived_tstamp") AS "event_date", "price_point", MAX("price_point") OVER (PARTITION BY "user_id" ORDER BY date("derived_tstamp") ASC RANGE BETWEEN INTERVAL '29 DAYS' PRECEDING AND INTERVAL '1 DAY' PRECEDING) AS "max_price_point_last_30_days" FROM "some_table"` + ] + }, ] SQL_LIST.forEach(sqlInfo => { const { title, sql } = sqlInfo From 7912fd5e10dfd3cfbcac001b46504729cde7bb11 Mon Sep 17 00:00:00 2001 From: taozhi8833998 Date: Wed, 4 Dec 2024 09:19:16 +0800 Subject: [PATCH 2/2] feat: support range expr as window frame in snowflake --- pegjs/athena.pegjs | 22 ++++++++++++++------ pegjs/bigquery.pegjs | 30 +++++++++++++++------------ pegjs/hive.pegjs | 23 ++++++++++++++------- pegjs/mariadb.pegjs | 25 ++++++++++++++--------- pegjs/mysql.pegjs | 23 ++++++++++++++------- pegjs/noql.pegjs | 24 ++++++++++++++-------- pegjs/postgresql.pegjs | 29 +++++++++++++++++--------- pegjs/redshift.pegjs | 25 ++++++++++++++--------- pegjs/snowflake.pegjs | 45 ++++++++++++++++++++++++++++------------- pegjs/transactsql.pegjs | 26 +++++++++++++++--------- pegjs/trino.pegjs | 25 ++++++++++++++--------- src/interval.js | 4 ++-- src/window.js | 16 +++++++-------- 13 files changed, 207 insertions(+), 110 deletions(-) diff --git a/pegjs/athena.pegjs b/pegjs/athena.pegjs index a5a3f68e..33704998 100644 --- a/pegjs/athena.pegjs +++ b/pegjs/athena.pegjs @@ -1320,10 +1320,21 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -1344,14 +1355,13 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row' } + return { type: 'origin', value: 'current row' } } window_frame_value = s:'UNBOUNDED'i { // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase() } + return { type: 'origin', value: s.toUpperCase() } } / literal_numeric diff --git a/pegjs/bigquery.pegjs b/pegjs/bigquery.pegjs index 2a803bfb..c8d72d0e 100644 --- a/pegjs/bigquery.pegjs +++ b/pegjs/bigquery.pegjs @@ -2022,16 +2022,22 @@ window_specification } window_frame_clause - = 'RANGE'i __ KW_BETWEEN 'UNBOUNDED'i __ 'PRECEDING'i __ KW_AND __ 'CURRENT'i __ 'ROW' { - return 'range between unbounded preceding and current row' - } - / kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / k:(KW_ROWS / 'RANGE'i) __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: k.toLowerCase(), + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -2052,14 +2058,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row', ...getLocationObject() } + return { type: 'origin', value: 'current row', ...getLocationObject() } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase(), ...getLocationObject() } + return { type: 'origin', value: s.toUpperCase(), ...getLocationObject() } } / literal_numeric diff --git a/pegjs/hive.pegjs b/pegjs/hive.pegjs index bb3a5202..002b03c2 100644 --- a/pegjs/hive.pegjs +++ b/pegjs/hive.pegjs @@ -1297,10 +1297,21 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -1321,14 +1332,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row' } + return { type: 'origin', value: 'current row' } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase() } + return { type: 'origin', value: s.toUpperCase() } } / literal_numeric diff --git a/pegjs/mariadb.pegjs b/pegjs/mariadb.pegjs index 335dc39c..30f76422 100644 --- a/pegjs/mariadb.pegjs +++ b/pegjs/mariadb.pegjs @@ -3098,12 +3098,21 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -3124,14 +3133,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row' } + return { type: 'origin', value: 'current row' } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase() } + return { type: 'origin', value: s.toUpperCase() } } / literal_numeric diff --git a/pegjs/mysql.pegjs b/pegjs/mysql.pegjs index 13d4d938..43688ee4 100644 --- a/pegjs/mysql.pegjs +++ b/pegjs/mysql.pegjs @@ -3388,12 +3388,21 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -3414,12 +3423,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - return { type: 'single_quote_string', value: 'current row', ...getLocationObject() } + return { type: 'origin', value: 'current row', ...getLocationObject() } } window_frame_value = s:'UNBOUNDED'i { - return { type: 'single_quote_string', value: s.toUpperCase(), ...getLocationObject() } + return { type: 'origin', value: s.toUpperCase(), ...getLocationObject() } } / literal_numeric diff --git a/pegjs/noql.pegjs b/pegjs/noql.pegjs index b6dfc1e0..321bfb7a 100644 --- a/pegjs/noql.pegjs +++ b/pegjs/noql.pegjs @@ -3224,12 +3224,21 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -3250,14 +3259,13 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row' } + return { type: 'origin', value: 'current row' } } window_frame_value = s:'UNBOUNDED'i { // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase() } + return { type: 'origin', value: s.toUpperCase() } } / literal_numeric diff --git a/pegjs/postgresql.pegjs b/pegjs/postgresql.pegjs index e72f1948..5a32ec88 100644 --- a/pegjs/postgresql.pegjs +++ b/pegjs/postgresql.pegjs @@ -3758,12 +3758,23 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + // => { type: 'row'; expr: window_frame_following / window_frame_preceding } + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + // => binary_expr + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -3784,14 +3795,14 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row' } + // => { type: 'origin'; value: string } + return { type: 'origin', value: 'current row' } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase() } + // => { type: 'origin'; value: string } + return { type: 'origin', value: s.toUpperCase() } } / literal_numeric diff --git a/pegjs/redshift.pegjs b/pegjs/redshift.pegjs index edb12e04..3f3c8201 100644 --- a/pegjs/redshift.pegjs +++ b/pegjs/redshift.pegjs @@ -3253,12 +3253,21 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -3279,14 +3288,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row' } + return { type: 'origin', value: 'current row' } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase() } + return { type: 'origin', value: s.toUpperCase() } } / literal_numeric diff --git a/pegjs/snowflake.pegjs b/pegjs/snowflake.pegjs index 033183ec..2d439197 100644 --- a/pegjs/snowflake.pegjs +++ b/pegjs/snowflake.pegjs @@ -2680,19 +2680,38 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } - / 'RANGE'i __ KW_BETWEEN __ i:interval_expr __ 'PRECEDING'i __ KW_AND __ f:interval_expr __ 'PRECEDING'i { - return { - type: 'range', - between: i, - and: f + / 'RANGE'i __ op:KW_BETWEEN __ p:interval_expr __ 'PRECEDING'i __ KW_AND __ f:interval_expr __ 'PRECEDING'i { + const left = { + type: 'origin', + value: 'range', + } + const suffix = { + type: 'origin', + value: 'preceding', + } + p.suffix = suffix + f.suffix = suffix + const right = { + type: 'expr_list', + value: [p, f] } + return createBinaryExpr(op, left, right) } window_frame_following @@ -2713,14 +2732,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row', ...getLocationObject() } + return { type: 'origin', value: 'current row', ...getLocationObject() } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase(), ...getLocationObject() } + return { type: 'origin', value: s.toUpperCase(), ...getLocationObject() } } / literal_numeric diff --git a/pegjs/transactsql.pegjs b/pegjs/transactsql.pegjs index a34461e1..273059ec 100644 --- a/pegjs/transactsql.pegjs +++ b/pegjs/transactsql.pegjs @@ -2642,13 +2642,23 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_bound __ KW_AND __ f:window_frame_bound { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_bound __ KW_AND __ f:window_frame_bound { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } + window_frame_bound = window_frame_preceding / window_frame_following window_frame_following @@ -2669,14 +2679,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row' } + return { type: 'origin', value: 'current row' } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase() } + return { type: 'origin', value: s.toUpperCase() } } / literal_numeric diff --git a/pegjs/trino.pegjs b/pegjs/trino.pegjs index eb0897fc..fb92a00f 100644 --- a/pegjs/trino.pegjs +++ b/pegjs/trino.pegjs @@ -2637,12 +2637,21 @@ window_specification_frameless window_frame_clause = kw:KW_ROWS __ s:(window_frame_following / window_frame_preceding) { - // => string - return `rows ${s.value}` + return { + type: 'rows', + expr: s + } } - / KW_ROWS __ KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { - // => string - return `rows between ${p.value} and ${f.value}` + / KW_ROWS __ op:KW_BETWEEN __ p:window_frame_preceding __ KW_AND __ f:window_frame_following { + const left = { + type: 'origin', + value: 'rows', + } + const right = { + type: 'expr_list', + value: [p, f] + } + return createBinaryExpr(op, left, right) } window_frame_following @@ -2663,14 +2672,12 @@ window_frame_preceding window_frame_current_row = 'CURRENT'i __ 'ROW'i { - // => { type: 'single_quote_string'; value: string } - return { type: 'single_quote_string', value: 'current row', ...getLocationObject() } + return { type: 'origin', value: 'current row', ...getLocationObject() } } window_frame_value = s:'UNBOUNDED'i { - // => literal_string - return { type: 'single_quote_string', value: s.toUpperCase(), ...getLocationObject() } + return { type: 'origin', value: s.toUpperCase(), ...getLocationObject() } } / literal_numeric diff --git a/src/interval.js b/src/interval.js index cd3c7dde..82b2389a 100644 --- a/src/interval.js +++ b/src/interval.js @@ -2,8 +2,8 @@ import { toUpper, hasVal } from './util' import { exprToSQL } from './expr' function intervalToSQL(intervalExpr) { - const { expr, unit } = intervalExpr - const result = ['INTERVAL', exprToSQL(expr), toUpper(unit)] + const { expr, unit, suffix } = intervalExpr + const result = ['INTERVAL', exprToSQL(expr), toUpper(unit), exprToSQL(suffix)] return result.filter(hasVal).join(' ') } diff --git a/src/window.js b/src/window.js index 669c0fa7..d4a3892c 100644 --- a/src/window.js +++ b/src/window.js @@ -1,14 +1,14 @@ import { hasVal, toUpper } from './util' import { exprToSQL, orderOrPartitionByToSQL } from './expr' -import { intervalToSQL } from './interval' import { overToSQL } from './over' -function rangeExprToSQL(rangeExpr) { - if (!rangeExpr) return - if (typeof rangeExpr === 'string') return toUpper(rangeExpr) - const { type, between, and } = rangeExpr - const result = [toUpper(type), 'BETWEEN', intervalToSQL(between), 'PRECEDING', 'AND', intervalToSQL(and), 'PRECEDING'] - return result.filter(hasVal).join(' ') +function windowFrameExprToSQL(windowFrameExpr) { + if (!windowFrameExpr) return + const { type } = windowFrameExpr + if (type === 'rows') { + return [toUpper(type), exprToSQL(windowFrameExpr.expr)].filter(hasVal).join(' ') + } + return exprToSQL(windowFrameExpr) } function windowSpecificationToSQL(windowSpec) { const { @@ -21,7 +21,7 @@ function windowSpecificationToSQL(windowSpec) { name, orderOrPartitionByToSQL(partitionby, 'partition by'), orderOrPartitionByToSQL(orderby, 'order by'), - rangeExprToSQL(windowFrame), + windowFrameExprToSQL(windowFrame), ] return result.filter(hasVal).join(' ') }