Skip to content

Commit

Permalink
feat: support json visitor in snowflake
Browse files Browse the repository at this point in the history
  • Loading branch information
taozhi8833998 committed Apr 10, 2024
1 parent 7f22270 commit 332563c
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 3 deletions.
36 changes: 34 additions & 2 deletions pegjs/snowflake.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
'ORDER': true,
'OUTER': true,

'QUALIFY': true,
'RECURSIVE': true,
'RENAME': true,
// 'REPLACE': true,
Expand Down Expand Up @@ -2063,6 +2064,7 @@ select_stmt_nake
w:where_clause? __
g:group_by_clause? __
h:having_clause? __
q:qualify_clause? __
o:order_by_clause? __
l:limit_clause? __
win:window_clause? __
Expand Down Expand Up @@ -2100,6 +2102,7 @@ select_stmt_nake
where: w,
groupby: g,
having: h,
qualify: q,
orderby: o,
limit: l,
window: win,
Expand Down Expand Up @@ -2540,6 +2543,9 @@ column_ref_list
having_clause
= KW_HAVING __ e:or_and_where_expr { /* => expr */ return e; }

qualify_clause
= KW_QUALIFY __ e:or_and_where_expr { /* => or_and_where_expr */ return e; }

window_clause
= KW_WINDOW __ l:named_window_expr_list {
// => { keyword: 'window'; type: 'window', expr: named_window_expr_list; }
Expand Down Expand Up @@ -3802,6 +3808,21 @@ flattern_args
}
}

json_visit
= KW_SINGLE_COLON __ k:ident_without_kw_type {
return {
type: 'json_visitor',
symbol: ':',
expr: k
}
}
json_visit_list
= head:json_visit tail:(__ json_visit)* {
return {
type: 'expr_list',
value: createList(head, tail, 1)
}
}
func_call
= trim_func_clause
/ name:'now'i __ LPAREN __ l:expr_list? __ RPAREN __ 'at'i __ KW_TIME __ 'zone'i __ z:literal_string {
Expand Down Expand Up @@ -3839,6 +3860,16 @@ func_call
over: up
}
}
/ name:'parse_json'i __ LPAREN __ l:or_and_where_expr? __ RPAREN __ j:json_visit_list? {
// => { type: 'function'; name: string; args: expr_list; }
if (l && l.type !== 'expr_list') l = { type: 'expr_list', value: [l] }
return {
type: 'function',
name: { name: [{ type: 'default', value: name }] },
args: l ? l: { type: 'expr_list', value: [] },
suffix: j
};
}
/ name:proc_func_name __ LPAREN __ l:or_and_where_expr? __ RPAREN {
// => { type: 'function'; name: string; args: expr_list; }
if (l && l.type !== 'expr_list') l = { type: 'expr_list', value: [l] }
Expand Down Expand Up @@ -4247,6 +4278,7 @@ KW_GROUP = "GROUP"i !ident_start
KW_BY = "BY"i !ident_start
KW_ORDER = "ORDER"i !ident_start
KW_HAVING = "HAVING"i !ident_start
KW_QUALIFY = "QUALIFY"i !ident_start
KW_WINDOW = "WINDOW"i !ident_start

KW_LIMIT = "LIMIT"i !ident_start
Expand Down Expand Up @@ -4297,6 +4329,7 @@ KW_CHARACTER = "CHARACTER"i !ident_start { return 'CHARACTER'; }
KW_VARCHAR = "VARCHAR"i !ident_start { return 'VARCHAR';}
KW_NUMBER = "NUMBER"i !ident_start { return 'NUMBER'; }
KW_DECIMAL = "DECIMAL"i !ident_start { return 'DECIMAL'; }
KW_STRING = "STRING"i !ident_start { return 'STRING'; }
KW_SIGNED = "SIGNED"i !ident_start { return 'SIGNED'; }
KW_UNSIGNED = "UNSIGNED"i !ident_start { return 'UNSIGNED'; }
KW_INT = "INT"i !ident_start { return 'INT'; }
Expand Down Expand Up @@ -4697,8 +4730,7 @@ character_string_type
// => data_type
return { dataType: t, length: parseInt(l.join(''), 10), parentheses: true };
}
/ t:(KW_CHAR / KW_CHARACTER) { /* => data_type */ return { dataType: t }; }
/ t:KW_VARCHAR { /* => data_type */ return { dataType: t }; }
/ t:(KW_CHAR / KW_CHARACTER / KW_VARCHAR / KW_STRING) { /* => data_type */ return { dataType: t }; }

numeric_type_suffix
= un: KW_UNSIGNED? __ ze: KW_ZEROFILL? {
Expand Down
3 changes: 2 additions & 1 deletion src/expr.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { caseToSQL } from './case'
import { columnDefinitionToSQL, columnRefToSQL, fullTextSearchToSQL } from './column'
import { anyValueFuncToSQL, castToSQL, extractFunToSQL, flattenFunToSQL, funcToSQL, lambdaToSQL, tablefuncFunToSQL } from './func'
import { intervalToSQL } from './interval'
import { jsonExprToSQL } from './json'
import { jsonExprToSQL, jsonVisitorExprToSQL } from './json'
import { selectToSQL } from './select'
import { showToSQL } from './show'
import { arrayStructExprToSQL } from './array-struct'
Expand Down Expand Up @@ -36,6 +36,7 @@ const exprToSQLConvertFn = {
insert : unionToSQL,
interval : intervalToSQL,
json : jsonExprToSQL,
json_visitor : jsonVisitorExprToSQL,
show : showToSQL,
struct : arrayStructExprToSQL,
tablefunc : tablefuncFunToSQL,
Expand Down
6 changes: 6 additions & 0 deletions src/json.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ function jsonExprToSQL(expr) {
return result
}

function jsonVisitorExprToSQL(stmt) {
const { symbol, expr } = stmt
return [symbol, exprToSQL(expr)].join('')
}

export {
jsonExprToSQL,
jsonVisitorExprToSQL,
}
49 changes: 49 additions & 0 deletions test/snowflake.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,55 @@ describe('snowflake', () => {
'SELECT "src"."customer"[0].name, "src"."vehicle"[0] FROM "car_sales" ORDER BY 1 ASC',
]
},
{
title: 'qualify expr',
sql: [
`SELECT i, p, o
FROM qt
QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1;`,
'SELECT "i", "p", "o" FROM "qt" QUALIFY ROW_NUMBER() OVER (PARTITION BY "p" ORDER BY "o" ASC) = 1'
]
},
{
title: 'json visitor',
sql: [
`SELECT
'1'||'_'||tj.ID AS JOB_KEY,
tj.ID AS PAY_JOB_ID,
tj.ID,
case when tj.transcription_job_split_id is not null then 1 else 0 end AS TRANSCRIPTION_IS_SPLIT, --chnaged logic for all splits
case when tjs.transcription_job_id is not null then tjs.transcription_job_id else tj.id end as PARENT_TRANSCRIPTION_JOB_ID, --New column--
case when tjss.transcription_job_id is not null then tjss.transcription_job_id
when tjss.transcription_job_id is null and tjs.transcription_job_id is not null then tjs.transcription_job_id
else tj.id end AS TRANSCRIPTION_ORIGINAL_JOB_ID,
case when tjss.transcription_job_id is not null then tjss.transcription_job_id
when tjss.transcription_job_id is null and tjs.transcription_job_id is not null then tjs.transcription_job_id
else tj.id end AS REV_JOB_ID,
tj.catalog_item_id,
PARSE_JSON(tj.METADATA) AS METADATA,
PARSE_JSON(tj.METADATA):entry_id::STRING AS ENTRY_ID, --relavent for Kaltura only--
PARSE_JSON(tj.METADATA):partner_id::NUMBER AS PARTNER_ID,--relavent for Kaltura only--
PARSE_JSON(tj.METADATA):original_profile_id::NUMBER AS ORIGINAL_PROFILE_ID,
PARSE_JSON(tj.config):external_provider_captioning_provider::STRING AS caption_provider,
PARSE_JSON(tj.config):external_provider_transcription_provider::STRING AS transcription_provider,
tj.FRAUD_ISSUES,
tj.OWNER_ID, --this will be the customer user that uploaded the file--
tj.SANDBOX,-- should be excluded in all cases--
coalesce(tj.ORDER_ID, tj.ORDER_UUID) ORDER_ID,
coalesce(tj.ORDER_ID, tj.ORDER_UUID) IS NOT NULL AS ONE_ORDERING,
tj.SOURCE_TABLE,
coalesce(tj.job_flow_id,p.job_flow_id) as JOB_FLOW_ID
FROM MRR.MRR_VERBIT_TRANSCRIPTION_JOBS tj -- SON --
left join MRR.MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS tjs
on tj.transcription_job_split_id=tjs.id
inner join MRR.MRR_VERBIT_PROFILES p on tj.profile_id=p.id
left join MRR.MRR_VERBIT_TRANSCRIPTION_JOBS tjj -- FATHER --
on tjs.transcription_job_id=tjj.id
left join MRR.MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS tjss -- GRANDPHA --
on tjj.transcription_job_split_id=tjss.id`,
`SELECT '1' || '_' || "tj"."ID" AS "JOB_KEY", "tj"."ID" AS "PAY_JOB_ID", "tj"."ID", CASE WHEN "tj"."transcription_job_split_id" IS NOT NULL THEN 1 ELSE 0 END AS "TRANSCRIPTION_IS_SPLIT", CASE WHEN "tjs"."transcription_job_id" IS NOT NULL THEN "tjs"."transcription_job_id" ELSE "tj"."id" END AS "PARENT_TRANSCRIPTION_JOB_ID", CASE WHEN "tjss"."transcription_job_id" IS NOT NULL THEN "tjss"."transcription_job_id" WHEN "tjss"."transcription_job_id" IS NULL AND "tjs"."transcription_job_id" IS NOT NULL THEN "tjs"."transcription_job_id" ELSE "tj"."id" END AS "TRANSCRIPTION_ORIGINAL_JOB_ID", CASE WHEN "tjss"."transcription_job_id" IS NOT NULL THEN "tjss"."transcription_job_id" WHEN "tjss"."transcription_job_id" IS NULL AND "tjs"."transcription_job_id" IS NOT NULL THEN "tjs"."transcription_job_id" ELSE "tj"."id" END AS "REV_JOB_ID", "tj"."catalog_item_id", PARSE_JSON("tj"."METADATA") AS "METADATA", PARSE_JSON("tj"."METADATA") :entry_id::STRING AS "ENTRY_ID", PARSE_JSON("tj"."METADATA") :partner_id::NUMBER AS "PARTNER_ID", PARSE_JSON("tj"."METADATA") :original_profile_id::NUMBER AS "ORIGINAL_PROFILE_ID", PARSE_JSON("tj"."config") :external_provider_captioning_provider::STRING AS "caption_provider", PARSE_JSON("tj"."config") :external_provider_transcription_provider::STRING AS "transcription_provider", "tj"."FRAUD_ISSUES", "tj"."OWNER_ID", "tj"."SANDBOX", coalesce("tj"."ORDER_ID", "tj"."ORDER_UUID") AS "ORDER_ID", coalesce("tj"."ORDER_ID", "tj"."ORDER_UUID") IS NOT NULL AS "ONE_ORDERING", "tj"."SOURCE_TABLE", coalesce("tj"."job_flow_id", "p"."job_flow_id") AS "JOB_FLOW_ID" FROM "MRR"."MRR_VERBIT_TRANSCRIPTION_JOBS" AS "tj" LEFT JOIN "MRR"."MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS" AS "tjs" ON "tj"."transcription_job_split_id" = "tjs"."id" INNER JOIN "MRR"."MRR_VERBIT_PROFILES" AS "p" ON "tj"."profile_id" = "p"."id" LEFT JOIN "MRR"."MRR_VERBIT_TRANSCRIPTION_JOBS" AS "tjj" ON "tjs"."transcription_job_id" = "tjj"."id" LEFT JOIN "MRR"."MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS" AS "tjss" ON "tjj"."transcription_job_split_id" = "tjss"."id"`
]
},
]
SQL_LIST.forEach(sqlInfo => {
const { title, sql } = sqlInfo
Expand Down

0 comments on commit 332563c

Please sign in to comment.