From 332563c36898756ae9820c53cf051960bd75096a Mon Sep 17 00:00:00 2001 From: taozhi8833998 Date: Wed, 10 Apr 2024 09:45:37 +0800 Subject: [PATCH] feat: support json visitor in snowflake --- pegjs/snowflake.pegjs | 36 +++++++++++++++++++++++++++++-- src/expr.js | 3 ++- src/json.js | 6 ++++++ test/snowflake.spec.js | 49 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/pegjs/snowflake.pegjs b/pegjs/snowflake.pegjs index 9c5c6e38..c931bee2 100644 --- a/pegjs/snowflake.pegjs +++ b/pegjs/snowflake.pegjs @@ -62,6 +62,7 @@ 'ORDER': true, 'OUTER': true, + 'QUALIFY': true, 'RECURSIVE': true, 'RENAME': true, // 'REPLACE': true, @@ -2063,6 +2064,7 @@ select_stmt_nake w:where_clause? __ g:group_by_clause? __ h:having_clause? __ + q:qualify_clause? __ o:order_by_clause? __ l:limit_clause? __ win:window_clause? __ @@ -2100,6 +2102,7 @@ select_stmt_nake where: w, groupby: g, having: h, + qualify: q, orderby: o, limit: l, window: win, @@ -2540,6 +2543,9 @@ column_ref_list having_clause = KW_HAVING __ e:or_and_where_expr { /* => expr */ return e; } +qualify_clause + = KW_QUALIFY __ e:or_and_where_expr { /* => or_and_where_expr */ return e; } + window_clause = KW_WINDOW __ l:named_window_expr_list { // => { keyword: 'window'; type: 'window', expr: named_window_expr_list; } @@ -3802,6 +3808,21 @@ flattern_args } } +json_visit + = KW_SINGLE_COLON __ k:ident_without_kw_type { + return { + type: 'json_visitor', + symbol: ':', + expr: k + } + } +json_visit_list + = head:json_visit tail:(__ json_visit)* { + return { + type: 'expr_list', + value: createList(head, tail, 1) + } + } func_call = trim_func_clause / name:'now'i __ LPAREN __ l:expr_list? __ RPAREN __ 'at'i __ KW_TIME __ 'zone'i __ z:literal_string { @@ -3839,6 +3860,16 @@ func_call over: up } } + / name:'parse_json'i __ LPAREN __ l:or_and_where_expr? __ RPAREN __ j:json_visit_list? { + // => { type: 'function'; name: string; args: expr_list; } + if (l && l.type !== 'expr_list') l = { type: 'expr_list', value: [l] } + return { + type: 'function', + name: { name: [{ type: 'default', value: name }] }, + args: l ? l: { type: 'expr_list', value: [] }, + suffix: j + }; + } / name:proc_func_name __ LPAREN __ l:or_and_where_expr? __ RPAREN { // => { type: 'function'; name: string; args: expr_list; } if (l && l.type !== 'expr_list') l = { type: 'expr_list', value: [l] } @@ -4247,6 +4278,7 @@ KW_GROUP = "GROUP"i !ident_start KW_BY = "BY"i !ident_start KW_ORDER = "ORDER"i !ident_start KW_HAVING = "HAVING"i !ident_start +KW_QUALIFY = "QUALIFY"i !ident_start KW_WINDOW = "WINDOW"i !ident_start KW_LIMIT = "LIMIT"i !ident_start @@ -4297,6 +4329,7 @@ KW_CHARACTER = "CHARACTER"i !ident_start { return 'CHARACTER'; } KW_VARCHAR = "VARCHAR"i !ident_start { return 'VARCHAR';} KW_NUMBER = "NUMBER"i !ident_start { return 'NUMBER'; } KW_DECIMAL = "DECIMAL"i !ident_start { return 'DECIMAL'; } +KW_STRING = "STRING"i !ident_start { return 'STRING'; } KW_SIGNED = "SIGNED"i !ident_start { return 'SIGNED'; } KW_UNSIGNED = "UNSIGNED"i !ident_start { return 'UNSIGNED'; } KW_INT = "INT"i !ident_start { return 'INT'; } @@ -4697,8 +4730,7 @@ character_string_type // => data_type return { dataType: t, length: parseInt(l.join(''), 10), parentheses: true }; } - / t:(KW_CHAR / KW_CHARACTER) { /* => data_type */ return { dataType: t }; } - / t:KW_VARCHAR { /* => data_type */ return { dataType: t }; } + / t:(KW_CHAR / KW_CHARACTER / KW_VARCHAR / KW_STRING) { /* => data_type */ return { dataType: t }; } numeric_type_suffix = un: KW_UNSIGNED? __ ze: KW_ZEROFILL? { diff --git a/src/expr.js b/src/expr.js index c6f76c8c..18c00368 100644 --- a/src/expr.js +++ b/src/expr.js @@ -7,7 +7,7 @@ import { caseToSQL } from './case' import { columnDefinitionToSQL, columnRefToSQL, fullTextSearchToSQL } from './column' import { anyValueFuncToSQL, castToSQL, extractFunToSQL, flattenFunToSQL, funcToSQL, lambdaToSQL, tablefuncFunToSQL } from './func' import { intervalToSQL } from './interval' -import { jsonExprToSQL } from './json' +import { jsonExprToSQL, jsonVisitorExprToSQL } from './json' import { selectToSQL } from './select' import { showToSQL } from './show' import { arrayStructExprToSQL } from './array-struct' @@ -36,6 +36,7 @@ const exprToSQLConvertFn = { insert : unionToSQL, interval : intervalToSQL, json : jsonExprToSQL, + json_visitor : jsonVisitorExprToSQL, show : showToSQL, struct : arrayStructExprToSQL, tablefunc : tablefuncFunToSQL, diff --git a/src/json.js b/src/json.js index 78e2eee5..29eb4cf3 100644 --- a/src/json.js +++ b/src/json.js @@ -7,6 +7,12 @@ function jsonExprToSQL(expr) { return result } +function jsonVisitorExprToSQL(stmt) { + const { symbol, expr } = stmt + return [symbol, exprToSQL(expr)].join('') +} + export { jsonExprToSQL, + jsonVisitorExprToSQL, } diff --git a/test/snowflake.spec.js b/test/snowflake.spec.js index 7ae74f4d..7386e853 100644 --- a/test/snowflake.spec.js +++ b/test/snowflake.spec.js @@ -155,6 +155,55 @@ describe('snowflake', () => { 'SELECT "src"."customer"[0].name, "src"."vehicle"[0] FROM "car_sales" ORDER BY 1 ASC', ] }, + { + title: 'qualify expr', + sql: [ + `SELECT i, p, o + FROM qt + QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1;`, + 'SELECT "i", "p", "o" FROM "qt" QUALIFY ROW_NUMBER() OVER (PARTITION BY "p" ORDER BY "o" ASC) = 1' + ] + }, + { + title: 'json visitor', + sql: [ + `SELECT + '1'||'_'||tj.ID AS JOB_KEY, + tj.ID AS PAY_JOB_ID, + tj.ID, + case when tj.transcription_job_split_id is not null then 1 else 0 end AS TRANSCRIPTION_IS_SPLIT, --chnaged logic for all splits + case when tjs.transcription_job_id is not null then tjs.transcription_job_id else tj.id end as PARENT_TRANSCRIPTION_JOB_ID, --New column-- + case when tjss.transcription_job_id is not null then tjss.transcription_job_id + when tjss.transcription_job_id is null and tjs.transcription_job_id is not null then tjs.transcription_job_id + else tj.id end AS TRANSCRIPTION_ORIGINAL_JOB_ID, + case when tjss.transcription_job_id is not null then tjss.transcription_job_id + when tjss.transcription_job_id is null and tjs.transcription_job_id is not null then tjs.transcription_job_id + else tj.id end AS REV_JOB_ID, + tj.catalog_item_id, + PARSE_JSON(tj.METADATA) AS METADATA, + PARSE_JSON(tj.METADATA):entry_id::STRING AS ENTRY_ID, --relavent for Kaltura only-- + PARSE_JSON(tj.METADATA):partner_id::NUMBER AS PARTNER_ID,--relavent for Kaltura only-- + PARSE_JSON(tj.METADATA):original_profile_id::NUMBER AS ORIGINAL_PROFILE_ID, + PARSE_JSON(tj.config):external_provider_captioning_provider::STRING AS caption_provider, + PARSE_JSON(tj.config):external_provider_transcription_provider::STRING AS transcription_provider, + tj.FRAUD_ISSUES, + tj.OWNER_ID, --this will be the customer user that uploaded the file-- + tj.SANDBOX,-- should be excluded in all cases-- + coalesce(tj.ORDER_ID, tj.ORDER_UUID) ORDER_ID, + coalesce(tj.ORDER_ID, tj.ORDER_UUID) IS NOT NULL AS ONE_ORDERING, + tj.SOURCE_TABLE, + coalesce(tj.job_flow_id,p.job_flow_id) as JOB_FLOW_ID + FROM MRR.MRR_VERBIT_TRANSCRIPTION_JOBS tj -- SON -- + left join MRR.MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS tjs + on tj.transcription_job_split_id=tjs.id + inner join MRR.MRR_VERBIT_PROFILES p on tj.profile_id=p.id + left join MRR.MRR_VERBIT_TRANSCRIPTION_JOBS tjj -- FATHER -- + on tjs.transcription_job_id=tjj.id + left join MRR.MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS tjss -- GRANDPHA -- + on tjj.transcription_job_split_id=tjss.id`, + `SELECT '1' || '_' || "tj"."ID" AS "JOB_KEY", "tj"."ID" AS "PAY_JOB_ID", "tj"."ID", CASE WHEN "tj"."transcription_job_split_id" IS NOT NULL THEN 1 ELSE 0 END AS "TRANSCRIPTION_IS_SPLIT", CASE WHEN "tjs"."transcription_job_id" IS NOT NULL THEN "tjs"."transcription_job_id" ELSE "tj"."id" END AS "PARENT_TRANSCRIPTION_JOB_ID", CASE WHEN "tjss"."transcription_job_id" IS NOT NULL THEN "tjss"."transcription_job_id" WHEN "tjss"."transcription_job_id" IS NULL AND "tjs"."transcription_job_id" IS NOT NULL THEN "tjs"."transcription_job_id" ELSE "tj"."id" END AS "TRANSCRIPTION_ORIGINAL_JOB_ID", CASE WHEN "tjss"."transcription_job_id" IS NOT NULL THEN "tjss"."transcription_job_id" WHEN "tjss"."transcription_job_id" IS NULL AND "tjs"."transcription_job_id" IS NOT NULL THEN "tjs"."transcription_job_id" ELSE "tj"."id" END AS "REV_JOB_ID", "tj"."catalog_item_id", PARSE_JSON("tj"."METADATA") AS "METADATA", PARSE_JSON("tj"."METADATA") :entry_id::STRING AS "ENTRY_ID", PARSE_JSON("tj"."METADATA") :partner_id::NUMBER AS "PARTNER_ID", PARSE_JSON("tj"."METADATA") :original_profile_id::NUMBER AS "ORIGINAL_PROFILE_ID", PARSE_JSON("tj"."config") :external_provider_captioning_provider::STRING AS "caption_provider", PARSE_JSON("tj"."config") :external_provider_transcription_provider::STRING AS "transcription_provider", "tj"."FRAUD_ISSUES", "tj"."OWNER_ID", "tj"."SANDBOX", coalesce("tj"."ORDER_ID", "tj"."ORDER_UUID") AS "ORDER_ID", coalesce("tj"."ORDER_ID", "tj"."ORDER_UUID") IS NOT NULL AS "ONE_ORDERING", "tj"."SOURCE_TABLE", coalesce("tj"."job_flow_id", "p"."job_flow_id") AS "JOB_FLOW_ID" FROM "MRR"."MRR_VERBIT_TRANSCRIPTION_JOBS" AS "tj" LEFT JOIN "MRR"."MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS" AS "tjs" ON "tj"."transcription_job_split_id" = "tjs"."id" INNER JOIN "MRR"."MRR_VERBIT_PROFILES" AS "p" ON "tj"."profile_id" = "p"."id" LEFT JOIN "MRR"."MRR_VERBIT_TRANSCRIPTION_JOBS" AS "tjj" ON "tjs"."transcription_job_id" = "tjj"."id" LEFT JOIN "MRR"."MRR_VERBIT_TRANSCRIPTION_JOB_SPLITS" AS "tjss" ON "tjj"."transcription_job_split_id" = "tjss"."id"` + ] + }, ] SQL_LIST.forEach(sqlInfo => { const { title, sql } = sqlInfo