diff --git a/pegjs/athena.pegjs b/pegjs/athena.pegjs index bcc04019..2a50c136 100644 --- a/pegjs/athena.pegjs +++ b/pegjs/athena.pegjs @@ -82,6 +82,7 @@ 'UNION': true, 'UPDATE': true, 'USING': true, + 'UNNEST': true, 'VALUES': true, @@ -1088,9 +1089,28 @@ column_list_item } alias_clause - = KW_AS __ i:alias_ident { return i; } + = KW_AS __ i:(func_call / alias_ident) { return i; } / KW_AS? __ i:ident { return i; } +with_offset + = KW_WITH __ KW_OFFSET __ alias:alias_clause? { + return { + keyword: 'with offset as', + as: alias + } + } + +from_unnest_item + = 'UNNEST'i __ LPAREN __ a:expr? __ RPAREN __ alias:alias_clause? __ wf:with_offset? { + return { + type: 'unnest', + expr: a, + parentheses: true, + as: alias, + with_offset: wf, + } + } + from_clause = KW_FROM __ l:table_ref_list { return l; } @@ -1186,7 +1206,8 @@ table_join //NOTE that, the table assigned to `var` shouldn't write in `table_join` table_base - = KW_DUAL { + = from_unnest_item + / KW_DUAL { return { type: 'dual' }; @@ -2142,6 +2163,17 @@ literal / literal_bool / literal_null / literal_datetime + / literal_array + +literal_array + = s:KW_ARRAY __ LBRAKE __ c:expr_list? __ RBRAKE { + return { + expr_list: c || { type: 'origin', value: '' }, + type: 'array', + keyword: 'array', + brackets: true + } + } literal_list = head:literal tail:(__ COMMA __ literal)* { diff --git a/src/tables.js b/src/tables.js index e8a24597..062c2b6b 100644 --- a/src/tables.js +++ b/src/tables.js @@ -9,7 +9,7 @@ function unnestToSQL(unnestExpr) { const { type, as, expr, with_offset: withOffset } = unnestExpr const result = [ `${toUpper(type)}(${expr && exprToSQL(expr) || ''})`, - commonOptionConnector('AS', identifierToSql, as), + commonOptionConnector('AS', typeof as === 'string' ? identifierToSql : exprToSQL, as), commonOptionConnector( toUpper(withOffset && withOffset.keyword), identifierToSql, diff --git a/test/athena.spec.js b/test/athena.spec.js index 363505bd..786fcfe6 100644 --- a/test/athena.spec.js +++ b/test/athena.spec.js @@ -145,4 +145,70 @@ describe('athena', () => { order by first_note_week` expect(getParsedSql(sql)).to.be.equal("WITH `weekly_data` AS (SELECT LOWER(`m`.`distinct_id`) AS `therapist`, SPLIT_PART(SPLIT_PART(`distinct_id`, '@', 2), '.', 1) AS `eleos_organization`, DATE_TRUNC('week', `timestamp_event`) AS `week_start`, MIN(DATE_TRUNC('week', `timestamp_event`)) OVER (PARTITION BY LOWER(`m`.`distinct_id`)) AS `first_note_week` FROM `bronze_prod`.`outreach_mixpanel_events` AS `m` WHERE SPLIT_PART(SPLIT_PART(`distinct_id`, '@', 2), '.', 1) IN ('thresholds', 'trilogyinc', 'zepfcenter') AND `timestamp_event` IS NOT NULL AND `event` LIKE '%outreach - note saved%'), `weekly_totals` AS (SELECT `wd`.`eleos_organization`, `wd`.`week_start`, COUNT(DISTINCT `wd`.`therapist`) AS `active_therapists`, COUNT(DISTINCT CASE WHEN `wd`.`first_note_week` = `wd`.`week_start` THEN `wd`.`therapist` END) AS `activated_therapists`, SUM(COUNT(DISTINCT CASE WHEN `wd`.`first_note_week` = `wd`.`week_start` THEN `wd`.`therapist` END)) OVER (PARTITION BY `wd`.`eleos_organization` ORDER BY `wd`.`week_start` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `accumulated_therapists` FROM `weekly_data` AS `wd` GROUP BY `wd`.`week_start`, `wd`.`eleos_organization` ORDER BY `wd`.`week_start` ASC), `weekly_data_with_namber` AS (SELECT DISTINCT `eleos_organization`, `therapist`, `week_start`, `first_note_week`, EXTRACT(DAY FROM `week_start` - `first_note_week`) / 7 AS `week_number` FROM `weekly_data` ORDER BY `first_note_week` ASC) SELECT `wd`.`eleos_organization`, `wd`.`first_note_week`, `wt`.`active_therapists`, `wt`.`activated_therapists`, `wt`.`accumulated_therapists`, SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END) AS `week_0`, SUM(CASE WHEN `week_number` = 1 THEN 1 ELSE 0 END) AS `week_1`, SUM(CASE WHEN `week_number` = 2 THEN 1 ELSE 0 END) AS `week_2`, SUM(CASE WHEN `week_number` = 3 THEN 1 ELSE 0 END) AS `week_3`, SUM(CASE WHEN `week_number` = 4 THEN 1 ELSE 0 END) AS `week_4`, SUM(CASE WHEN `week_number` = 5 THEN 1 ELSE 0 END) AS `week_5`, SUM(CASE WHEN `week_number` = 6 THEN 1 ELSE 0 END) AS `week_6`, SUM(CASE WHEN `week_number` = 7 THEN 1 ELSE 0 END) AS `week_7`, SUM(CASE WHEN `week_number` = 8 THEN 1 ELSE 0 END) AS `week_8`, SUM(CASE WHEN `week_number` = 9 THEN 1 ELSE 0 END) AS `week_9`, SUM(CASE WHEN `week_number` = 10 THEN 1 ELSE 0 END) AS `week_10`, SUM(CASE WHEN `week_number` = 11 THEN 1 ELSE 0 END) AS `week_11`, SUM(CASE WHEN `week_number` = 12 THEN 1 ELSE 0 END) AS `week_12`, SUM(CASE WHEN `week_number` = 13 THEN 1 ELSE 0 END) AS `week_13`, SUM(CASE WHEN `week_number` = 14 THEN 1 ELSE 0 END) AS `week_14`, SUM(CASE WHEN `week_number` = 15 THEN 1 ELSE 0 END) AS `week_15`, SUM(CASE WHEN `week_number` = 16 THEN 1 ELSE 0 END) AS `week_16`, SUM(CASE WHEN `week_number` = 17 THEN 1 ELSE 0 END) AS `week_17`, SUM(CASE WHEN `week_number` = 18 THEN 1 ELSE 0 END) AS `week_18`, SUM(CASE WHEN `week_number` = 19 THEN 1 ELSE 0 END) AS `week_19`, SUM(CASE WHEN `week_number` = 20 THEN 1 ELSE 0 END) AS `week_20`, SUM(CASE WHEN `week_number` = 21 THEN 1 ELSE 0 END) AS `week_21`, SUM(CASE WHEN `week_number` = 22 THEN 1 ELSE 0 END) AS `week_22`, SUM(CASE WHEN `week_number` = 23 THEN 1 ELSE 0 END) AS `week_23`, SUM(CASE WHEN `week_number` = 24 THEN 1 ELSE 0 END) AS `week_24`, SUM(CASE WHEN `week_number` = 25 THEN 1 ELSE 0 END) AS `week_25`, SUM(CASE WHEN `week_number` = 26 THEN 1 ELSE 0 END) AS `week_26`, SUM(CASE WHEN `week_number` = 27 THEN 1 ELSE 0 END) AS `week_27`, SUM(CASE WHEN `week_number` = 28 THEN 1 ELSE 0 END) AS `week_28`, SUM(CASE WHEN `week_number` = 29 THEN 1 ELSE 0 END) AS `week_29`, ROUND(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_0_percentage`, ROUND(SUM(CASE WHEN `week_number` = 1 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_1_percentage`, ROUND(SUM(CASE WHEN `week_number` = 2 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_2_percentage`, ROUND(SUM(CASE WHEN `week_number` = 3 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_3_percentage`, ROUND(SUM(CASE WHEN `week_number` = 4 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_4_percentage`, ROUND(SUM(CASE WHEN `week_number` = 5 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_5_percentage`, ROUND(SUM(CASE WHEN `week_number` = 6 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_6_percentage`, ROUND(SUM(CASE WHEN `week_number` = 7 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_7_percentage`, ROUND(SUM(CASE WHEN `week_number` = 8 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_8_percentage`, ROUND(SUM(CASE WHEN `week_number` = 9 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_9_percentage`, ROUND(SUM(CASE WHEN `week_number` = 10 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_10_percentage`, ROUND(SUM(CASE WHEN `week_number` = 11 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_11_percentage`, ROUND(SUM(CASE WHEN `week_number` = 12 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_12_percentage`, ROUND(SUM(CASE WHEN `week_number` = 13 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_13_percentage`, ROUND(SUM(CASE WHEN `week_number` = 14 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_14_percentage`, ROUND(SUM(CASE WHEN `week_number` = 15 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_15_percentage`, ROUND(SUM(CASE WHEN `week_number` = 16 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_16_percentage`, ROUND(SUM(CASE WHEN `week_number` = 17 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_17_percentage`, ROUND(SUM(CASE WHEN `week_number` = 18 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_18_percentage`, ROUND(SUM(CASE WHEN `week_number` = 19 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_19_percentage`, ROUND(SUM(CASE WHEN `week_number` = 20 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_20_percentage`, ROUND(SUM(CASE WHEN `week_number` = 21 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_21_percentage`, ROUND(SUM(CASE WHEN `week_number` = 22 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_22_percentage`, ROUND(SUM(CASE WHEN `week_number` = 23 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_23_percentage`, ROUND(SUM(CASE WHEN `week_number` = 24 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_24_percentage`, ROUND(SUM(CASE WHEN `week_number` = 25 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_25_percentage`, ROUND(SUM(CASE WHEN `week_number` = 26 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_26_percentage`, ROUND(SUM(CASE WHEN `week_number` = 27 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_27_percentage`, ROUND(SUM(CASE WHEN `week_number` = 28 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_28_percentage`, ROUND(SUM(CASE WHEN `week_number` = 29 THEN 1 ELSE 0 END) * 1 / NULLIF(SUM(CASE WHEN `week_number` = 0 THEN 1 ELSE 0 END), 0) * 1 * 100, 2) AS `week_29_percentage` FROM `weekly_data_with_namber` AS `wd` INNER JOIN `weekly_totals` AS `wt` ON `wd`.`eleos_organization` = `wt`.`eleos_organization` AND `wd`.`first_note_week` = `wt`.`week_start` GROUP BY `wd`.`eleos_organization`, `wd`.`first_note_week`, `wt`.`active_therapists`, `wt`.`activated_therapists`, `wt`.`accumulated_therapists` ORDER BY `first_note_week` ASC") }) + it('should support from unnest', () => { + const sql = `with org_mapping AS ( + select * from ( + select trim(lower(name)) as name, organization_name, + ROW_NUMBER() OVER (PARTITION BY trim(lower(name))) as rn + from bronze_sales_prod.drive_organization_mapping) + where rn = 1 + ) + ,orgs as ( + SELECT trim(COALESCE(om.organization_name, custom_attributes.Account)) AS organization, + * + FROM "bronze_sales_prod"."intercom_all_conversations" + JOIN org_mapping om ON trim(lower(custom_attributes.Account)) = trim(lower(om.name)) + ) + , orgs_dates_metrics as ( + select organization, + cast(from_unixtime(created_at) as date) AS conversation_ctreated_date, + array_agg(custom_attributes) custom_attributes_array, + approx_percentile(case when state = 'closed' then date_diff('minute', from_unixtime(created_at), from_unixtime(updated_at)) else 0 end,0.5) as median_resolution_time_minutes, + sum(case when state IN ('open', 'snoozed') then 1 else 0 end) open_conversations_count, + count(*) overall_conversations_count + from orgs + where organization is not null + group by 1,2 + ) + ,last_year_org_dates as + ( + SELECT distinct om.organization_name as organization, + date_add('day', -sequence, current_date) AS date + FROM UNNEST(sequence(1, 365)) AS t + join org_mapping om on 1=1 + WHERE + date_add('day', -sequence, current_date) >= date_add('year', -1, current_date) + ), counted_tags as ( + SELECT organization + ,cast(from_unixtime(created_at) as date) AS conversation_ctreated_date + ,tag.name as tag + , json_extract_scalar(cast(source AS json), '$.author.email') AS author_email + ,COUNT(*) AS count + FROM orgs + CROSS JOIN UNNEST(tags.tags) AS t(tag) + group by 1,2,3,4 + ), counted_tags_map as ( + SELECT organization, conversation_ctreated_date, author_email, + MAP ( + ARRAY_AGG(tag), + ARRAY_AGG(count) + ) AS tags_counts + FROM + counted_tags + group by 1,2,3 + ) + select od.organization as organization_name + , od.date + , t.author_email + , coalesce(t.tags_counts, MAP()) as tags_counts + , coalesce(o.custom_attributes_array, ARRAY[]) as custom_attributes_array + , COALESCE(o.median_resolution_time_minutes,0) as median_resolution_time_minutes + , COALESCE(o.open_conversations_count,0) as open_conversations_count + , COALESCE(o.overall_conversations_count,0) as overall_conversations_count + , cast(current_timestamp as timestamp(6)) as dbt_insert_time + from last_year_org_dates od + left join orgs_dates_metrics o on od.organization = o.organization and od.date = o.conversation_ctreated_date + left join counted_tags_map t on od.organization = t.organization and od.date = t.conversation_ctreated_date` + expect(getParsedSql(sql)).to.be.equal("WITH `org_mapping` AS (SELECT * FROM (SELECT trim(lower(`name`)) AS `name`, `organization_name`, ROW_NUMBER() OVER (PARTITION BY trim(lower(`name`))) AS `rn` FROM `bronze_sales_prod`.`drive_organization_mapping`) WHERE `rn` = 1), `orgs` AS (SELECT trim(COALESCE(`om`.`organization_name`, `custom_attributes`.`Account`)) AS `organization`, * FROM `bronze_sales_prod`.`intercom_all_conversations` INNER JOIN `org_mapping` AS `om` ON trim(lower(`custom_attributes`.`Account`)) = trim(lower(`om`.`name`))), `orgs_dates_metrics` AS (SELECT `organization`, CAST(from_unixtime(`created_at`) AS DATE) AS `conversation_ctreated_date`, array_agg(`custom_attributes`) AS `custom_attributes_array`, approx_percentile(CASE WHEN `state` = 'closed' THEN date_diff('minute', from_unixtime(`created_at`), from_unixtime(`updated_at`)) ELSE 0 END, 0.5) AS `median_resolution_time_minutes`, SUM(CASE WHEN `state` IN ('open', 'snoozed') THEN 1 ELSE 0 END) AS `open_conversations_count`, COUNT(*) OVER all_conversations_count FROM `orgs` WHERE `organization` IS NOT NULL GROUP BY 1, 2), `last_year_org_dates` AS (SELECT DISTINCT `om`.`organization_name` AS `organization`, date_add('day', -`sequence`, CURRENT_DATE) AS DATE FROM UNNEST(sequence(1, 365)) AS `t` INNER JOIN `org_mapping` AS `om` ON 1 = 1 WHERE date_add('day', -`sequence`, CURRENT_DATE) >= date_add('year', -1, CURRENT_DATE)), `counted_tags` AS (SELECT `organization`, CAST(from_unixtime(`created_at`) AS DATE) AS `conversation_ctreated_date`, `tag`.`name` AS `tag`, json_extract_scalar(CAST(`source` AS JSON), '$.author.email') AS `author_email`, COUNT(*) AS `count` FROM `orgs` CROSS JOIN UNNEST(`tags`.`tags`) AS t(`tag`) GROUP BY 1, 2, 3, 4), `counted_tags_map` AS (SELECT `organization`, `conversation_ctreated_date`, `author_email`, MAP(ARRAY_AGG(`tag`), ARRAY_AGG(`count`)) AS `tags_counts` FROM `counted_tags` GROUP BY 1, 2, 3) SELECT `od`.`organization` AS `organization_name`, `od`.`date`, `t`.`author_email`, coalesce(`t`.`tags_counts`, MAP()) AS `tags_counts`, coalesce(`o`.`custom_attributes_array`, ARRAY[]) AS `custom_attributes_array`, COALESCE(`o`.`median_resolution_time_minutes`, 0) AS `median_resolution_time_minutes`, COALESCE(`o`.`open_conversations_count`, 0) AS `open_conversations_count`, COALESCE(`o`.`overall_conversations_count`, 0) AS `overall_conversations_count`, CAST(CURRENT_TIMESTAMP AS TIMESTAMP(6)) AS `dbt_insert_time` FROM `last_year_org_dates` AS `od` LEFT JOIN `orgs_dates_metrics` AS `o` ON `od`.`organization` = `o`.`organization` AND `od`.`date` = `o`.`conversation_ctreated_date` LEFT JOIN `counted_tags_map` AS `t` ON `od`.`organization` = `t`.`organization` AND `od`.`date` = `t`.`conversation_ctreated_date`") + }) }) \ No newline at end of file diff --git a/test/util.spec.js b/test/util.spec.js index 77a26608..a7b55c7a 100644 --- a/test/util.spec.js +++ b/test/util.spec.js @@ -52,7 +52,7 @@ describe('util function test', () => { expect(columnIdentifierToSql("id")).to.be.equal('"id"') }) - it.only('should support trim query option', () => { + it('should support trim query option', () => { const opt = { "database": "mysql", "parseOptions": {