diff --git a/data/transform/models/marts/telemetry/base/cli_executions_base.sql b/data/transform/models/marts/telemetry/base/cli_executions_base.sql index ef53dae4..3b7c4f76 100644 --- a/data/transform/models/marts/telemetry/base/cli_executions_base.sql +++ b/data/transform/models/marts/telemetry/base/cli_executions_base.sql @@ -80,7 +80,6 @@ combined AS ( NULL AS machine, NULL AS system_release, NULL AS is_dev_build, - NULL AS environment_name_hash, NULL AS python_implementation, NULL AS system_name, NULL AS system_version, @@ -158,7 +157,6 @@ combined AS ( unstructured_executions.machine, unstructured_executions.system_release, unstructured_executions.is_dev_build, - unstructured_executions.environment_name_hash, unstructured_executions.python_implementation, unstructured_executions.system_name, unstructured_executions.system_version, @@ -262,7 +260,6 @@ SELECT combined.machine, combined.system_release, combined.is_dev_build, - combined.environment_name_hash, combined.python_implementation, combined.system_name, combined.system_version, diff --git a/data/transform/models/marts/telemetry/base/environment_dim.sql b/data/transform/models/marts/telemetry/base/environment_dim.sql new file mode 100644 index 00000000..3944239d --- /dev/null +++ b/data/transform/models/marts/telemetry/base/environment_dim.sql @@ -0,0 +1,44 @@ +{{ + config(materialized='table') +}} + +SELECT + {{ dbt_utils.surrogate_key( + [ + 'structured_executions.project_id', + 'cmd_parsed_all.environment' + ] + ) }} AS environment_pk, + structured_executions.project_id, + cmd_parsed_all.environment AS env_hash, + COALESCE( + hash_lookup.unhashed_value, + cmd_parsed_all.environment + ) AS env_name +FROM {{ ref('structured_executions') }} +LEFT JOIN + {{ ref('cmd_parsed_all') }} ON + structured_executions.command = cmd_parsed_all.command +LEFT JOIN {{ ref('hash_lookup') }} + ON cmd_parsed_all.environment = hash_lookup.hash_value + AND hash_lookup.category = 'environment' + +UNION ALL + +SELECT + {{ dbt_utils.surrogate_key( + [ + 'unstructured_executions.project_id', + 'unstructured_executions.environment_name_hash' + ] + ) }} AS environment_pk, + unstructured_executions.project_id, + unstructured_executions.environment_name_hash AS env_hash, + COALESCE( + hash_lookup.unhashed_value, + unstructured_executions.environment_name_hash + ) AS env_name +FROM {{ ref('unstructured_executions') }} +LEFT JOIN {{ ref('hash_lookup') }} + ON unstructured_executions.environment_name_hash = hash_lookup.hash_value + AND hash_lookup.category = 'environment' diff --git a/data/transform/models/marts/telemetry/base/environments.sql b/data/transform/models/marts/telemetry/base/environments.sql deleted file mode 100644 index 5ac9d456..00000000 --- a/data/transform/models/marts/telemetry/base/environments.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT - structured_executions.execution_id, - cmd_parsed_all.environment AS env_hash, - hash_lookup.unhashed_value AS env_name, - NULL AS is_ephemeral, - NULL AS is_cicd, - NULL AS is_cloud -FROM {{ ref('structured_executions') }} -LEFT JOIN - {{ ref('cmd_parsed_all') }} ON - structured_executions.command = cmd_parsed_all.command -LEFT JOIN {{ ref('hash_lookup') }} - ON cmd_parsed_all.environment = hash_lookup.hash_value - AND hash_lookup.category = 'environment' diff --git a/data/transform/models/marts/telemetry/base/execution_env_map.sql b/data/transform/models/marts/telemetry/base/execution_env_map.sql new file mode 100644 index 00000000..dc278c64 --- /dev/null +++ b/data/transform/models/marts/telemetry/base/execution_env_map.sql @@ -0,0 +1,28 @@ +{{ + config(materialized='table') +}} + +SELECT + {{ dbt_utils.surrogate_key( + [ + 'structured_executions.project_id', + 'cmd_parsed_all.environment' + ] + ) }} AS environment_fk, + structured_executions.execution_id +FROM {{ ref('structured_executions') }} +LEFT JOIN + {{ ref('cmd_parsed_all') }} ON + structured_executions.command = cmd_parsed_all.command + +UNION ALL + +SELECT + {{ dbt_utils.surrogate_key( + [ + 'unstructured_executions.project_id', + 'unstructured_executions.environment_name_hash' + ] + ) }} AS environment_fk, + unstructured_executions.execution_id +FROM {{ ref('unstructured_executions') }} diff --git a/data/transform/models/marts/telemetry/base/pipeline_executions.sql b/data/transform/models/marts/telemetry/base/pipeline_executions.sql index d4374b4b..a86b994a 100644 --- a/data/transform/models/marts/telemetry/base/pipeline_executions.sql +++ b/data/transform/models/marts/telemetry/base/pipeline_executions.sql @@ -2,7 +2,7 @@ WITH plugin_prep AS ( SELECT plugin_executions.execution_id, cli_executions_base.project_id, - cli_executions_base.environment_name_hash AS env_id, + environment_dim.env_hash AS env_id, ARRAY_AGG( COALESCE( plugin_executions.plugin_surrogate_key, @@ -12,6 +12,10 @@ WITH plugin_prep AS ( FROM {{ ref('plugin_executions') }} LEFT JOIN {{ ref('cli_executions_base') }} ON plugin_executions.execution_id = cli_executions_base.execution_id + LEFT JOIN {{ ref('execution_env_map') }} + ON plugin_executions.execution_id = execution_env_map.execution_id + LEFT JOIN {{ ref('environment_dim') }} + ON execution_env_map.environment_fk = environment_dim.environment_pk WHERE cli_executions_base.cli_command IN ('elt', 'run') GROUP BY 1, 2, 3 ) diff --git a/data/transform/models/marts/telemetry/base/schema.yml b/data/transform/models/marts/telemetry/base/schema.yml index 42db1094..574569f6 100644 --- a/data/transform/models/marts/telemetry/base/schema.yml +++ b/data/transform/models/marts/telemetry/base/schema.yml @@ -134,4 +134,19 @@ models: - name: event_date tests: - not_null - \ No newline at end of file + + - name: environment_dim + description: This table contains attributes about project environments. + columns: + - name: environment_pk + tests: + - not_null + - unique + + - name: execution_env_map + columns: + - name: environment_fk + tests: + - relationships: + to: ref('environment_dim') + field: environment_pk diff --git a/data/transform/models/marts/telemetry/fact_cli_executions.sql b/data/transform/models/marts/telemetry/fact_cli_executions.sql index 882b67d1..80c0c736 100644 --- a/data/transform/models/marts/telemetry/fact_cli_executions.sql +++ b/data/transform/models/marts/telemetry/fact_cli_executions.sql @@ -9,7 +9,8 @@ SELECT cli_executions_base.python_version, ip_address_dim.ip_address_hash, ip_address_dim.cloud_provider, - ip_address_dim.execution_location + ip_address_dim.execution_location, + environment_dim.env_name FROM {{ ref('cli_executions_base') }} LEFT JOIN {{ ref('pipeline_executions') }} ON cli_executions_base.execution_id = pipeline_executions.execution_id @@ -19,3 +20,7 @@ LEFT JOIN {{ ref('date_dim') }} ON cli_executions_base.event_date = date_dim.date_day LEFT JOIN {{ ref('ip_address_dim') }} ON cli_executions_base.ip_address_hash = ip_address_dim.ip_address_hash +LEFT JOIN {{ ref('execution_env_map') }} + ON cli_executions_base.execution_id = execution_env_map.execution_id +LEFT JOIN {{ ref('environment_dim') }} + ON execution_env_map.environment_fk = environment_dim.environment_pk diff --git a/data/transform/models/marts/telemetry/fact_plugin_usage.sql b/data/transform/models/marts/telemetry/fact_plugin_usage.sql index 32aa45ec..bd8429ca 100644 --- a/data/transform/models/marts/telemetry/fact_plugin_usage.sql +++ b/data/transform/models/marts/telemetry/fact_plugin_usage.sql @@ -20,8 +20,7 @@ SELECT plugin_executions.plugin_surrogate_key, -- CLI Attributes cli_executions_base.cli_command, - cli_executions_base.environment_name_hash AS env_id, - hash_lookup.unhashed_value AS env_name, + environment_dim.env_name, cli_executions_base.exit_code AS cli_exit_code, cli_executions_base.meltano_version, cli_executions_base.num_cpu_cores_available, @@ -58,7 +57,7 @@ LEFT JOIN {{ ref('project_dim') }} ON cli_executions_base.project_id = project_dim.project_id LEFT JOIN {{ ref('ip_address_dim') }} ON cli_executions_base.ip_address_hash = ip_address_dim.ip_address_hash --- TODO: move this parsing up stream -LEFT JOIN {{ ref('hash_lookup') }} - ON cli_executions_base.environment_name_hash = hash_lookup.hash_value - AND hash_lookup.category = 'environment' +LEFT JOIN {{ ref('execution_env_map') }} + ON cli_executions_base.execution_id = execution_env_map.execution_id +LEFT JOIN {{ ref('environment_dim') }} + ON execution_env_map.environment_fk = environment_dim.environment_pk diff --git a/data/transform/models/staging/snowplow/stg_snowplow__events.sql b/data/transform/models/staging/snowplow/stg_snowplow__events.sql index 0f2c1351..764f7558 100644 --- a/data/transform/models/staging/snowplow/stg_snowplow__events.sql +++ b/data/transform/models/staging/snowplow/stg_snowplow__events.sql @@ -16,10 +16,9 @@ WITH source AS ( {% if env_var("MELTANO_ENVIRONMENT") == "cicd" %} - FROM raw.snowplow.events - WHERE derived_tstamp::TIMESTAMP >= DATEADD('day', -7, CURRENT_DATE) - -- filter test events - AND app_id != 'test' + FROM raw.snowplow.events SAMPLE ROW (100000 ROWS) + WHERE COALESCE(app_id, '') != 'test' + {% else %} FROM {{ source('snowplow', 'events') }}