Skip to content

Commit

Permalink
Merge pull request #4 from tailsdotcom/build_speed
Browse files Browse the repository at this point in the history
Improve build speed and add dim_dbt__models
  • Loading branch information
NiallRees authored Mar 3, 2021
2 parents 834074a + 9de6729 commit 413068e
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 4 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ This package builds a mart of tables from dbt artifacts loaded into a table. It

Models included:

- `fct_dbt_model_executions`
- `fct_dbt_run_results`
- `dim_dbt__models`
- `fct_dbt__model_executions`
- `fct_dbt__latest_full_model_executions`
- `fct_dbt__critical_path`
- `fct_dbt_run_results`

The critical path model determines the slowest route through your DAG, which provides you with the information needed to make a targeted effort to reducing `dbt run` times. For example:

Expand Down
39 changes: 39 additions & 0 deletions models/incremental/dim_dbt__models.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{{ config( materialized='incremental', unique_key='manifest_model_id' ) }}

with dbt_models as (

select * from {{ ref('stg_dbt__models') }}

),

dbt_models_incremental as (

select *
from dbt_models

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
where artifact_generated_at > (select max(artifact_generated_at) from {{ this }})
{% endif %}

),

fields as (

select
manifest_model_id,
command_invocation_id,
artifact_generated_at,
node_id,
name,
model_schema,
depends_on_nodes,
package_name,
model_path,
checksum,
model_materialization
from dbt_models_incremental

)

select * from fields
4 changes: 2 additions & 2 deletions models/incremental/fct_dbt__model_executions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
with models as (

select *
from {{ ref('stg_dbt__models') }}
from {{ ref('dim_dbt__models') }}

),

model_executions as (

select *
from {{ ref('stg_dbt__model_executions') }}
from {{ ref('int_dbt__model_executions') }}

),

Expand Down
40 changes: 40 additions & 0 deletions models/incremental/int_dbt__model_executions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{{ config( materialized='incremental', unique_key='model_execution_id' ) }}

with model_executions as (

select *
from {{ ref('stg_dbt__model_executions') }}

),

model_executions_incremental as (

select *
from model_executions

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
where artifact_generated_at > (select max(artifact_generated_at) from {{ this }})
{% endif %}

),

fields as (

select
model_execution_id,
command_invocation_id,
artifact_generated_at,
was_full_refresh,
node_id,
thread_id,
status,
compile_started_at,
query_completed_at,
total_node_runtime,
rows_affected
from model_executions_incremental

)

select * from fields
26 changes: 26 additions & 0 deletions models/schemas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,29 @@ models:
description: Was the run executed with a --full-refresh flag?
- name: env_*
description: Columns for the environment variables set when the command was executed.

- name: dim_dbt__models
description: All dbt model metadata from every manifest.json.
columns:
- name: manifest_model_id
description: Primary key generated from the command_invocation_id and checksum.
tests:
- unique
- not_null
- name: command_invocation_id
description: The id of the command which resulted in the source artifact's generation.
- name: artifact_generated_at
description: Timestamp of when the source artifact was generated.
- name: node_id
description: Unique id for the node, in the form of model.[package_name].[model_name]
- name: name
description: The model name.
- name: model_schema
- name: depends_on_nodes
description: List of node ids the model depends on.
- name: package_name
- name: model_path
description: Filepath of the model.
- name: checksum
description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
- name: model_materialization

0 comments on commit 413068e

Please sign in to comment.