Skip to content

Commit

Permalink
Merge pull request #21 from calogica/refactor/expect_column_values_to…
Browse files Browse the repository at this point in the history
…_be_within_n_moving_stdevs

Refactor expect_column_values_to_be_within_n_moving_stdevs
  • Loading branch information
clausherther authored Mar 3, 2021
2 parents 784708a + ccb3cae commit 2b0d8e6
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 16 deletions.
30 changes: 27 additions & 3 deletions integration_tests/models/schema_tests/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ models:
- dbt_expectations.expect_column_values_to_be_within_n_stdevs:
sigma_threshold: 6
- dbt_expectations.expect_column_values_to_be_within_n_moving_stdevs:
group_by: date_day
date_column_name: date_day
sigma_threshold: 6
take_logs: true


- name: timeseries_data_extended
tests:
- dbt_expectations.expect_table_columns_to_match_ordered_list:
column_list: ["date_day", "row_value", "row_value_log"]

columns:
- name: date_day
tests:
Expand All @@ -77,10 +77,34 @@ models:
- name: row_value_log
tests:
- dbt_expectations.expect_column_values_to_be_within_n_moving_stdevs:
group_by: date_day
date_column_name: cast(date_day as datetime)
sigma_threshold: 6
take_logs: false

- name: timeseries_hourly_data_extended
columns:
- name: date_hour
tests:
- dbt_expectations.expect_row_values_to_have_recent_data:
datepart: hour
interval: 12
- dbt_expectations.expect_column_values_to_be_of_type:
column_type: datetime
- dbt_expectations.expect_column_values_to_be_in_type_list:
column_type_list: [date, datetime]


- name: row_value_log
tests:
- dbt_expectations.expect_column_values_to_be_within_n_moving_stdevs:
date_column_name: cast(date_hour as datetime)
period: hour
trend_periods: 48
test_periods: 12
sigma_threshold: 6
take_logs: true


- name: data_test
tests:
- dbt_expectations.expect_compound_columns_to_be_unique:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
with dates as (

{{ dbt_utils.date_spine('hour',
start_date=dbt_date.n_days_ago(10),
end_date=dbt_date.today()
) }}

),
add_row_values as (

select
d.date_hour,
cast(floor(100 * rnd) as {{ dbt_utils.type_int() }}) as row_value
from
dates d
cross join
unnest(generate_array(1, 10)) as rnd

),
add_logs as (

select
*,
{{ dbt_expectations.log_natural('nullif(row_value, 0)') }} as row_value_log
from
add_row_values
)
select
*
from
add_logs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ coalesce({{ metric_column }}, 0)

{% macro test_expect_column_values_to_be_within_n_moving_stdevs(model,
column_name,
group_by,
lookback_days=1,
trend_days=7,
test_days=14,
date_column_name,
period='day',
lookback_periods=1,
trend_periods=7,
test_periods=14,
sigma_threshold=3,
sigma_threshold_upper=None,
sigma_threshold_lower=None,
Expand All @@ -30,7 +31,7 @@ with metric_values as (
with grouped_metric_values as (

select
{{ group_by }} as metric_date,
{{ dbt_utils.date_trunc(period, date_column_name) }} as metric_period,
sum({{ column_name }}) as agg_metric_value
from
{{ model }}
Expand All @@ -43,7 +44,7 @@ with metric_values as (

select
*,
lag(agg_metric_value, {{ lookback_days }}) over(order by metric_date) as prior_agg_metric_value
lag(agg_metric_value, {{ lookback_periods }}) over(order by metric_period) as prior_agg_metric_value
from
grouped_metric_values d

Expand Down Expand Up @@ -73,11 +74,11 @@ metric_moving_calcs as (
select
*,
avg(metric_test_value)
over(order by metric_date rows
between {{ trend_days }} preceding and 1 preceding) as metric_test_rolling_average,
over(order by metric_period rows
between {{ trend_periods }} preceding and 1 preceding) as metric_test_rolling_average,
stddev(metric_test_value)
over(order by metric_date rows
between {{ trend_days }} preceding and 1 preceding) as metric_test_rolling_stddev
over(order by metric_period rows
between {{ trend_periods }} preceding and 1 preceding) as metric_test_rolling_stddev
from
metric_values

Expand All @@ -87,7 +88,7 @@ metric_sigma as (
select
*,
(metric_test_value - metric_test_rolling_average) as metric_test_delta,
(metric_test_value - metric_test_rolling_average)/metric_test_rolling_stddev as metric_test_sigma
(metric_test_value - metric_test_rolling_average)/nullif(metric_test_rolling_stddev, 0) as metric_test_sigma
from
metric_moving_calcs

Expand All @@ -97,8 +98,14 @@ select
from
metric_sigma
where
metric_date >= date({{ dbt_date.n_days_ago(test_days) }}) and
metric_date < {{ dbt_date.today() }} and

metric_period >= cast(
{{ dbt_utils.dateadd(period, -test_periods, dbt_utils.date_trunc(period, dbt_date.now())) }}
as {{ dbt_utils.type_timestamp() }})
and
metric_period < {{ dbt_utils.date_trunc(period, dbt_date.now()) }}
and

not (
metric_test_sigma >= {{ sigma_threshold_lower }} and
metric_test_sigma <= {{ sigma_threshold_upper }}
Expand Down

0 comments on commit 2b0d8e6

Please sign in to comment.