-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathtimeliness_delayed_ingestion.yaml
198 lines (181 loc) · 5.35 KB
/
timeliness_delayed_ingestion.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
entities:
INGESTION_TABLE_DAY_LEVEL:
source_database: BIGQUERY
resource_type: BIGQUERY
table_name: ingestion_day_level
dataset_name: <my_bigquery_dataset_id>
project_name: <my-gcp-project-id>
columns:
INVOICE_ID:
name: invoice_id
data_type: FLOAT
description: |-
invoice id
DATE_OF_DAY:
name: date_of_day
data_type: DATE
description: |-
ingestion date
INGESTION_TABLE_MONTH_LEVEL:
source_database: BIGQUERY
resource_type: BIGQUERY
table_name: ingestion_month_level
dataset_name: <my_bigquery_dataset_id>
project_name: <my-gcp-project-id>
columns:
CLIENT_CD:
name: client_cd
data_type: INTEGER
description: |-
client cd
MONTH_ID:
name: month_id
data_type: INTEGER
description: |-
month id %y%m
INGESTION_TABLE_TIMESTAMP_LEVEL:
source_database: BIGQUERY
resource_type: BIGQUERY
table_name: ingestion_timestamp_level
dataset_name: <my_bigquery_dataset_id>
project_name: <my-gcp-project-id>
columns:
DANA_INGESTION_TIMESTAMP:
name: dana_ingestion_timestamp
data_type: TIMESTAMP
description: |-
ingestion timestamp
SALES_MANAGER_ID:
name: salesManagerId
data_type: STRING
description: |-
sales manager id
row_filters:
NONE:
filter_sql_expr: 'True'
rule_dimensions:
- timeliness
- correctness
- integrity
- conformity
- completeness
- uniqueness
- accuracy
- validity
rules:
NO_DELAYED_INGESTION_DAY_LEVEL:
rule_type: CUSTOM_SQL_STATEMENT
dimension: timeliness
params:
custom_sql_arguments:
- ingestion_date_day
- elapsed_time_days
custom_sql_statement: |-
select * from
(select count(*) as n
from data a
where $ingestion_date_day >= date_sub(current_date(), interval $elapsed_time_days day)
)
where n = 0
NO_DELAYED_INGESTION_MONTH_LEVEL:
rule_type: CUSTOM_SQL_STATEMENT
dimension: timeliness
params:
custom_sql_arguments:
- ingestion_date_month
- elapsed_time_months
custom_sql_statement: |-
select * from
(select count(*) as n
from data a
where parse_date('%Y%m', cast($ingestion_date_month as string)) >= date_sub(date_trunc(current_date(), month), interval $elapsed_time_months month)
)
where n = 0
NO_DELAYED_INGESTION_TIMESTAMP_LEVEL:
rule_type: CUSTOM_SQL_STATEMENT
dimension: timeliness
params:
custom_sql_arguments:
- ingestion_timestamp
- elapsed_time_hours
custom_sql_statement: |-
select * from
(select count(*) as n
from data a
where $ingestion_timestamp >= timestamp_sub(current_timestamp(), interval $elapsed_time_hours hour)
)
where n = 0
rule_bindings:
T1_NO_DELAYED_INGESTION_DAY_LEVEL_SHOULD_FAIL:
entity_id: INGESTION_TABLE_DAY_LEVEL
column_id: INVOICE_ID
row_filter_id: NONE
rule_ids:
- NO_DELAYED_INGESTION_DAY_LEVEL:
ingestion_date_day: date_of_day
elapsed_time_days: 1
metadata:
brand: one
T1_NO_DELAYED_INGESTION_DAY_LEVEL_SHOULD_SUCCEED:
entity_id: INGESTION_TABLE_DAY_LEVEL
column_id: INVOICE_ID
row_filter_id: NONE
rule_ids:
- NO_DELAYED_INGESTION_DAY_LEVEL:
ingestion_date_day: date_of_day
elapsed_time_days: 11
metadata:
brand: one
T1_NO_DELAYED_INGESTION_MONTH_LEVEL_SHOULD_SUCCEED:
entity_id: INGESTION_TABLE_MONTH_LEVEL
column_id: CLIENT_CD
row_filter_id: NONE
rule_ids:
- NO_DELAYED_INGESTION_MONTH_LEVEL:
ingestion_date_month: month_id
elapsed_time_months: 1
metadata:
brand: one
T1_NO_DELAYED_INGESTION_MONTH_LEVEL_SHOULD_FAIL:
entity_id: INGESTION_TABLE_MONTH_LEVEL
column_id: CLIENT_CD
row_filter_id: NONE
rule_ids:
- NO_DELAYED_INGESTION_MONTH_LEVEL:
ingestion_date_month: month_id
elapsed_time_months: 0
metadata:
brand: one
T1_NO_DELAYED_INGESTION_TIMESTAMP_LEVEL_SHOULD_SUCCEED:
entity_id: INGESTION_TABLE_TIMESTAMP_LEVEL
column_id: SALES_MANAGER_ID
row_filter_id: NONE
rule_ids:
- NO_DELAYED_INGESTION_TIMESTAMP_LEVEL:
ingestion_timestamp: dana_ingestion_timestamp
elapsed_time_hours: 1200
metadata:
brand: one
T1_NO_DELAYED_INGESTION_TIMESTAMP_LEVEL_SHOULD_FAIL:
entity_id: INGESTION_TABLE_TIMESTAMP_LEVEL
column_id: SALES_MANAGER_ID
row_filter_id: NONE
rule_ids:
- NO_DELAYED_INGESTION_TIMESTAMP_LEVEL:
ingestion_timestamp: dana_ingestion_timestamp
elapsed_time_hours: 10
metadata:
brand: one