-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcopier.yml
262 lines (221 loc) Β· 7.08 KB
/
copier.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# Project configs
project_name:
type: str
default: My dbt Project
help: The name of your project or organization.
project_slug:
type: str
default: "{{ project_name | lower | replace(' ', '_') | replace('-', '_') | replace('.', '') | replace(\"'\", '')}}"
help: The slugified name of your project.
# Warehouse profile connection shared configs
# All configs that are shared by more than one warehouse
profile_name:
type: str
default: "{{ project_slug }}"
help: |
The name of your default dev profile, this is the set of configs for connecting
to your warehouse for development.
data_warehouse:
type: str
default: snowflake
help: The data warehouse, platform, or database you're going to build in.
choices:
- snowflake
- bigquery
- databricks
- redshift
- postgres
- duckdb
database:
type: str
help: |
The name or id of the database (or project in BigQuery) *within* your data platform that you want to build into.
when: "{{ data_warehouse in [ 'snowflake', 'bigquery', 'duckdb' ] }}"
schema:
type: str
help: |
The name of the schema (or dataset in BigQuery) in the database you want to build into.
host:
type: str
help: |
The URL string of the database host β don't include 'https://'.
when: "{{ data_warehouse in [ 'databricks', 'redshift', 'postgres' ] }}"
port:
type: int
help: |
The port number on your database host to connect to.
when: "{{ data_warehouse in [ 'redshift', 'postgres' ] }}"
# Snowflake-specific configs
account_id:
type: str
help: |
This is usually most easily found in the url of your data warehouse's web app.
Highly recommended to set this as an env var and put that env var name here not the value itself.
when: "{{ data_warehouse == 'snowflake'}}"
user_role:
type: str
default: transformer
help: The default user role to use for development.
when: "{{ data_warehouse == 'snowflake' }}"
warehouse:
type: str
default: transforming
help: The name of your default development compute warehouse.
when: "{{ data_warehouse == 'snowflake' }}"
username:
type: str
help: The username you log in to your database with. This may be an email address or a plain string.
when: "{{ data_warehouse in [ 'snowflake', 'redshift', 'postgres' ] }}"
# BigQuery specific configs
# None for now!
# Databricks specific configs
using_unity_catalog:
type: bool
default: False
help: Are you using Datbricks Unity Catalog?
when: "{{ data_warehouse == 'databricks' }}"
catalog:
type: str
help: Optional catalog name if you're using Unity Catalog.
when: "{{ using_unity_catalog }}"
http_path:
type: str
help: The http path to your SQL Warehouse or all-purpose cluster.
when: "{{ data_warehouse == 'databricks' }}"
# Redshift specific configs
cluster_id:
type: str
help: The id of your warehouse cluster.
when: "{{ data_warehouse == 'redshift' }}"
iam_profile:
type: str
default: default
help: The name of the profile to use for connection.
when: "{{ data_warehouse == 'redshift' }}"
region:
type: str
default: "us-east-1"
help: The region to connect your IAM profile through.
when: "{{ data_warehouse == 'redshift' }}"
# Postgres specific configs
# None for now!
# DuckDB specific configs
duckdb_file_path:
type: str
default: "./{{ project_slug }}.db"
help: |
Where do you want to build your duckdb database file?
Strongly suggest the default, which will be automatically gitignored.
when: "{{ data_warehouse == 'duckdb' }}"
# dbt development configs
thread_count:
type: int
default: 8
help: How many threads you want to dbt to run in parallel?
# Formatting configs
tab_space_size:
type: int
default: 4
help: How many spaces should the default tab alignment be?
choices:
- 2
- 4
comma_alignment:
type: str
default: trailing
help: Leading or trailing commas?
choices:
- trailing
- leading
capitalization:
type: str
default: lowercase
help: Should keywords, functions and other special commands be upper or lowercase?
choices:
- lowercase
- uppercase
group_by_and_order_by_style:
type: str
default: implicit
help: |
Should group_by and order_by list field names (explicit β group by customer_id)
or numbers (implicit β group by 1)?
choices:
- implicit
- explicit
# Task options
virtual_environment:
type: bool
default: False
help: |
Do you want copier to initialize and activate a virtual environment,
and install dependencies?
init_repo:
type: bool
default: False
help: |
Do you want copier to initialize and make a first commit to a fresh git repo?
move_profile:
type: bool
default: False
help: |
Do you want copier to move the contents of your generated `profiles.yml` file
to the appropriate place in your home directory (`~/.dbt/profiles.yml`)?
# Python configs
virtual_environment_name:
type: str
default: .venv
help: What do you want your virtual environment to be called?
when: "{{ virtual_environment }}"
# Files to exclude from template
_exclude:
- "template-integration-tests"
- "copier.yml"
- "~*"
- "*.py[co]"
- "__pycache__"
- ".pytest_cache"
- ".git"
- ".github"
- ".DS_STORE"
- "README.md"
- ".venv"
- ".env"
- "venv"
- "env"
- "dev-requirements.txt"
- "dev-requirements.in"
# Pre copy message
_message_before_copy: |
Let's make a new dbt project in {{ _copier_conf.dst_path }}. β¨
We'll ask you some questions, you will fill in the prompts,
and we'll have you set up in no time.
If at any time you change your mind just Ctrl+C to cancel,
and nothing will get created. You can also cancel and pass
a --pretend flag to do a run-through first before the real thing.
# Post cleanup message
_message_after_copy: |
Your project "{{ project_name }}" has been created successfully! π
Next steps:
1. Change directory to the project root:
$ cd {{ _copier_conf.dst_path }}
2. Ensure the virtual environment is active:
$ source {{ virtual_environment_name }}/bin/activate
3. Run initial dbt build:
$ dbt deps
$ dbt build
# Tasks
_tasks:
- "{% if virtual_environment %} python3 -m venv {{ virtual_environment_name }} {% endif %}"
- "{% if virtual_environment %} source {{ virtual_environment_name }}/bin/activate {% endif %}"
- "{% if virtual_environment %} python3 -m pip install --upgrade pip {% endif %}"
- "{% if virtual_environment %} python3 -m pip install uv {% endif %}"
- "{% if virtual_environment %} source {{ virtual_environment_name }}/bin/activate {% endif %}"
- "{% if virtual_environment %} uv pip compile requirements.in -o requirements.txt {% endif %}"
- "{% if virtual_environment %} uv pip install -r requirements.txt {% endif %}"
- "{% if move_profile %} mkdir -p ~/.dbt && cat profiles.yml >> ~/.dbt/profiles.yml {% endif %}"
- "{% if move_profile %} rm profiles.yml {% endif %}"
- "{% if init_repo %} git init {% endif %}"
- "{% if init_repo %} git add --all {% endif %}"
- "{% if init_repo %} git commit -m 'Initial commit.' {% endif %}"
- "{% if virtual_environment and init_repo %} source {{ virtual_environment_name }}/bin/activate && pre-commit install {% endif %}"