diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 04c85c0..81b7b00 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -21,4 +21,5 @@ export CI_SNOWFLAKE_DBT_USER=$(gcloud secrets versions access latest --secret="C export CI_SNOWFLAKE_DBT_WAREHOUSE=$(gcloud secrets versions access latest --secret="CI_SNOWFLAKE_DBT_WAREHOUSE" --project="dbt-package-testing-363917") export CI_DATABRICKS_DBT_HOST=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_HOST" --project="dbt-package-testing-363917") export CI_DATABRICKS_DBT_HTTP_PATH=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_HTTP_PATH" --project="dbt-package-testing-363917") -export CI_DATABRICKS_DBT_TOKEN=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_TOKEN" --project="dbt-package-testing-363917") \ No newline at end of file +export CI_DATABRICKS_DBT_TOKEN=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_TOKEN" --project="dbt-package-testing-363917") +export CI_DATABRICKS_DBT_CATALOG=$(gcloud secrets versions access latest --secret="CI_DATABRICKS_DBT_CATALOG" --project="dbt-package-testing-363917") \ No newline at end of file diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6e76ea0..281f1d4 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -69,5 +69,6 @@ steps: - "CI_DATABRICKS_DBT_HOST" - "CI_DATABRICKS_DBT_HTTP_PATH" - "CI_DATABRICKS_DBT_TOKEN" + - "CI_DATABRICKS_DBT_CATALOG" commands: | bash .buildkite/scripts/run_models.sh databricks \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 73dce21..d748279 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +# dbt_shopify_source v0.8.2 + +## Bug Fixes +[PR #59](https://github.com/fivetran/dbt_shopify_source/pull/59) introduces the following changes: +- The `fivetan_utils.union_data` [macro](https://github.com/fivetran/dbt_fivetran_utils/pull/100) has been expanded to handle checking if a source table exists. Previously in the Shopify source package, this check happened outside of the macro and depended on the user having a defined shopify `source`. If the package anticipates a table that you do not have in any schema or database, it will return a **completely** empty table (ie `limit 0`) that will work seamlessly with downstream transformations. + - A compilation message will be raised when a staging model is completely empty. This compiler warning can be turned off by the end user by setting the `fivetran__remove_empty_table_warnings` variable to `True` (see https://github.com/fivetran/dbt_fivetran_utils/tree/releases/v0.4.latest#union_data-source for details). +- A uniqueness test has been placed on the `order_line_id`, `index`, and `source_relation` columns in `stg_shopify__tax_line`, as it was previously missing a uniqueness test. + +## Contributors +- [@dfagnan](https://github.com/dfagnan) (Issue https://github.com/fivetran/dbt_shopify_source/issues/57) + # dbt_shopify_source v0.8.1 ## Bug Fixes diff --git a/DECISIONLOG.md b/DECISIONLOG.md index d4653ab..355194f 100644 --- a/DECISIONLOG.md +++ b/DECISIONLOG.md @@ -2,13 +2,15 @@ In creating this package, which is meant for a wide range of use cases, we had to take opinionated stances on a few different questions we came across during development. We've consolidated significant choices we made here, and will continue to update as the package evolves. -## Creating Empty Tables for Refunds, Order Line Refunds, and Order Adjustments +## Creating Empty Staging Models for Non-Existent Source Tables -Source tables related to `refunds`, `order_line_refunds`, and `order_adjustments` are created in the Shopify schema dyanmically. For example, if your shop has not incurred any refunds, you will not have a `refund` table yet until you do refund an order. +Empty source tables are created in the Shopify schema dyanmically if they do not exist in your raw source schema. For example, if your shop has not incurred any refunds, you will not have a `refund` table yet until you do refund an order. -Thus, the source package will create empty (1 row of all `NULL` fields) staging models if these source tables do not exist in your Shopify schema yet, and the transform package will work seamlessly with these empty models. Once `refund`, `order_line_refund`, or `order_adjustment` exists in your schema, the source and transform packages will automatically reference the new populated table(s). ([example](https://github.com/fivetran/dbt_shopify_source/blob/main/models/tmp/stg_shopify__refund_tmp.sql)). +Thus, the source package will will return **completely** empty staging models (ie `limit 0`) if these source tables do not exist in your Shopify schema yet, and the transform package will work seamlessly with these empty models. Once an anticipated source table exists in your schema, the source and transform packages will automatically reference the new populated table(s). ([example](https://github.com/fivetran/dbt_shopify_source/blob/main/models/tmp/stg_shopify__refund_tmp.sql)). -> In previous versions of the package, you had to manually enable or disable transforms of `refund`, `order_line_refund`, or `order_adjustment` through variables. Because this required you to monitor your Shopify account/schema and update the variable(s) accordingly, we decided to pursue a more automated solution. +The package will output a compiler message that the respective staging model is empty. This compiler warning can be turned off by the end user by setting the `fivetran__remove_empty_table_warnings` variable to `True` (see https://github.com/fivetran/dbt_fivetran_utils/tree/releases/v0.4.latest#union_data-source for details). + +> In previous versions of the package, you had to manually enable or disable transforms of `refund`, `order_line_refund`, or `order_adjustment` through variables. Because this required you to monitor your Shopify account/schema and update the variable(s) accordingly, we decided to pursue a more automated solution that works for all tables. ## Keeping Deleted Entities diff --git a/README.md b/README.md index e28b0d2..06d7e1e 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ models: shopify_source: +schema: my_new_schema_name # leave blank for just the target_schema ``` -### Change the source table references +### Change the source table references (not available if unioning multiple Shopify connectors) If an individual source table has a different name than the package expects, add the table name as it appears in your destination to the respective variable: > IMPORTANT: See this project's [`src_shopify.yml`](https://github.com/fivetran/dbt_shopify_source/blob/main/models/src_shopify.yml) for the default names. @@ -144,6 +144,23 @@ vars: shopify__identifier: your_table_name ``` +If you are making use of the `shopify_union_schemas` or `shopify_union_databases` variables, the package will assume individual tables to have their default names. + +### Disable Compiler Warnings for Empty Tables + +Empty staging models are created in the Shopify schema dyanmically if the respective source tables do not exist in your raw source schema. For example, if your shop has not incurred any refunds, you will not have a `refund` table yet until you do refund an order, and the package will create an empty `stg_shopify__refund` model. + +The source package will will return **completely** empty staging models (ie `limit 0`) if these source tables do not exist in your Shopify schema yet, and the transform package will work seamlessly with these empty models. Once an anticipated source table exists in your schema, the source and transform packages will automatically reference the new populated table(s). ([example](https://github.com/fivetran/dbt_shopify_source/blob/main/models/tmp/stg_shopify__refund_tmp.sql)). + +The package will raise a compiler warning message that the respective staging model is empty. The compiler warning can be turned off by the end user by setting the `fivetran__remove_empty_table_warnings` variable to `True`. + +```yml +# dbt_project.yml + +vars: + fivetran__remove_empty_table_warnings: true # default = false +``` + ## (Optional) Step 7: Orchestrate your models with Fivetran Transformations for dbt Coreā„¢ diff --git a/dbt_project.yml b/dbt_project.yml index 7305b66..e3f84ec 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'shopify_source' -version: '0.8.1' +version: '0.8.2' config-version: 2 require-dbt-version: [">=1.3.0", "<2.0.0"] models: @@ -52,4 +52,4 @@ vars: order_pass_through_columns: [] product_pass_through_columns: [] product_variant_pass_through_columns: [] - transaction_pass_through_columns: [] \ No newline at end of file + transaction_pass_through_columns: [] diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml index 5e5bbad..e76518c 100644 --- a/integration_tests/ci/sample.profiles.yml +++ b/integration_tests/ci/sample.profiles.yml @@ -16,13 +16,13 @@ integration_tests: pass: "{{ env_var('CI_REDSHIFT_DBT_PASS') }}" dbname: "{{ env_var('CI_REDSHIFT_DBT_DBNAME') }}" port: 5439 - schema: shopify_source_integration_tests_5 + schema: shopify_source_integration_tests_6 threads: 8 bigquery: type: bigquery method: service-account-json project: 'dbt-package-testing' - schema: shopify_source_integration_tests_5 + schema: shopify_source_integration_tests_6 threads: 8 keyfile_json: "{{ env_var('GCLOUD_SERVICE_KEY') | as_native }}" snowflake: @@ -33,7 +33,7 @@ integration_tests: role: "{{ env_var('CI_SNOWFLAKE_DBT_ROLE') }}" database: "{{ env_var('CI_SNOWFLAKE_DBT_DATABASE') }}" warehouse: "{{ env_var('CI_SNOWFLAKE_DBT_WAREHOUSE') }}" - schema: shopify_source_integration_tests_5 + schema: shopify_source_integration_tests_6 threads: 8 postgres: type: postgres @@ -42,13 +42,13 @@ integration_tests: pass: "{{ env_var('CI_POSTGRES_DBT_PASS') }}" dbname: "{{ env_var('CI_POSTGRES_DBT_DBNAME') }}" port: 5432 - schema: shopify_source_integration_tests_5 + schema: shopify_source_integration_tests_6 threads: 8 databricks: - catalog: null + catalog: "{{ env_var('CI_DATABRICKS_DBT_CATALOG') }}" host: "{{ env_var('CI_DATABRICKS_DBT_HOST') }}" http_path: "{{ env_var('CI_DATABRICKS_DBT_HTTP_PATH') }}" - schema: shopify_source_integration_tests_5 + schema: shopify_source_integration_tests_6 threads: 2 token: "{{ env_var('CI_DATABRICKS_DBT_TOKEN') }}" type: databricks diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index f11f6d9..aceb273 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -1,10 +1,10 @@ name: 'shopify_source_integration_tests' -version: '0.8.1' +version: '0.8.2' profile: 'integration_tests' config-version: 2 vars: - shopify_schema: shopify_source_integration_tests_5 + shopify_schema: shopify_source_integration_tests_6 shopify_source: shopify_customer_identifier: "shopify_customer_data" shopify_order_line_refund_identifier: "shopify_order_line_refund_data" @@ -136,6 +136,7 @@ seeds: shopify_tax_line_data: +column_types: price: float + rate: float shopify_price_rule_data: +column_types: prerequisite_subtotal_range: float @@ -166,8 +167,4 @@ seeds: markup: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' }}" price: float original_shop_markup: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' }}" - original_shop_price: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' }}" - shopify_tax_line_data: - +column_types: - price: float - rate: float \ No newline at end of file + original_shop_price: "{{ 'string' if target.type in ('bigquery', 'spark', 'databricks') else 'varchar' }}" \ No newline at end of file diff --git a/models/src_shopify.yml b/models/src_shopify.yml index 3133b12..3a2a1d3 100644 --- a/models/src_shopify.yml +++ b/models/src_shopify.yml @@ -1,9 +1,9 @@ version: 2 sources: - - name: shopify + - name: shopify # This source will only be used if you are using a single Shopify source connector. If multiple sources are being unioned, their tables will be directly referenced via adatper.get_relation schema: "{{ var('shopify_schema', 'shopify') }}" - database: "{% if target.type != 'spark'%}{{ var('shopify_database', target.database) }}{% endif %}" + database: "{% if target.type not in ('spark', 'databricks') %}{{ var('shopify_database', target.database) }}{% endif %}" tables: - name: order description: Each record represents an order in Shopify. diff --git a/models/stg_shopify.yml b/models/stg_shopify.yml index 0741a75..bf88b54 100644 --- a/models/stg_shopify.yml +++ b/models/stg_shopify.yml @@ -667,6 +667,7 @@ models: description: The payment gateway used by the checkout. - name: checkout_id description: The ID for the checkout. + tests: - not_null - name: landing_site_base_url description: The URL for the page where the customer entered the shop. @@ -875,7 +876,7 @@ models: - dbt_utils.unique_combination_of_columns: combination_of_columns: - fulfillment_id - - source_relation + - source_relation columns: - name: _fivetran_synced description: "{{ doc('_fivetran_synced') }}" @@ -1230,7 +1231,7 @@ models: description: "{{ doc('_fivetran_synced') }}" - name: key description: Key of the tag pair. - tests: + tests: - not_null - name: order_id description: ID of the order url being tagged. @@ -1249,7 +1250,7 @@ models: - dbt_utils.unique_combination_of_columns: combination_of_columns: - price_rule_id - - source_relation + - source_relation columns: - name: _fivetran_synced description: "{{ doc('_fivetran_synced') }}" @@ -1718,6 +1719,12 @@ models: - name: stg_shopify__tax_line description: Represents a single tax applied to the associated line item. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - order_line_id + - index + - source_relation columns: - name: _fivetran_synced description: "{{ doc('_fivetran_synced') }}" diff --git a/models/tmp/stg_shopify__discount_code_tmp.sql b/models/tmp/stg_shopify__discount_code_tmp.sql index eba47d3..eca13f4 100644 --- a/models/tmp/stg_shopify__discount_code_tmp.sql +++ b/models/tmp/stg_shopify__discount_code_tmp.sql @@ -1,14 +1,5 @@ -- this model will be all NULL until you create a discount code in Shopify -{%- set source_relation = adapter.get_relation( - database=source('shopify', 'discount_code').database, - schema=source('shopify', 'discount_code').schema, - identifier=source('shopify', 'discount_code').name) -%} - -{% set table_exists=source_relation is not none %} - -{% if table_exists %} - {{ fivetran_utils.union_data( table_identifier='discount_code', @@ -20,18 +11,4 @@ union_schema_variable='shopify_union_schemas', union_database_variable='shopify_union_databases' ) -}} - -{% else %} - -select - cast(null as {{ dbt.type_timestamp() }}) as _fivetran_synced, - cast(null as {{ dbt.type_string() }}) as code, - cast(null as {{ dbt.type_timestamp() }}) as created_at, - cast(null as {{ dbt.type_int() }}) as id, - cast(null as {{ dbt.type_int() }}) as price_rule_id, - cast(null as {{ dbt.type_timestamp() }}) as updated_at, - cast(null as {{ dbt.type_float() }}) as usage_count, - cast(null as {{ dbt.type_string() }}) as _dbt_source_relation - -{% endif %} \ No newline at end of file +}} \ No newline at end of file diff --git a/models/tmp/stg_shopify__order_adjustment_tmp.sql b/models/tmp/stg_shopify__order_adjustment_tmp.sql index 6ac4801..608f386 100644 --- a/models/tmp/stg_shopify__order_adjustment_tmp.sql +++ b/models/tmp/stg_shopify__order_adjustment_tmp.sql @@ -1,14 +1,5 @@ -- this model will be all NULL until you have made an order adjustment in Shopify -{%- set source_relation = adapter.get_relation( - database=source('shopify', 'order_adjustment').database, - schema=source('shopify', 'order_adjustment').schema, - identifier=source('shopify', 'order_adjustment').name) -%} - -{% set table_exists=source_relation is not none %} - -{% if table_exists %} - {{ fivetran_utils.union_data( table_identifier='order_adjustment', @@ -20,21 +11,4 @@ union_schema_variable='shopify_union_schemas', union_database_variable='shopify_union_databases' ) -}} - -{% else %} - -select - cast(null as {{ dbt.type_timestamp() }}) as _fivetran_synced, - cast(null as {{ dbt.type_numeric() }}) as id, - cast(null as {{ dbt.type_numeric() }}) as order_id, - cast(null as {{ dbt.type_numeric() }}) as refund_id, - cast(null as {{ dbt.type_float() }}) as amount, - cast(null as {{ dbt.type_string() }}) as amount_set, - cast(null as {{ dbt.type_float() }}) as tax_amount, - cast(null as {{ dbt.type_string() }}) as tax_amount_set, - cast(null as {{ dbt.type_string() }}) as kind, - cast(null as {{ dbt.type_string() }}) as reason, - cast(null as {{ dbt.type_string() }}) as _dbt_source_relation - -{% endif %} \ No newline at end of file +}} \ No newline at end of file diff --git a/models/tmp/stg_shopify__order_line_refund_tmp.sql b/models/tmp/stg_shopify__order_line_refund_tmp.sql index 188bb1f..1d57168 100644 --- a/models/tmp/stg_shopify__order_line_refund_tmp.sql +++ b/models/tmp/stg_shopify__order_line_refund_tmp.sql @@ -1,14 +1,5 @@ -- this model will be all NULL until you have made an order line refund in Shopify -{%- set source_relation = adapter.get_relation( - database=source('shopify', 'order_line_refund').database, - schema=source('shopify', 'order_line_refund').schema, - identifier=source('shopify', 'order_line_refund').name) -%} - -{% set table_exists=source_relation is not none %} - -{% if table_exists %} - {{ fivetran_utils.union_data( table_identifier='order_line_refund', @@ -20,22 +11,4 @@ union_schema_variable='shopify_union_schemas', union_database_variable='shopify_union_databases' ) -}} - -{% else %} - -select - cast(null as {{ dbt.type_timestamp() }}) as _fivetran_synced, - cast(null as {{ dbt.type_numeric() }}) as id, - cast(null as {{ dbt.type_numeric() }}) as location_id, - cast(null as {{ dbt.type_numeric() }}) as order_line_id, - cast(null as {{ dbt.type_numeric() }}) as subtotal, - cast(null as {{ dbt.type_string() }}) as subtotal_set, - cast(null as {{ dbt.type_numeric() }}) as total_tax, - cast(null as {{ dbt.type_string() }}) as total_tax_set, - cast(null as {{ dbt.type_float() }}) as quantity, - cast(null as {{ dbt.type_numeric() }}) as refund_id, - cast(null as {{ dbt.type_string() }}) as restock_type, - cast(null as {{ dbt.type_string() }}) as _dbt_source_relation - -{% endif %} \ No newline at end of file +}} \ No newline at end of file diff --git a/models/tmp/stg_shopify__refund_tmp.sql b/models/tmp/stg_shopify__refund_tmp.sql index d57c0ce..7491be0 100644 --- a/models/tmp/stg_shopify__refund_tmp.sql +++ b/models/tmp/stg_shopify__refund_tmp.sql @@ -1,14 +1,4 @@ ---To disable this model, set the shopify__using_refund variable within your dbt_project.yml file to False. -{{ config(enabled=var('shopify__using_refund', True)) }} - -{%- set source_relation = adapter.get_relation( - database=source('shopify', 'refund').database, - schema=source('shopify', 'refund').schema, - identifier=source('shopify', 'refund').name) -%} - -{% set table_exists=source_relation is not none %} - -{% if table_exists %} +-- this model will be all NULL until you create a refund in Shopify {{ fivetran_utils.union_data( @@ -21,20 +11,4 @@ union_schema_variable='shopify_union_schemas', union_database_variable='shopify_union_databases' ) -}} - -{% else %} - -select - cast(null as {{ dbt.type_timestamp() }}) as _fivetran_synced, - cast(null as {{ dbt.type_timestamp() }}) as created_at, - cast(null as {{ dbt.type_numeric() }}) as id, - cast(null as {{ dbt.type_string() }}) as note, - cast(null as {{ dbt.type_numeric() }}) as order_id, - cast(null as {{ dbt.type_timestamp() }}) as processed_at, - cast(null as boolean) as restock, - cast(null as {{ dbt.type_string() }}) as total_duties_set, - cast(null as {{ dbt.type_numeric() }}) as user_id, - cast(null as {{ dbt.type_string() }}) as _dbt_source_relation - -{% endif %} \ No newline at end of file +}} \ No newline at end of file