Skip to content

Commit

Permalink
Add Trino Support (#294)
Browse files Browse the repository at this point in the history
* Fix expect_select_column_values_to_be_unique_within_record macro

Fix ORDER BY to order by columns which are exptected to have unique values across them, instead of random column

* Fix timeseries_data_extended.row_value expect_column_distinct_count_ tests row_conditions

* Add Trino support

* Add Trino to CI

* Cast to float explicitly

Co-authored-by: Damian Owsianny <[email protected]>

---------

Co-authored-by: Claus Herther <[email protected]>
  • Loading branch information
damian3031 and clausherther authored Feb 19, 2024
1 parent 823c965 commit 19793de
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 9 deletions.
39 changes: 39 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,42 @@ jobs:
- store_artifacts:
path: ./logs


integration-tests-trino:

docker:
- image: cimg/python:3.11
- image: trinodb/trino:432

resource_class: small

environment:
DBT_PROFILES_DIR: ./integration_tests/ci
DBT_PROJECT_DIR: ./integration_tests
DBT_VERSION: 1.7.*

steps:
- checkout
- run:
name: Install dbt adapter packages
command: |
python3 -m venv venv
. venv/bin/activate
pip install dbt-spark "dbt-trino==$DBT_VERSION"
- run: *dbt-deps
- setup_remote_docker
- run:
name: Run Trino server
command: |
docker run --name trino -p 8080:8080 -d -v `pwd`/integration_tests/docker/trino/catalog:/etc/trino/catalog trinodb/trino:432
timeout 5m bash -c -- 'while ! docker logs trino 2>&1 | tail -n 1 | grep "SERVER STARTED"; do sleep 2; done'
- run:
name: "Run Tests - Trino"
command: |
. venv/bin/activate
dbt build -t trino --project-dir $DBT_PROJECT_DIR
workflows:
version: 2
test-all:
Expand All @@ -145,3 +181,6 @@ workflows:
- integration-tests-spark-thrift:
requires:
- hold
- integration-tests-trino:
requires:
- hold
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ This package supports:
* BigQuery
* DuckDB
* Spark (experimental)
* Trino

For latest release, see [https://github.com/calogica/dbt-expectations/releases](https://github.com/calogica/dbt-expectations/releases)

Expand Down
11 changes: 11 additions & 0 deletions integration_tests/ci/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,15 @@ integration_tests:
server_side_parameters:
"spark.sql.parser.escapedStringLiterals": "true"

trino:
type: trino
method: none
host: localhost
port: 8080
user: admin
catalog: memory
schema: default
timezone: UTC
threads: 4

target: postgres
14 changes: 7 additions & 7 deletions integration_tests/models/schema_tests/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ models:
regex: "(?i)[A-Z]"
flags: i
config:
enabled: "{{ target.type == 'bigquery' }}"
enabled: "{{ target.type in ['bigquery', 'trino'] }}"
# match all uppercase, case-sensitive (where implemented), should fail
- dbt_expectations.expect_column_values_to_match_regex:
regex: "[A-Z]"
flags: c
config:
enabled: "{{ target.type not in ['bigquery', 'spark' ] }}"
enabled: "{{ target.type not in ['bigquery', 'spark', 'trino'] }}"
error_if: "=0"
warn_if: "<4"
# do not match other non-email string, should pass
Expand Down Expand Up @@ -72,7 +72,7 @@ models:
regex_list: ["[A-G]", "[H-Z]"]
flags: c
config:
enabled: "{{ target.type not in ['bigquery', 'spark' ] }}"
enabled: "{{ target.type not in ['bigquery', 'spark', 'trino'] }}"
error_if: "=0"
warn_if: "<4"
# match email address or other string
Expand Down Expand Up @@ -242,7 +242,7 @@ models:
- dbt_expectations.expect_row_values_to_have_data_for_every_n_datepart:
date_col: date_day
date_part: day
exclusion_condition: not(date_day = '2021-10-19')
exclusion_condition: not(date_day = cast('2021-10-19' as date))
- dbt_expectations.expect_row_values_to_have_data_for_every_n_datepart:
date_col: date_day
date_part: day
Expand Down Expand Up @@ -329,14 +329,14 @@ models:
- name: row_value
tests:
- dbt_expectations.expect_column_distinct_count_to_equal:
row_condition: date_day = {{ dbt_date.yesterday() }}
row_condition: cast(date_day as date) = {{ dbt_date.yesterday() }}
value: 10
- dbt_expectations.expect_column_distinct_count_to_be_greater_than:
row_condition: date_day = {{ dbt_date.yesterday() }}
row_condition: cast(date_day as date) = {{ dbt_date.yesterday() }}
value: 1
- dbt_expectations.expect_column_distinct_count_to_be_less_than:
value: 11
row_condition: date_day = {{ dbt_date.yesterday() }}
row_condition: cast(date_day as date) = {{ dbt_date.yesterday() }}



Expand Down
17 changes: 17 additions & 0 deletions macros/regex/regexp_instr.sql
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ regexp_matches({{ source_value }}, '{{ regexp }}', '{{ flags }}')
length(regexp_extract({{ source_value }}, '{{ regexp }}', 0))
{% endmacro %}

{% macro trino__regexp_instr(source_value, regexp, position, occurrence, is_raw, flags) %}
{% if flags %}
{{ dbt_expectations._validate_re2_flags(flags) }}
{# Trino prepends "(?flags)" to set flags for current group #}
{%- set regexp = "(?" ~ flags ~ ")" ~ regexp -%}
{% endif %}
{% if is_raw %}
{{ exceptions.warn(
"is_raw option is not supported for this adapter "
~ "and is being ignored."
) }}
{% endif %}
{%- set regexp_query = "regexp_position(" ~ source_value ~ ", '" ~ regexp ~ "', " ~ position ~ ", " ~ occurrence ~ ")" -%}
{# Trino regexp_position returns -1 if not found. Change it to 0, to be consistent with other adapters #}
if({{ regexp_query}} = -1, 0, {{ regexp_query}})
{% endmacro %}

{% macro _validate_flags(flags, alphabet) %}
{% for flag in flags %}
{% if flag not in alphabet %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
strictly=False
) %}
{% set expression %}
count(distinct {{ column_name }})*1.0/count({{ column_name }})
cast(count(distinct {{ column_name }}) as {{ dbt.type_float() }})/count({{ column_name }})
{% endset %}
{{ dbt_expectations.expression_between(model,
expression=expression,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
with column_values as (

select
row_number() over(order by 1) as row_index,
row_number() over(order by {{ columns|join(', ') }}) as row_index,
{% for column in columns -%}
{{ column }}{% if not loop.last %},{% endif %}
{%- endfor %}
Expand Down
8 changes: 8 additions & 0 deletions macros/utils/datatypes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
timestamp without time zone
{%- endmacro %}

{% macro trino__type_timestamp() -%}
timestamp(3)
{%- endmacro %}

{# datetime ------------------------------------------------- #}

{% macro type_datetime() -%}
Expand All @@ -41,3 +45,7 @@
{% macro spark__type_datetime() -%}
timestamp
{%- endmacro %}

{% macro trino__type_datetime() -%}
timestamp(3)
{%- endmacro %}

0 comments on commit 19793de

Please sign in to comment.