Skip to content

Commit

Permalink
Cast core stage data types (#201)
Browse files Browse the repository at this point in the history
* casting data types before core union

* version

* fixed syntax

* added missing commas

* casting integration_test models

* added date macro

* fixes integration test
  • Loading branch information
thutuva authored Sep 6, 2023
1 parent 1e14f6a commit a64d4e7
Show file tree
Hide file tree
Showing 29 changed files with 980 additions and 782 deletions.
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'the_tuva_project'
version: '0.5.0'
version: '0.5.1'
config-version: 2
require-dbt-version: ">=1.3.0"

Expand Down
2 changes: 1 addition & 1 deletion integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ vars:
# Use the vars below to enabled or disable sections of The Tuva Project.

## The vars directly below enable all models related to the type of healthcare data being used
# clinical_enabled: true
clinical_enabled: true
claims_enabled: true

## The vars directly below enable a single data mart. See the Quickstart section of
Expand Down
99 changes: 99 additions & 0 deletions integration_tests/macros/try_to_cast_date.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{#

This macros takes in a date column and date format (defaults to 'YYYY-MM-DD')
then runs a try to cast macro based on the adapter type. Returns NULL
casted as date if the try to cast fails.

#}

{%- macro try_to_cast_date(column_name, date_format='YYYY-MM-DD') -%}

{{ return(adapter.dispatch('try_to_cast_date')(column_name, date_format)) }}

{%- endmacro -%}

{%- macro bigquery__try_to_cast_date(column_name, date_format) -%}

{%- if date_format == 'YYYY-MM-DD HH:MI:SS' -%}
safe_cast( date( {{ column_name }} ) as date )
{%- else -%}
safe_cast( {{ column_name }} as date )
{%- endif -%}

{%- endmacro -%}

{%- macro default__try_to_cast_date(column_name, date_format) -%}

try_cast( {{ column_name }} as date )

{%- endmacro -%}

{%- macro postgres__try_to_cast_date(column_name, date_format) -%}

{%- if date_format == 'YYYY-MM-DD' -%}
case
when {{ column_name }} similar to '[0-9]{4}-[0-9]{2}-[0-9]{2}'
then to_date( {{ column_name }}, 'YYYY-MM-DD')
else date(NULL)
end
{%- elif date_format == 'YYYYMMDD' -%}
case
when {{ column_name }} similar to '[0-9]{4}[0-9]{2}[0-9]{2}'
then to_date( {{ column_name }}, 'YYYYMMDD')
else date(NULL)
end
{%- elif date_format == 'MM/DD/YYYY' -%}
case
when {{ column_name }} similar to '[0-9]{2}/[0-9]{2}/[0-9]{4}'
then to_date( {{ column_name }}, 'MM/DD/YYYY')
else date(NULL)
end
{%- elif date_format == 'YYYY-MM-DD HH:MI:SS' -%}
case
when {{ column_name }} similar to '[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}'
then to_date( {{ column_name }}, 'YYYY-MM-DD HH:MI:SS')
else date(NULL)
end
{%- else -%}
date(NULL)
{%- endif -%}

{%- endmacro -%}

{%- macro redshift__try_to_cast_date(column_name, date_format) -%}

{%- if date_format == 'YYYY-MM-DD' -%}
case
when {{ column_name }} similar to '\\d{4}-\\d{2}-\\d{2}'
then to_date( {{ column_name }}, 'YYYY-MM-DD')
else date(NULL)
end
{%- elif date_format == 'YYYYMMDD' -%}
case
when {{ column_name }} similar to '\\d{4}\\d{2}\\d{2}'
then to_date( {{ column_name }}, 'YYYYMMDD')
else date(NULL)
end
{%- elif date_format == 'MM/DD/YYYY' -%}
case
when {{ column_name }} similar to '\\d{2}/\\d{2}/\\d{4}'
then to_date( {{ column_name }}, 'MM/DD/YYYY')
else date(NULL)
end
{%- elif date_format == 'YYYY-MM-DD HH:MI:SS' -%}
case
when {{ column_name }} similar to '\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}'
then to_date( {{ column_name }}, 'YYYY-MM-DD HH:MI:SS')
else date(NULL)
end
{%- else -%}
date(NULL)
{%- endif -%}

{%- endmacro -%}

{%- macro snowflake__try_to_cast_date(column_name, date_format) -%}

try_cast( {{ column_name }} as date )

{%- endmacro -%}
40 changes: 20 additions & 20 deletions integration_tests/models/condition.sql
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
select
null as condition_id
, null as patient_id
, null as encounter_id
, null as claim_id
, null as recorded_date
, null as onset_date
, null as resolved_date
, null as status
, null as condition_type
, null as source_code_type
, null as source_code
, null as source_description
, null as normalized_code_type
, null as normalized_code
, null as normalized_description
, null as condition_rank
, null as present_on_admit_code
, null as present_on_admit_description
, null as data_source
, null as tuva_last_run
cast(null as {{ dbt.type_string() }} ) as condition_id
, cast(null as {{ dbt.type_string() }} ) as patient_id
, cast(null as {{ dbt.type_string() }} ) as encounter_id
, cast(null as {{ dbt.type_string() }} ) as claim_id
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as recorded_date
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as onset_date
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as resolved_date
, cast(null as {{ dbt.type_string() }} ) as status
, cast(null as {{ dbt.type_string() }} ) as condition_type
, cast(null as {{ dbt.type_string() }} ) as source_code_type
, cast(null as {{ dbt.type_string() }} ) as source_code
, cast(null as {{ dbt.type_string() }} ) as source_description
, cast(null as {{ dbt.type_string() }} ) as normalized_code_type
, cast(null as {{ dbt.type_string() }} ) as normalized_code
, cast(null as {{ dbt.type_string() }} ) as normalized_description
, cast(null as {{ dbt.type_int() }} ) as condition_rank
, cast(null as {{ dbt.type_string() }} ) as present_on_admit_code
, cast(null as {{ dbt.type_string() }} ) as present_on_admit_description
, cast(null as {{ dbt.type_string() }} ) as data_source
, cast(null as {{ dbt.type_timestamp() }} ) as tuva_last_run
limit 0
50 changes: 25 additions & 25 deletions integration_tests/models/encounter.sql
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
select
null as encounter_id
, null as patient_id
, null as encounter_type
, null as encounter_start_date
, null as encounter_end_date
, null as length_of_stay
, null as admit_source_code
, null as admit_source_description
, null as admit_type_code
, null as admit_type_description
, null as discharge_disposition_code
, null as discharge_disposition_description
, null as attending_provider_id
, null as facility_npi
, null as primary_diagnosis_code
, null as primary_diagnosis_description
, null as ms_drg_code
, null as ms_drg_description
, null as apr_drg_code
, null as apr_drg_description
, null as paid_amount
, null as allowed_amount
, null as charge_amount
, null as data_source
, null as tuva_last_run
cast(null as {{ dbt.type_string() }} ) as encounter_id
, cast(null as {{ dbt.type_string() }} ) as patient_id
, cast(null as {{ dbt.type_string() }} ) as encounter_type
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as encounter_start_date
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as encounter_end_date
, cast(null as {{ dbt.type_int() }} ) as length_of_stay
, cast(null as {{ dbt.type_string() }} ) as admit_source_code
, cast(null as {{ dbt.type_string() }} ) as admit_source_description
, cast(null as {{ dbt.type_string() }} ) as admit_type_code
, cast(null as {{ dbt.type_string() }} ) as admit_type_description
, cast(null as {{ dbt.type_string() }} ) as discharge_disposition_code
, cast(null as {{ dbt.type_string() }} ) as discharge_disposition_description
, cast(null as {{ dbt.type_string() }} ) as attending_provider_id
, cast(null as {{ dbt.type_string() }} ) as facility_npi
, cast(null as {{ dbt.type_string() }} ) as primary_diagnosis_code
, cast(null as {{ dbt.type_string() }} ) as primary_diagnosis_description
, cast(null as {{ dbt.type_string() }} ) as ms_drg_code
, cast(null as {{ dbt.type_string() }} ) as ms_drg_description
, cast(null as {{ dbt.type_string() }} ) as apr_drg_code
, cast(null as {{ dbt.type_string() }} ) as apr_drg_description
, cast(null as {{ dbt.type_float() }} ) as paid_amount
, cast(null as {{ dbt.type_float() }} ) as allowed_amount
, cast(null as {{ dbt.type_float() }} ) as charge_amount
, cast(null as {{ dbt.type_string() }} ) as data_source
, cast(null as {{ dbt.type_timestamp() }} ) as tuva_last_run
limit 0
56 changes: 28 additions & 28 deletions integration_tests/models/lab_result.sql
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
select
null as lab_result_id
, null as patient_id
, null as encounter_id
, null as accession_number
, null as source_code_type
, null as source_code
, null as source_description
, null as source_component
, null as normalized_code_type
, null as normalized_code
, null as normalized_description
, null as normalized_component
, null as status
, null as result
, null as result_date
, null as collection_date
, null as source_units
, null as normalized_units
, null as source_reference_range_low
, null as source_reference_range_high
, null as normalized_reference_range_low
, null as normalized_reference_range_high
, null as source_abnormal_flag
, null as normalized_abnormal_flag
, null as specimen
, null as ordering_practitioner_id
, null as data_source
, null as tuva_last_run
cast(null as {{ dbt.type_string() }} ) as lab_result_id
, cast(null as {{ dbt.type_string() }} ) as patient_id
, cast(null as {{ dbt.type_string() }} ) as encounter_id
, cast(null as {{ dbt.type_string() }} ) as accession_number
, cast(null as {{ dbt.type_string() }} ) as source_code_type
, cast(null as {{ dbt.type_string() }} ) as source_code
, cast(null as {{ dbt.type_string() }} ) as source_description
, cast(null as {{ dbt.type_string() }} ) as source_component
, cast(null as {{ dbt.type_string() }} ) as normalized_code_type
, cast(null as {{ dbt.type_string() }} ) as normalized_code
, cast(null as {{ dbt.type_string() }} ) as normalized_description
, cast(null as {{ dbt.type_string() }} ) as normalized_component
, cast(null as {{ dbt.type_string() }} ) as status
, cast(null as {{ dbt.type_string() }} ) as result
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as result_date
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as collection_date
, cast(null as {{ dbt.type_string() }} ) as source_units
, cast(null as {{ dbt.type_string() }} ) as normalized_units
, cast(null as {{ dbt.type_string() }} ) as source_reference_range_low
, cast(null as {{ dbt.type_string() }} ) as source_reference_range_high
, cast(null as {{ dbt.type_string() }} ) as normalized_reference_range_low
, cast(null as {{ dbt.type_string() }} ) as normalized_reference_range_high
, cast(null as {{ dbt.type_int() }} ) as source_abnormal_flag
, cast(null as {{ dbt.type_int() }} ) as normalized_abnormal_flag
, cast(null as {{ dbt.type_string() }} ) as specimen
, cast(null as {{ dbt.type_string() }} ) as ordering_practitioner_id
, cast(null as {{ dbt.type_string() }} ) as data_source
, cast(null as {{ dbt.type_timestamp() }} ) as tuva_last_run
limit 0
26 changes: 13 additions & 13 deletions integration_tests/models/location.sql
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
select
null as location_id
, null as npi
, null as name
, null as facility_type
, null as parent_organization
, null as address
, null as city
, null as state
, null as zip_code
, null as latitude
, null as longitude
, null as data_source
, null as tuva_last_run
cast(null as {{ dbt.type_string() }} ) as location_id
, cast(null as {{ dbt.type_string() }} ) as npi
, cast(null as {{ dbt.type_string() }} ) as name
, cast(null as {{ dbt.type_string() }} ) as facility_type
, cast(null as {{ dbt.type_string() }} ) as parent_organization
, cast(null as {{ dbt.type_string() }} ) as address
, cast(null as {{ dbt.type_string() }} ) as city
, cast(null as {{ dbt.type_string() }} ) as state
, cast(null as {{ dbt.type_string() }} ) as zip_code
, cast(null as {{ dbt.type_float() }} ) as latitude
, cast(null as {{ dbt.type_float() }} ) as longitude
, cast(null as {{ dbt.type_string() }} ) as data_source
, cast(null as {{ dbt.type_timestamp() }} ) as tuva_last_run
limit 0
44 changes: 22 additions & 22 deletions integration_tests/models/medication.sql
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
select
null as medication_id
, null as patient_id
, null as encounter_id
, null as dispensing_date
, null as prescribing_date
, null as source_code_type
, null as source_code
, null as source_description
, null as ndc_code
, null as ndc_description
, null as rxnorm_code
, null as rxnorm_description
, null as atc_code
, null as atc_description
, null as route
, null as strength
, null as quantity
, null as quantity_unit
, null as days_supply
, null as practitioner_id
, null as data_source
, null as tuva_last_run
cast(null as {{ dbt.type_string() }} ) as medication_id
, cast(null as {{ dbt.type_string() }} ) as patient_id
, cast(null as {{ dbt.type_string() }} ) as encounter_id
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as dispensing_date
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as prescribing_date
, cast(null as {{ dbt.type_string() }} ) as source_code_type
, cast(null as {{ dbt.type_string() }} ) as source_code
, cast(null as {{ dbt.type_string() }} ) as source_description
, cast(null as {{ dbt.type_string() }} ) as ndc_code
, cast(null as {{ dbt.type_string() }} ) as ndc_description
, cast(null as {{ dbt.type_string() }} ) as rxnorm_code
, cast(null as {{ dbt.type_string() }} ) as rxnorm_description
, cast(null as {{ dbt.type_string() }} ) as atc_code
, cast(null as {{ dbt.type_string() }} ) as atc_description
, cast(null as {{ dbt.type_string() }} ) as route
, cast(null as {{ dbt.type_string() }} ) as strength
, cast(null as {{ dbt.type_int() }} ) as quantity
, cast(null as {{ dbt.type_string() }} ) as quantity_unit
, cast(null as {{ dbt.type_int() }} ) as days_supply
, cast(null as {{ dbt.type_string() }} ) as practitioner_id
, cast(null as {{ dbt.type_string() }} ) as data_source
, cast(null as {{ dbt.type_timestamp() }} ) as tuva_last_run
limit 0
42 changes: 21 additions & 21 deletions integration_tests/models/observation.sql
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
select
null as observation_id
, null as patient_id
, null as encounter_id
, null as panel_id
, null as observation_date
, null as observation_type
, null as source_code_type
, null as source_code
, null as source_description
, null as normalized_code_type
, null as normalized_code
, null as normalized_description
, null as result
, null as source_units
, null as normalized_units
, null as source_reference_range_low
, null as source_reference_range_high
, null as normalized_reference_range_low
, null as normalized_reference_range_high
, null as data_source
, null as tuva_last_run
cast(null as {{ dbt.type_string() }} ) as observation_id
, cast(null as {{ dbt.type_string() }} ) as patient_id
, cast(null as {{ dbt.type_string() }} ) as encounter_id
, cast(null as {{ dbt.type_string() }} ) as panel_id
, {{ try_to_cast_date('null', 'YYYY-MM-DD') }} as observation_date
, cast(null as {{ dbt.type_string() }} ) as observation_type
, cast(null as {{ dbt.type_string() }} ) as source_code_type
, cast(null as {{ dbt.type_string() }} ) as source_code
, cast(null as {{ dbt.type_string() }} ) as source_description
, cast(null as {{ dbt.type_string() }} ) as normalized_code_type
, cast(null as {{ dbt.type_string() }} ) as normalized_code
, cast(null as {{ dbt.type_string() }} ) as normalized_description
, cast(null as {{ dbt.type_string() }} ) as result
, cast(null as {{ dbt.type_string() }} ) as source_units
, cast(null as {{ dbt.type_string() }} ) as normalized_units
, cast(null as {{ dbt.type_string() }} ) as source_reference_range_low
, cast(null as {{ dbt.type_string() }} ) as source_reference_range_high
, cast(null as {{ dbt.type_string() }} ) as normalized_reference_range_low
, cast(null as {{ dbt.type_string() }} ) as normalized_reference_range_high
, cast(null as {{ dbt.type_string() }} ) as data_source
, cast(null as {{ dbt.type_timestamp() }} ) as tuva_last_run
limit 0
Loading

0 comments on commit a64d4e7

Please sign in to comment.