From 12a2cf47095b21a7648089c8f62553fc3856e379 Mon Sep 17 00:00:00 2001 From: Erika Pacheco Date: Wed, 6 Nov 2024 16:00:41 -0800 Subject: [PATCH] Filter out NULL NTD_IDs from NTD Staging ridership models. - Add documentation for models and columns - Add Test for NTD_ID [#3519] --- warehouse/models/docs/_docs_ntd.md | 130 +++++++++ .../_stg_ntd_historical_ridership_tables.yml | 257 ++++++++++++++++++ ...ments_and_estimates__calendar_year_upt.sql | 29 +- ...ments_and_estimates__calendar_year_vrm.sql | 29 +- ...with_adjustments_and_estimates__master.sql | 29 +- ...ip_with_adjustments_and_estimates__upt.sql | 29 +- ...justments_and_estimates__upt_estimates.sql | 29 +- ...p_with_adjustments_and_estimates__voms.sql | 29 +- ...ip_with_adjustments_and_estimates__vrh.sql | 29 +- ...ip_with_adjustments_and_estimates__vrm.sql | 29 +- ...justments_and_estimates__vrm_estimates.sql | 29 +- 11 files changed, 501 insertions(+), 147 deletions(-) diff --git a/warehouse/models/docs/_docs_ntd.md b/warehouse/models/docs/_docs_ntd.md index 5c63138074..4ca3df7fc6 100644 --- a/warehouse/models/docs/_docs_ntd.md +++ b/warehouse/models/docs/_docs_ntd.md @@ -81,6 +81,10 @@ A system for carrying transit passengers described by specific right-of-way (ROW - Jitney (JT) {% enddocs %} +{% docs ntd_3_mode %} +A grouping of modes based upon whether the mode operates on rail, is a bus mode, is ferry boat service or other. +{% enddocs %} + {% docs ntd_time_period %} The time period for which data was collected. Valid values are: @@ -124,3 +128,129 @@ Actual vehicle hours exclude: - Operator training, - Vehicle maintenance testing. {% enddocs %} + +{% docs ntd_upt %} +Unlinked Passenger Trips (UPT) - +The number of passengers who board public transportation vehicles. +Passengers are counted each time they board vehicles no matter how +many vehicles they use to travel from their origin to their destination. +{% enddocs %} + +{% docs ntd_voms %} +Vehicles Operated in Annual Maximum Service (VOMS) - +The number of revenue vehicles operated to meet the annual maximum +service requirement. This is the revenue vehicle count during the peak +season of the year; on the week and day that maximum service is +provided. Vehicles operated in maximum service (VOMS) exclude: + - Atypical days; or + - One-time special events. +{% enddocs %} + +{% docs ntd_vrh %} +Vehicle Revenue Hours (VRH) - +The hours that vehicles are scheduled to or actually travel while in +revenue service. Vehicle revenue hours include: + - Layover / recovery time. +Vehicle revenue hours exclude: + - Deadhead; + - Operator training; + - Vehicle maintenance testing; and + - Other non-revenue uses of vehicles. +{% enddocs %} + +{% docs ntd_vrm %} +Vehicle Revenue Miles (VRM) - +The miles that vehicles are scheduled to or actually travel while in +revenue service. Vehicle revenue miles include: + - Layover / recovery time. +Vehicle revenue miles exclude: + - Deadhead; + - Operator training; + - Vehicle maintenance testing; and + - Other non-revenue uses of vehicles. +{% enddocs %} + +{% docs ntd_mode_type_of_service_status %} +Indicates whether a property reports (active) or not (inactive) during the most recent Annual report year. +{% enddocs %} + +{% docs ntd_last_closed_report_year %} +The property’s most-recent closed-out annual report year. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_last_closed_fy_end_month %} +The month the property’s fiscal year ends. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_last_closed_fy_end_year %} +The year in which the property’s most-recent closed-out fiscal year ended. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_passenger_miles_fy %} +Passenger miles for the most recent closed-out annual report year. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_unlinked_passenger_trips_fy %} +Unlinked Passenger Trips for the most recent closed-out annual report year. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_avg_trip_length_fy %} +The ratio of Passenger Miles per Unlinked Passenger trips. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_fares_fy %} +The fare revenues collected during the most recent closed-out annual report year. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_operating_expenses_fy %} +The expenses associated with the operation of the transit agency, and classified by function or activity, and the goods and services purchased for the most recent closed-out annual report year. +{% enddocs %} + +{% docs ntd_avg_cost_per_trip_fy %} +The ratio of Total Operating Expenses per Unlinked Passenger Trips. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_avg_fares_per_trip_fy %} +The ratio of Fares Earned per Unlinked Passenger Trips. +In cases where the agency mode type of service combination does not have a closed out report year, this value will be null. +{% enddocs %} + +{% docs ntd_ridership_service_type %} +A summarization of modes into `Fixed Route`, `Demand Response`, or `Unknown`. +{% enddocs %} + +{% docs ntd_top_150 %} +Values are: `Y` (Yes) and `N` (No) +{% enddocs %} + +{% docs ntd_period_year %} +The Year for which data was collected. +{% enddocs %} + +{% docs ntd_period_month %} +The Month for which data was collected. +{% enddocs %} + +{% docs ntd_period_month_year %} +The Month and Year for which data was collected. +{% enddocs %} + +{% docs ntd_period_year_month %} +The Year and Month for which data was collected. +{% enddocs %} + +{% docs ntd_xlsx_dt %} +Date when the data was extracted. +{% enddocs %} + +{% docs ntd_xlsx_execution_ts %} +Date and Time when the data was extracted. +{% enddocs %} diff --git a/warehouse/models/staging/ntd_ridership/_stg_ntd_historical_ridership_tables.yml b/warehouse/models/staging/ntd_ridership/_stg_ntd_historical_ridership_tables.yml index 464dc63f2a..ae488d7756 100644 --- a/warehouse/models/staging/ntd_ridership/_stg_ntd_historical_ridership_tables.yml +++ b/warehouse/models/staging/ntd_ridership/_stg_ntd_historical_ridership_tables.yml @@ -1,12 +1,269 @@ version: 2 +x-common-fields: + - &ntd_id + name: ntd_id + description: '{{ doc("ntd_id") }}' + tests: + - not_null + - &legacy_ntd_id + name: legacy_ntd_id + description: '{{ doc("ntd_legacy_id") }}' + - &agency + name: agency + description: '{{ doc("ntd_agency") }}' + - &reporter_type + name: reporter_type + description: '{{ doc("ntd_reporter_type") }}' + - &uace_cd + name: uace_cd + description: '{{ doc("ntd_primary_uza_code") }}' + - &uza_name + name: uza_name + description: '{{ doc("ntd_primary_uza_name") }}' + - &tos + name: tos + description: '{{ doc("ntd_type_of_service") }}' + - &mode + name: mode + description: '{{ doc("ntd_mode") }}' + - &_3_mode + name: _3_mode + description: '{{ doc("ntd_3_mode") }}' + - &mode_type_of_service_status + name: mode_type_of_service_status + description: '{{ doc("ntd_mode_type_of_service_status") }}' + - &organization_type + name: organization_type + description: '{{ doc("ntd_organization_type") }}' + - &hq_city + name: hq_city + description: '{{ doc("ntd_city") }}' + - &hq_state + name: hq_state + description: '{{ doc("ntd_state") }}' + - &uza_sq_miles + name: uza_sq_miles + description: '{{ doc("ntd_primary_uza_area_sq_miles") }}' + - &uza_population + name: uza_population + description: '{{ doc("ntd_primary_uza_population") }}' + - &service_area_population + name: service_area_population + description: '{{ doc("ntd_service_area_population") }}' + - &service_area_sq_miles + name: service_area_sq_miles + description: '{{ doc("ntd_service_area_sq_miles") }}' + - &last_closed_report_year + name: last_closed_report_year + description: '{{ doc("ntd_last_closed_report_year") }}' + - &last_closed_fy_end_month + name: last_closed_fy_end_month + description: '{{ doc("ntd_last_closed_fy_end_month") }}' + - &last_closed_fy_end_year + name: last_closed_fy_end_year + description: '{{ doc("ntd_last_closed_fy_end_year") }}' + - &passenger_miles_fy + name: passenger_miles_fy + description: '{{ doc("ntd_passenger_miles_fy") }}' + - &unlinked_passenger_trips_fy + name: unlinked_passenger_trips_fy + description: '{{ doc("ntd_unlinked_passenger_trips_fy") }}' + - &avg_trip_length_fy + name: avg_trip_length_fy + description: '{{ doc("ntd_avg_trip_length_fy") }}' + - &fares_fy + name: fares_fy + description: '{{ doc("ntd_fares_fy") }}' + - &operating_expenses_fy + name: operating_expenses_fy + description: '{{ doc("ntd_operating_expenses_fy") }}' + - &avg_cost_per_trip_fy + name: avg_cost_per_trip_fy + description: '{{ doc("ntd_avg_cost_per_trip_fy") }}' + - &avg_fares_per_trip_fy + name: avg_fares_per_trip_fy + description: '{{ doc("ntd_avg_fares_per_trip_fy") }}' + - &top_150 + name: top_150 + description: '{{ doc("ntd_top_150") }}' + - &month_year + name: month + description: '{{ doc("ntd_period_month_year") }}' + - &year + name: year + description: '{{ doc("ntd_period_year") }}' + - &estimated_upt + name: estimated_upt + description: '{{ doc("ntd_upt") }}' + - &estimated_vrm + name: estimated_vrm + description: '{{ doc("ntd_vrm") }}' + - &dt + name: dt + description: '{{ doc("ntd_xlsx_dt") }}' + - &execution_ts + name: execution_ts + description: '{{ doc("ntd_xlsx_execution_ts") }}' + models: - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt + description: Calendar year totals for UPT for each transit agency, by mode and type of service. + columns: + - *ntd_id + - *legacy_ntd_id + - *agency + - *mode_type_of_service_status + - *reporter_type + - *uace_cd + - *uza_name + - *mode + - *tos + - *_3_mode + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm + description: Calendar year totals for VRM for each transit agency, by mode and type of service. + columns: + - *ntd_id + - *legacy_ntd_id + - *agency + - *mode_type_of_service_status + - *reporter_type + - *uace_cd + - *uza_name + - *mode + - *tos + - *_3_mode + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master + description: A Master File of all properties that currently report or reported monthly data during the period January 2002 - Current Calendar Year. + columns: + - *ntd_id + - *legacy_ntd_id + - *agency + - *mode + - *tos + - *_3_mode + - *mode_type_of_service_status + - *reporter_type + - *organization_type + - *hq_city + - *hq_state + - *uace_cd + - *uza_name + - *uza_sq_miles + - *uza_population + - *service_area_population + - *service_area_sq_miles + - *last_closed_report_year + - *last_closed_fy_end_month + - *last_closed_fy_end_year + - *passenger_miles_fy + - *unlinked_passenger_trips_fy + - *avg_trip_length_fy + - *fares_fy + - *operating_expenses_fy + - *avg_cost_per_trip_fy + - *avg_fares_per_trip_fy + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt + description: '{{ doc("ntd_upt") }}' + columns: + - *ntd_id + - *legacy_ntd_id + - *agency + - *mode_type_of_service_status + - *reporter_type + - *uace_cd + - *uza_name + - *mode + - *tos + - *_3_mode + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates + description: | + This dataset includes estimates for missing UPT data in the two most recent report years. + Estimates are calculated based on the monthly modal growth rates from an industry-wide sample of transit agencies. + columns: + - *top_150 + - *ntd_id + - *agency + - *mode + - *tos + - *month_year + - *year + - *estimated_upt + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms + description: '{{ doc("ntd_voms") }}' + columns: + - *ntd_id + - *legacy_ntd_id + - *agency + - *mode_type_of_service_status + - *reporter_type + - *uace_cd + - *uza_name + - *mode + - *tos + - *_3_mode + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh + description: '{{ doc("ntd_vrh") }}' + columns: + - *ntd_id + - *legacy_ntd_id + - *agency + - *mode_type_of_service_status + - *reporter_type + - *uace_cd + - *uza_name + - *mode + - *tos + - *_3_mode + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm + description: '{{ doc("ntd_vrm") }}' + columns: + - *ntd_id + - *legacy_ntd_id + - *agency + - *mode_type_of_service_status + - *reporter_type + - *uace_cd + - *uza_name + - *mode + - *tos + - *_3_mode + - *dt + - *execution_ts + - name: stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates + description: | + This dataset includes estimates for missing VRM data in the two most recent report years. + Estimates are calculated based on the monthly modal growth rates from an industry-wide sample of transit agencies. + columns: + - *top_150 + - *ntd_id + - *agency + - *mode + - *tos + - *month_year + - *year + - *estimated_vrm + - *dt + - *execution_ts diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql index 9fb85a174a..d9b4d6f709 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql @@ -1,19 +1,16 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt') }} -), +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file + ), -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt AS ( - SELECT * - FROM get_latest_extract -) + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql index 81e1bd790a..836776c939 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql @@ -1,19 +1,16 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm') }} -), +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file + ), -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm AS ( - SELECT * - FROM get_latest_extract -) + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master.sql index 484c47ebe9..dc5edcc28a 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master.sql @@ -1,19 +1,14 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__master AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__master') }} -), - -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__master - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master AS ( - SELECT * - FROM get_latest_extract -) +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__master') }} + ), + + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__master diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql index 7657bcead2..a7fb450769 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql @@ -1,19 +1,16 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__upt AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__upt') }} -), +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__upt') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file + ), -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__upt - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt AS ( - SELECT * - FROM get_latest_extract -) + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates.sql index c3127f4a01..67b4118186 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates.sql @@ -1,19 +1,14 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates') }} -), - -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates AS ( - SELECT * - FROM get_latest_extract -) +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates') }} + ), + + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt_estimates diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql index f6068c1d93..d4f99e0a38 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql @@ -1,19 +1,16 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__voms AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__voms') }} -), +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__voms') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file + ), -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__voms - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms AS ( - SELECT * - FROM get_latest_extract -) + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql index cf5bdb4783..baa46625fd 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql @@ -1,19 +1,16 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__vrh AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh') }} -), +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file + ), -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__vrh - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh AS ( - SELECT * - FROM get_latest_extract -) + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql index 6c436a9e92..0af9c590f4 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql @@ -1,19 +1,16 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__vrm AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm') }} -), +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file + ), -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__vrm - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm AS ( - SELECT * - FROM get_latest_extract -) + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates.sql index 3cb5d759bf..15e103010b 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates.sql @@ -1,19 +1,14 @@ -WITH external_historical_complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates AS ( - SELECT * - FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates') }} -), - -get_latest_extract AS( - - SELECT * - FROM external_historical_complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates - -- we pull the whole table every month in the pipeline, so this gets only the latest extract - QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 -), - -stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates AS ( - SELECT * - FROM get_latest_extract -) +WITH + source AS ( + SELECT * + FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates') }} + ), + + stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates AS( + SELECT * + FROM source + -- we pull the whole table every month in the pipeline, so this gets only the latest extract + QUALIFY DENSE_RANK() OVER (ORDER BY execution_ts DESC) = 1 + ) SELECT * FROM stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm_estimates