From f77bf8e50f2c2ff58e73b0d12ab306fa2c6b267e Mon Sep 17 00:00:00 2001 From: Erika Pacheco Date: Thu, 7 Nov 2024 13:47:52 -0800 Subject: [PATCH] Use new NTD monthly data sources for Staging and Mart tables - Also Format SQL files and reorder columns - Remove irrelevant year column from dim model - Filter out null NTD_ids - Edit description on documentation [#3519] --- script/scrape_ntd.py | 4 +- warehouse/models/docs/_docs_ntd.md | 53 ++++ warehouse/models/intermediate/ntd/int_ntd.yml | 20 +- ...thly_ridership_with_adjustments_joined.sql | 91 +++--- ...monthly_ridership_with_adjustments_upt.sql | 111 ++++--- ...onthly_ridership_with_adjustments_voms.sql | 111 ++++--- ...monthly_ridership_with_adjustments_vrh.sql | 111 ++++--- ...monthly_ridership_with_adjustments_vrm.sql | 111 ++++--- warehouse/models/mart/ntd/_mart_ntd.yml | 273 ++++++++---------- ...dim_monthly_ridership_with_adjustments.sql | 46 +++ ...ments_and_estimates__calendar_year_upt.sql | 2 + ...ments_and_estimates__calendar_year_vrm.sql | 2 + ...ip_with_adjustments_and_estimates__upt.sql | 2 + ...p_with_adjustments_and_estimates__voms.sql | 2 + ...ip_with_adjustments_and_estimates__vrh.sql | 2 + ...ip_with_adjustments_and_estimates__vrm.sql | 2 + 16 files changed, 497 insertions(+), 446 deletions(-) create mode 100644 warehouse/models/mart/ntd/dim_monthly_ridership_with_adjustments.sql diff --git a/script/scrape_ntd.py b/script/scrape_ntd.py index 64f84a2ec6..4147e1dcc1 100644 --- a/script/scrape_ntd.py +++ b/script/scrape_ntd.py @@ -24,7 +24,9 @@ poetry install then poetry run python scrape_ntd.py annual-database-agency-information 2021 https://www.transit.dot.gov/sites/fta.dot.gov/files/2022-10/2021%20Agency%20Information.xlsx -poetry run python scrape_ntd.py monthly-ridership-with-adjustments 2024 https://www.transit.dot.gov/sites/fta.dot.gov/files/2024-04/February%202024%20Complete%20Monthly%20Ridership%20%28with%20adjustments%20and%20estimates%29_240402_0.xlsx + +-- poetry run python scrape_ntd.py monthly-ridership-with-adjustments 2024 https://www.transit.dot.gov/sites/fta.dot.gov/files/2024-04/February%202024%20Complete%20Monthly%20Ridership%20%28with%20adjustments%20and%20estimates%29_240402_0.xlsx +-- REPLACED by "/airflow/plugins/operators/scrape_ntd_xlsx.py" """ import gzip diff --git a/warehouse/models/docs/_docs_ntd.md b/warehouse/models/docs/_docs_ntd.md index 5c63138074..f61b55525f 100644 --- a/warehouse/models/docs/_docs_ntd.md +++ b/warehouse/models/docs/_docs_ntd.md @@ -81,6 +81,10 @@ A system for carrying transit passengers described by specific right-of-way (ROW - Jitney (JT) {% enddocs %} +{% docs ntd_3_mode %} +A grouping of modes based upon whether the mode operates on rail, is a bus mode, is ferry boat service or other. +{% enddocs %} + {% docs ntd_time_period %} The time period for which data was collected. Valid values are: @@ -124,3 +128,52 @@ Actual vehicle hours exclude: - Operator training, - Vehicle maintenance testing. {% enddocs %} + +{% docs ntd_monthly_upt %} +Unlinked Passenger Trips (UPT) - +The number of passengers who board public transportation vehicles. +Passengers are counted each time they board vehicles no matter how +many vehicles they use to travel from their origin to their destination. +{% enddocs %} + +{% docs ntd_monthly_voms %} +Vehicles Operated in Annual Maximum Service (VOMS) - +The number of revenue vehicles operated to meet the annual maximum +service requirement. This is the revenue vehicle count during the peak +season of the year; on the week and day that maximum service is +provided. Vehicles operated in maximum service (VOMS) exclude: + - Atypical days; or + - One-time special events. +{% enddocs %} + +{% docs ntd_monthly_vrh %} +Vehicle Revenue Hours (VRH) - +The hours that vehicles are scheduled to or actually travel while in +revenue service. Vehicle revenue hours include: + - Layover / recovery time. +Vehicle revenue hours exclude: + - Deadhead; + - Operator training; + - Vehicle maintenance testing; and + - Other non-revenue uses of vehicles. +{% enddocs %} + +{% docs ntd_monthly_vrm %} +Vehicle Revenue Miles (VRM) - +The miles that vehicles are scheduled to or actually travel while in +revenue service. Vehicle revenue miles include: + - Layover / recovery time. +Vehicle revenue miles exclude: + - Deadhead; + - Operator training; + - Vehicle maintenance testing; and + - Other non-revenue uses of vehicles. +{% enddocs %} + +{% docs ntd_monthly_mode_type_of_service_status %} +Indicates whether a property reports (active) or not (inactive) during the most recent Annual report year. +{% enddocs %} + +{% docs ntd_monthly_service_type %} +A summarization of modes into `Fixed Route`, `Demand Response`, or `Unknown`. +{% enddocs %} diff --git a/warehouse/models/intermediate/ntd/int_ntd.yml b/warehouse/models/intermediate/ntd/int_ntd.yml index 4658c3d7b1..cef6f5b2cc 100644 --- a/warehouse/models/intermediate/ntd/int_ntd.yml +++ b/warehouse/models/intermediate/ntd/int_ntd.yml @@ -1,27 +1,27 @@ version: 2 + models: - name: int_ntd__monthly_ridership_with_adjustments_upt - description: | - Ridership - upt + description: '{{ doc("ntd_monthly_upt") }}' config: materialized: table + - name: int_ntd__monthly_ridership_with_adjustments_vrm - description: | - Ridership - vrm + description: '{{ doc("ntd_monthly_vrm") }}' config: materialized: table + - name: int_ntd__monthly_ridership_with_adjustments_vrh - description: | - Ridership - vrh + description: '{{ doc("ntd_monthly_vrh") }}' config: materialized: table + - name: int_ntd__monthly_ridership_with_adjustments_voms - description: | - Ridership - voms + description: '{{ doc("ntd_monthly_voms") }}' config: materialized: table + - name: int_ntd__monthly_ridership_with_adjustments_joined - description: | - Ridership - joined + description: Ridership - joined VRM, VOMS, VRH, and UPT config: materialized: table diff --git a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_joined.sql b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_joined.sql index 90c3f9d9d8..951e45cbb6 100644 --- a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_joined.sql +++ b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_joined.sql @@ -1,60 +1,57 @@ {{ config(materialized="table") }} -with - voms as ( - select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_voms") }} +WITH + voms AS ( + SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_voms") }} ), - vrh as (select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_vrh") }}), - vrm as (select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_vrm") }}), - upt as (select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_upt") }}), + vrh AS (SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_vrh") }}), + vrm AS (SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_vrm") }}), + upt AS (SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_upt") }}), - int_ntd__monthly_ridership_with_adjustments_joined as ( - select voms.*, upt.upt, vrm.vrm, vrh.vrh - from voms + int_ntd__monthly_ridership_with_adjustments_joined AS ( + SELECT voms.*, + upt.upt, + vrm.vrm, + vrh.vrh + FROM voms - full outer join - upt - on voms.ntd_id = upt.ntd_id - and voms.year = upt.year - and voms.mode = upt.mode - and voms.reporter_type = upt.reporter_type - and voms.agency = upt.agency - and voms._3_mode = upt._3_mode - and voms.period_month = upt.period_month - and voms.period_year = upt.period_year - and voms.tos = upt.tos - and voms.mode_type_of_service_status = upt.mode_type_of_service_status + FULL OUTER JOIN upt + ON voms.ntd_id = upt.ntd_id + AND voms.mode = upt.mode + AND voms.reporter_type = upt.reporter_type + AND voms.agency = upt.agency + AND voms._3_mode = upt._3_mode + AND voms.period_month = upt.period_month + AND voms.period_year = upt.period_year + AND voms.tos = upt.tos + AND voms.mode_type_of_service_status = upt.mode_type_of_service_status - full outer join - vrm - on voms.ntd_id = vrm.ntd_id - and voms.year = vrm.year - and voms.mode = vrm.mode - and voms.reporter_type = vrm.reporter_type - and voms.agency = vrm.agency - and voms._3_mode = vrm._3_mode - and voms.period_month = vrm.period_month - and voms.period_year = vrm.period_year - and voms.tos = vrm.tos - and voms.mode_type_of_service_status = vrm.mode_type_of_service_status + FULL OUTER JOIN vrm + ON voms.ntd_id = vrm.ntd_id + AND voms.mode = vrm.mode + AND voms.reporter_type = vrm.reporter_type + AND voms.agency = vrm.agency + AND voms._3_mode = vrm._3_mode + AND voms.period_month = vrm.period_month + AND voms.period_year = vrm.period_year + AND voms.tos = vrm.tos + AND voms.mode_type_of_service_status = vrm.mode_type_of_service_status - full outer join - vrh - on voms.ntd_id = vrh.ntd_id - and voms.year = vrh.year - and voms.mode = vrh.mode - and voms.reporter_type = vrh.reporter_type - and voms.agency = vrh.agency - and voms._3_mode = vrh._3_mode - and voms.period_month = vrh.period_month - and voms.period_year = vrh.period_year - and voms.tos = vrh.tos - and voms.mode_type_of_service_status = vrh.mode_type_of_service_status + FULL OUTER JOIN vrh + ON voms.ntd_id = vrh.ntd_id + AND voms.mode = vrh.mode + AND voms.reporter_type = vrh.reporter_type + AND voms.agency = vrh.agency + AND voms._3_mode = vrh._3_mode + AND voms.period_month = vrh.period_month + AND voms.period_year = vrh.period_year + AND voms.tos = vrh.tos + AND voms.mode_type_of_service_status = vrh.mode_type_of_service_status -- where voms.ntd_id not in ("10089", "20170", "30069", "90178", "90179") -- These agencies have null for uace_cd and uza_name and perhaps are not good to -- have in the dataset. -- If you don't want them then add that where clause back in. ) -select * -from int_ntd__monthly_ridership_with_adjustments_joined + +SELECT * FROM int_ntd__monthly_ridership_with_adjustments_joined diff --git a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_upt.sql b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_upt.sql index 21e5d4e224..7381443506 100644 --- a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_upt.sql +++ b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_upt.sql @@ -1,72 +1,67 @@ -with - source_pivoted as ( +WITH + source_pivoted AS ( {{ dbt_utils.unpivot( cast_to="int", - relation=ref("stg_ntd__monthly_ridership_with_adjustments_upt"), + relation=ref("stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt"), exclude=[ - "uza_name", - "uace_cd", - "dt", - "ts", - "year", - "ntd_id", - "reporter_type", + "_3_mode", "agency", - "mode_type_of_service_status", + "legacy_ntd_id", "mode", - "_3_mode", + "mode_type_of_service_status", + "ntd_id", + "reporter_type", "tos", - "legacy_ntd_id", + "uace_cd", + "uza_name", + "dt", + "execution_ts" ], field_name="period", value_name="upt", ) }} ), - int_ntd__monthly_ridership_with_adjustments_upt as ( - select - uza_name, - format("%05d", cast(uace_cd as int64)) as uace_cd, - dt as _dt, - ts, - year, - format("%05d", cast(ntd_id as int64)) as ntd_id, - legacy_ntd_id, - reporter_type, - agency, - mode_type_of_service_status, - mode, - _3_mode, - tos, - split(period, '_')[offset(2)] as period_year, - split(period, '_')[offset(1)] as period_month, - upt - from source_pivoted - where - mode in ( - "DR", - "FB", - "LR", - "MB", - "SR", - "TB", - "VP", - "CB", - "RB", - "CR", - "YR", - "MG", - "MO", - "AR", - "TR", - "HR", - "OR", - "IP", - "AG", - "PB", - "CC" - ) + + int_ntd__monthly_ridership_with_adjustments_upt AS ( + SELECT format("%05d", cast(cast(ntd_id AS NUMERIC) AS INT64)) AS ntd_id, + legacy_ntd_id, + agency, + reporter_type, + split(period, '_')[offset(2)] AS period_year, + split(period, '_')[offset(1)] AS period_month, + uza_name, + format("%05d", cast(uace_cd AS INT64)) AS uace_cd, + mode, + mode_type_of_service_status, + _3_mode, + tos, + upt, + dt AS _dt, + execution_ts + FROM source_pivoted + WHERE mode IN ("AG", + "AR", + "CB", + "CC", + "CR", + "DR", + "FB", + "HR", + "IP", + "LR", + "MB", + "MG", + "MO", + "OR", + "PB", + "RB", + "SR", + "TB", + "TR", + "VP", + "YR") ) -select * -from int_ntd__monthly_ridership_with_adjustments_upt + +SELECT * FROM int_ntd__monthly_ridership_with_adjustments_upt diff --git a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_voms.sql b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_voms.sql index 8598ce18e2..26d04e94d6 100644 --- a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_voms.sql +++ b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_voms.sql @@ -1,72 +1,67 @@ -with - source_pivoted as ( +WITH + source_pivoted AS ( {{ dbt_utils.unpivot( cast_to="int", - relation=ref("stg_ntd__monthly_ridership_with_adjustments_voms"), + relation=ref("stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms"), exclude=[ - "uza_name", - "uace_cd", - "dt", - "ts", - "year", - "ntd_id", - "reporter_type", + "_3_mode", "agency", - "mode_type_of_service_status", + "legacy_ntd_id", "mode", - "_3_mode", + "mode_type_of_service_status", + "ntd_id", + "reporter_type", "tos", - "legacy_ntd_id", + "uace_cd", + "uza_name", + "dt", + "execution_ts" ], field_name="period", value_name="voms", ) }} ), - int_ntd__monthly_ridership_with_adjustments_voms as ( - select - uza_name, - format("%05d", cast(uace_cd as int64)) as uace_cd, - dt as _dt, - ts, - year, - format("%05d", cast(ntd_id as int64)) as ntd_id, - legacy_ntd_id, - reporter_type, - agency, - mode_type_of_service_status, - mode, - _3_mode, - tos, - split(period, '_')[offset(2)] as period_year, - split(period, '_')[offset(1)] as period_month, - voms - from source_pivoted - where - mode in ( - "DR", - "FB", - "LR", - "MB", - "SR", - "TB", - "VP", - "CB", - "RB", - "CR", - "YR", - "MG", - "MO", - "AR", - "TR", - "HR", - "OR", - "IP", - "AG", - "PB", - "CC" - ) + + int_ntd__monthly_ridership_with_adjustments_voms AS ( + SELECT format("%05d", cast(cast(ntd_id AS NUMERIC) AS INT64)) AS ntd_id, + legacy_ntd_id, + agency, + reporter_type, + split(period, '_')[offset(2)] AS period_year, + split(period, '_')[offset(1)] AS period_month, + uza_name, + format("%05d", cast(uace_cd AS INT64)) AS uace_cd, + mode, + mode_type_of_service_status, + _3_mode, + tos, + voms, + dt AS _dt, + execution_ts + FROM source_pivoted + WHERE mode IN ("AG", + "AR", + "CB", + "CC", + "CR", + "DR", + "FB", + "HR", + "IP", + "LR", + "MB", + "MG", + "MO", + "OR", + "PB", + "RB", + "SR", + "TB", + "TR", + "VP", + "YR") ) -select * -from int_ntd__monthly_ridership_with_adjustments_voms + +SELECT * FROM int_ntd__monthly_ridership_with_adjustments_voms diff --git a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrh.sql b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrh.sql index 21e32e2b80..1a7b62af3b 100644 --- a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrh.sql +++ b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrh.sql @@ -1,72 +1,67 @@ -with - source_pivoted as ( +WITH + source_pivoted AS ( {{ dbt_utils.unpivot( cast_to="int", - relation=ref("stg_ntd__monthly_ridership_with_adjustments_vrh"), + relation=ref("stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh"), exclude=[ - "uza_name", - "uace_cd", - "dt", - "ts", - "year", - "ntd_id", - "reporter_type", + "_3_mode", "agency", - "mode_type_of_service_status", + "legacy_ntd_id", "mode", - "_3_mode", + "mode_type_of_service_status", + "ntd_id", + "reporter_type", "tos", - "legacy_ntd_id", + "uace_cd", + "uza_name", + "dt", + "execution_ts" ], field_name="period", value_name="vrh", ) }} ), - int_ntd__monthly_ridership_with_adjustments_vrh as ( - select - uza_name, - format("%05d", cast(uace_cd as int64)) as uace_cd, - dt as _dt, - ts, - year, - format("%05d", cast(ntd_id as int64)) as ntd_id, - legacy_ntd_id, - reporter_type, - agency, - mode_type_of_service_status, - mode, - _3_mode, - tos, - split(period, '_')[offset(2)] as period_year, - split(period, '_')[offset(1)] as period_month, - vrh - from source_pivoted - where - mode in ( - "DR", - "FB", - "LR", - "MB", - "SR", - "TB", - "VP", - "CB", - "RB", - "CR", - "YR", - "MG", - "MO", - "AR", - "TR", - "HR", - "OR", - "IP", - "AG", - "PB", - "CC" - ) + + int_ntd__monthly_ridership_with_adjustments_vrh AS ( + SELECT format("%05d", cast(cast(ntd_id AS NUMERIC) AS INT64)) AS ntd_id, + legacy_ntd_id, + agency, + reporter_type, + split(period, '_')[offset(2)] AS period_year, + split(period, '_')[offset(1)] AS period_month, + uza_name, + format("%05d", cast(uace_cd AS INT64)) AS uace_cd, + mode, + mode_type_of_service_status, + _3_mode, + tos, + vrh, + dt AS _dt, + execution_ts + FROM source_pivoted + WHERE mode IN ("AG", + "AR", + "CB", + "CC", + "CR", + "DR", + "FB", + "HR", + "IP", + "LR", + "MB", + "MG", + "MO", + "OR", + "PB", + "RB", + "SR", + "TB", + "TR", + "VP", + "YR") ) -select * -from int_ntd__monthly_ridership_with_adjustments_vrh + +SELECT * FROM int_ntd__monthly_ridership_with_adjustments_vrh diff --git a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrm.sql b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrm.sql index 95aa345e59..b1ee536d4b 100644 --- a/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrm.sql +++ b/warehouse/models/intermediate/ntd/int_ntd__monthly_ridership_with_adjustments_vrm.sql @@ -1,72 +1,67 @@ -with - source_pivoted as ( +WITH + source_pivoted AS ( {{ dbt_utils.unpivot( cast_to="int", - relation=ref("stg_ntd__monthly_ridership_with_adjustments_vrm"), + relation=ref("stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm"), exclude=[ - "uza_name", - "uace_cd", - "dt", - "ts", - "year", - "ntd_id", - "reporter_type", + "_3_mode", "agency", - "mode_type_of_service_status", + "legacy_ntd_id", "mode", - "_3_mode", + "mode_type_of_service_status", + "ntd_id", + "reporter_type", "tos", - "legacy_ntd_id", + "uace_cd", + "uza_name", + "dt", + "execution_ts" ], field_name="period", value_name="vrm", ) }} ), - int_ntd__monthly_ridership_with_adjustments_vrm as ( - select - uza_name, - format("%05d", cast(uace_cd as int64)) as uace_cd, - dt as _dt, - ts, - year, - format("%05d", cast(ntd_id as int64)) as ntd_id, - legacy_ntd_id, - reporter_type, - agency, - mode_type_of_service_status, - mode, - _3_mode, - tos, - split(period, '_')[offset(2)] as period_year, - split(period, '_')[offset(1)] as period_month, - vrm - from source_pivoted - where - mode in ( - "DR", - "FB", - "LR", - "MB", - "SR", - "TB", - "VP", - "CB", - "RB", - "CR", - "YR", - "MG", - "MO", - "AR", - "TR", - "HR", - "OR", - "IP", - "AG", - "PB", - "CC" - ) + + int_ntd__monthly_ridership_with_adjustments_vrm AS ( + SELECT format("%05d", cast(cast(ntd_id AS NUMERIC) AS INT64)) AS ntd_id, + legacy_ntd_id, + agency, + reporter_type, + split(period, '_')[offset(2)] AS period_year, + split(period, '_')[offset(1)] AS period_month, + uza_name, + format("%05d", cast(uace_cd AS INT64)) AS uace_cd, + mode, + mode_type_of_service_status, + _3_mode, + tos, + vrm, + dt AS _dt, + execution_ts + FROM source_pivoted + WHERE mode IN ("AG", + "AR", + "CB", + "CC", + "CR", + "DR", + "FB", + "HR", + "IP", + "LR", + "MB", + "MG", + "MO", + "OR", + "PB", + "RB", + "SR", + "TB", + "TR", + "VP", + "YR") ) -select * -from int_ntd__monthly_ridership_with_adjustments_vrm + +SELECT * FROM int_ntd__monthly_ridership_with_adjustments_vrm diff --git a/warehouse/models/mart/ntd/_mart_ntd.yml b/warehouse/models/mart/ntd/_mart_ntd.yml index 55fc65ce25..f6035369aa 100644 --- a/warehouse/models/mart/ntd/_mart_ntd.yml +++ b/warehouse/models/mart/ntd/_mart_ntd.yml @@ -52,6 +52,9 @@ x-common-fields: - &mode_name name: mode_name description: '{{ doc("ntd_mode_name") }}' + - &_3_mode + name: _3_mode + description: '{{ doc("ntd_3_mode") }}' - &time_period name: time_period description: '{{ doc("ntd_time_period") }}' @@ -64,6 +67,24 @@ x-common-fields: - &actual_vehicle_hours name: actual_vehicle_hours description: '{{ doc("ntd_actual_vehicle_hours") }}' + - &upt + name: upt + description: '{{ doc("ntd_monthly_upt") }}' + - &voms + name: voms + description: '{{ doc("ntd_monthly_voms") }}' + - &vrh + name: vrh + description: '{{ doc("ntd_monthly_vrh") }}' + - &vrm + name: vrm + description: '{{ doc("ntd_monthly_vrm") }}' + - &mode_type_of_service_status + name: mode_type_of_service_status + description: '{{ doc("ntd_monthly_mode_type_of_service_status") }}' + - &service_type + name: service_type + description: '{{ doc("ntd_monthly_service_type") }}' models: - name: dim_annual_ntd_agency_information @@ -690,7 +711,9 @@ models: name: passengers_per_hour_questionable - name: dim_monthly_ntd_ridership_with_adjustments - description: >- + description: | + ** THIS MODEL WILL BE REMOVED! Please use `dim_monthly_ridership_with_adjustments`. ** + Extracts of the NTD Monthly Ridership with adjustments - VRM, VOMS, VRH, and UPT @@ -701,100 +724,13 @@ models: https://www.transit.dot.gov/ntd/national-transit-database-ntd-glossary or the reporting manuals located at https://www.transit.dot.gov/ntd/manuals for more detailed information. - - A Master File of all properties that currently report or reported monthly - data during the period January 2002 - Current Calendar Year. The following - data elements are included in the master file and reflect the data - reported in the Annual module for the most recently reported fiscal year:" - - NTD ID – The Transit Property’s NTD identification number in the Next - Generation NTD Database - - Legacy NTD ID – The Transit Property’s NTD identification number in the - Legacy NTD Database - - Name – The Transit Property’s legal name - - Mode – Indicates the means of conveyance used for the service. The NTD - recognizes a number of different modes including Motorbus (MB), Heavy Rail - (HR), Light Rail (LR), and Demand Response (DR), among others. - - Type of Service – Indicates whether the service is directly operated by - the transit property, or is operated by a third party through a purchased - transportation agreement. - - 3 Mode - A grouping of modes based upon whether the mode operates on rail, - is a bus mode, is ferry boat service or other. - - Status – Indicates whether a property reports (active) or not (inactive) - during the most recent Annual report year. - - HQ City – The city where the property’s headquarters (HQ) is located. - - HQ State – The state where the property’s headquarters (HQ) is located. - - Reporter Type - Indicates the agency's reporter type (Building Reporter, - Full Reporter, Reduced Asset Reporter, Rural Reporter, Separate Service - Reporter or Small System Waiver Reporter) as of their most recent report - year. - - Organization Type – Indicates the agency's organization type as of their - most recent report year. - - Urban Area Census Code (UACE) – A unique numeric identifier associated - with the 2020 urban areas as defined by the U.S. Census. - - Urbanized Area Name – The name of the urbanized area as defined by the - U.S. Census - - Urbanized Area Population – The population of the urbanized area as - reported by the U.S. Census. This is the population count for the year the - census was made. No population updates are made until the next Census - (2020). - - Urbanized Area Square Miles – The square miles of the urbanized area as - reported by the U.S. census. - - Property Service Area – The square miles of the area served by the - property, as reported by the transit property. This may include some rural - (non-urbanized) areas. - - Property Service Area Population – The population of the area served by - the property, as reported by the transit property. - - Most Recent Report Year (Annual module) – The property’s most-recent - closed-out annual report year. - - Fiscal Year Month – The month the property’s fiscal year ends. - - Fiscal Year End – The year in which the property’s most-recent closed-out - fiscal year ended. - - Passenger Miles - For the most recent closed-out annual report year. - - Unlinked Passenger Trips – For the most recent closed-out annual report - year. - - Average Trip Length – The ratio of Passenger Miles per Unlinked Passenger - trips. - - Fares – The fare revenues collected during the most recent closed-out - annual report year. - - Operating Expenses – For the most recent closed-out annual report year. - - Avg. Cost per Trip – The ratio of Total Operating Expenses per Unlinked - Passenger Trips - - Avg. Fares per Trip – The ratio of Fares Earned per Unlinked Passenger - Trips columns: - *primary_uza_name - *primary_uza_code - name: _dt - description: "" + description: Date when the data was extracted. - name: ts - description: "" + description: Date and Time when the data was extracted. - <<: *ntd_id tests: - not_null @@ -803,80 +739,105 @@ models: as well. - *reporter_type - *agency - - name: mode_type_of_service_status - description: Status – Indicates whether a property reports (active) or not - (inactive) during the most recent Annual report year. + - *mode_type_of_service_status - *mode - - name: service_type - description: A summarization of modes into Fixed Route and Demand Response - - name: _3_mode - description: 3 Mode - A grouping of modes based upon whether the mode operates - on rail, is a bus mode, is ferry boat service or other. + - *service_type + - *_3_mode - <<: *type_of_service name: tos - *ntd_legacy_id - name: period_year_month - description: "" - name: period_month - description: "" - name: period_year - description: "" - - name: upt - description: >- - Unlinked Passenger Trips (UPT) - - The number of passengers who board public transportation vehicles. - Passengers are counted each time they board vehicles no matter how - many vehicles they use to travel from their origin to their - destination. - - name: vrm - description: >- - Vehicle Revenue Miles (VRM) - - The miles that vehicles are scheduled to or actually travel while in - revenue service. Vehicle revenue miles include: - - • Layover / recovery time. - - Vehicle revenue miles exclude: + - *upt + - *voms + - *vrm + - *vrh - • Deadhead; - - • Operator training; - - • Vehicle maintenance testing; and - - • Other non-revenue uses of vehicles. - - name: vrh - description: >- - Vehicle Revenue Hours (VRH) - - The hours that vehicles are scheduled to or actually travel while in - revenue service. Vehicle revenue hours include: - - • Layover / recovery time. - - Vehicle revenue hours exclude: - - • Deadhead; - - • Operator training; - - • Vehicle maintenance testing; and - - • Other non-revenue uses of vehicles. - - name: voms - description: >- - Vehicles Operated in Annual Maximum Service (VOMS) - - The number of revenue vehicles operated to meet the annual maximum - service requirement. This is the revenue vehicle count during the peak - season of the year; on the week and day that maximum service is - provided. Vehicles operated in maximum service (VOMS) exclude: - - • Atypical days; or - - • One-time special events. + - name: dim_monthly_ridership_with_adjustments + description: >- + Extracts of the NTD Complete Monthly Ridership with Adjustments and Estimates (UPT, VOMS, VRH, and VRM) + that contains all monthly module data reported to the NTD since January 2002. + Mode and Type of Service Changes and Impacts on this Time Series + + Monthly data are reported by mode and type of service. + From 2002 through 2011, there were 16 modes in the NTD. + NTD monthly ridership data is now reported according to refined modal classifications. + Service previously reported as bus (MB) now may be reported as either MB, Commuter Bus (CB), + or Bus Rapid Transit (RB). + Additionally, service previously categorized as Light Rail (LR) now may be reported as LR or + Streetcar (SR). + + Similarly, Types of Service were refined in Report Year 2019. + From 2002 - 2018, there were two types of service: + Directly Operated (DO) and + Purchased Transportation (PT). + + As of 2019, Purchased Transportation is now classified such that agencies report the + purchased transportation based on the type of contractor: + general third party (PT), + taxicab operator (TX), + or transportation network company (TN). + + FTA concurrently removed the "Demand Response Taxi" (DT/PT) mode in 2019. + FTA now considers all such service as Demand Response (DR) with Taxi (TX) type of service and + this time series has been updated to reflect this change. + + This time series also considers modes which have been combined, collapsing the + Automated Guideway (AG) and Monorail (MO) mode historical data into the + Monorail/Automated Guideway (MG) mode to reflect the current reporting arrangement and + allow for easier analysis of these modes. + + Finally, FTA introduced the Hybrid Rail (YR) mode in January 2012. While this mode combines + some systems previously classified as either Commuter Rail (CR) or Light Rail (LR), + this change could not be done uniformly; most operators of Hybrid Rail continued to + offer services presently defined as CR or LR. + Therefore, this time series does not attempt to combine historically. + Data users should therefore take caution that the following systems converted completely + from Commuter Rail to Hybrid Rail around this time: + - 60048 Capital Metropolitan Transportation Authority (Commuter Rail to Hybrid Rail) + - 60101 Denton County Transportation Authority (Commuter Rail to Hybrid Rail) + - 90030 North County Transit District (Light Rail to Hybrid Rail) + + The dataset is found at: + https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release. + + For more information use the glossary of transit terms located at: + https://www.transit.dot.gov/ntd/national-transit-database-ntd-glossary + or the reporting manuals located at: https://www.transit.dot.gov/ntd/manuals. + columns: + - name: key + tests: + - not_null + - unique + - name: _dt + description: Date when the data was extracted. + - name: execution_ts + description: Date and Time when the data was extracted. + - name: period_year_month + description: The Year and Month for which data was collected. + - name: period_month + description: The Month for which data was collected. + - name: period_year + description: The Year for which data was collected. + - <<: *ntd_id + tests: + - not_null + - *reporter_type + - *agency + - *mode_type_of_service_status + - *mode + - *_3_mode + - *service_type + - <<: *type_of_service + name: tos + - *ntd_legacy_id + - *primary_uza_name + - *primary_uza_code + - *upt + - *voms + - *vrh + - *vrm - name: fct_annual_service_modes description: diff --git a/warehouse/models/mart/ntd/dim_monthly_ridership_with_adjustments.sql b/warehouse/models/mart/ntd/dim_monthly_ridership_with_adjustments.sql new file mode 100644 index 0000000000..835990098e --- /dev/null +++ b/warehouse/models/mart/ntd/dim_monthly_ridership_with_adjustments.sql @@ -0,0 +1,46 @@ +{{ config(materialized="table") }} + +WITH + source AS ( + SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_joined") }} + ), + + ntd_modes AS ( + SELECT * FROM {{ ref("int_ntd__modes") }} + ), + + dim_monthly_ridership_with_adjustments AS ( + SELECT {{ dbt_utils.generate_surrogate_key(['ntd_id', 'mode', 'period_month', 'period_year', 'tos']) }} as key, + ntd_id, + legacy_ntd_id, + agency, + reporter_type, + concat(period_year, '-', lpad(period_month, 2, '0')) AS period_year_month, + period_year, + period_month, + uza_name, + uace_cd, + _3_mode, + mode, + ntd_mode_full_name AS mode_full_name, + CASE + WHEN mode IN ('AG', 'AR', 'CB', 'CC', 'CR', 'FB', 'HR', 'IP', 'IP', 'LR', 'MB', 'MG', 'MO', 'RB', 'SR', 'TB', 'TR', 'YR') + THEN 'Fixed Route' + WHEN mode IN ('DR', 'DT', 'VP', 'JT', 'PB') + THEN 'Demand Response' + ELSE 'Unknown' -- mode is null sometimes + END AS service_type, + mode_type_of_service_status, + tos, + upt, + vrm, + vrh, + voms, + _dt, + execution_ts + FROM source + LEFT JOIN ntd_modes + ON source.mode = ntd_modes.ntd_mode_abbreviation + ) + +SELECT * FROM dim_monthly_ridership_with_adjustments diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql index 4218fa605a..d9b4d6f709 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt.sql @@ -2,6 +2,8 @@ WITH source AS ( SELECT * FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file ), stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_upt AS( diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql index 2fed755139..836776c939 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm.sql @@ -2,6 +2,8 @@ WITH source AS ( SELECT * FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file ), stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__calendar_year_vrm AS( diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql index d2c8a80502..a7fb450769 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt.sql @@ -2,6 +2,8 @@ WITH source AS ( SELECT * FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__upt') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file ), stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt AS( diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql index 0cfbcf8d10..d4f99e0a38 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms.sql @@ -2,6 +2,8 @@ WITH source AS ( SELECT * FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__voms') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file ), stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__voms AS( diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql index 3a47e7370c..baa46625fd 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh.sql @@ -2,6 +2,8 @@ WITH source AS ( SELECT * FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file ), stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrh AS( diff --git a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql index 3e0e48ef44..0af9c590f4 100644 --- a/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql +++ b/warehouse/models/staging/ntd_ridership/stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm.sql @@ -2,6 +2,8 @@ WITH source AS ( SELECT * FROM {{ source('external_ntd__ridership', 'historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm') }} + WHERE ntd_id IS NOT NULL + -- Removing records without NTD_ID because contains "estimated monthly industry totals for Rural reporters" from the bottom of the scraped file ), stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__vrm AS(