Skip to content

Commit

Permalink
Use new NTD monthly data sources for Staging and
Browse files Browse the repository at this point in the history
Mart tables

- Also Format SQL files and reorder columns
- Remove irrelevant year column from dim model
- Filter out null NTD_ids
- Edit description on documentation

[#3519]
  • Loading branch information
erikamov committed Nov 13, 2024
1 parent 142231c commit 53f4fb0
Show file tree
Hide file tree
Showing 16 changed files with 496 additions and 448 deletions.
4 changes: 3 additions & 1 deletion script/scrape_ntd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
poetry install
then
poetry run python scrape_ntd.py annual-database-agency-information 2021 https://www.transit.dot.gov/sites/fta.dot.gov/files/2022-10/2021%20Agency%20Information.xlsx
poetry run python scrape_ntd.py monthly-ridership-with-adjustments 2024 https://www.transit.dot.gov/sites/fta.dot.gov/files/2024-04/February%202024%20Complete%20Monthly%20Ridership%20%28with%20adjustments%20and%20estimates%29_240402_0.xlsx
-- poetry run python scrape_ntd.py monthly-ridership-with-adjustments 2024 https://www.transit.dot.gov/sites/fta.dot.gov/files/2024-04/February%202024%20Complete%20Monthly%20Ridership%20%28with%20adjustments%20and%20estimates%29_240402_0.xlsx
-- REPLACED by "/airflow/plugins/operators/scrape_ntd_xlsx.py"
"""

import gzip
Expand Down
53 changes: 53 additions & 0 deletions warehouse/models/docs/_docs_ntd.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ A system for carrying transit passengers described by specific right-of-way (ROW
- Jitney (JT)
{% enddocs %}

{% docs ntd_3_mode %}
A grouping of modes based upon whether the mode operates on rail, is a bus mode, is ferry boat service or other.
{% enddocs %}

{% docs ntd_time_period %}
The time period for which data was collected.
Valid values are:
Expand Down Expand Up @@ -124,3 +128,52 @@ Actual vehicle hours exclude:
- Operator training,
- Vehicle maintenance testing.
{% enddocs %}

{% docs ntd_monthly_upt %}
Unlinked Passenger Trips (UPT) -
The number of passengers who board public transportation vehicles.
Passengers are counted each time they board vehicles no matter how
many vehicles they use to travel from their origin to their destination.
{% enddocs %}

{% docs ntd_monthly_voms %}
Vehicles Operated in Annual Maximum Service (VOMS) -
The number of revenue vehicles operated to meet the annual maximum
service requirement. This is the revenue vehicle count during the peak
season of the year; on the week and day that maximum service is
provided. Vehicles operated in maximum service (VOMS) exclude:
- Atypical days; or
- One-time special events.
{% enddocs %}

{% docs ntd_monthly_vrh %}
Vehicle Revenue Hours (VRH) -
The hours that vehicles are scheduled to or actually travel while in
revenue service. Vehicle revenue hours include:
- Layover / recovery time.
Vehicle revenue hours exclude:
- Deadhead;
- Operator training;
- Vehicle maintenance testing; and
- Other non-revenue uses of vehicles.
{% enddocs %}

{% docs ntd_monthly_vrm %}
Vehicle Revenue Miles (VRM) -
The miles that vehicles are scheduled to or actually travel while in
revenue service. Vehicle revenue miles include:
- Layover / recovery time.
Vehicle revenue miles exclude:
- Deadhead;
- Operator training;
- Vehicle maintenance testing; and
- Other non-revenue uses of vehicles.
{% enddocs %}

{% docs ntd_monthly_mode_type_of_service_status %}
Indicates whether a property reports (active) or not (inactive) during the most recent Annual report year.
{% enddocs %}

{% docs ntd_monthly_service_type %}
A summarization of modes into `Fixed Route`, `Demand Response`, or `Unknown`.
{% enddocs %}
20 changes: 10 additions & 10 deletions warehouse/models/intermediate/ntd/int_ntd.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
version: 2

models:
- name: int_ntd__monthly_ridership_with_adjustments_upt
description: |
Ridership - upt
description: '{{ doc("ntd_monthly_upt") }}'
config:
materialized: table

- name: int_ntd__monthly_ridership_with_adjustments_vrm
description: |
Ridership - vrm
description: '{{ doc("ntd_monthly_vrm") }}'
config:
materialized: table

- name: int_ntd__monthly_ridership_with_adjustments_vrh
description: |
Ridership - vrh
description: '{{ doc("ntd_monthly_vrh") }}'
config:
materialized: table

- name: int_ntd__monthly_ridership_with_adjustments_voms
description: |
Ridership - voms
description: '{{ doc("ntd_monthly_voms") }}'
config:
materialized: table

- name: int_ntd__monthly_ridership_with_adjustments_joined
description: |
Ridership - joined
description: Ridership - joined VRM, VOMS, VRH, and UPT
config:
materialized: table
Original file line number Diff line number Diff line change
@@ -1,60 +1,54 @@
{{ config(materialized="table") }}

with
voms as (
select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_voms") }}
),
WITH
voms AS (SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_voms") }}),
vrh AS (SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_vrh") }}),
vrm AS (SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_vrm") }}),
upt AS (SELECT * FROM {{ ref("int_ntd__monthly_ridership_with_adjustments_upt") }}),

vrh as (select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_vrh") }}),
vrm as (select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_vrm") }}),
upt as (select * from {{ ref("int_ntd__monthly_ridership_with_adjustments_upt") }}),
int_ntd__monthly_ridership_with_adjustments_joined AS (
SELECT voms.*,
upt.upt,
vrm.vrm,
vrh.vrh
FROM voms

int_ntd__monthly_ridership_with_adjustments_joined as (
select voms.*, upt.upt, vrm.vrm, vrh.vrh
from voms
FULL OUTER JOIN upt
ON voms.ntd_id = upt.ntd_id
AND voms.mode = upt.mode
AND voms.reporter_type = upt.reporter_type
AND voms.agency = upt.agency
AND voms._3_mode = upt._3_mode
AND voms.period_month = upt.period_month
AND voms.period_year = upt.period_year
AND voms.tos = upt.tos
AND voms.mode_type_of_service_status = upt.mode_type_of_service_status

full outer join
upt
on voms.ntd_id = upt.ntd_id
and voms.year = upt.year
and voms.mode = upt.mode
and voms.reporter_type = upt.reporter_type
and voms.agency = upt.agency
and voms._3_mode = upt._3_mode
and voms.period_month = upt.period_month
and voms.period_year = upt.period_year
and voms.tos = upt.tos
and voms.mode_type_of_service_status = upt.mode_type_of_service_status
FULL OUTER JOIN vrm
ON voms.ntd_id = vrm.ntd_id
AND voms.mode = vrm.mode
AND voms.reporter_type = vrm.reporter_type
AND voms.agency = vrm.agency
AND voms._3_mode = vrm._3_mode
AND voms.period_month = vrm.period_month
AND voms.period_year = vrm.period_year
AND voms.tos = vrm.tos
AND voms.mode_type_of_service_status = vrm.mode_type_of_service_status

full outer join
vrm
on voms.ntd_id = vrm.ntd_id
and voms.year = vrm.year
and voms.mode = vrm.mode
and voms.reporter_type = vrm.reporter_type
and voms.agency = vrm.agency
and voms._3_mode = vrm._3_mode
and voms.period_month = vrm.period_month
and voms.period_year = vrm.period_year
and voms.tos = vrm.tos
and voms.mode_type_of_service_status = vrm.mode_type_of_service_status

full outer join
vrh
on voms.ntd_id = vrh.ntd_id
and voms.year = vrh.year
and voms.mode = vrh.mode
and voms.reporter_type = vrh.reporter_type
and voms.agency = vrh.agency
and voms._3_mode = vrh._3_mode
and voms.period_month = vrh.period_month
and voms.period_year = vrh.period_year
and voms.tos = vrh.tos
and voms.mode_type_of_service_status = vrh.mode_type_of_service_status
FULL OUTER JOIN vrh
ON voms.ntd_id = vrh.ntd_id
AND voms.mode = vrh.mode
AND voms.reporter_type = vrh.reporter_type
AND voms.agency = vrh.agency
AND voms._3_mode = vrh._3_mode
AND voms.period_month = vrh.period_month
AND voms.period_year = vrh.period_year
AND voms.tos = vrh.tos
AND voms.mode_type_of_service_status = vrh.mode_type_of_service_status
-- where voms.ntd_id not in ("10089", "20170", "30069", "90178", "90179")
-- These agencies have null for uace_cd and uza_name and perhaps are not good to
-- have in the dataset.
-- If you don't want them then add that where clause back in.
)
select *
from int_ntd__monthly_ridership_with_adjustments_joined

SELECT * FROM int_ntd__monthly_ridership_with_adjustments_joined
Original file line number Diff line number Diff line change
@@ -1,72 +1,67 @@
with
source_pivoted as (
WITH
source_pivoted AS (
{{
dbt_utils.unpivot(
cast_to="int",
relation=ref("stg_ntd__monthly_ridership_with_adjustments_upt"),
relation=ref("stg_ntd_ridership_historical__complete_monthly_ridership_with_adjustments_and_estimates__upt"),
exclude=[
"uza_name",
"uace_cd",
"dt",
"ts",
"year",
"ntd_id",
"reporter_type",
"_3_mode",
"agency",
"mode_type_of_service_status",
"legacy_ntd_id",
"mode",
"_3_mode",
"mode_type_of_service_status",
"ntd_id",
"reporter_type",
"tos",
"legacy_ntd_id",
"uace_cd",
"uza_name",
"dt",
"execution_ts"
],
field_name="period",
value_name="upt",
)
}}
),
int_ntd__monthly_ridership_with_adjustments_upt as (
select
uza_name,
format("%05d", cast(uace_cd as int64)) as uace_cd,
dt as _dt,
ts,
year,
format("%05d", cast(ntd_id as int64)) as ntd_id,
legacy_ntd_id,
reporter_type,
agency,
mode_type_of_service_status,
mode,
_3_mode,
tos,
split(period, '_')[offset(2)] as period_year,
split(period, '_')[offset(1)] as period_month,
upt
from source_pivoted
where
mode in (
"DR",
"FB",
"LR",
"MB",
"SR",
"TB",
"VP",
"CB",
"RB",
"CR",
"YR",
"MG",
"MO",
"AR",
"TR",
"HR",
"OR",
"IP",
"AG",
"PB",
"CC"
)

int_ntd__monthly_ridership_with_adjustments_upt AS (
SELECT format("%05d", cast(cast(ntd_id AS NUMERIC) AS INT64)) AS ntd_id,
legacy_ntd_id,
agency,
reporter_type,
split(period, '_')[offset(2)] AS period_year,
split(period, '_')[offset(1)] AS period_month,
uza_name,
format("%05d", cast(uace_cd AS INT64)) AS uace_cd,
mode,
mode_type_of_service_status,
_3_mode,
tos,
upt,
dt AS _dt,
execution_ts
FROM source_pivoted
WHERE mode IN ("AG",
"AR",
"CB",
"CC",
"CR",
"DR",
"FB",
"HR",
"IP",
"LR",
"MB",
"MG",
"MO",
"OR",
"PB",
"RB",
"SR",
"TB",
"TR",
"VP",
"YR")
)
select *
from int_ntd__monthly_ridership_with_adjustments_upt

SELECT * FROM int_ntd__monthly_ridership_with_adjustments_upt
Loading

0 comments on commit 53f4fb0

Please sign in to comment.