From f59acc07af9a222056ed76307219436df5299efb Mon Sep 17 00:00:00 2001 From: Rafael Pinheiro Date: Thu, 16 Jan 2025 17:31:17 -0300 Subject: [PATCH] revisar ordem_servico_diaria --- .../br_rj_riodejaneiro_gtfs/flows.py | 2 +- queries/dbt_project.yml | 8 ++ .../dashboard_operacao_onibus/CHANGELOG.md | 7 ++ .../ordem_servico_diaria.sql | 18 ++++ .../dashboard_operacao_onibus/schema.yml | 24 ++++++ .../staging/aux_ordem_servico_diaria_v1.sql | 85 +++++++++++++++++++ .../staging/aux_ordem_servico_diaria_v2.sql | 71 ++++++++++++++++ .../staging/servicos_sentido.sql | 67 +++++++++++++++ queries/models/gtfs/CHANGELOG.md | 10 +++ queries/models/gtfs/ordem_servico_diaria.sql | 75 ---------------- .../gtfs/ordem_servico_viagens_planejadas.sql | 44 ---------- queries/models/gtfs/schema.yml | 43 ---------- queries/models/gtfs/servicos_sentido.sql | 81 ------------------ 13 files changed, 291 insertions(+), 244 deletions(-) create mode 100644 queries/models/dashboard_operacao_onibus/CHANGELOG.md create mode 100644 queries/models/dashboard_operacao_onibus/ordem_servico_diaria.sql create mode 100644 queries/models/dashboard_operacao_onibus/schema.yml create mode 100644 queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v1.sql create mode 100644 queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v2.sql create mode 100644 queries/models/dashboard_operacao_onibus/staging/servicos_sentido.sql delete mode 100644 queries/models/gtfs/ordem_servico_diaria.sql delete mode 100644 queries/models/gtfs/ordem_servico_viagens_planejadas.sql delete mode 100644 queries/models/gtfs/servicos_sentido.sql diff --git a/pipelines/migration/br_rj_riodejaneiro_gtfs/flows.py b/pipelines/migration/br_rj_riodejaneiro_gtfs/flows.py index b5e72a59b..eac3f44a7 100644 --- a/pipelines/migration/br_rj_riodejaneiro_gtfs/flows.py +++ b/pipelines/migration/br_rj_riodejaneiro_gtfs/flows.py @@ -2,7 +2,7 @@ """ Flows for gtfs -DBT 2025-01-13 +DBT 2025-01-16 """ from prefect import Parameter, case, task diff --git a/queries/dbt_project.yml b/queries/dbt_project.yml index ed8796ff1..2eba6b1e3 100644 --- a/queries/dbt_project.yml +++ b/queries/dbt_project.yml @@ -108,6 +108,7 @@ vars: shapes_version: "YYYY-MM-DD" frequencies_version: "YYYY-MM-DD" tipo_materializacao: "subsidio" + data_inicio_trips_shapes: "2023-04-01" # Feature penalidade de autuação por inoperância do ar condicionado (DECRETO RIO 51940/2023) DATA_SUBSIDIO_V2_INICIO: "2023-01-16" # Feature penalidade de autuação por segurança e limpeza/equipamento (DECRETO RIO 52820/2023) @@ -372,3 +373,10 @@ models: datario: +materialized: view +schema: datario + dashboard_operacao_onibus: + +materialized: incremental + +incremental_strategy: insert_overwrite + +schema: dashboard_operacao_onibus + staging: + +materialized: view + +schema: dashboard_operacao_onibus_staging diff --git a/queries/models/dashboard_operacao_onibus/CHANGELOG.md b/queries/models/dashboard_operacao_onibus/CHANGELOG.md new file mode 100644 index 000000000..ef3c74622 --- /dev/null +++ b/queries/models/dashboard_operacao_onibus/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog - gtfs + +## [1.0.0] - 2025-01-16 + +### Adicionado + +- Cria novo dataset para views utilizadas no dashboard de operação dos ônibus \ No newline at end of file diff --git a/queries/models/dashboard_operacao_onibus/ordem_servico_diaria.sql b/queries/models/dashboard_operacao_onibus/ordem_servico_diaria.sql new file mode 100644 index 000000000..433775c5c --- /dev/null +++ b/queries/models/dashboard_operacao_onibus/ordem_servico_diaria.sql @@ -0,0 +1,18 @@ +{{ + config( + materialized="view", + labels={ + "dashboard": "yes", + }, + ) +}} + +select * +from {{ ref("aux_ordem_servico_diaria_v1") }} +where data < "{{ var('data_inicio_trips_shapes') }}" + +union all + +select * +from {{ ref("aux_ordem_servico_diaria_v2") }} +where data >= "{{ var('data_inicio_trips_shapes') }}" diff --git a/queries/models/dashboard_operacao_onibus/schema.yml b/queries/models/dashboard_operacao_onibus/schema.yml new file mode 100644 index 000000000..aaf2f0247 --- /dev/null +++ b/queries/models/dashboard_operacao_onibus/schema.yml @@ -0,0 +1,24 @@ +version: 2 + +models: + - name: ordem_servico_diaria + description: "Dados diários das linhas presentes no município." + columns: + - name: DATA + description: "Data da ordem de serviço." + - name: tipo_dia + description: "{{ doc('tipo_dia') }}" + - name: servico + description: "{{ doc('servico') }}" + - name: vista + description: "{{ doc('vista') }}" + - name: consorcio + description: "{{ doc('consorcio') }}" + - name: sentido + description: "{{ doc('sentido') }}" + - name: viagens_planejadas + description: "{{ doc('viagens_planejadas') }}" + - name: inicio_periodo + description: "{{ doc('inicio_periodo') }}" + - name: fim_periodo + description: "{{ doc('fim_periodo') }}" \ No newline at end of file diff --git a/queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v1.sql b/queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v1.sql new file mode 100644 index 000000000..b30c94426 --- /dev/null +++ b/queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v1.sql @@ -0,0 +1,85 @@ +{{ config(materialized="ephemeral") }} + +with + feed_start_date as ( + select + feed_start_date, + feed_start_date as data_inicio, + coalesce( + date_sub( + lead(feed_start_date) over (order by feed_start_date), + interval 1 day + ), + last_day(feed_start_date, month) + ) as data_fim + from (select distinct feed_start_date from {{ ref("ordem_servico_gtfs") }}) + ), + ordem_servico_pivot as ( + select * + from + {{ ref("ordem_servico_gtfs") }} pivot ( + max(partidas_ida) as partidas_ida, + max(partidas_volta) as partidas_volta, + max(viagens_planejadas) as viagens_planejadas, + max(distancia_total_planejada) as km for + tipo_dia in ( + 'Dia Útil' as du, + 'Ponto Facultativo' as pf, + 'Sabado' as sab, + 'Domingo' as dom + ) + ) + ), + subsidio_feed_start_date_efetiva as ( + select + data, split(tipo_dia, " - ")[0] as tipo_dia, tipo_dia as tipo_dia_original + from {{ ref("subsidio_data_versao_efetiva") }} + ) +select + data, + tipo_dia_original as tipo_dia, + servico, + vista, + consorcio, + sentido, + case + {% set tipo_dia = { + "Dia Útil": "du", + "Ponto Facultativo": "pf", + "Sabado": "sab", + "Domingo": "dom", + } %} + {% set sentido = {"ida": ("I", "C"), "volta": "V"} %} + {%- for key_s, value_s in sentido.items() %} + {%- for key_td, value_td in tipo_dia.items() %} + when + sentido + {% if key_s == "ida" %} in {{ value_s }} + {% else %} = "{{ value_s }}" + {% endif %} and tipo_dia = "{{ key_td }}" + then + {% if key_td in ["Sabado", "Domingo"] %} + round( + safe_divide( + (partidas_{{ key_s }}_du * km_{{ value_td }}), km_du + ) + ) + {% else %} partidas_{{ key_s }}_{{ value_td }} + {% endif %} + {% endfor -%} + {% endfor -%} + end as viagens_planejadas, + horario_inicio as inicio_periodo, + horario_fim as fim_periodo +from + unnest( + generate_date_array( + (select min(data_inicio) from feed_start_date), + (select max(data_fim) from feed_start_date) + ) + ) as data +left join feed_start_date as d on data between d.data_inicio and d.data_fim +left join subsidio_feed_start_date_efetiva as sd using (data) +left join ordem_servico_pivot as o using (feed_start_date) +left join {{ ref("servicos_sentido") }} using (feed_start_date, servico) +where data < "{{ var('data_inicio_trips_shapes') }}" diff --git a/queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v2.sql b/queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v2.sql new file mode 100644 index 000000000..60cd84f23 --- /dev/null +++ b/queries/models/dashboard_operacao_onibus/staging/aux_ordem_servico_diaria_v2.sql @@ -0,0 +1,71 @@ +{{ config(materialized="ephemeral") }} + + +with + feed_info as (select * from `rj-smtr`.`gtfs`.`feed_info`), + ordem_servico_pivot as ( + select * + from + {{ ref("ordem_servico_gtfs") }} pivot ( + max(partidas_ida) as partidas_ida, + max(partidas_volta) as partidas_volta, + max(viagens_planejadas) as viagens_planejadas, + max(distancia_total_planejada) as km for tipo_dia in ( + 'Dia Útil' as du, + 'Ponto Facultativo' as pf, + 'Sabado' as sab, + 'Domingo' as dom + ) + ) + ), + subsidio_feed_start_date_efetiva as ( + select + data, + tipo_os, + split(tipo_dia, " - ")[0] as tipo_dia, + tipo_dia as tipo_dia_original + from {{ ref("subsidio_data_versao_efetiva") }} + ), + ordem_servico_trips_shapes as ( + select distinct feed_start_date, servico, tipo_os, sentido + from {{ ref("ordem_servico_trips_shapes_gtfs") }} + ) +select + data, + tipo_dia_original as tipo_dia, + servico, + vista, + consorcio, + sentido, + case + when sentido in ('I', 'C') and tipo_dia = "Dia Útil" + then partidas_ida_du + when sentido in ('I', 'C') and tipo_dia = "Ponto Facultativo" + then partidas_ida_pf + when sentido in ('I', 'C') and tipo_dia = "Sabado" + then round(safe_divide((partidas_ida_du * km_sab), km_du)) + when sentido in ('I', 'C') and tipo_dia = "Domingo" + then round(safe_divide((partidas_ida_du * km_dom), km_du)) + when sentido = "V" and tipo_dia = "Dia Útil" + then partidas_volta_du + when sentido = "V" and tipo_dia = "Ponto Facultativo" + then partidas_volta_pf + when sentido = "V" and tipo_dia = "Sabado" + then round(safe_divide((partidas_volta_du * km_sab), km_du)) + when sentido = "V" and tipo_dia = "Domingo" + then round(safe_divide((partidas_volta_du * km_dom), km_du)) + end as viagens_planejadas, + horario_inicio as inicio_periodo, + horario_fim as fim_periodo +from + unnest( + generate_date_array( + (select min(feed_start_date) from feed_info), + (select max(feed_end_date) from feed_info) + ) + ) as data +left join feed_info as d on data between d.feed_start_date and d.feed_end_date +left join ordem_servico_pivot as o using (feed_start_date) +inner join subsidio_feed_start_date_efetiva as sd using (data, tipo_os) +left join ordem_servico_trips_shapes using (feed_start_date, servico, tipo_os) +where data >= "{{ var('data_inicio_trips_shapes') }}" diff --git a/queries/models/dashboard_operacao_onibus/staging/servicos_sentido.sql b/queries/models/dashboard_operacao_onibus/staging/servicos_sentido.sql new file mode 100644 index 000000000..d000baa4f --- /dev/null +++ b/queries/models/dashboard_operacao_onibus/staging/servicos_sentido.sql @@ -0,0 +1,67 @@ +{{ config(materialized="view") }} + +with + servicos_exclusivos_sabado as ( + select distinct servico + from {{ ref("ordem_servico_gtfs") }} + where tipo_dia = "Dia Útil" and viagens_planejadas = 0 + ), + servicos as ( + select * except (versao_modelo, shape) + from {{ ref("trips_gtfs") }} as t + left join {{ ref("shapes_geom_gtfs") }} as s using (feed_start_date, shape_id) + where + ( + feed_start_date >= "2023-06-01" + and ( + trip_short_name not in (select * from servicos_exclusivos_sabado) + and (service_id like "U_R%" or service_id like "U_O%") + ) + or ( + trip_short_name in (select * from servicos_exclusivos_sabado) + and (service_id like "S_R%" or service_id like "S_O%") + ) + ) + or ( + feed_start_date < "2023-06-01" + and ( + trip_short_name not in (select * from servicos_exclusivos_sabado) + or ( + trip_short_name in (select * from servicos_exclusivos_sabado) + and service_id = "S" + ) + ) + ) + and shape_distance is not null + ), + servicos_rn as ( + select + *, + row_number() over ( + partition by feed_start_date, trip_short_name, direction_id + order by trip_short_name, service_id, shape_id, direction_id + ) as rn + from servicos + ), + servicos_filtrada as (select * except (rn) from servicos_rn where rn = 1), + servicos_potencialmente_circulares as ( + select + feed_start_date, trip_short_name, count(distinct direction_id) as q_direcoes + from servicos_filtrada + group by 1, 2 + having count(distinct direction_id) = 1 + ) +select + feed_start_date, + trip_short_name as servico, + case + when q_direcoes = 1 and st_distance(start_pt, end_pt) <= 50 + then "C" + when direction_id = "0" + then "I" + when direction_id = "1" + then "V" + end as sentido +from servicos_filtrada as sf +left join + servicos_potencialmente_circulares as spc using (feed_start_date, trip_short_name) diff --git a/queries/models/gtfs/CHANGELOG.md b/queries/models/gtfs/CHANGELOG.md index f54e43191..4e2d8de0c 100644 --- a/queries/models/gtfs/CHANGELOG.md +++ b/queries/models/gtfs/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog - gtfs +## [1.2.1] - 2025-01-16 + +### Alterado + +- Move modelos `ordem_servico_diaria.sql` e `servicos_sentido.sql` para o dataset `dashboard_operacao_onibus` + +### Removido + +- Modelo `ordem_servico_viagens_planejadas.sql` deletado + ## [1.2.0] - 2024-12-04 ### Alterado diff --git a/queries/models/gtfs/ordem_servico_diaria.sql b/queries/models/gtfs/ordem_servico_diaria.sql deleted file mode 100644 index 027dce3bc..000000000 --- a/queries/models/gtfs/ordem_servico_diaria.sql +++ /dev/null @@ -1,75 +0,0 @@ -{{ config( - materialized="view" -) }} - -WITH - feed_start_date AS ( - SELECT - feed_start_date, - feed_start_date AS data_inicio, - COALESCE(DATE_SUB(LEAD(feed_start_date) OVER (ORDER BY feed_start_date), INTERVAL 1 DAY), LAST_DAY(feed_start_date, MONTH)) AS data_fim - FROM ( - SELECT - DISTINCT feed_start_date, - FROM - {{ ref("ordem_servico_gtfs") }} )), - ordem_servico_pivot AS ( - SELECT - * - FROM - {{ ref("ordem_servico_gtfs") }} - PIVOT( MAX(partidas_ida) AS partidas_ida, - MAX(partidas_volta) AS partidas_volta, - MAX(viagens_planejadas) AS viagens_planejadas, - MAX(distancia_total_planejada) AS km FOR - tipo_dia IN ( - 'Dia Útil' AS du, - 'Ponto Facultativo' AS pf, - 'Sabado' AS sab, - 'Domingo' AS dom ))), - subsidio_feed_start_date_efetiva AS ( - SELECT - data, - SPLIT(tipo_dia, " - ")[0] AS tipo_dia, - tipo_dia AS tipo_dia_original - FROM - {{ ref("subsidio_data_versao_efetiva") }} ) -SELECT - DATA, - tipo_dia_original AS tipo_dia, - servico, - vista, - consorcio, - sentido, - CASE - {% set tipo_dia = {"Dia Útil": "du", "Ponto Facultativo": "pf", "Sabado": "sab", "Domingo": "dom"} %} - {% set sentido = {"ida": ("I", "C"), "volta": "V"} %} - {%- for key_s, value_s in sentido.items() %} - {%- for key_td, value_td in tipo_dia.items() %} - WHEN sentido {% if key_s == "ida" %} IN {{ value_s }} {% else %} = "{{ value_s }}" {% endif %} AND tipo_dia = "{{ key_td }}" THEN {% if key_td in ["Sabado", "Domingo"] %} ROUND(SAFE_DIVIDE((partidas_{{ key_s }}_du * km_{{ value_td }}), km_du)) {% else %} partidas_{{ key_s }}_{{ value_td }} {% endif %} - {% endfor -%} - {% endfor -%} -END - AS viagens_planejadas, - horario_inicio AS inicio_periodo, - horario_fim AS fim_periodo -FROM - UNNEST(GENERATE_DATE_ARRAY((SELECT MIN(data_inicio) FROM feed_start_date), (SELECT MAX(data_fim) FROM feed_start_date))) AS DATA -LEFT JOIN - feed_start_date AS d -ON - DATA BETWEEN d.data_inicio - AND d.data_fim -LEFT JOIN - subsidio_feed_start_date_efetiva AS sd -USING - (DATA) -LEFT JOIN - ordem_servico_pivot AS o -USING - (feed_start_date) -LEFT JOIN - {{ ref("servicos_sentido") }} -USING - (feed_start_date, - servico) diff --git a/queries/models/gtfs/ordem_servico_viagens_planejadas.sql b/queries/models/gtfs/ordem_servico_viagens_planejadas.sql deleted file mode 100644 index b490699b5..000000000 --- a/queries/models/gtfs/ordem_servico_viagens_planejadas.sql +++ /dev/null @@ -1,44 +0,0 @@ -{{ config( - materialized="view" -) }} - -WITH - data_versao AS ( - SELECT - feed_start_date, - feed_start_date AS data_inicio, - COALESCE(DATE_SUB(LEAD(feed_start_date) OVER (ORDER BY feed_start_date), INTERVAL 1 DAY), LAST_DAY(feed_start_date, MONTH)) AS data_fim - FROM ( - SELECT - DISTINCT feed_start_date, - FROM - {{ ref("ordem_servico_gtfs") }} )), - subsidio_data_versao_efetiva AS ( - SELECT - * EXCEPT(tipo_dia), - SPLIT(tipo_dia, " - ")[0] AS tipo_dia - FROM - {{ ref("subsidio_data_versao_efetiva") }} ) -SELECT - data, - sd.tipo_dia, - servico, - viagens_planejadas -FROM - UNNEST(GENERATE_DATE_ARRAY((SELECT MIN(data_inicio) FROM data_versao), (SELECT MAX(data_fim) FROM data_versao))) AS data -LEFT JOIN - data_versao AS d -ON - data BETWEEN d.data_inicio - AND d.data_fim -LEFT JOIN - subsidio_data_versao_efetiva AS sd -ON - data = sd.data - AND (d.feed_start_date = sd.feed_start_date - OR sd.feed_start_date IS NULL) -LEFT JOIN - {{ ref("ordem_servico_gtfs") }} AS o -ON - d.feed_start_date = o.feed_start_date - AND sd.tipo_dia = o.tipo_dia \ No newline at end of file diff --git a/queries/models/gtfs/schema.yml b/queries/models/gtfs/schema.yml index e26008e7a..1c728f139 100644 --- a/queries/models/gtfs/schema.yml +++ b/queries/models/gtfs/schema.yml @@ -372,49 +372,6 @@ models: - name: versao_modelo description: "Código de controle de versão (SHA do GitHub)." - - name: ordem_servico_diaria - description: "Dados diários das linhas presentes no município." - columns: - - name: DATA - description: "Data da ordem de serviço." - - name: tipo_dia - description: "{{ doc('tipo_dia') }}" - - name: servico - description: "{{ doc('servico') }}" - - name: vista - description: "{{ doc('vista') }}" - - name: consorcio - description: "{{ doc('consorcio') }}" - - name: sentido - description: "{{ doc('sentido') }}" - - name: viagens_planejadas - description: "{{ doc('viagens_planejadas') }}" - - name: inicio_periodo - description: "{{ doc('inicio_periodo') }}" - - name: fim_periodo - description: "{{ doc('fim_periodo') }}" - - - name: ordem_servico_viagens_planejadas - description: "Viagens planejadas das linhas presentes no município." - columns: - - name: data - description: "Data da ordem de serviço." - - name: tipo_dia - description: "{{ doc('tipo_dia') }}" - - name: servico - description: "{{ doc('servico') }}" - - name: viagens_planejadas - description: "{{ doc('viagens_planejadas') }}" - - - name: servicos_sentido - description: "Sentido dos serviços (I - Ida, V - Volta, C - Circular)" - columns: - - name: feed_start_date - description: "{{ doc('feed_start_date') }}" - - name: servico - description: "{{ doc('servico') }}" - - name: sentido - description: "{{ doc('sentido') }}" - name: ordem_servico_gtfs description: "Dados das linhas presentes no município." columns: diff --git a/queries/models/gtfs/servicos_sentido.sql b/queries/models/gtfs/servicos_sentido.sql deleted file mode 100644 index a9298242f..000000000 --- a/queries/models/gtfs/servicos_sentido.sql +++ /dev/null @@ -1,81 +0,0 @@ -{{ config( - materialized="view" -) }} - -WITH - servicos_exclusivos_sabado AS ( - SELECT - DISTINCT servico - FROM - {{ ref("ordem_servico_gtfs") }} - WHERE - tipo_dia = "Dia Útil" - AND viagens_planejadas = 0), - servicos AS ( - SELECT - * EXCEPT(versao_modelo, - shape) - FROM - {{ ref("trips_gtfs") }} AS t - LEFT JOIN - {{ ref("shapes_geom_gtfs") }} AS s - USING - (feed_start_date, - shape_id) - WHERE - (feed_start_date >= "2023-06-01" AND - ( trip_short_name NOT IN (SELECT * FROM servicos_exclusivos_sabado) - AND (service_id LIKE "U_R%" - OR service_id LIKE "U_O%") ) - OR ( trip_short_name IN (SELECT * FROM servicos_exclusivos_sabado) - AND (service_id LIKE "S_R%" - OR service_id LIKE "S_O%"))) - OR - (feed_start_date < "2023-06-01" AND - ( trip_short_name NOT IN (SELECT * FROM servicos_exclusivos_sabado) - OR ( trip_short_name IN (SELECT * FROM servicos_exclusivos_sabado) - AND service_id = "S"))) - AND shape_distance IS NOT NULL), - servicos_rn AS ( - SELECT - *, - ROW_NUMBER() OVER (PARTITION BY feed_start_date, trip_short_name, direction_id ORDER BY trip_short_name, service_id, shape_id, direction_id) AS rn - FROM - servicos ), - servicos_filtrada AS ( - SELECT - * EXCEPT(rn) - FROM - servicos_rn - WHERE - rn = 1), - servicos_potencialmente_circulares AS ( - SELECT - feed_start_date, - trip_short_name, - COUNT(DISTINCT direction_id) AS q_direcoes - FROM - servicos_filtrada - GROUP BY - 1, - 2 - HAVING - COUNT(DISTINCT direction_id) = 1 ) -SELECT - feed_start_date, - trip_short_name AS servico, - CASE - WHEN q_direcoes = 1 AND ST_DISTANCE(start_pt, end_pt) <= 50 THEN "C" - WHEN direction_id = "0" THEN "I" - WHEN direction_id = "1" THEN "V" -END - AS sentido -FROM - servicos_filtrada AS sf -LEFT JOIN - servicos_potencialmente_circulares AS spc -USING - (feed_start_date, - trip_short_name) - -