From 36fba27df261ac2bed327a9132042664934baa90 Mon Sep 17 00:00:00 2001 From: Gabriel Gazola Milan Date: Thu, 13 Oct 2022 13:57:43 -0300 Subject: [PATCH 01/10] feat: add siscob --- pipelines/constants.py | 2 + pipelines/rj_smi/__init__.py | 6 + pipelines/rj_smi/dump_db_siscob/__init__.py | 0 pipelines/rj_smi/dump_db_siscob/flows.py | 41 ++++++ pipelines/rj_smi/dump_db_siscob/schedules.py | 147 +++++++++++++++++++ 5 files changed, 196 insertions(+) create mode 100644 pipelines/rj_smi/__init__.py create mode 100644 pipelines/rj_smi/dump_db_siscob/__init__.py create mode 100644 pipelines/rj_smi/dump_db_siscob/flows.py create mode 100644 pipelines/rj_smi/dump_db_siscob/schedules.py diff --git a/pipelines/constants.py b/pipelines/constants.py index 32f269c81..ed1ea157d 100644 --- a/pipelines/constants.py +++ b/pipelines/constants.py @@ -37,6 +37,8 @@ class constants(Enum): # pylint: disable=c0103 RJ_SMFP_AGENT_LABEL = "rj-smfp" + RJ_SMI_AGENT_LABEL = "rj-smi" + RJ_SECONSERVA_AGENT_LABEL = "rj-seconserva" ###################################### diff --git a/pipelines/rj_smi/__init__.py b/pipelines/rj_smi/__init__.py new file mode 100644 index 000000000..2d765a859 --- /dev/null +++ b/pipelines/rj_smi/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +""" +Prefect flows for rj_smi project +""" + +from pipelines.rj_smi.dump_db_siscob.flows import * diff --git a/pipelines/rj_smi/dump_db_siscob/__init__.py b/pipelines/rj_smi/dump_db_siscob/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pipelines/rj_smi/dump_db_siscob/flows.py b/pipelines/rj_smi/dump_db_siscob/flows.py new file mode 100644 index 000000000..d76b173a9 --- /dev/null +++ b/pipelines/rj_smi/dump_db_siscob/flows.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +""" +Database dumping flows for segovi project +""" + +from copy import deepcopy + +from prefect.run_configs import KubernetesRun +from prefect.storage import GCS + +from pipelines.constants import constants +from pipelines.rj_smi.dump_db_siscob.schedules import ( + siscob_update_schedule, +) +from pipelines.utils.dump_db.flows import dump_sql_flow +from pipelines.utils.utils import set_default_parameters + + +dump_siscob_flow = deepcopy(dump_sql_flow) +dump_siscob_flow.name = "SMI: SISCOB - Ingerir tabelas de banco SQL" +dump_siscob_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value) +dump_siscob_flow.run_config = KubernetesRun( + image=constants.DOCKER_IMAGE.value, + labels=[ + constants.RJ_SMI_AGENT_LABEL.value, + ], +) + +siscob_default_parameters = { + "db_database": "SISCOB200", + "db_host": "10.70.1.34", + "db_port": "1433", + "db_type": "sql_server", + "vault_secret_path": "siscob", + "dataset_id": "infraestrutura_siscob_obras", +} +dump_siscob_flow = set_default_parameters( + dump_siscob_flow, default_parameters=siscob_default_parameters +) + +dump_siscob_flow.schedule = siscob_update_schedule diff --git a/pipelines/rj_smi/dump_db_siscob/schedules.py b/pipelines/rj_smi/dump_db_siscob/schedules.py new file mode 100644 index 000000000..68390a337 --- /dev/null +++ b/pipelines/rj_smi/dump_db_siscob/schedules.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +""" +Schedules for the database dump pipeline +""" + +from datetime import timedelta, datetime + +from prefect.schedules import Schedule +import pytz + +from pipelines.constants import constants +from pipelines.utils.dump_db.utils import generate_dump_db_schedules +from pipelines.utils.utils import untuple_clocks as untuple + + +##################################### +# +# SISCOB Schedules +# +##################################### + +siscob_queries = { + # "obra": { + # "materialize_after_dump": True, + # "materialization_mode": "prod", + # "dump_mode": "overwrite", + # "execute_query": """ + # Select + # CD_OBRA, + # DS_TITULO, + # ORGAO_CONTRATANTE, + # ORGAO_EXECUTOR, + # NR_PROCESSO, + # OBJETO, + # NM_FAVORECIDO, + # CNPJ, + # NR_LICITACAO, + # MODALIDADE, + # DT_ASS_CONTRATO, + # DT_INICIO_OBRA, + # DT_TERMINO_PREVISTO, + # DT_TERMINO_ATUAL, + # NR_CONTRATO, + # AA_EXERCICIO, + # SITUACAO, + # VL_ORCADO_C_BDI, + # VL_CONTRATADO, + # VL_VIGENTE, + # PC_MEDIDO + # from dbo.fuSEGOVI_Dados_da_Obra();""", + # }, + "medicao": { + "materialize_after_dump": True, + "materialization_mode": "prod", + "dump_mode": "overwrite", + "execute_query": """ + Select + CD_OBRA, + NR_MEDICAO, + CD_ETAPA, + TP_MEDICAO_D, + DT_INI_MEDICAO, + DT_FIM_MEDICAO, + VL_FINAL NUMERIC(12,2) + from dbo.fuSEGOVI_Medicoes(); + """, + }, + "termo_aditivo": { + "materialize_after_dump": True, + "materialization_mode": "prod", + "dump_mode": "overwrite", + "execute_query": """ + Select + CD_OBRA, + NR_DO_TERMO, + TP_ACERTO, + DT_DO, + DT_AUTORIZACAO, + VL_ACERTO + from dbo.fuSEGOVI_Termos_Aditivos(); + """, + }, + "cronograma_financeiro": { + "materialize_after_dump": True, + "materialization_mode": "prod", + "dump_mode": "overwrite", + "execute_query": """ + Select + CD_OBRA, + ETAPA, + DT_INICIO_ETAPA, + DT_FIM_ETAPA, + PC_PERCENTUAL, + VL_ESTIMADO + from dbo.fuSEGOVI_Cronograma_Financeiro(); + """, + }, + "localizacao": { + "materialize_after_dump": True, + "materialization_mode": "prod", + "dump_mode": "overwrite", + "execute_query": """ + Select + CD_OBRA, + ENDERECO, + NM_BAIRRO, + NM_RA, + NM_AP + from dbo.fuSEGOVI_Localizacoes_obra(); + """, + }, + "cronograma_alteracao": { + "materialize_after_dump": True, + "materialization_mode": "prod", + "dump_mode": "overwrite", + "execute_query": """ + Select + CD_OBRA, + NR_PROCESSO, + TP_ALTERACAO, + DT_PUBL_DO, + CD_ETAPA, + NR_PRAZO, + DT_VALIDADE, + DS_OBSERVACAO + from dbo.fuSEGOVI_Alteração_de_Cronograma(); + """, + }, +} + +# TODO: review +siscob_clocks = generate_dump_db_schedules( + interval=timedelta(days=7), + start_date=datetime(2022, 10, 2, 0, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), + labels=[ + constants.RJ_SMI_AGENT_LABEL.value, + ], + db_database="SISCOB200", + db_host="10.70.1.34", + db_port="1433", + db_type="sql_server", + dataset_id="infraestrutura_siscob_obras", + vault_secret_path="siscob", + table_parameters=siscob_queries, +) + +siscob_update_schedule = Schedule(clocks=untuple(siscob_clocks)) From da63d0dc5b521953e455ec7c6c13fb06975e4a96 Mon Sep 17 00:00:00 2001 From: Gabriel Gazola Milan Date: Thu, 13 Oct 2022 13:58:15 -0300 Subject: [PATCH 02/10] chore: fix linting issues --- pipelines/rj_smi/dump_db_siscob/schedules.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/rj_smi/dump_db_siscob/schedules.py b/pipelines/rj_smi/dump_db_siscob/schedules.py index 68390a337..1f66d7abb 100644 --- a/pipelines/rj_smi/dump_db_siscob/schedules.py +++ b/pipelines/rj_smi/dump_db_siscob/schedules.py @@ -128,7 +128,6 @@ }, } -# TODO: review siscob_clocks = generate_dump_db_schedules( interval=timedelta(days=7), start_date=datetime(2022, 10, 2, 0, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), From 4322c3249fbf47004f44da10058cc61466a72a00 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 13 Oct 2022 16:59:22 +0000 Subject: [PATCH 03/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/utils/dump_db/flows.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/utils/dump_db/flows.py b/pipelines/utils/dump_db/flows.py index aed48a56d..c87d6cafe 100644 --- a/pipelines/utils/dump_db/flows.py +++ b/pipelines/utils/dump_db/flows.py @@ -89,7 +89,9 @@ table_id = Parameter("table_id") dump_mode = Parameter("dump_mode", default="append") # overwrite or append batch_data_type = Parameter("batch_data_type", default="csv") # csv or parquet - dbt_model_secret_parameters = Parameter("dbt_model_secret_parameters", default={"hash_seed": "hash_seed" }) + dbt_model_secret_parameters = Parameter( + "dbt_model_secret_parameters", default={"hash_seed": "hash_seed"} + ) ##################################### # # Rename flow run From dbc1f5f6627b2cb460e2ded1166e425c470195f8 Mon Sep 17 00:00:00 2001 From: Gabriel Gazola Milan Date: Thu, 13 Oct 2022 14:10:38 -0300 Subject: [PATCH 04/10] fix: import `smi` flows --- pipelines/flows.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/flows.py b/pipelines/flows.py index e8f83e4b3..ab380e7ec 100644 --- a/pipelines/flows.py +++ b/pipelines/flows.py @@ -12,5 +12,6 @@ from pipelines.rj_segovi import * from pipelines.rj_sme import * from pipelines.rj_smfp import * +from pipelines.rj_smi import * from pipelines.rj_smtr import * from pipelines.utils import * From 091c1d0f7ce8ce2bc9a397f8fe1e9aee2172a38b Mon Sep 17 00:00:00 2001 From: Gabriel Gazola Milan Date: Thu, 13 Oct 2022 14:42:36 -0300 Subject: [PATCH 05/10] fix: `medicao` query --- pipelines/rj_smi/dump_db_siscob/schedules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/rj_smi/dump_db_siscob/schedules.py b/pipelines/rj_smi/dump_db_siscob/schedules.py index 1f66d7abb..11977ed41 100644 --- a/pipelines/rj_smi/dump_db_siscob/schedules.py +++ b/pipelines/rj_smi/dump_db_siscob/schedules.py @@ -61,7 +61,7 @@ TP_MEDICAO_D, DT_INI_MEDICAO, DT_FIM_MEDICAO, - VL_FINAL NUMERIC(12,2) + VL_FINAL from dbo.fuSEGOVI_Medicoes(); """, }, From 09f40a70c3a8314f96e08a2c91701abb979cefd3 Mon Sep 17 00:00:00 2001 From: d116626 Date: Fri, 4 Nov 2022 10:53:38 -0300 Subject: [PATCH 06/10] feat: add obras and programa_fonte to siscob_dump --- pipelines/rj_smi/dump_db_siscob/schedules.py | 76 ++++++++++++-------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/pipelines/rj_smi/dump_db_siscob/schedules.py b/pipelines/rj_smi/dump_db_siscob/schedules.py index 11977ed41..d2ba5641d 100644 --- a/pipelines/rj_smi/dump_db_siscob/schedules.py +++ b/pipelines/rj_smi/dump_db_siscob/schedules.py @@ -20,35 +20,37 @@ ##################################### siscob_queries = { - # "obra": { - # "materialize_after_dump": True, - # "materialization_mode": "prod", - # "dump_mode": "overwrite", - # "execute_query": """ - # Select - # CD_OBRA, - # DS_TITULO, - # ORGAO_CONTRATANTE, - # ORGAO_EXECUTOR, - # NR_PROCESSO, - # OBJETO, - # NM_FAVORECIDO, - # CNPJ, - # NR_LICITACAO, - # MODALIDADE, - # DT_ASS_CONTRATO, - # DT_INICIO_OBRA, - # DT_TERMINO_PREVISTO, - # DT_TERMINO_ATUAL, - # NR_CONTRATO, - # AA_EXERCICIO, - # SITUACAO, - # VL_ORCADO_C_BDI, - # VL_CONTRATADO, - # VL_VIGENTE, - # PC_MEDIDO - # from dbo.fuSEGOVI_Dados_da_Obra();""", - # }, + "obra": { + "materialize_after_dump": True, + "materialization_mode": "prod", + "dump_mode": "overwrite", + "execute_query": """ + Select + CD_OBRA, + DS_TITULO, + ORGAO_CONTRATANTE, + ORGAO_EXECUTOR, + NR_PROCESSO, + OBJETO, + NM_FAVORECIDO, + CNPJ, + NR_LICITACAO, + MODALIDADE, + DT_ASS_CONTRATO, + DT_INICIO_OBRA, + DT_TERMINO_PREVISTO, + DT_TERMINO_ATUAL, + NR_CONTRATO, + AA_EXERCICIO, + SITUACAO, + VL_ORCADO_C_BDI, + VL_CONTRATADO, + VL_VIGENTE, + PC_MEDIDO, + PRAZO_INICIAL + from dbo.fuSEGOVI_Dados_da_Obra(); + """, + }, "medicao": { "materialize_after_dump": True, "materialization_mode": "prod", @@ -126,6 +128,22 @@ from dbo.fuSEGOVI_Alteração_de_Cronograma(); """, }, + "programa_fonte": { + "materialize_after_dump": True, + "materialization_mode": "prod", + "dump_mode": "overwrite", + "execute_query": """ + Select + CD_OBRA, + CD_PRG_TRAB, + PROGRAMA_TRABALHO, + CD_FONTE_RECURSO, + FONTE_RECURSO, + CD_NATUREZA_DSP, + NATUREZA_DESPESA + from dbo.fuSEGOVI_Programa_Fonte(); + """, + }, } siscob_clocks = generate_dump_db_schedules( From 72f3c9befe4a75ddd9ac9d2cf8d9e4aafad17b92 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Nov 2022 13:54:21 +0000 Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/rj_smi/dump_db_siscob/schedules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/rj_smi/dump_db_siscob/schedules.py b/pipelines/rj_smi/dump_db_siscob/schedules.py index d2ba5641d..152af5ead 100644 --- a/pipelines/rj_smi/dump_db_siscob/schedules.py +++ b/pipelines/rj_smi/dump_db_siscob/schedules.py @@ -25,7 +25,7 @@ "materialization_mode": "prod", "dump_mode": "overwrite", "execute_query": """ - Select + Select CD_OBRA, DS_TITULO, ORGAO_CONTRATANTE, @@ -133,7 +133,7 @@ "materialization_mode": "prod", "dump_mode": "overwrite", "execute_query": """ - Select + Select CD_OBRA, CD_PRG_TRAB, PROGRAMA_TRABALHO, From 606e65b0df3ff162c64f558983fbe20ad6013d73 Mon Sep 17 00:00:00 2001 From: d116626 Date: Fri, 4 Nov 2022 11:03:53 -0300 Subject: [PATCH 08/10] feat: add obras and programa_fonte to siscob_dump --- pipelines/rj_smi/dump_db_siscob/flows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/rj_smi/dump_db_siscob/flows.py b/pipelines/rj_smi/dump_db_siscob/flows.py index d76b173a9..27bb68c1a 100644 --- a/pipelines/rj_smi/dump_db_siscob/flows.py +++ b/pipelines/rj_smi/dump_db_siscob/flows.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Database dumping flows for segovi project +Database dumping flows for segovi project (SISCOB) """ from copy import deepcopy From b78e8095bf7ffe814cc5299ec7327ff1f2ca175a Mon Sep 17 00:00:00 2001 From: d116626 Date: Fri, 4 Nov 2022 11:40:57 -0300 Subject: [PATCH 09/10] fix: generate_dump_db_schedules remove non parameter defaullt field --- pipelines/utils/dump_db/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/utils/dump_db/utils.py b/pipelines/utils/dump_db/utils.py index bb4dd5683..f9c12b598 100644 --- a/pipelines/utils/dump_db/utils.py +++ b/pipelines/utils/dump_db/utils.py @@ -80,7 +80,7 @@ def generate_dump_db_schedules( # pylint: disable=too-many-arguments,too-many-l # Add remaining parameters if value is not None for key, value in parameters.items(): - if value is not None: + if value is not None and key not in ['interval']: parameter_defaults[key] = value new_interval = parameters["interval"] if "interval" in parameters else interval From c4ef51f8f982fafc1c643a625fa926eb7f41ef62 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 4 Nov 2022 14:41:14 +0000 Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/utils/dump_db/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/utils/dump_db/utils.py b/pipelines/utils/dump_db/utils.py index f9c12b598..a1e521c04 100644 --- a/pipelines/utils/dump_db/utils.py +++ b/pipelines/utils/dump_db/utils.py @@ -80,7 +80,7 @@ def generate_dump_db_schedules( # pylint: disable=too-many-arguments,too-many-l # Add remaining parameters if value is not None for key, value in parameters.items(): - if value is not None and key not in ['interval']: + if value is not None and key not in ["interval"]: parameter_defaults[key] = value new_interval = parameters["interval"] if "interval" in parameters else interval