From 7b28fb85a6a78ba8d0b0896e8a28259b68d8a1d5 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Sat, 25 Nov 2023 20:34:36 -0300 Subject: [PATCH 01/25] Add new files and update imports --- pipelines/rj_sms/__init__.py | 1 + .../dump_api_prontuario_vitacare/constants.py | 31 ++++ .../dump_api_prontuario_vitacare/flows.py | 119 ++++++++++++ .../dump_api_prontuario_vitacare/tasks.py | 81 ++++++++ .../rj_sms/dump_api_prontuario_vitai/flows.py | 54 +++--- pipelines/rj_sms/dump_ftp_cnes/flows.py | 2 +- pipelines/rj_sms/dump_ftp_cnes/tasks.py | 2 +- pipelines/rj_sms/{utils.py => tasks.py} | 173 +++++++++++++++--- 8 files changed, 412 insertions(+), 51 deletions(-) create mode 100644 pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py create mode 100644 pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py create mode 100644 pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py rename pipelines/rj_sms/{utils.py => tasks.py} (77%) diff --git a/pipelines/rj_sms/__init__.py b/pipelines/rj_sms/__init__.py index bb37a9dca..be9479389 100644 --- a/pipelines/rj_sms/__init__.py +++ b/pipelines/rj_sms/__init__.py @@ -6,3 +6,4 @@ from pipelines.rj_sms.dump_db_sivep.flows import * from pipelines.rj_sms.dump_ftp_cnes.flows import * from pipelines.rj_sms.dump_api_prontuario_vitai.flows import * +from pipelines.rj_sms.dump_api_prontuario_vitacare.flows import * diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py new file mode 100644 index 000000000..2f16f1611 --- /dev/null +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +Constants for utils. +""" +from enum import Enum + + +class constants(Enum): + """ + Constant values for the dump vitai flows + """ + + VAULT_PATH = "prontuario_vitacare" + DATASET_ID = "brutos_prontuario_vitacare" + BASE_URL = { + "10" : "http://consolidado-ap10.pepvitacare.com:8088", + "21" : "http://consolidado-ap21.pepvitacare.com:8088", + "22" : "http://consolidado-ap22.pepvitacare.com:8088", + "31" : "http://consolidado-ap31.pepvitacare.com:8089", + "32" : "http://consolidado-ap32.pepvitacare.com:8088", + "33" : "http://consolidado-ap33.pepvitacare.com:8089", + "40" : "http://consolidado-ap40.pepvitacare.com:8089", + "51" : "http://consolidado-ap51.pepvitacare.com:8089", + "52" : "http://consolidado-ap52.pepvitacare.com:8088", + "53" : "http://consolidado-ap53.pepvitacare.com:8090", + } + ENDPOINT = { + "posicao" : "/reports/pharmacy/stocks", + "movimento" : "/reports/pharmacy/movements" + } diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py new file mode 100644 index 000000000..63a6edcc5 --- /dev/null +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 + +from prefect import Parameter, case +from prefect.run_configs import KubernetesRun +from prefect.storage import GCS +from pipelines.utils.decorators import Flow +from pipelines.constants import constants +from pipelines.utils.tasks import ( + rename_current_flow_run_dataset_table, +) +from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import ( + constants as vitacare_constants, +) +from pipelines.rj_sms.tasks import ( + get_secret, + create_folders, + cloud_function_request, + create_partitions, + upload_to_datalake, +) +from pipelines.rj_sms.dump_api_prontuario_vitacare.tasks import ( + build_url, + build_params, + create_filename, + save_data_to_file, +) + +from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import every_day_at_six_am + + +with Flow( + name="SMS: Dump VitaCare - Captura ", code_owners=["thiago"] +) as dump_vitacare: # noqa: E501 + # Set Parameters + # Vault + VAULT_PATH = vitacare_constants.VAULT_PATH.value + + # Vitacare API + AP = Parameter("ap", required=True, default="10") + ENDPOINT = Parameter("endpoint", required=True) + DATE = Parameter("date", default="today") + + # GCP + DATASET_ID = Parameter( + "DATASET_ID", default=vitacare_constants.DATASET_ID.value + ) # noqa: E501 + TABLE_ID = Parameter("table_id", required=True) + + + # Start run + + # rename_flow_run = rename_current_flow_run_dataset_table( + # prefix="SMS Dump VitaCare: ", + # dataset_id=TABLE_ID, + # table_id=f"ap{AP}" + # ) + + get_secret_task = get_secret(secret_path=VAULT_PATH) + + create_folders_task = create_folders() + create_folders_task.set_upstream(get_secret_task) # pylint: disable=E1101 + + build_url_task = build_url(ap=AP, endpoint=ENDPOINT) + + build_params_task = build_params(date_param=DATE) + build_params_task.set_upstream(create_folders_task) # pylint: disable=E1101 + + download_task = cloud_function_request( + url=build_url_task, + credential=get_secret_task, + request_type="GET", + body_params=None, + query_params=build_params_task, + env="prod", + ) + download_task.set_upstream(build_url_task) + + file_name_task = create_filename(table_id=TABLE_ID, ap=AP) + file_name_task.set_upstream(download_task) + + save_data_task = save_data_to_file( + data=download_task, + file_folder=create_folders_task["raw"], + table_id=TABLE_ID, + ap=AP, + add_load_date_to_filename=True, + load_date=build_params_task["date"] + ) + save_data_task.set_upstream(file_name_task) # pylint: disable=E1101 + + with case(save_data_task, True): + create_partitions_task = create_partitions( + data_path=create_folders_task["raw"], + partition_directory=create_folders_task["partition_directory"] + ) + create_partitions_task.set_upstream(save_data_task) + + upload_to_datalake_task = upload_to_datalake( + input_path=create_folders_task["partition_directory"], + dataset_id=DATASET_ID, + table_id=TABLE_ID, + if_exists="replace", + csv_delimiter=";", + if_storage_data_exists="replace", + biglake_table=True, + ) + upload_to_datalake_task.set_upstream(create_partitions_task) + + +dump_vitacare.storage = GCS(constants.GCS_FLOWS_BUCKET.value) +dump_vitacare.run_config = KubernetesRun( + image=constants.DOCKER_IMAGE.value, + labels=[ + constants.RJ_SMS_DEV_AGENT_LABEL.value, + ], +) + +dump_vitacare.schedule = every_day_at_six_am diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py new file mode 100644 index 000000000..3cd6e9f64 --- /dev/null +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +""" +Tasks for dump_api_prontuario_vitacare +""" + +from datetime import ( + date, + datetime, + timedelta, +) + +from prefect import task + +from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import constants as vitacare_constants +from pipelines.utils.utils import log +from pipelines.rj_sms.tasks import ( + from_json_to_csv, + add_load_date_column, + save_to_file +) + + + +@task +def build_url(ap: str, endpoint: str) -> str: + + url = f"{vitacare_constants.BASE_URL.value[ap]}{vitacare_constants.ENDPOINT.value[endpoint]}" # noqa: E501 + log(f"URL built: {url}") + return url + +@task +def build_params(date_param: str = "today"): + if date_param == "today": + params = {"date": str(date.today())} + elif date_param == "yesterday": + params = {"date": str(date.today() - timedelta(days=1))} + else: + try: + # check if date_param is a date string + datetime.datetime.strptime(date_param, '%Y-%m-%d') + params = {"date": date_param} + except ValueError: + raise ValueError("date_param must be a date string (YYYY-MM-DD)") + + log(f"Params built: {params}") + return params + +@task +def create_filename(table_id: str, ap: str) -> str: + return f"{table_id}_ap{ap}" + +@task +def save_data_to_file( + data: str, + file_folder: str, + table_id: str, + ap: str, + add_load_date_to_filename: bool = False, + load_date: str = None, +): + + file_path = save_to_file.run( + data=data, + file_folder=file_folder, + file_name=f"{table_id}_ap{ap}", + add_load_date_to_filename=add_load_date_to_filename, + load_date=load_date + ) + + with open(file_path, 'r', encoding="UTF-8") as f: + first_line = f.readline().strip() + + if first_line == '[]': + log("The json content is empty.") + return False + else: + csv_file_path = from_json_to_csv.run(input_path=file_path, sep=";") + + add_load_date_column.run(input_path=csv_file_path, sep=";") + return True + diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 21966990c..28cbfc396 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -12,7 +12,8 @@ from pipelines.rj_sms.dump_api_prontuario_vitai.constants import ( constants as vitai_constants, ) -from pipelines.rj_sms.utils import ( +from pipelines.rj_sms.tasks import ( + get_secret, create_folders, from_json_to_csv, download_from_api, @@ -32,22 +33,25 @@ ) as dump_vitai_posicao: # Parameters # Parameters for Vault - vault_path = vitai_constants.VAULT_PATH.value - vault_key = vitai_constants.VAULT_KEY.value + VAULT_PATH = vitai_constants.VAULT_PATH.value + VAULT_KEY = vitai_constants.VAULT_KEY.value # Paramenters for GCP - dataset_id = vitai_constants.DATASET_ID.value - table_id = vitai_constants.TABLE_POSICAO_ID.value + DATASET_ID = vitai_constants.DATASET_ID.value + TABLE_ID = vitai_constants.TABLE_POSICAO_ID.value # Start run + get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) + create_folders_task = create_folders() + create_folders_task.set_upstream(get_secret_task) # pylint: disable=E1101 download_task = download_from_api( url="https://apidw.vitai.care/api/dw/v1/produtos/saldoAtual", - params=None, file_folder=create_folders_task["raw"], - file_name=table_id, - vault_path=vault_path, - vault_key=vault_key, + file_name=TABLE_ID, + params=None, + crendentials=get_secret_task, + auth_method="bearer", add_load_date_to_filename=True, ) download_task.set_upstream(create_folders_task) @@ -68,8 +72,8 @@ upload_to_datalake_task = upload_to_datalake( input_path=create_folders_task["partition_directory"], - dataset_id=dataset_id, - table_id=table_id, + dataset_id=DATASET_ID, + table_id=TABLE_ID, if_exists="replace", csv_delimiter=";", if_storage_data_exists="replace", @@ -94,30 +98,32 @@ ) as dump_vitai_movimentos: # Parameters # Parameters for Vault - vault_path = vitai_constants.VAULT_PATH.value - vault_key = vitai_constants.VAULT_KEY.value + VAULT_PATH = vitai_constants.VAULT_PATH.value # Paramenters for GCP - dataset_id = vitai_constants.DATASET_ID.value - table_id = vitai_constants.TABLE_MOVIMENTOS_ID.value + DATASET_ID = vitai_constants.DATASET_ID.value + TABLE_ID = vitai_constants.TABLE_MOVIMENTOS_ID.value # Parameters for Vitai - date = Parameter("date", default=None) + DATE = Parameter("date", default=None) # Start run + get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) + create_folders_task = create_folders() + create_folders_task.set_upstream(get_secret_task) # pylint: disable=E1101 - build_date_task = build_movimentos_date(date_param=date) + build_date_task = build_movimentos_date(date_param=DATE) build_date_task.set_upstream(create_folders_task) - build_url_task = build_movimentos_url(date_param=date) + build_url_task = build_movimentos_url(date_param=DATE) build_url_task.set_upstream(build_date_task) download_task = download_from_api( url=build_url_task, - params=None, file_folder=create_folders_task["raw"], - file_name=table_id, - vault_path=vault_path, - vault_key=vault_key, + file_name=TABLE_ID, + params=None, + crendentials=get_secret_task, + auth_method="bearer", add_load_date_to_filename=True, load_date=build_date_task, ) @@ -139,8 +145,8 @@ upload_to_datalake_task = upload_to_datalake( input_path=create_folders_task["partition_directory"], - dataset_id=dataset_id, - table_id=table_id, + dataset_id=DATASET_ID, + table_id=TABLE_ID, if_exists="replace", csv_delimiter=";", if_storage_data_exists="replace", diff --git a/pipelines/rj_sms/dump_ftp_cnes/flows.py b/pipelines/rj_sms/dump_ftp_cnes/flows.py index 703da146e..b3f8498a1 100644 --- a/pipelines/rj_sms/dump_ftp_cnes/flows.py +++ b/pipelines/rj_sms/dump_ftp_cnes/flows.py @@ -10,7 +10,7 @@ from pipelines.utils.decorators import Flow from pipelines.constants import constants from pipelines.rj_sms.dump_ftp_cnes.constants import constants as cnes_constants -from pipelines.rj_sms.utils import create_folders, unzip_file +from pipelines.rj_sms.tasks import create_folders, unzip_file from pipelines.rj_sms.dump_ftp_cnes.tasks import ( conform_csv_to_gcp, create_partitions_and_upload_multiple_tables_to_datalake, diff --git a/pipelines/rj_sms/dump_ftp_cnes/tasks.py b/pipelines/rj_sms/dump_ftp_cnes/tasks.py index b0715008c..4b2c598f8 100644 --- a/pipelines/rj_sms/dump_ftp_cnes/tasks.py +++ b/pipelines/rj_sms/dump_ftp_cnes/tasks.py @@ -13,7 +13,7 @@ from prefect import task from pipelines.utils.utils import log from pipelines.rj_sms.dump_ftp_cnes.constants import constants -from pipelines.rj_sms.utils import ( +from pipelines.rj_sms.tasks import ( list_files_ftp, upload_to_datalake, download_ftp, diff --git a/pipelines/rj_sms/utils.py b/pipelines/rj_sms/tasks.py similarity index 77% rename from pipelines/rj_sms/utils.py rename to pipelines/rj_sms/tasks.py index d55eb6dfa..7902b77a9 100644 --- a/pipelines/rj_sms/utils.py +++ b/pipelines/rj_sms/tasks.py @@ -8,6 +8,7 @@ import re import shutil import sys +import json from datetime import datetime, date from pathlib import Path from ftplib import FTP @@ -16,9 +17,48 @@ import pytz import pandas as pd import basedosdados as bd +import google.oauth2.id_token +import google.auth.transport.requests from azure.storage.blob import BlobServiceClient from prefect import task -from pipelines.utils.utils import log, get_vault_secret +from pipelines.utils.utils import ( + log, + get_vault_secret, + get_username_and_password_from_secret, +) + + +@task +def get_secret(secret_path: str, secret_key: str = None): + """ + Retrieves a secret stored in a vault. + + Args: + secret_path (str): The path to the secret in the vault. + secret_key (str, optional): The key of the secret in the vault. If not provided, the entire secret will be returned. + + Returns: + str: The secret retrieved from the vault. + """ # noqa: E501 + + if secret_key is None: + return get_vault_secret(secret_path)["data"] + else: + return get_vault_secret(secret_path)["data"][secret_key] + + +@task +def get_username_and_password(secret_path: str): + """ + Retrieves the username and password from a secret stored in a vault. + + Args: + secret_path (str): The path to the secret in the vault. + + Returns: + Tuple[str, str]: A tuple containing the username and password retrieved from the secret. + """ + return get_username_and_password_from_secret(secret_path) @task @@ -61,8 +101,8 @@ def download_from_api( file_folder: str, file_name: str, params=None, - vault_path=None, - vault_key=None, + crendentials=None, + auth_method="bearer", add_load_date_to_filename=False, load_date=None, ): @@ -70,35 +110,33 @@ def download_from_api( Downloads data from an API and saves it to a local file. Args: - url (str): The URL of the API to download data from. + url (str): The URL of the API endpoint. file_folder (str): The folder where the downloaded file will be saved. file_name (str): The name of the downloaded file. - params (dict, optional): Additional parameters to include in the API request. - vault_path (str, optional): The path in Vault where the authentication token is stored. - vault_key (str, optional): The key in Vault where the authentication token is stored. - add_load_date_to_filename (bool, optional): Whether to add the current date to the filename. - load_date (str, optional): The specific date to add to the filename. + params (dict, optional): Additional parameters to be included in the API request. Defaults to None. + crendentials (str or tuple, optional): The credentials to be used for authentication. Defaults to None. + auth_method (str, optional): The authentication method to be used. Valid values are "bearer" and "basic". Defaults to "bearer". + add_load_date_to_filename (bool, optional): Whether to add the load date to the filename. Defaults to False. + load_date (str, optional): The load date to be added to the filename. Defaults to None. Returns: - str: The path of the downloaded file. - """ + str: The file path of the downloaded file. + + Raises: + ValueError: If the API call fails. + """ # noqa: E501 + # Retrieve the API key from Vault - auth_token = "" - if vault_key is not None: - try: - auth_token = get_vault_secret(secret_path=vault_path)["data"][vault_key] - log("Vault secret retrieved") - except Exception as e: - log(f"Not able to retrieve Vault secret {e}", level="error") - - # Download data from API - log("Downloading data from API") - headers = {} if auth_token == "" else {"Authorization": f"Bearer {auth_token}"} params = {} if params is None else params - try: + + log("Downloading data from API") + if auth_method == "bearer": + headers = {"Authorization": f"Bearer {crendentials}"} response = requests.get(url, headers=headers, params=params) - except Exception as e: - log(f"An error occurred: {e}", level="error") + elif auth_method == "basic": + response = requests.get(url, auth=crendentials, params=params) + else: + response = requests.get(url, params=params) if response.status_code == 200: api_data = response.json() @@ -246,6 +284,67 @@ def callback(block): return output_path +@task +def cloud_function_request( + url: str, + credential: None, + request_type: str = "GET", + body_params: list = None, + query_params: list = None, + env: str = "dev", +): + """ + Sends a request to an endpoint trough a cloud function. + This method is used when the endpoint is only accessible through a fixed IP. + + Args: + url (str): The URL of the endpoint. + request_type (str, optional): The type of the request (e.g., GET, POST). Defaults to "GET". + body_params (list, optional): The body parameters of the request. Defaults to None. + query_params (list, optional): The query parameters of the request. Defaults to None. + env (str, optional): The environment of the cloud function (e.g., staging, prod). Defaults to "staging". + credential (None): The credential for the request. Defaults to None. + + Returns: + requests.Response: The response from the cloud function. + """ # noqa: E501 + + if env == "prod": + cloud_function_url = "https://us-central1-rj-sms.cloudfunctions.net/vitacare" + elif env == "dev": + cloud_function_url = "https://us-central1-rj-sms-dev.cloudfunctions.net/vitacare" + else: + raise ValueError("env must be 'prod' or 'dev'") + + TOKEN = os.environ.get("GOOGLE_TOKEN") + # request = google.auth.transport.requests.Request() + # TOKEN = google.oauth2.id_token.fetch_id_token(request, audience) + + payload = json.dumps( + { + "url": url, + "request_type": request_type, + "body_params": json.dumps(body_params), + "query_params": query_params, + "credential": credential, + } + ) + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {TOKEN}"} + response = requests.request("POST", cloud_function_url, headers=headers, data=payload) + + if response.status_code == 200: + log("Request to cloud function successful") + + if response.text.startswith("A solicitação não foi bem-sucedida"): + # TODO: melhorar a forma de verificar se a requisição foi bem sucedida + raise ValueError(f"Resquest to endpoint failed: {response.text}") + else: + log("Request to endpoint successful") + return response.json() + + else: + raise ValueError(f"Request to cloud function failed: {response.status_code} - {response.reason}") + @task def list_files_ftp(host, user, password, directory): @@ -272,6 +371,27 @@ def list_files_ftp(host, user, password, directory): return files +@task +def save_to_file(data, file_folder, file_name, add_load_date_to_filename, load_date): + # Save the API data to a local file + if add_load_date_to_filename: + if load_date is None: + destination_file_path = ( + f"{file_folder}/{file_name}_{str(date.today())}.json" + ) + else: + destination_file_path = f"{file_folder}/{file_name}_{load_date}.json" + else: + destination_file_path = f"{file_folder}/{file_name}.json" + + with open(destination_file_path, "w", encoding="utf-8") as file: + file.write(str(data)) + + log(f"API data downloaded to {destination_file_path}") + + return destination_file_path + + @task def unzip_file(file_path: str, output_path: str): """ @@ -489,6 +609,7 @@ def upload_to_datalake( csv_delimiter: str = ";", if_storage_data_exists: str = "replace", biglake_table: bool = True, + dataset_is_public: bool = False, dump_mode: str = "append", ): """ @@ -525,6 +646,7 @@ def upload_to_datalake( csv_delimiter=csv_delimiter, if_storage_data_exists=if_storage_data_exists, biglake_table=biglake_table, + dataset_is_public=dataset_is_public, ) else: if dump_mode == "append": @@ -557,6 +679,7 @@ def upload_to_datalake( csv_delimiter=csv_delimiter, if_storage_data_exists=if_storage_data_exists, biglake_table=biglake_table, + dataset_is_public=dataset_is_public, ) log("Data uploaded to BigQuery") From 478a290bf6c90004c32a16dbeb8ec99e00b84494 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Sat, 25 Nov 2023 23:18:08 -0300 Subject: [PATCH 02/25] Refactor code and reorder columns in CSV file --- .../dump_api_prontuario_vitacare/flows.py | 4 +- .../dump_api_prontuario_vitacare/tasks.py | 97 ++++++++++++++++--- pipelines/rj_sms/tasks.py | 17 +++- 3 files changed, 99 insertions(+), 19 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 63a6edcc5..5af9e5052 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -1,5 +1,8 @@ # -*- coding: utf-8 -*- # pylint: disable=C0103 +""" +Vitacare healthrecord dumping flows +""" from prefect import Parameter, case from prefect.run_configs import KubernetesRun @@ -47,7 +50,6 @@ ) # noqa: E501 TABLE_ID = Parameter("table_id", required=True) - # Start run # rename_flow_run = rename_current_flow_run_dataset_table( diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py index 3cd6e9f64..a860cd5fc 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py @@ -4,30 +4,28 @@ """ from datetime import ( - date, + date, datetime, timedelta, ) from prefect import task +import pandas as pd -from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import constants as vitacare_constants -from pipelines.utils.utils import log -from pipelines.rj_sms.tasks import ( - from_json_to_csv, - add_load_date_column, - save_to_file +from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import ( + constants as vitacare_constants, ) - +from pipelines.utils.utils import log +from pipelines.rj_sms.tasks import from_json_to_csv, add_load_date_column, save_to_file @task def build_url(ap: str, endpoint: str) -> str: - url = f"{vitacare_constants.BASE_URL.value[ap]}{vitacare_constants.ENDPOINT.value[endpoint]}" # noqa: E501 log(f"URL built: {url}") return url + @task def build_params(date_param: str = "today"): if date_param == "today": @@ -37,18 +35,84 @@ def build_params(date_param: str = "today"): else: try: # check if date_param is a date string - datetime.datetime.strptime(date_param, '%Y-%m-%d') + datetime.datetime.strptime(date_param, "%Y-%m-%d") params = {"date": date_param} except ValueError: raise ValueError("date_param must be a date string (YYYY-MM-DD)") - + log(f"Params built: {params}") return params + @task def create_filename(table_id: str, ap: str) -> str: return f"{table_id}_ap{ap}" + +@task +def fix_payload_column_order(filepath: str, table_id: str, sep: str = ";"): + """ + Load a CSV file into a pandas DataFrame, keeping all column types as string, + and reorder the columns in a specified order. + + Parameters: + - filepath: str + The file path of the CSV file to load. + + Returns: + - DataFrame + The loaded DataFrame with columns reordered. + """ + columns_order = { + "estoque_posicao": [ + "ap", + "cnesUnidade", + "nomeUnidade", + "desigMedicamento", + "atc", + "code", + "lote", + "dtaCriLote", + "dtaValidadeLote", + "estoqueLote", + "id", + "_data_carga" + ], + "estoque_movimento": [ + "ap", + "cnesUnidade", + "nomeUnidade", + "desigMedicamento", + "atc", + "code", + "lote", + "dtaMovimento", + "tipoMovimento", + "motivoCorrecao", + "justificativa", + "cnsProfPrescritor", + "cpfPatient", + "cnsPatient", + "qtd", + "id", + "_data_carga" + ], + } + + # Specifying dtype as str to ensure all columns are read as strings + df = pd.read_csv(filepath, sep=sep, dtype=str, encoding="utf-8") + + # Specifying the desired column order + column_order = columns_order[table_id] + + # Reordering the columns + df = df[column_order] + + df.to_csv(filepath, sep=sep, index=False, encoding="utf-8") + + log(f"Columns reordered for {filepath}") + + @task def save_data_to_file( data: str, @@ -58,24 +122,25 @@ def save_data_to_file( add_load_date_to_filename: bool = False, load_date: str = None, ): - file_path = save_to_file.run( data=data, file_folder=file_folder, file_name=f"{table_id}_ap{ap}", add_load_date_to_filename=add_load_date_to_filename, - load_date=load_date + load_date=load_date, ) - with open(file_path, 'r', encoding="UTF-8") as f: + with open(file_path, "r", encoding="UTF-8") as f: first_line = f.readline().strip() - if first_line == '[]': + if first_line == "[]": log("The json content is empty.") return False else: csv_file_path = from_json_to_csv.run(input_path=file_path, sep=";") add_load_date_column.run(input_path=csv_file_path, sep=";") - return True + fix_payload_column_order.run(filepath=csv_file_path, table_id=table_id) + + return True diff --git a/pipelines/rj_sms/tasks.py b/pipelines/rj_sms/tasks.py index 7902b77a9..3ac17c540 100644 --- a/pipelines/rj_sms/tasks.py +++ b/pipelines/rj_sms/tasks.py @@ -284,6 +284,7 @@ def callback(block): return output_path + @task def cloud_function_request( url: str, @@ -343,7 +344,7 @@ def cloud_function_request( return response.json() else: - raise ValueError(f"Request to cloud function failed: {response.status_code} - {response.reason}") + raise ValueError(f"Request to cloud function failed: {response.status_code} - {response.reason}") # noqa: E501 @task @@ -373,7 +374,19 @@ def list_files_ftp(host, user, password, directory): @task def save_to_file(data, file_folder, file_name, add_load_date_to_filename, load_date): - # Save the API data to a local file + """ + Save the API data to a local file. + + Args: + data: The API data to be saved. + file_folder: The folder where the file will be saved. + file_name: The name of the file. + add_load_date_to_filename: A boolean indicating whether to add the load date to the filename. + load_date: The load date to be added to the filename. + + Returns: + The path of the saved file. + """ # noqa: E501 if add_load_date_to_filename: if load_date is None: destination_file_path = ( From 0f4b5f48b28be6642607584c9f77f852be91155c Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Sun, 26 Nov 2023 01:13:57 -0300 Subject: [PATCH 03/25] Update API endpoints in constants.py --- .../dump_api_prontuario_vitacare/flows.py | 15 +-- .../dump_api_prontuario_vitacare/tasks.py | 18 ++- .../dump_api_prontuario_vitai/constants.py | 7 +- .../rj_sms/dump_api_prontuario_vitai/flows.py | 111 +++++------------- .../rj_sms/dump_api_prontuario_vitai/tasks.py | 55 ++++++--- 5 files changed, 91 insertions(+), 115 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 5af9e5052..dbb2a6eb1 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -9,9 +9,6 @@ from prefect.storage import GCS from pipelines.utils.decorators import Flow from pipelines.constants import constants -from pipelines.utils.tasks import ( - rename_current_flow_run_dataset_table, -) from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import ( constants as vitacare_constants, ) @@ -23,6 +20,7 @@ upload_to_datalake, ) from pipelines.rj_sms.dump_api_prontuario_vitacare.tasks import ( + rename_flow, build_url, build_params, create_filename, @@ -51,12 +49,11 @@ TABLE_ID = Parameter("table_id", required=True) # Start run - - # rename_flow_run = rename_current_flow_run_dataset_table( - # prefix="SMS Dump VitaCare: ", - # dataset_id=TABLE_ID, - # table_id=f"ap{AP}" - # ) + # TODO: Uncomment rename_flow before production + #rename_flow_task = rename_flow( + # table_id=TABLE_ID, + # ap=AP + #) get_secret_task = get_secret(secret_path=VAULT_PATH) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py index a860cd5fc..47e442e77 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py @@ -16,9 +16,19 @@ constants as vitacare_constants, ) from pipelines.utils.utils import log +from pipelines.utils.tasks import ( + rename_current_flow_run_dataset_table, +) from pipelines.rj_sms.tasks import from_json_to_csv, add_load_date_column, save_to_file +@task +def rename_flow(table_id: str, ap: str): + rename_current_flow_run_dataset_table.run( + prefix="SMS Dump VitaCare: ", dataset_id=table_id, table_id=f"ap{ap}" + ) + + @task def build_url(ap: str, endpoint: str) -> str: url = f"{vitacare_constants.BASE_URL.value[ap]}{vitacare_constants.ENDPOINT.value[endpoint]}" # noqa: E501 @@ -37,8 +47,8 @@ def build_params(date_param: str = "today"): # check if date_param is a date string datetime.datetime.strptime(date_param, "%Y-%m-%d") params = {"date": date_param} - except ValueError: - raise ValueError("date_param must be a date string (YYYY-MM-DD)") + except ValueError as e: + raise ValueError("date_param must be a date string (YYYY-MM-DD)") from e log(f"Params built: {params}") return params @@ -76,7 +86,7 @@ def fix_payload_column_order(filepath: str, table_id: str, sep: str = ";"): "dtaValidadeLote", "estoqueLote", "id", - "_data_carga" + "_data_carga", ], "estoque_movimento": [ "ap", @@ -95,7 +105,7 @@ def fix_payload_column_order(filepath: str, table_id: str, sep: str = ";"): "cnsPatient", "qtd", "id", - "_data_carga" + "_data_carga", ], } diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py index f315cb311..f7536572c 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py @@ -14,5 +14,8 @@ class constants(Enum): VAULT_PATH = "estoque_vitai" VAULT_KEY = "token" DATASET_ID = "brutos_prontuario_vitai" - TABLE_POSICAO_ID = "estoque_posicao" - TABLE_MOVIMENTOS_ID = "estoque_movimento" + ENDPOINT = { + "posicao": "https://apidw.vitai.care/api/dw/v1/produtos/saldoAtual", + "movimento": "https://apidw.vitai.care/api/dw/v1/movimentacaoProduto/query/dataMovimentacao", # noqa: E501 + } + diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 28cbfc396..825c7ee42 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -12,6 +12,9 @@ from pipelines.rj_sms.dump_api_prontuario_vitai.constants import ( constants as vitai_constants, ) +from pipelines.utils.tasks import ( + rename_current_flow_run_dataset_table, +) from pipelines.rj_sms.tasks import ( get_secret, create_folders, @@ -22,100 +25,46 @@ upload_to_datalake, ) from pipelines.rj_sms.dump_api_prontuario_vitai.tasks import ( - build_movimentos_date, - build_movimentos_url, + build_date_param, + build_url ) from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import every_day_at_six_am - with Flow( - name="SMS: Dump Vitai - Captura Posição de Estoque", code_owners=["thiago"] -) as dump_vitai_posicao: + name="SMS: Dump Vitai - Captura ", code_owners=["thiago"] +) as dump_vitai: # Parameters # Parameters for Vault VAULT_PATH = vitai_constants.VAULT_PATH.value VAULT_KEY = vitai_constants.VAULT_KEY.value - # Paramenters for GCP - DATASET_ID = vitai_constants.DATASET_ID.value - TABLE_ID = vitai_constants.TABLE_POSICAO_ID.value - - # Start run - get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) - - create_folders_task = create_folders() - create_folders_task.set_upstream(get_secret_task) # pylint: disable=E1101 - - download_task = download_from_api( - url="https://apidw.vitai.care/api/dw/v1/produtos/saldoAtual", - file_folder=create_folders_task["raw"], - file_name=TABLE_ID, - params=None, - crendentials=get_secret_task, - auth_method="bearer", - add_load_date_to_filename=True, - ) - download_task.set_upstream(create_folders_task) - - conversion_task = from_json_to_csv(input_path=download_task, sep=";") - conversion_task.set_upstream(download_task) - - add_load_date_column_task = add_load_date_column( - input_path=conversion_task, sep=";" - ) - add_load_date_column_task.set_upstream(conversion_task) - - create_partitions_task = create_partitions( - data_path=create_folders_task["raw"], - partition_directory=create_folders_task["partition_directory"], - ) - create_partitions_task.set_upstream(add_load_date_column_task) - - upload_to_datalake_task = upload_to_datalake( - input_path=create_folders_task["partition_directory"], - dataset_id=DATASET_ID, - table_id=TABLE_ID, - if_exists="replace", - csv_delimiter=";", - if_storage_data_exists="replace", - biglake_table=True, - ) - upload_to_datalake_task.set_upstream(create_partitions_task) - - -dump_vitai_posicao.storage = GCS(constants.GCS_FLOWS_BUCKET.value) -dump_vitai_posicao.run_config = KubernetesRun( - image=constants.DOCKER_IMAGE.value, - labels=[ - constants.RJ_SMS_AGENT_LABEL.value, - ], -) - -dump_vitai_posicao.schedule = every_day_at_six_am + # Vitai API + ENDPOINT = Parameter("endpoint", required=True) + DATE = Parameter("date", default=None) -with Flow( - name="SMS: Dump Vitai - Captura Movimentos de Estoque", code_owners=["thiago"] -) as dump_vitai_movimentos: - # Parameters - # Parameters for Vault - VAULT_PATH = vitai_constants.VAULT_PATH.value # Paramenters for GCP - DATASET_ID = vitai_constants.DATASET_ID.value - TABLE_ID = vitai_constants.TABLE_MOVIMENTOS_ID.value - # Parameters for Vitai - DATE = Parameter("date", default=None) + DATASET_ID = Parameter( + "DATASET_ID", default=vitai_constants.DATASET_ID.value + ) # noqa: E501 + TABLE_ID = Parameter("table_id", required=True) # Start run + # TODO: Uncomment rename_flow before production + #rename_flow_task = rename_current_flow_run_dataset_table( + # prefix="SMS Dump Vitai: ", + # dataset_id=TABLE_ID, + # table_id="") + get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) create_folders_task = create_folders() create_folders_task.set_upstream(get_secret_task) # pylint: disable=E1101 - build_date_task = build_movimentos_date(date_param=DATE) - build_date_task.set_upstream(create_folders_task) + build_date_param_task = build_date_param(date_param=DATE) + build_date_param_task.set_upstream(create_folders_task) - build_url_task = build_movimentos_url(date_param=DATE) - build_url_task.set_upstream(build_date_task) + build_url_task = build_url(endpoint=ENDPOINT, date_param=build_date_param_task) + build_url_task.set_upstream(build_date_param_task) download_task = download_from_api( url=build_url_task, @@ -125,15 +74,15 @@ crendentials=get_secret_task, auth_method="bearer", add_load_date_to_filename=True, - load_date=build_date_task, + load_date=build_date_param_task, ) - download_task.set_upstream(build_url_task) + download_task.set_upstream(create_folders_task) conversion_task = from_json_to_csv(input_path=download_task, sep=";") conversion_task.set_upstream(download_task) add_load_date_column_task = add_load_date_column( - input_path=conversion_task, sep=";", load_date=build_date_task + input_path=conversion_task, sep=";" ) add_load_date_column_task.set_upstream(conversion_task) @@ -155,12 +104,12 @@ upload_to_datalake_task.set_upstream(create_partitions_task) -dump_vitai_movimentos.storage = GCS(constants.GCS_FLOWS_BUCKET.value) -dump_vitai_movimentos.run_config = KubernetesRun( +dump_vitai.storage = GCS(constants.GCS_FLOWS_BUCKET.value) +dump_vitai.run_config = KubernetesRun( image=constants.DOCKER_IMAGE.value, labels=[ constants.RJ_SMS_AGENT_LABEL.value, ], ) -dump_vitai_movimentos.schedule = every_day_at_six_am +dump_vitai.schedule = every_day_at_six_am diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py b/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py index 3077c115b..8359f9354 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py @@ -3,44 +3,61 @@ Tasks for dump_api_prontuario_vitai """ -from datetime import date, timedelta +from datetime import timedelta, date, datetime + from prefect import task + from pipelines.utils.utils import log +from pipelines.rj_sms.dump_api_prontuario_vitai.constants import ( + constants as vitai_constants) @task -def build_movimentos_url(date_param=None): +def build_date_param(date_param: str = "today"): """ - Builds a URL for querying product movements from the Vitai API. + Builds a date parameter based on the given input. Args: - date_param (str, optional): The date to query in the format "YYYY-MM-DD". - Defaults to yesterday's date. + date_param (str, optional): The date parameter. Defaults to "today". Returns: - str: The URL for querying product movements from the Vitai API. - """ - if date_param is None: - date_param = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d") + str: The built date parameter. - url = f"https://apidw.vitai.care/api/dw/v1/movimentacaoProduto/query/dataMovimentacao/{date_param}" # noqa: E501 - log(f"URL built: {url}") - return url + Raises: + ValueError: If the date_param is not a valid date string (YYYY-MM-DD). + """ + if date_param == "today": + date_param = str(date.today()) + elif date_param == "yesterday": + date_param = str(date.today() - timedelta(days=1)) + elif date_param is not None: + try: + # check if date_param is a date string + datetime.strptime(date_param, "%Y-%m-%d") + except ValueError as e: + raise ValueError("date_param must be a date string (YYYY-MM-DD)") from e + + log(f"Params built: {date_param}") + return date_param @task -def build_movimentos_date(date_param=None): +def build_url(endpoint: str, date_param: None) -> str: """ - Builds a date string in the format '%Y-%m-%d' based on the given date_param or yesterday's - date if date_param is None. + Build the URL for the given endpoint and date parameter. Args: - date_param (str, optional): A date string in the format '%Y-%m-%d'. Defaults to None. + endpoint (str): The endpoint for the URL. + date_param (None): The date parameter for the URL. Returns: - str: A date string in the format '%Y-%m-%d'. + str: The built URL. """ if date_param is None: - date_param = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d") + url = vitai_constants.ENDPOINT.value[endpoint] + else: + url = f"{vitai_constants.ENDPOINT.value[endpoint]}/{date_param}" - return date_param + log(f"URL built: {url}") + + return url From 430075e305ea2c7d3b1b950214c70abf508df75b Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Sun, 26 Nov 2023 01:14:34 -0300 Subject: [PATCH 04/25] Uncomment rename_flow_task in dump_api_prontuario_vitai/flows.py --- pipelines/rj_sms/dump_api_prontuario_vitai/flows.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 825c7ee42..45a8a52b2 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -50,10 +50,10 @@ # Start run # TODO: Uncomment rename_flow before production - #rename_flow_task = rename_current_flow_run_dataset_table( - # prefix="SMS Dump Vitai: ", - # dataset_id=TABLE_ID, - # table_id="") + rename_flow_task = rename_current_flow_run_dataset_table( + prefix="SMS Dump Vitai: ", + dataset_id=TABLE_ID, + table_id="") get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) From 16501046511e7b6d5d900117fff711cbadf31502 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Nov 2023 04:17:26 +0000 Subject: [PATCH 05/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../dump_api_prontuario_vitacare/constants.py | 24 +++++++++---------- .../dump_api_prontuario_vitacare/flows.py | 8 +++---- .../dump_api_prontuario_vitai/constants.py | 1 - .../rj_sms/dump_api_prontuario_vitai/flows.py | 14 ++++------- .../rj_sms/dump_api_prontuario_vitai/tasks.py | 3 ++- pipelines/rj_sms/tasks.py | 14 +++++++---- 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py index 2f16f1611..609382cef 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py @@ -14,18 +14,18 @@ class constants(Enum): VAULT_PATH = "prontuario_vitacare" DATASET_ID = "brutos_prontuario_vitacare" BASE_URL = { - "10" : "http://consolidado-ap10.pepvitacare.com:8088", - "21" : "http://consolidado-ap21.pepvitacare.com:8088", - "22" : "http://consolidado-ap22.pepvitacare.com:8088", - "31" : "http://consolidado-ap31.pepvitacare.com:8089", - "32" : "http://consolidado-ap32.pepvitacare.com:8088", - "33" : "http://consolidado-ap33.pepvitacare.com:8089", - "40" : "http://consolidado-ap40.pepvitacare.com:8089", - "51" : "http://consolidado-ap51.pepvitacare.com:8089", - "52" : "http://consolidado-ap52.pepvitacare.com:8088", - "53" : "http://consolidado-ap53.pepvitacare.com:8090", + "10": "http://consolidado-ap10.pepvitacare.com:8088", + "21": "http://consolidado-ap21.pepvitacare.com:8088", + "22": "http://consolidado-ap22.pepvitacare.com:8088", + "31": "http://consolidado-ap31.pepvitacare.com:8089", + "32": "http://consolidado-ap32.pepvitacare.com:8088", + "33": "http://consolidado-ap33.pepvitacare.com:8089", + "40": "http://consolidado-ap40.pepvitacare.com:8089", + "51": "http://consolidado-ap51.pepvitacare.com:8089", + "52": "http://consolidado-ap52.pepvitacare.com:8088", + "53": "http://consolidado-ap53.pepvitacare.com:8090", } ENDPOINT = { - "posicao" : "/reports/pharmacy/stocks", - "movimento" : "/reports/pharmacy/movements" + "posicao": "/reports/pharmacy/stocks", + "movimento": "/reports/pharmacy/movements", } diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index dbb2a6eb1..6d62a2998 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -50,10 +50,10 @@ # Start run # TODO: Uncomment rename_flow before production - #rename_flow_task = rename_flow( + # rename_flow_task = rename_flow( # table_id=TABLE_ID, # ap=AP - #) + # ) get_secret_task = get_secret(secret_path=VAULT_PATH) @@ -84,14 +84,14 @@ table_id=TABLE_ID, ap=AP, add_load_date_to_filename=True, - load_date=build_params_task["date"] + load_date=build_params_task["date"], ) save_data_task.set_upstream(file_name_task) # pylint: disable=E1101 with case(save_data_task, True): create_partitions_task = create_partitions( data_path=create_folders_task["raw"], - partition_directory=create_folders_task["partition_directory"] + partition_directory=create_folders_task["partition_directory"], ) create_partitions_task.set_upstream(save_data_task) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py index f7536572c..c9f82aaea 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py @@ -18,4 +18,3 @@ class constants(Enum): "posicao": "https://apidw.vitai.care/api/dw/v1/produtos/saldoAtual", "movimento": "https://apidw.vitai.care/api/dw/v1/movimentacaoProduto/query/dataMovimentacao", # noqa: E501 } - diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 45a8a52b2..9ffad27a4 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -24,15 +24,10 @@ create_partitions, upload_to_datalake, ) -from pipelines.rj_sms.dump_api_prontuario_vitai.tasks import ( - build_date_param, - build_url -) +from pipelines.rj_sms.dump_api_prontuario_vitai.tasks import build_date_param, build_url from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import every_day_at_six_am -with Flow( - name="SMS: Dump Vitai - Captura ", code_owners=["thiago"] -) as dump_vitai: +with Flow(name="SMS: Dump Vitai - Captura ", code_owners=["thiago"]) as dump_vitai: # Parameters # Parameters for Vault VAULT_PATH = vitai_constants.VAULT_PATH.value @@ -51,9 +46,8 @@ # Start run # TODO: Uncomment rename_flow before production rename_flow_task = rename_current_flow_run_dataset_table( - prefix="SMS Dump Vitai: ", - dataset_id=TABLE_ID, - table_id="") + prefix="SMS Dump Vitai: ", dataset_id=TABLE_ID, table_id="" + ) get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py b/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py index 8359f9354..1a8aa7e70 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/tasks.py @@ -9,7 +9,8 @@ from pipelines.utils.utils import log from pipelines.rj_sms.dump_api_prontuario_vitai.constants import ( - constants as vitai_constants) + constants as vitai_constants, +) @task diff --git a/pipelines/rj_sms/tasks.py b/pipelines/rj_sms/tasks.py index 3ac17c540..a45beca66 100644 --- a/pipelines/rj_sms/tasks.py +++ b/pipelines/rj_sms/tasks.py @@ -313,7 +313,9 @@ def cloud_function_request( if env == "prod": cloud_function_url = "https://us-central1-rj-sms.cloudfunctions.net/vitacare" elif env == "dev": - cloud_function_url = "https://us-central1-rj-sms-dev.cloudfunctions.net/vitacare" + cloud_function_url = ( + "https://us-central1-rj-sms-dev.cloudfunctions.net/vitacare" + ) else: raise ValueError("env must be 'prod' or 'dev'") @@ -331,7 +333,9 @@ def cloud_function_request( } ) headers = {"Content-Type": "application/json", "Authorization": f"Bearer {TOKEN}"} - response = requests.request("POST", cloud_function_url, headers=headers, data=payload) + response = requests.request( + "POST", cloud_function_url, headers=headers, data=payload + ) if response.status_code == 200: log("Request to cloud function successful") @@ -344,7 +348,9 @@ def cloud_function_request( return response.json() else: - raise ValueError(f"Request to cloud function failed: {response.status_code} - {response.reason}") # noqa: E501 + raise ValueError( + f"Request to cloud function failed: {response.status_code} - {response.reason}" + ) # noqa: E501 @task @@ -386,7 +392,7 @@ def save_to_file(data, file_folder, file_name, add_load_date_to_filename, load_d Returns: The path of the saved file. - """ # noqa: E501 + """ # noqa: E501 if add_load_date_to_filename: if load_date is None: destination_file_path = ( From aef7e47dc74c0a2ad1f9273f62a7cf1d390ae704 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Sun, 26 Nov 2023 10:10:58 -0300 Subject: [PATCH 06/25] Update constants for Vitai and Vitacare --- pipelines/rj_sms/__init__.py | 1 + .../dump_api_prontuario_vitacare/constants.py | 2 +- .../dump_api_prontuario_vitacare/flows.py | 18 +-- .../dump_api_prontuario_vitai/constants.py | 2 +- .../rj_sms/dump_api_prontuario_vitai/flows.py | 17 +-- .../dump_azureblob_estoque_tpc/constants.py | 22 ++++ .../dump_azureblob_estoque_tpc/flows.py | 106 +++++++++++++++++ .../dump_azureblob_estoque_tpc/schedules.py | 22 ++++ .../dump_azureblob_estoque_tpc/tasks.py | 108 ++++++++++++++++++ pipelines/rj_sms/tasks.py | 16 +-- 10 files changed, 284 insertions(+), 30 deletions(-) create mode 100644 pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py create mode 100644 pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py create mode 100644 pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py create mode 100644 pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py diff --git a/pipelines/rj_sms/__init__.py b/pipelines/rj_sms/__init__.py index be9479389..b29dec961 100644 --- a/pipelines/rj_sms/__init__.py +++ b/pipelines/rj_sms/__init__.py @@ -5,5 +5,6 @@ from pipelines.rj_sms.dump_db_sivep.flows import * from pipelines.rj_sms.dump_ftp_cnes.flows import * +from pipelines.rj_sms.dump_azureblob_estoque_tpc.flows import * from pipelines.rj_sms.dump_api_prontuario_vitai.flows import * from pipelines.rj_sms.dump_api_prontuario_vitacare.flows import * diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py index 609382cef..bb23e8fdd 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=C0103 """ -Constants for utils. +Constants for Vitacare. """ from enum import Enum diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 6d62a2998..1d4e6f56d 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -32,11 +32,12 @@ with Flow( name="SMS: Dump VitaCare - Captura ", code_owners=["thiago"] -) as dump_vitacare: # noqa: E501 +) as dump_vitacare: # Set Parameters + # Flow parameters + RENAME_FLOW = Parameter("rename_flow", default=True) # Vault VAULT_PATH = vitacare_constants.VAULT_PATH.value - # Vitacare API AP = Parameter("ap", required=True, default="10") ENDPOINT = Parameter("endpoint", required=True) @@ -45,15 +46,15 @@ # GCP DATASET_ID = Parameter( "DATASET_ID", default=vitacare_constants.DATASET_ID.value - ) # noqa: E501 + ) TABLE_ID = Parameter("table_id", required=True) # Start run - # TODO: Uncomment rename_flow before production - # rename_flow_task = rename_flow( - # table_id=TABLE_ID, - # ap=AP - # ) + with case(RENAME_FLOW, True): + rename_flow_task = rename_flow( + table_id=TABLE_ID, + ap=AP + ) get_secret_task = get_secret(secret_path=VAULT_PATH) @@ -103,6 +104,7 @@ csv_delimiter=";", if_storage_data_exists="replace", biglake_table=True, + dataset_is_public=False, ) upload_to_datalake_task.set_upstream(create_partitions_task) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py index c9f82aaea..c51f70d32 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/constants.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=C0103 """ -Constants for utils. +Constants for Vitai. """ from enum import Enum diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 9ffad27a4..c39f9dfb4 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -4,7 +4,7 @@ Vitai healthrecord dumping flows """ -from prefect import Parameter +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS from pipelines.utils.decorators import Flow @@ -29,25 +29,25 @@ with Flow(name="SMS: Dump Vitai - Captura ", code_owners=["thiago"]) as dump_vitai: # Parameters + # Flow parameters + RENAME_FLOW = Parameter("rename_flow", default=True) # Parameters for Vault VAULT_PATH = vitai_constants.VAULT_PATH.value VAULT_KEY = vitai_constants.VAULT_KEY.value - # Vitai API ENDPOINT = Parameter("endpoint", required=True) DATE = Parameter("date", default=None) - # Paramenters for GCP DATASET_ID = Parameter( "DATASET_ID", default=vitai_constants.DATASET_ID.value - ) # noqa: E501 + ) TABLE_ID = Parameter("table_id", required=True) # Start run - # TODO: Uncomment rename_flow before production - rename_flow_task = rename_current_flow_run_dataset_table( - prefix="SMS Dump Vitai: ", dataset_id=TABLE_ID, table_id="" - ) + with case(RENAME_FLOW, True): + rename_flow_task = rename_current_flow_run_dataset_table( + prefix="SMS Dump Vitai: ", dataset_id=TABLE_ID, table_id="" + ) get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) @@ -94,6 +94,7 @@ csv_delimiter=";", if_storage_data_exists="replace", biglake_table=True, + dataset_is_public=False, ) upload_to_datalake_task.set_upstream(create_partitions_task) diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py new file mode 100644 index 000000000..fc141bb1d --- /dev/null +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +Constants for TPC. +""" +from enum import Enum + + +class constants(Enum): + """ + Constant values for the dump vitai flows + """ + + VAULT_PATH = "estoque_tpc" + VAULT_KEY = "token" + DATASET_ID = "brutos_estoque_central_tpc" + CONTAINER_NAME = "datalaketpc" + BLOB_PATH = { + "posicao" : "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/estoque_local/estoque_local.csv", + "pedidos" : "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/pedidos_depositante/pedidos_depositante.csv", + "recebimento" : "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/recebimento_documental/recebimento_documental.csv", + } diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py new file mode 100644 index 000000000..7cce08b6e --- /dev/null +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +TPC inventory dumping flows +""" + +from prefect import Parameter, case +from prefect.run_configs import KubernetesRun +from prefect.storage import GCS +from pipelines.utils.decorators import Flow +from pipelines.constants import constants +from pipelines.rj_sms.dump_azureblob_estoque_tpc.constants import ( + constants as tpc_constants, +) +from pipelines.utils.tasks import ( + rename_current_flow_run_dataset_table, +) +from pipelines.rj_sms.tasks import ( + get_secret, + download_azure_blob, + create_folders, + add_load_date_column, + create_partitions, + upload_to_datalake, +) +from pipelines.rj_sms.dump_azureblob_estoque_tpc.tasks import ( + get_blob_path, + conform_csv_to_gcp, +) +from pipelines.rj_sms.dump_azureblob_estoque_tpc.schedules import every_day_at_six_am + +with Flow(name="SMS: Dump TPC - Captura ", code_owners=["thiago"]) as dump_tpc: + # Parameters + # Flow parameters + RENAME_FLOW = Parameter("rename_flow", default=True) + # Parameters for Vault + VAULT_PATH = tpc_constants.VAULT_PATH.value + VAULT_KEY = tpc_constants.VAULT_KEY.value + # TPC Azure + CONTAINER_NAME = tpc_constants.CONTAINER_NAME.value + BLOB_FILE = Parameter("blob_file", required=True) + # Paramenters for GCP + DATASET_ID = Parameter( + "DATASET_ID", default=tpc_constants.DATASET_ID.value + ) + TABLE_ID = Parameter("table_id", required=True) + + # Start run + with case(RENAME_FLOW, True): + rename_flow_task = rename_current_flow_run_dataset_table( + prefix="SMS Dump TPC: ", dataset_id=TABLE_ID, table_id="" + ) + + get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) + + create_folders_task = create_folders() + create_folders_task.set_upstream(get_secret_task) # pylint: disable=E1101 + + get_blob_path_task = get_blob_path(blob_file=BLOB_FILE) + get_blob_path_task.set_upstream(create_folders_task) + + download_task = download_azure_blob( + container_name=CONTAINER_NAME, + blob_path=get_blob_path_task, + file_folder=create_folders_task["raw"], + file_name=TABLE_ID, + credentials=get_secret_task, + add_load_date_to_filename=True, + ) + download_task.set_upstream(create_folders_task) + + conform_task = conform_csv_to_gcp( + filepath=download_task, + blob_file=BLOB_FILE) + conform_task.set_upstream(download_task) + + add_load_date_column_task = add_load_date_column(input_path=download_task, sep=";") + add_load_date_column_task.set_upstream(conform_task) + + create_partitions_task = create_partitions( + data_path=create_folders_task["raw"], + partition_directory=create_folders_task["partition_directory"], + ) + create_partitions_task.set_upstream(add_load_date_column_task) + + upload_to_datalake_task = upload_to_datalake( + input_path=create_folders_task["partition_directory"], + dataset_id=DATASET_ID, + table_id=TABLE_ID, + if_exists="replace", + csv_delimiter=";", + if_storage_data_exists="replace", + biglake_table=True, + dataset_is_public=False, + ) + upload_to_datalake_task.set_upstream(create_partitions_task) + +dump_tpc.storage = GCS(constants.GCS_FLOWS_BUCKET.value) +dump_tpc.run_config = KubernetesRun( + image=constants.DOCKER_IMAGE.value, + labels=[ + constants.RJ_SMS_DEV_AGENT_LABEL.value, + ], +) + +dump_tpc.schedule = every_day_at_six_am diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py new file mode 100644 index 000000000..dd4aa0239 --- /dev/null +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +Schedules for the database dump pipeline +""" +from datetime import timedelta +import pendulum +from prefect.schedules import Schedule +from prefect.schedules.clocks import IntervalClock +from pipelines.constants import constants + +every_day_at_six_am = Schedule( + clocks=[ + IntervalClock( + interval=timedelta(days=1), + start_date=pendulum.datetime(2023, 1, 1, 6, 0, 0, tz="America/Sao_Paulo"), + labels=[ + constants.RJ_SMS_AGENT_LABEL.value, + ], + ) + ] +) \ No newline at end of file diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py new file mode 100644 index 000000000..6753a8b4b --- /dev/null +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +""" +Tasks for dump_azublob_estoque_tpc +""" + +import pandas as pd +from prefect import task +from pipelines.utils.utils import log +from pipelines.rj_sms.dump_azureblob_estoque_tpc.constants import ( + constants as tpc_constants, +) + + +@task +def get_blob_path(blob_file: str): + """ + Returns the blob path for the given blob file. + + Args: + blob_file (str): The name of the blob file. + + Returns: + str: The blob path for the given blob file. + """ + return tpc_constants.BLOB_PATH.value[blob_file] + +@task +def conform_csv_to_gcp(filepath: str, blob_file: str): + """ + Reads a CSV file from the given filepath, applies some data cleaning and formatting operations, + and saves the resulting dataframe back to the same file in a different format. + + Args: + filepath (str): The path to the CSV file to be processed. + + Returns: + None + """ + log("Conforming CSV to GCP") + + # remove " from csv to avoid errors + with open(filepath, 'r') as f: + file_contents = f.read() + + file_contents = file_contents.replace('\"', '') + + with open(filepath, 'w') as f: + f.write(file_contents) + + + df = pd.read_csv(filepath, sep=";", dtype=str, keep_default_na=False) + + if blob_file == "posicao": + + # remove registros errados + df = df[df.sku != ""] + + # converte para valores numéricos + df["volume"] = df.volume.apply(lambda x: float(x.replace(",", "."))) + df["peso_bruto"] = df.peso_bruto.apply(lambda x: float(x.replace(",", "."))) + df["qtd_dispo"] = df.qtd_dispo.apply(lambda x: float(x.replace(",", "."))) + df["qtd_roma"] = df.qtd_roma.apply(lambda x: float(x.replace(",", "."))) + df["preco_unitario"] = df.preco_unitario.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + + # converte as validades + df["validade"] = df.validade.apply(lambda x: x[:10]) + df["dt_situacao"] = df.dt_situacao.apply( + lambda x: x[-4:] + "-" + x[3:5] + "-" + x[:2] + ) + elif blob_file == "pedidos": + # converte para valores numéricos + df["valor"] = df.valor.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + df["peso"] = df.peso.apply(lambda x: float(x.replace(",", "."))) + df["volume"] = df.volume.apply(lambda x: float(x.replace(",", "."))) + df["quantidade_peca"] = df.quantidade_peca.apply(lambda x: float(x.replace(",", "."))) + df["valor_total"] = df.valor_total.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + elif blob_file == "recebimento": + + + df["qt"] = df.qt.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + df["qt_fis"] = df.qt_fis.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + df["pr_unit"] = df.pr_unit.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + df["vl_merc"] = df.vl_merc.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + df["vl_total"] = df.vl_total.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + df["qt_rec"] = df.qt_rec.apply( + lambda x: float(x.replace(",", ".")) if x != "" else x + ) + + + df.to_csv(filepath, index=False, sep=";", encoding="utf-8", quoting=0, decimal=".") + + log("CSV now conform") \ No newline at end of file diff --git a/pipelines/rj_sms/tasks.py b/pipelines/rj_sms/tasks.py index a45beca66..e3bf40f05 100644 --- a/pipelines/rj_sms/tasks.py +++ b/pipelines/rj_sms/tasks.py @@ -171,8 +171,7 @@ def download_azure_blob( blob_path: str, file_folder: str, file_name: str, - vault_path: str, - vault_key: str, + credentials=None, add_load_date_to_filename=False, load_date=None, ): @@ -185,26 +184,19 @@ def download_azure_blob( file_folder (str): The folder where the downloaded file will be saved. file_name (str): The name of the downloaded file. params (dict, optional): Additional parameters to include in the API request. - vault_path (str, optional): The path in Vault where the authentication token is stored. - vault_key (str, optional): The key in Vault where the authentication token is stored. + credentials (str or tuple, optional): The credentials to be used for authentication. Defaults to None. add_load_date_to_filename (bool, optional): Whether to add the current date to the filename. load_date (str, optional): The specific date to add to the filename. Returns: str: The path of the downloaded file. - """ - # Retrieve the API key from Vault - try: - credential = get_vault_secret(secret_path=vault_path)["data"][vault_key] - log("Vault secret retrieved") - except Exception as e: - log(f"Not able to retrieve Vault secret {e}", level="error") + """ # noqa: E501 # Download data from Blob Storage log(f"Downloading data from Azure Blob Storage: {blob_path}") blob_service_client = BlobServiceClient( account_url="https://datalaketpcgen2.blob.core.windows.net/", - credential=credential, + credential=credentials, ) blob_client = blob_service_client.get_blob_client( container=container_name, blob=blob_path From d2bc5e3bb82ca5086d60eab95f25af46bd9651cd Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Mon, 27 Nov 2023 13:58:44 -0300 Subject: [PATCH 07/25] Update API endpoints in constants.py --- pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py | 6 +++--- pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py index bb23e8fdd..82cfdd214 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py @@ -18,12 +18,12 @@ class constants(Enum): "21": "http://consolidado-ap21.pepvitacare.com:8088", "22": "http://consolidado-ap22.pepvitacare.com:8088", "31": "http://consolidado-ap31.pepvitacare.com:8089", - "32": "http://consolidado-ap32.pepvitacare.com:8088", + "32": "http://consolidado-ap32.pepvitacare.com:8090", "33": "http://consolidado-ap33.pepvitacare.com:8089", "40": "http://consolidado-ap40.pepvitacare.com:8089", - "51": "http://consolidado-ap51.pepvitacare.com:8089", + "51": "http://consolidado-ap51.pepvitacare.com:8091", "52": "http://consolidado-ap52.pepvitacare.com:8088", - "53": "http://consolidado-ap53.pepvitacare.com:8090", + "53": "http://consolidado-ap53.pepvitacare.com:8092", } ENDPOINT = { "posicao": "/reports/pharmacy/stocks", diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py index 47e442e77..134ccceee 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/tasks.py @@ -45,7 +45,7 @@ def build_params(date_param: str = "today"): else: try: # check if date_param is a date string - datetime.datetime.strptime(date_param, "%Y-%m-%d") + datetime.strptime(date_param, "%Y-%m-%d") params = {"date": date_param} except ValueError as e: raise ValueError("date_param must be a date string (YYYY-MM-DD)") from e From 01bf5263f6ee395f0614a114f6cbbfac3c3ecc3d Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Mon, 27 Nov 2023 21:36:25 -0300 Subject: [PATCH 08/25] Add Google Sheets dumping functionality --- pipelines/rj_sms/__init__.py | 1 + pipelines/rj_sms/dump_sheets/constants.py | 14 ++++++ pipelines/rj_sms/dump_sheets/flows.py | 30 +++++++++++++ pipelines/rj_sms/dump_sheets/schedules.py | 52 +++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 pipelines/rj_sms/dump_sheets/constants.py create mode 100644 pipelines/rj_sms/dump_sheets/flows.py create mode 100644 pipelines/rj_sms/dump_sheets/schedules.py diff --git a/pipelines/rj_sms/__init__.py b/pipelines/rj_sms/__init__.py index b29dec961..ca74ca40a 100644 --- a/pipelines/rj_sms/__init__.py +++ b/pipelines/rj_sms/__init__.py @@ -8,3 +8,4 @@ from pipelines.rj_sms.dump_azureblob_estoque_tpc.flows import * from pipelines.rj_sms.dump_api_prontuario_vitai.flows import * from pipelines.rj_sms.dump_api_prontuario_vitacare.flows import * +from pipelines.rj_sms.dump_sheets.flows import * diff --git a/pipelines/rj_sms/dump_sheets/constants.py b/pipelines/rj_sms/dump_sheets/constants.py new file mode 100644 index 000000000..53017ff91 --- /dev/null +++ b/pipelines/rj_sms/dump_sheets/constants.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +Constants for sheets dump. +""" +from enum import Enum + + +class constants(Enum): + """ + Constant values for the dump sheets flows + """ + + DATASET_ID = "brutos_sheets" diff --git a/pipelines/rj_sms/dump_sheets/flows.py b/pipelines/rj_sms/dump_sheets/flows.py new file mode 100644 index 000000000..e5de7c3b3 --- /dev/null +++ b/pipelines/rj_sms/dump_sheets/flows.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +""" +Database dumping flows for sheets dump. +""" + +from copy import deepcopy + +from prefect.run_configs import KubernetesRun +from prefect.storage import GCS + +from pipelines.constants import constants +from pipelines.utils.dump_url.flows import dump_url_flow +from pipelines.rj_sms.dump_sheets.schedules import sms_sheets_daily_update_schedule + +# TODO: add code owner + +dump_sms_sheets_flow = deepcopy(dump_url_flow) +dump_sms_sheets_flow.name = ( + "SMS: Dump Google Sheets - Ingerir planilhas do Google Sheets" +) +dump_sms_sheets_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value) + +dump_sms_sheets_flow.run_config = KubernetesRun( + image=constants.DOCKER_IMAGE.value, + labels=[ + constants.RJ_SMS_AGENT_LABEL.value, + ], +) + +dump_sms_sheets_flow.schedule = sms_sheets_daily_update_schedule \ No newline at end of file diff --git a/pipelines/rj_sms/dump_sheets/schedules.py b/pipelines/rj_sms/dump_sheets/schedules.py new file mode 100644 index 000000000..18254f585 --- /dev/null +++ b/pipelines/rj_sms/dump_sheets/schedules.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +Schedules for the sheets dump pipeline +""" + +from datetime import timedelta, datetime + +from prefect.schedules import Schedule +import pytz + + +from pipelines.constants import constants +from pipelines.rj_sms.dump_sheets.constants import constants as sheets_constants +from pipelines.utils.dump_url.utils import generate_dump_url_schedules +from pipelines.utils.utils import untuple_clocks as untuple + + +table_parameters = { + "estabelecimento_auxiliar": { + "url": "https://docs.google.com/spreadsheets/d/1EkYfxuN2bWD_q4OhHL8hJvbmQKmQKFrk0KLf6D7nKS4/edit?usp=sharing", # noqa: E501 + "url_type": "google_sheet", + "gsheets_sheet_name": "Sheet1", + "dataset_id": sheets_constants.DATASET_ID.value, + "table_id": "estabelecimento_auxiliar", + "dump_mode": "overwrite", + "biglake_table": True, + }, + "material_remume": { + "url": "https://docs.google.com/spreadsheets/d/1p7tOI1VeeEgeuzP_mag5wGZHTetpb23g_ykwbcd2u00/edit?usp=sharing", # noqa: E501 + "url_type": "google_sheet", + "gsheets_sheet_name": "CONSOLIDADO", + "dataset_id": sheets_constants.DATASET_ID.value, + "table_id": "material_remume", + "dump_mode": "overwrite", + "biglake_table": True, + }, +} + + +sms_clocks = generate_dump_url_schedules( + interval=timedelta(days=1), + start_date=datetime(2023, 1, 1, 5, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), + labels=[ + constants.RJ_SMS_AGENT_LABEL.value, + ], + dataset_id=sheets_constants.DATASET_ID.value, + table_parameters=table_parameters, + runs_interval_minutes=5, +) + +sms_sheets_daily_update_schedule = Schedule(clocks=untuple(sms_clocks)) From 5a7d546bda09aed8d683d455dc999a2ec87ccc59 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Tue, 28 Nov 2023 12:10:22 -0300 Subject: [PATCH 09/25] Update schedules for vitacare and tpc data dumps --- pipelines/.DS_Store | Bin 6148 -> 6148 bytes .../dump_api_prontuario_vitacare/flows.py | 29 ++++++-- .../dump_api_prontuario_vitacare/schedules.py | 63 ++++++++++++++++++ .../rj_sms/dump_api_prontuario_vitai/flows.py | 36 +++++++--- .../dump_api_prontuario_vitai/schedules.py | 51 ++++++++++---- .../dump_azureblob_estoque_tpc/flows.py | 30 +++++++-- .../dump_azureblob_estoque_tpc/schedules.py | 58 +++++++++++----- pipelines/rj_sms/dump_sheets/schedules.py | 8 +-- pipelines/rj_sms/utils.py | 54 +++++++++++++++ 9 files changed, 273 insertions(+), 56 deletions(-) create mode 100644 pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py create mode 100644 pipelines/rj_sms/utils.py diff --git a/pipelines/.DS_Store b/pipelines/.DS_Store index ff03a594b25a86a6ca70873da7083634eb449fac..f8223c939d41efd22c6dee585350a7a001461e4c 100644 GIT binary patch delta 81 zcmV-X0IvUpFoZCW7XgQnaTbv-ApruBP&<<_6a nZ*FG*00058u{{T~2M7=d0f(`%_XCp;6fFS&vq2QY1Cf9PrmPnJ delta 57 zcmZoMXfc=|&e%4wP;8=}A|vC(0Ba!8qq^~|75l^oGMm{s1UMMmHeUSBJeglamz9x$ Mf#JYrTam-e01Ax}jQ{`u diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 1d4e6f56d..b158e395d 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -27,17 +27,23 @@ save_data_to_file, ) -from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import every_day_at_six_am +from pipelines.rj_sms.dump_api_prontuario_vitacare.schedules import vitacare_clocks with Flow( - name="SMS: Dump VitaCare - Captura ", code_owners=["thiago"] + name="SMS: Dump VitaCare - Ingerir dados do prontuário VitaCare", code_owners=["thiago"] ) as dump_vitacare: - # Set Parameters - # Flow parameters + + ##################################### + # Parameters + ##################################### + + # Flow RENAME_FLOW = Parameter("rename_flow", default=True) + # Vault VAULT_PATH = vitacare_constants.VAULT_PATH.value + # Vitacare API AP = Parameter("ap", required=True, default="10") ENDPOINT = Parameter("endpoint", required=True) @@ -49,13 +55,20 @@ ) TABLE_ID = Parameter("table_id", required=True) - # Start run + ##################################### + # Rename flow run + #################################### + with case(RENAME_FLOW, True): rename_flow_task = rename_flow( table_id=TABLE_ID, ap=AP ) + #################################### + # Tasks section #1 - Get data + ##################################### + get_secret_task = get_secret(secret_path=VAULT_PATH) create_folders_task = create_folders() @@ -89,6 +102,10 @@ ) save_data_task.set_upstream(file_name_task) # pylint: disable=E1101 + ##################################### + # Tasks section #2 - Transform data and Create table + ##################################### + with case(save_data_task, True): create_partitions_task = create_partitions( data_path=create_folders_task["raw"], @@ -117,4 +134,4 @@ ], ) -dump_vitacare.schedule = every_day_at_six_am +dump_vitacare.schedule = vitacare_clocks diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py new file mode 100644 index 000000000..37f912d85 --- /dev/null +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +Schedules for the vitacare dump pipeline +""" + +from datetime import timedelta, datetime + +from prefect.schedules import Schedule +import pytz + + +from pipelines.constants import constants +from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import constants as vitacare_constants +from pipelines.utils.utils import untuple_clocks as untuple +from pipelines.rj_sms.utils import ( + generate_dicts, + generate_dump_api_schedules +) + + +posicao_parameters = generate_dicts( + dict_template={ + "dataset_id": vitacare_constants.DATASET_ID.value, + "table_id": "estoque_posicao", + "AP": "", + "endpoint": "posicao", + "date": "today", + }, + key="AP", + values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"] +) + +movimento_parameters = generate_dicts( + dict_template={ + "dataset_id": vitacare_constants.DATASET_ID.value, + "table_id": "estoque_movimento", + "AP": "", + "endpoint": "movimento", + "date": "yesterday", + }, + key="AP", + values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"] +) + +flow_parameters = posicao_parameters + movimento_parameters + + +vitacare_clocks = generate_dump_api_schedules( + interval=timedelta(days=1), + start_date=datetime(2023, 1, 1, 13, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), + labels=[ + constants.RJ_SMS_DEV_AGENT_LABEL.value, + ], + flow_run_parameters=flow_parameters, + runs_interval_minutes=2, +) + +vitacare_daily_update_schedule = Schedule(clocks=untuple(vitacare_clocks)) + + +if __name__ == "__main__": + print(vitacare_daily_update_schedule) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index c39f9dfb4..b8d0cfd19 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -25,30 +25,42 @@ upload_to_datalake, ) from pipelines.rj_sms.dump_api_prontuario_vitai.tasks import build_date_param, build_url -from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import every_day_at_six_am +from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import vitai_clocks -with Flow(name="SMS: Dump Vitai - Captura ", code_owners=["thiago"]) as dump_vitai: +with Flow( + name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", code_owners=["thiago"] +) as dump_vitai: + + ##################################### # Parameters - # Flow parameters + ##################################### + + # Flow RENAME_FLOW = Parameter("rename_flow", default=True) - # Parameters for Vault + + # Vault VAULT_PATH = vitai_constants.VAULT_PATH.value VAULT_KEY = vitai_constants.VAULT_KEY.value + # Vitai API ENDPOINT = Parameter("endpoint", required=True) DATE = Parameter("date", default=None) - # Paramenters for GCP - DATASET_ID = Parameter( - "DATASET_ID", default=vitai_constants.DATASET_ID.value - ) + + # GCP + DATASET_ID = Parameter("DATASET_ID", default=vitai_constants.DATASET_ID.value) TABLE_ID = Parameter("table_id", required=True) - # Start run + ##################################### + # Rename flow run + #################################### with case(RENAME_FLOW, True): rename_flow_task = rename_current_flow_run_dataset_table( prefix="SMS Dump Vitai: ", dataset_id=TABLE_ID, table_id="" ) + #################################### + # Tasks section #1 - Get data + ##################################### get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) create_folders_task = create_folders() @@ -72,6 +84,10 @@ ) download_task.set_upstream(create_folders_task) + ##################################### + # Tasks section #2 - Transform data and Create table + ##################################### + conversion_task = from_json_to_csv(input_path=download_task, sep=";") conversion_task.set_upstream(download_task) @@ -107,4 +123,4 @@ ], ) -dump_vitai.schedule = every_day_at_six_am +dump_vitai.schedule = vitai_clocks diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py index 3710726ac..67ec68c81 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py @@ -1,23 +1,46 @@ # -*- coding: utf-8 -*- # pylint: disable=C0103 """ -Schedules for the database dump pipeline +Schedules for the vitacare dump pipeline """ -from datetime import timedelta -import pendulum +from datetime import timedelta, datetime + from prefect.schedules import Schedule -from prefect.schedules.clocks import IntervalClock +import pytz + + from pipelines.constants import constants +from pipelines.rj_sms.dump_api_prontuario_vitai.constants import ( + constants as vitai_constants, +) +from pipelines.utils.utils import untuple_clocks as untuple +from pipelines.rj_sms.utils import generate_dump_api_schedules -every_day_at_six_am = Schedule( - clocks=[ - IntervalClock( - interval=timedelta(days=1), - start_date=pendulum.datetime(2023, 1, 1, 6, 0, 0, tz="America/Sao_Paulo"), - labels=[ - constants.RJ_SMS_AGENT_LABEL.value, - ], - ) - ] + +flow_parameters = [ + { + "table_id": "estoque_posicao", + "dataset_id": vitai_constants.DATASET_ID.value, + "endpoint": "posicao", + }, + { + "table_id": "estoque_movimento", + "dataset_id": vitai_constants.DATASET_ID.value, + "endpoint": "movimento", + "date": "yesterday", + }, +] + + +vitai_clocks = generate_dump_api_schedules( + interval=timedelta(days=1), + start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), + labels=[ + constants.RJ_SMS_DEV_AGENT_LABEL.value, + ], + flow_run_parameters=flow_parameters, + runs_interval_minutes=2, ) + +vitai_daily_update_schedule = Schedule(clocks=untuple(vitai_clocks)) diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index 7cce08b6e..d9d0ed784 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -29,28 +29,42 @@ ) from pipelines.rj_sms.dump_azureblob_estoque_tpc.schedules import every_day_at_six_am -with Flow(name="SMS: Dump TPC - Captura ", code_owners=["thiago"]) as dump_tpc: +with Flow(name="SMS: Dump TPC - Ingerir dados do estoque TPC", code_owners=["thiago"]) as dump_tpc: + + ##################################### # Parameters - # Flow parameters + ##################################### + + # Flow RENAME_FLOW = Parameter("rename_flow", default=True) - # Parameters for Vault + + # Vault VAULT_PATH = tpc_constants.VAULT_PATH.value VAULT_KEY = tpc_constants.VAULT_KEY.value + # TPC Azure CONTAINER_NAME = tpc_constants.CONTAINER_NAME.value BLOB_FILE = Parameter("blob_file", required=True) - # Paramenters for GCP + + # GCP DATASET_ID = Parameter( "DATASET_ID", default=tpc_constants.DATASET_ID.value ) TABLE_ID = Parameter("table_id", required=True) - # Start run + ##################################### + # Rename flow run + #################################### + with case(RENAME_FLOW, True): rename_flow_task = rename_current_flow_run_dataset_table( prefix="SMS Dump TPC: ", dataset_id=TABLE_ID, table_id="" ) + #################################### + # Tasks section #1 - Get data + ##################################### + get_secret_task = get_secret(secret_path=VAULT_PATH, secret_key=VAULT_KEY) create_folders_task = create_folders() @@ -69,6 +83,10 @@ ) download_task.set_upstream(create_folders_task) + ##################################### + # Tasks section #2 - Transform data and Create table + ##################################### + conform_task = conform_csv_to_gcp( filepath=download_task, blob_file=BLOB_FILE) @@ -94,7 +112,7 @@ dataset_is_public=False, ) upload_to_datalake_task.set_upstream(create_partitions_task) - + dump_tpc.storage = GCS(constants.GCS_FLOWS_BUCKET.value) dump_tpc.run_config = KubernetesRun( image=constants.DOCKER_IMAGE.value, diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py index dd4aa0239..8f1551b55 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py @@ -1,22 +1,50 @@ # -*- coding: utf-8 -*- # pylint: disable=C0103 """ -Schedules for the database dump pipeline +Schedules for the vitacare dump pipeline """ -from datetime import timedelta -import pendulum + +from datetime import timedelta, datetime + from prefect.schedules import Schedule -from prefect.schedules.clocks import IntervalClock +import pytz + + from pipelines.constants import constants +from pipelines.rj_sms.dump_azureblob_estoque_tpc.constants import ( + constants as tpc_constants, +) +from pipelines.utils.utils import untuple_clocks as untuple +from pipelines.rj_sms.utils import generate_dump_api_schedules + + +flow_parameters = [ + { + "table_id": "estoque_posicao", + "dataset_id": tpc_constants.DATASET_ID.value, + "blob_file": "posicao", + }, + { + "table_id": "estoque_pedidos_abastecimento", + "dataset_id": tpc_constants.DATASET_ID.value, + "blob_file": "pedidos", + }, + { + "table_id": "estoque_recebimento", + "dataset_id": tpc_constants.DATASET_ID.value, + "blob_file": "recebimento", + }, +] + + +tpc_clocks = generate_dump_api_schedules( + interval=timedelta(days=1), + start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), + labels=[ + constants.RJ_SMS_DEV_AGENT_LABEL.value, + ], + flow_run_parameters=flow_parameters, + runs_interval_minutes=2, +) -every_day_at_six_am = Schedule( - clocks=[ - IntervalClock( - interval=timedelta(days=1), - start_date=pendulum.datetime(2023, 1, 1, 6, 0, 0, tz="America/Sao_Paulo"), - labels=[ - constants.RJ_SMS_AGENT_LABEL.value, - ], - ) - ] -) \ No newline at end of file +tpc_daily_update_schedule = Schedule(clocks=untuple(tpc_clocks)) diff --git a/pipelines/rj_sms/dump_sheets/schedules.py b/pipelines/rj_sms/dump_sheets/schedules.py index 18254f585..54f0277f5 100644 --- a/pipelines/rj_sms/dump_sheets/schedules.py +++ b/pipelines/rj_sms/dump_sheets/schedules.py @@ -22,7 +22,6 @@ "url_type": "google_sheet", "gsheets_sheet_name": "Sheet1", "dataset_id": sheets_constants.DATASET_ID.value, - "table_id": "estabelecimento_auxiliar", "dump_mode": "overwrite", "biglake_table": True, }, @@ -31,7 +30,6 @@ "url_type": "google_sheet", "gsheets_sheet_name": "CONSOLIDADO", "dataset_id": sheets_constants.DATASET_ID.value, - "table_id": "material_remume", "dump_mode": "overwrite", "biglake_table": True, }, @@ -40,13 +38,13 @@ sms_clocks = generate_dump_url_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 5, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ - constants.RJ_SMS_AGENT_LABEL.value, + constants.RJ_SMS_DEV_AGENT_LABEL.value, ], dataset_id=sheets_constants.DATASET_ID.value, table_parameters=table_parameters, - runs_interval_minutes=5, + runs_interval_minutes=2, ) sms_sheets_daily_update_schedule = Schedule(clocks=untuple(sms_clocks)) diff --git a/pipelines/rj_sms/utils.py b/pipelines/rj_sms/utils.py new file mode 100644 index 000000000..d7b30b24d --- /dev/null +++ b/pipelines/rj_sms/utils.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# pylint: disable=C0103 +""" +General utilities for sms pipelines. +""" + +from datetime import datetime, timedelta +from typing import List + +from prefect.schedules.clocks import IntervalClock + + +def generate_dump_api_schedules( # pylint: disable=too-many-arguments,too-many-locals + interval: timedelta, + start_date: datetime, + labels: List[str], + flow_run_parameters: List[dict], + runs_interval_minutes: int = 2, +) -> List[IntervalClock]: + """ + Generates multiple schedules for vitacare dumping. + """ + clocks = [] + for count, parameters in enumerate(flow_run_parameters): + new_interval = parameters["interval"] if "interval" in parameters else interval + + clocks.append( + IntervalClock( + interval=new_interval, + start_date=start_date + + timedelta(minutes=runs_interval_minutes * count), + labels=labels, + parameter_defaults=parameters, + ) + ) + return clocks + + +def generate_dicts(dict_template: dict, key: str, values: list) -> list: + """ + Generates a list of dictionaries from a template dictionary and a list of + values to be used in the template. + + Args: + dict_template (dict): Template dictionary to be used in the generation + of the list of dictionaries. + key (str): Key to be used in the template. + values (list): List of values to be used in the template. + + Returns: + list: List of dictionaries generated from the template dictionary and + the list of values. + """ + return [dict_template | {key: value} for value in values] From f258929d0b8c71e0c7caee2466f8b19faf40c476 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:10:46 +0000 Subject: [PATCH 10/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../dump_api_prontuario_vitacare/flows.py | 13 ++++------- .../dump_api_prontuario_vitacare/schedules.py | 13 +++++------ .../rj_sms/dump_api_prontuario_vitai/flows.py | 1 - .../dump_azureblob_estoque_tpc/constants.py | 8 +++---- .../dump_azureblob_estoque_tpc/flows.py | 13 +++++------ .../dump_azureblob_estoque_tpc/tasks.py | 22 ++++++++----------- pipelines/rj_sms/dump_sheets/flows.py | 2 +- 7 files changed, 29 insertions(+), 43 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index b158e395d..7cc2d3559 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -31,9 +31,9 @@ with Flow( - name="SMS: Dump VitaCare - Ingerir dados do prontuário VitaCare", code_owners=["thiago"] + name="SMS: Dump VitaCare - Ingerir dados do prontuário VitaCare", + code_owners=["thiago"], ) as dump_vitacare: - ##################################### # Parameters ##################################### @@ -50,9 +50,7 @@ DATE = Parameter("date", default="today") # GCP - DATASET_ID = Parameter( - "DATASET_ID", default=vitacare_constants.DATASET_ID.value - ) + DATASET_ID = Parameter("DATASET_ID", default=vitacare_constants.DATASET_ID.value) TABLE_ID = Parameter("table_id", required=True) ##################################### @@ -60,10 +58,7 @@ #################################### with case(RENAME_FLOW, True): - rename_flow_task = rename_flow( - table_id=TABLE_ID, - ap=AP - ) + rename_flow_task = rename_flow(table_id=TABLE_ID, ap=AP) #################################### # Tasks section #1 - Get data diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py index 37f912d85..68226fb9a 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py @@ -11,12 +11,11 @@ from pipelines.constants import constants -from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import constants as vitacare_constants -from pipelines.utils.utils import untuple_clocks as untuple -from pipelines.rj_sms.utils import ( - generate_dicts, - generate_dump_api_schedules +from pipelines.rj_sms.dump_api_prontuario_vitacare.constants import ( + constants as vitacare_constants, ) +from pipelines.utils.utils import untuple_clocks as untuple +from pipelines.rj_sms.utils import generate_dicts, generate_dump_api_schedules posicao_parameters = generate_dicts( @@ -28,7 +27,7 @@ "date": "today", }, key="AP", - values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"] + values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"], ) movimento_parameters = generate_dicts( @@ -40,7 +39,7 @@ "date": "yesterday", }, key="AP", - values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"] + values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"], ) flow_parameters = posicao_parameters + movimento_parameters diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index b8d0cfd19..9a6731296 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -30,7 +30,6 @@ with Flow( name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", code_owners=["thiago"] ) as dump_vitai: - ##################################### # Parameters ##################################### diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py index fc141bb1d..db8e0f8a0 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py @@ -16,7 +16,7 @@ class constants(Enum): DATASET_ID = "brutos_estoque_central_tpc" CONTAINER_NAME = "datalaketpc" BLOB_PATH = { - "posicao" : "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/estoque_local/estoque_local.csv", - "pedidos" : "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/pedidos_depositante/pedidos_depositante.csv", - "recebimento" : "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/recebimento_documental/recebimento_documental.csv", - } + "posicao": "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/estoque_local/estoque_local.csv", + "pedidos": "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/pedidos_depositante/pedidos_depositante.csv", + "recebimento": "gold/logistico/cliente=prefeitura_rio/planta=sms_rio/recebimento_documental/recebimento_documental.csv", + } diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index d9d0ed784..e5d781e88 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -29,8 +29,9 @@ ) from pipelines.rj_sms.dump_azureblob_estoque_tpc.schedules import every_day_at_six_am -with Flow(name="SMS: Dump TPC - Ingerir dados do estoque TPC", code_owners=["thiago"]) as dump_tpc: - +with Flow( + name="SMS: Dump TPC - Ingerir dados do estoque TPC", code_owners=["thiago"] +) as dump_tpc: ##################################### # Parameters ##################################### @@ -47,9 +48,7 @@ BLOB_FILE = Parameter("blob_file", required=True) # GCP - DATASET_ID = Parameter( - "DATASET_ID", default=tpc_constants.DATASET_ID.value - ) + DATASET_ID = Parameter("DATASET_ID", default=tpc_constants.DATASET_ID.value) TABLE_ID = Parameter("table_id", required=True) ##################################### @@ -87,9 +86,7 @@ # Tasks section #2 - Transform data and Create table ##################################### - conform_task = conform_csv_to_gcp( - filepath=download_task, - blob_file=BLOB_FILE) + conform_task = conform_csv_to_gcp(filepath=download_task, blob_file=BLOB_FILE) conform_task.set_upstream(download_task) add_load_date_column_task = add_load_date_column(input_path=download_task, sep=";") diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py index 6753a8b4b..b9af52a23 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/tasks.py @@ -24,6 +24,7 @@ def get_blob_path(blob_file: str): """ return tpc_constants.BLOB_PATH.value[blob_file] + @task def conform_csv_to_gcp(filepath: str, blob_file: str): """ @@ -39,19 +40,17 @@ def conform_csv_to_gcp(filepath: str, blob_file: str): log("Conforming CSV to GCP") # remove " from csv to avoid errors - with open(filepath, 'r') as f: + with open(filepath, "r") as f: file_contents = f.read() - file_contents = file_contents.replace('\"', '') + file_contents = file_contents.replace('"', "") - with open(filepath, 'w') as f: + with open(filepath, "w") as f: f.write(file_contents) - df = pd.read_csv(filepath, sep=";", dtype=str, keep_default_na=False) if blob_file == "posicao": - # remove registros errados df = df[df.sku != ""] @@ -76,16 +75,14 @@ def conform_csv_to_gcp(filepath: str, blob_file: str): ) df["peso"] = df.peso.apply(lambda x: float(x.replace(",", "."))) df["volume"] = df.volume.apply(lambda x: float(x.replace(",", "."))) - df["quantidade_peca"] = df.quantidade_peca.apply(lambda x: float(x.replace(",", "."))) + df["quantidade_peca"] = df.quantidade_peca.apply( + lambda x: float(x.replace(",", ".")) + ) df["valor_total"] = df.valor_total.apply( lambda x: float(x.replace(",", ".")) if x != "" else x ) elif blob_file == "recebimento": - - - df["qt"] = df.qt.apply( - lambda x: float(x.replace(",", ".")) if x != "" else x - ) + df["qt"] = df.qt.apply(lambda x: float(x.replace(",", ".")) if x != "" else x) df["qt_fis"] = df.qt_fis.apply( lambda x: float(x.replace(",", ".")) if x != "" else x ) @@ -102,7 +99,6 @@ def conform_csv_to_gcp(filepath: str, blob_file: str): lambda x: float(x.replace(",", ".")) if x != "" else x ) - df.to_csv(filepath, index=False, sep=";", encoding="utf-8", quoting=0, decimal=".") - log("CSV now conform") \ No newline at end of file + log("CSV now conform") diff --git a/pipelines/rj_sms/dump_sheets/flows.py b/pipelines/rj_sms/dump_sheets/flows.py index e5de7c3b3..db3c561ee 100644 --- a/pipelines/rj_sms/dump_sheets/flows.py +++ b/pipelines/rj_sms/dump_sheets/flows.py @@ -27,4 +27,4 @@ ], ) -dump_sms_sheets_flow.schedule = sms_sheets_daily_update_schedule \ No newline at end of file +dump_sms_sheets_flow.schedule = sms_sheets_daily_update_schedule From 519af1243a5d65cbfdb0ee6b0ad6ca5171a11f54 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Tue, 28 Nov 2023 12:23:54 -0300 Subject: [PATCH 11/25] Update schedule for TPC daily update --- pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index e5d781e88..5926e8941 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -27,7 +27,7 @@ get_blob_path, conform_csv_to_gcp, ) -from pipelines.rj_sms.dump_azureblob_estoque_tpc.schedules import every_day_at_six_am +from pipelines.rj_sms.dump_azureblob_estoque_tpc.schedules import tpc_daily_update_schedule with Flow( name="SMS: Dump TPC - Ingerir dados do estoque TPC", code_owners=["thiago"] @@ -118,4 +118,4 @@ ], ) -dump_tpc.schedule = every_day_at_six_am +dump_tpc.schedule = tpc_daily_update_schedule From 0e2ea2c2a9482b80d822c1a433a3bd61a34edfaf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:24:26 +0000 Subject: [PATCH 12/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index 5926e8941..547b421ee 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -27,7 +27,9 @@ get_blob_path, conform_csv_to_gcp, ) -from pipelines.rj_sms.dump_azureblob_estoque_tpc.schedules import tpc_daily_update_schedule +from pipelines.rj_sms.dump_azureblob_estoque_tpc.schedules import ( + tpc_daily_update_schedule, +) with Flow( name="SMS: Dump TPC - Ingerir dados do estoque TPC", code_owners=["thiago"] From 84dc346b0ef70fdff1f41794e79f8ab21a36a426 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Tue, 28 Nov 2023 12:56:49 -0300 Subject: [PATCH 13/25] Update schedules for daily data dump --- pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py | 4 ++-- pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py | 5 +---- pipelines/rj_sms/dump_api_prontuario_vitai/flows.py | 4 ++-- pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py | 2 +- pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py | 1 + pipelines/rj_sms/dump_sheets/flows.py | 4 ++-- pipelines/rj_sms/dump_sheets/schedules.py | 2 +- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 7cc2d3559..9107e3b7d 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -27,7 +27,7 @@ save_data_to_file, ) -from pipelines.rj_sms.dump_api_prontuario_vitacare.schedules import vitacare_clocks +from pipelines.rj_sms.dump_api_prontuario_vitacare.schedules import vitacare_daily_update_schedule with Flow( @@ -129,4 +129,4 @@ ], ) -dump_vitacare.schedule = vitacare_clocks +dump_vitacare.schedule = vitacare_daily_update_schedule diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py index 68226fb9a..6c847f0d6 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py @@ -47,7 +47,7 @@ vitacare_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 10, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], @@ -57,6 +57,3 @@ vitacare_daily_update_schedule = Schedule(clocks=untuple(vitacare_clocks)) - -if __name__ == "__main__": - print(vitacare_daily_update_schedule) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 9a6731296..cb95ae7db 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -25,7 +25,7 @@ upload_to_datalake, ) from pipelines.rj_sms.dump_api_prontuario_vitai.tasks import build_date_param, build_url -from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import vitai_clocks +from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import vitai_daily_update_schedule with Flow( name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", code_owners=["thiago"] @@ -122,4 +122,4 @@ ], ) -dump_vitai.schedule = vitai_clocks +dump_vitai.schedule = vitai_daily_update_schedule diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py index 67ec68c81..fc1675e9c 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py @@ -35,7 +35,7 @@ vitai_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py index db8e0f8a0..9a1529b52 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/constants.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # pylint: disable=C0103 +# flake8: noqa: E501 """ Constants for TPC. """ diff --git a/pipelines/rj_sms/dump_sheets/flows.py b/pipelines/rj_sms/dump_sheets/flows.py index db3c561ee..39c080b7b 100644 --- a/pipelines/rj_sms/dump_sheets/flows.py +++ b/pipelines/rj_sms/dump_sheets/flows.py @@ -10,7 +10,7 @@ from pipelines.constants import constants from pipelines.utils.dump_url.flows import dump_url_flow -from pipelines.rj_sms.dump_sheets.schedules import sms_sheets_daily_update_schedule +from pipelines.rj_sms.dump_sheets.schedules import sheets_daily_update_schedule # TODO: add code owner @@ -27,4 +27,4 @@ ], ) -dump_sms_sheets_flow.schedule = sms_sheets_daily_update_schedule +dump_sms_sheets_flow.schedule = sheets_daily_update_schedule diff --git a/pipelines/rj_sms/dump_sheets/schedules.py b/pipelines/rj_sms/dump_sheets/schedules.py index 54f0277f5..b96acb584 100644 --- a/pipelines/rj_sms/dump_sheets/schedules.py +++ b/pipelines/rj_sms/dump_sheets/schedules.py @@ -47,4 +47,4 @@ runs_interval_minutes=2, ) -sms_sheets_daily_update_schedule = Schedule(clocks=untuple(sms_clocks)) +sheets_daily_update_schedule = Schedule(clocks=untuple(sms_clocks)) From fe5f6e7620770e143ff9a0b71256d86a381430c7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:59:12 +0000 Subject: [PATCH 14/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py | 4 +++- pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py | 1 - pipelines/rj_sms/dump_api_prontuario_vitai/flows.py | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 9107e3b7d..187af4598 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -27,7 +27,9 @@ save_data_to_file, ) -from pipelines.rj_sms.dump_api_prontuario_vitacare.schedules import vitacare_daily_update_schedule +from pipelines.rj_sms.dump_api_prontuario_vitacare.schedules import ( + vitacare_daily_update_schedule, +) with Flow( diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py index 6c847f0d6..bdb003b36 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py @@ -56,4 +56,3 @@ ) vitacare_daily_update_schedule = Schedule(clocks=untuple(vitacare_clocks)) - diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index cb95ae7db..5bd81fffb 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -25,7 +25,9 @@ upload_to_datalake, ) from pipelines.rj_sms.dump_api_prontuario_vitai.tasks import build_date_param, build_url -from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import vitai_daily_update_schedule +from pipelines.rj_sms.dump_api_prontuario_vitai.schedules import ( + vitai_daily_update_schedule, +) with Flow( name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", code_owners=["thiago"] From 27c046bf447cc1c4440add2a985f6f483df5609b Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Tue, 28 Nov 2023 13:12:03 -0300 Subject: [PATCH 15/25] Update schedules.py with new start dates --- .../rj_sms/dump_api_prontuario_vitacare/schedules.py | 10 +++++----- .../rj_sms/dump_api_prontuario_vitai/schedules.py | 2 +- .../rj_sms/dump_azureblob_estoque_tpc/schedules.py | 2 +- pipelines/rj_sms/dump_sheets/schedules.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py index bdb003b36..a34163ea6 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py @@ -22,11 +22,11 @@ dict_template={ "dataset_id": vitacare_constants.DATASET_ID.value, "table_id": "estoque_posicao", - "AP": "", + "ap": "", "endpoint": "posicao", "date": "today", }, - key="AP", + key="ap", values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"], ) @@ -34,11 +34,11 @@ dict_template={ "dataset_id": vitacare_constants.DATASET_ID.value, "table_id": "estoque_movimento", - "AP": "", + "ap": "", "endpoint": "movimento", "date": "yesterday", }, - key="AP", + key="ap", values=["10", "21", "22", "31", "32", "33", "40", "51", "52", "53"], ) @@ -47,7 +47,7 @@ vitacare_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 10, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py index fc1675e9c..8b8093957 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py @@ -35,7 +35,7 @@ vitai_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 40, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py index 8f1551b55..fc86e9e99 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py @@ -39,7 +39,7 @@ tpc_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 40, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], diff --git a/pipelines/rj_sms/dump_sheets/schedules.py b/pipelines/rj_sms/dump_sheets/schedules.py index b96acb584..2918eed65 100644 --- a/pipelines/rj_sms/dump_sheets/schedules.py +++ b/pipelines/rj_sms/dump_sheets/schedules.py @@ -38,7 +38,7 @@ sms_clocks = generate_dump_url_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 40, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], From 53b0f889fea2c81caf570ca2140bc1e0ef925d1a Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Wed, 29 Nov 2023 14:00:19 -0300 Subject: [PATCH 16/25] Update GCP dataset_id and start_date in RJ SMS flows and schedules --- pipelines/constants.py | 4 ++++ .../dump_api_prontuario_vitacare/flows.py | 2 +- .../dump_api_prontuario_vitacare/schedules.py | 2 +- .../rj_sms/dump_api_prontuario_vitai/flows.py | 2 +- .../dump_azureblob_estoque_tpc/flows.py | 2 +- .../dump_azureblob_estoque_tpc/schedules.py | 20 +++++++++---------- pipelines/rj_sms/dump_sheets/schedules.py | 2 +- pipelines/rj_sms/tasks.py | 7 +++---- 8 files changed, 22 insertions(+), 19 deletions(-) diff --git a/pipelines/constants.py b/pipelines/constants.py index 1040ca077..e8550c2bb 100644 --- a/pipelines/constants.py +++ b/pipelines/constants.py @@ -150,4 +150,8 @@ class constants(Enum): # pylint: disable=c0103 "user_id": "222842688117014528", "type": "user_nickname", }, + "danilo": { + "user_id": "1147152438487416873", + "type": "user_nickname", + }, } diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 187af4598..406653719 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -52,7 +52,7 @@ DATE = Parameter("date", default="today") # GCP - DATASET_ID = Parameter("DATASET_ID", default=vitacare_constants.DATASET_ID.value) + DATASET_ID = Parameter("dataset_id", default=vitacare_constants.DATASET_ID.value) TABLE_ID = Parameter("table_id", required=True) ##################################### diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py index a34163ea6..7acdd7164 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py @@ -47,7 +47,7 @@ vitacare_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 14, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 5bd81fffb..5552d08bb 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -48,7 +48,7 @@ DATE = Parameter("date", default=None) # GCP - DATASET_ID = Parameter("DATASET_ID", default=vitai_constants.DATASET_ID.value) + DATASET_ID = Parameter("dataset_id", default=vitai_constants.DATASET_ID.value) TABLE_ID = Parameter("table_id", required=True) ##################################### diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index 547b421ee..cd47af067 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -50,7 +50,7 @@ BLOB_FILE = Parameter("blob_file", required=True) # GCP - DATASET_ID = Parameter("DATASET_ID", default=tpc_constants.DATASET_ID.value) + DATASET_ID = Parameter("dataset_id", default=tpc_constants.DATASET_ID.value) TABLE_ID = Parameter("table_id", required=True) ##################################### diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py index fc86e9e99..6bdd22d61 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py @@ -24,16 +24,16 @@ "dataset_id": tpc_constants.DATASET_ID.value, "blob_file": "posicao", }, - { - "table_id": "estoque_pedidos_abastecimento", - "dataset_id": tpc_constants.DATASET_ID.value, - "blob_file": "pedidos", - }, - { - "table_id": "estoque_recebimento", - "dataset_id": tpc_constants.DATASET_ID.value, - "blob_file": "recebimento", - }, + #{ + # "table_id": "estoque_pedidos_abastecimento", + # "dataset_id": tpc_constants.DATASET_ID.value, + # "blob_file": "pedidos", + #}, + #{ + # "table_id": "estoque_recebimento", + # "dataset_id": tpc_constants.DATASET_ID.value, + # "blob_file": "recebimento", + #}, ] diff --git a/pipelines/rj_sms/dump_sheets/schedules.py b/pipelines/rj_sms/dump_sheets/schedules.py index 2918eed65..b96acb584 100644 --- a/pipelines/rj_sms/dump_sheets/schedules.py +++ b/pipelines/rj_sms/dump_sheets/schedules.py @@ -38,7 +38,7 @@ sms_clocks = generate_dump_url_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 40, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ constants.RJ_SMS_DEV_AGENT_LABEL.value, ], diff --git a/pipelines/rj_sms/tasks.py b/pipelines/rj_sms/tasks.py index e3bf40f05..4794372b5 100644 --- a/pipelines/rj_sms/tasks.py +++ b/pipelines/rj_sms/tasks.py @@ -27,7 +27,6 @@ get_username_and_password_from_secret, ) - @task def get_secret(secret_path: str, secret_key: str = None): """ @@ -311,9 +310,9 @@ def cloud_function_request( else: raise ValueError("env must be 'prod' or 'dev'") - TOKEN = os.environ.get("GOOGLE_TOKEN") - # request = google.auth.transport.requests.Request() - # TOKEN = google.oauth2.id_token.fetch_id_token(request, audience) + request = google.auth.transport.requests.Request() + TOKEN = google.oauth2.id_token.fetch_id_token(request, cloud_function_url) + payload = json.dumps( { From 089da7107a14fd3b48339a24f0d58e1478f72000 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:00:51 +0000 Subject: [PATCH 17/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py | 8 ++++---- pipelines/rj_sms/tasks.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py index 6bdd22d61..ecc86241a 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py @@ -24,16 +24,16 @@ "dataset_id": tpc_constants.DATASET_ID.value, "blob_file": "posicao", }, - #{ + # { # "table_id": "estoque_pedidos_abastecimento", # "dataset_id": tpc_constants.DATASET_ID.value, # "blob_file": "pedidos", - #}, - #{ + # }, + # { # "table_id": "estoque_recebimento", # "dataset_id": tpc_constants.DATASET_ID.value, # "blob_file": "recebimento", - #}, + # }, ] diff --git a/pipelines/rj_sms/tasks.py b/pipelines/rj_sms/tasks.py index 4794372b5..6e3966ca5 100644 --- a/pipelines/rj_sms/tasks.py +++ b/pipelines/rj_sms/tasks.py @@ -27,6 +27,7 @@ get_username_and_password_from_secret, ) + @task def get_secret(secret_path: str, secret_key: str = None): """ @@ -313,7 +314,6 @@ def cloud_function_request( request = google.auth.transport.requests.Request() TOKEN = google.oauth2.id_token.fetch_id_token(request, cloud_function_url) - payload = json.dumps( { "url": url, From dde5d4888fd64351adb157a49f29b288d3cf0b14 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Wed, 29 Nov 2023 20:36:16 -0300 Subject: [PATCH 18/25] Update code owners and schedules --- .../dump_api_prontuario_vitacare/constants.py | 4 ++-- .../rj_sms/dump_api_prontuario_vitacare/flows.py | 14 +++++++------- .../dump_api_prontuario_vitacare/schedules.py | 6 +++--- .../rj_sms/dump_api_prontuario_vitai/flows.py | 5 +++-- .../rj_sms/dump_api_prontuario_vitai/schedules.py | 4 ++-- .../rj_sms/dump_azureblob_estoque_tpc/flows.py | 5 +++-- .../rj_sms/dump_azureblob_estoque_tpc/schedules.py | 4 ++-- pipelines/rj_sms/dump_ftp_cnes/flows.py | 7 ++++--- pipelines/rj_sms/dump_sheets/flows.py | 1 + pipelines/rj_sms/tasks.py | 13 +++++++------ 10 files changed, 34 insertions(+), 29 deletions(-) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py index 82cfdd214..4b2476b0b 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/constants.py @@ -15,8 +15,8 @@ class constants(Enum): DATASET_ID = "brutos_prontuario_vitacare" BASE_URL = { "10": "http://consolidado-ap10.pepvitacare.com:8088", - "21": "http://consolidado-ap21.pepvitacare.com:8088", - "22": "http://consolidado-ap22.pepvitacare.com:8088", + "21": "http://consolidado-ap21.pepvitacare.com:8090", + "22": "http://consolidado-ap22.pepvitacare.com:8091", "31": "http://consolidado-ap31.pepvitacare.com:8089", "32": "http://consolidado-ap32.pepvitacare.com:8090", "33": "http://consolidado-ap33.pepvitacare.com:8089", diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 406653719..5a40752a9 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -34,7 +34,7 @@ with Flow( name="SMS: Dump VitaCare - Ingerir dados do prontuário VitaCare", - code_owners=["thiago"], + code_owners=["thiago", "andre", "danilo"], ) as dump_vitacare: ##################################### # Parameters @@ -76,6 +76,9 @@ build_params_task = build_params(date_param=DATE) build_params_task.set_upstream(create_folders_task) # pylint: disable=E1101 + file_name_task = create_filename(table_id=TABLE_ID, ap=AP) + file_name_task.set_upstream(build_params_task) + download_task = cloud_function_request( url=build_url_task, credential=get_secret_task, @@ -84,10 +87,7 @@ query_params=build_params_task, env="prod", ) - download_task.set_upstream(build_url_task) - - file_name_task = create_filename(table_id=TABLE_ID, ap=AP) - file_name_task.set_upstream(download_task) + download_task.set_upstream(file_name_task) # pylint: disable=E1101 save_data_task = save_data_to_file( data=download_task, @@ -97,7 +97,7 @@ add_load_date_to_filename=True, load_date=build_params_task["date"], ) - save_data_task.set_upstream(file_name_task) # pylint: disable=E1101 + save_data_task.set_upstream(download_task) # pylint: disable=E1101 ##################################### # Tasks section #2 - Transform data and Create table @@ -127,7 +127,7 @@ dump_vitacare.run_config = KubernetesRun( image=constants.DOCKER_IMAGE.value, labels=[ - constants.RJ_SMS_DEV_AGENT_LABEL.value, + constants.RJ_SMS_AGENT_LABEL.value, ], ) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py index 7acdd7164..9f194f609 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/schedules.py @@ -47,12 +47,12 @@ vitacare_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 14, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 5, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ - constants.RJ_SMS_DEV_AGENT_LABEL.value, + constants.RJ_SMS_AGENT_LABEL.value, ], flow_run_parameters=flow_parameters, - runs_interval_minutes=2, + runs_interval_minutes=1, ) vitacare_daily_update_schedule = Schedule(clocks=untuple(vitacare_clocks)) diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 5552d08bb..a2728b0dd 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -30,7 +30,8 @@ ) with Flow( - name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", code_owners=["thiago"] + name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", + code_owners=["thiago", "andre", "danilo"], ) as dump_vitai: ##################################### # Parameters @@ -78,7 +79,7 @@ file_folder=create_folders_task["raw"], file_name=TABLE_ID, params=None, - crendentials=get_secret_task, + credentials=get_secret_task, auth_method="bearer", add_load_date_to_filename=True, load_date=build_date_param_task, diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py index 8b8093957..261e8bce0 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/schedules.py @@ -35,9 +35,9 @@ vitai_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 40, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 5, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ - constants.RJ_SMS_DEV_AGENT_LABEL.value, + constants.RJ_SMS_AGENT_LABEL.value, ], flow_run_parameters=flow_parameters, runs_interval_minutes=2, diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index cd47af067..1db771ed9 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -32,7 +32,8 @@ ) with Flow( - name="SMS: Dump TPC - Ingerir dados do estoque TPC", code_owners=["thiago"] + name="SMS: Dump TPC - Ingerir dados do estoque TPC", + code_owners=["thiago", "andre", "danilo"], ) as dump_tpc: ##################################### # Parameters @@ -116,7 +117,7 @@ dump_tpc.run_config = KubernetesRun( image=constants.DOCKER_IMAGE.value, labels=[ - constants.RJ_SMS_DEV_AGENT_LABEL.value, + constants.RJ_SMS_AGENT_LABEL.value, ], ) diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py index ecc86241a..d10055ff5 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/schedules.py @@ -39,9 +39,9 @@ tpc_clocks = generate_dump_api_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 40, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 5, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ - constants.RJ_SMS_DEV_AGENT_LABEL.value, + constants.RJ_SMS_AGENT_LABEL.value, ], flow_run_parameters=flow_parameters, runs_interval_minutes=2, diff --git a/pipelines/rj_sms/dump_ftp_cnes/flows.py b/pipelines/rj_sms/dump_ftp_cnes/flows.py index b3f8498a1..a7da70d90 100644 --- a/pipelines/rj_sms/dump_ftp_cnes/flows.py +++ b/pipelines/rj_sms/dump_ftp_cnes/flows.py @@ -22,7 +22,8 @@ with Flow( - name="SMS: Dump CNES - Captura de dados CNES", code_owners=["thiago"] + name="SMS: Dump CNES - Captura de dados CNES", + code_owners=["thiago", "andre", "danilo"], ) as dump_cnes: # Parameters # Parameters for GCP @@ -52,7 +53,7 @@ ) create_folders_task = create_folders() - create_folders_task.set_upstream(file_to_download_task) + create_folders_task.set_upstream(file_to_download_task) # pylint: disable=E1101 download_task = download_ftp_cnes( host=ftp_server, @@ -70,7 +71,7 @@ unzip_task.set_upstream(download_task) conform_task = conform_csv_to_gcp(create_folders_task["raw"]) - conform_task.set_upstream(unzip_task) + conform_task.set_upstream(unzip_task) # pylint: disable=E1101 add_multiple_date_column_task = add_multiple_date_column( directory=create_folders_task["raw"], diff --git a/pipelines/rj_sms/dump_sheets/flows.py b/pipelines/rj_sms/dump_sheets/flows.py index 39c080b7b..17acf8788 100644 --- a/pipelines/rj_sms/dump_sheets/flows.py +++ b/pipelines/rj_sms/dump_sheets/flows.py @@ -18,6 +18,7 @@ dump_sms_sheets_flow.name = ( "SMS: Dump Google Sheets - Ingerir planilhas do Google Sheets" ) +dump_sms_sheets_flow.code_owners = ["thiago", "andre", "danilo"] dump_sms_sheets_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value) dump_sms_sheets_flow.run_config = KubernetesRun( diff --git a/pipelines/rj_sms/tasks.py b/pipelines/rj_sms/tasks.py index 6e3966ca5..923d4bd8c 100644 --- a/pipelines/rj_sms/tasks.py +++ b/pipelines/rj_sms/tasks.py @@ -9,7 +9,7 @@ import shutil import sys import json -from datetime import datetime, date +from datetime import datetime, date, timedelta from pathlib import Path from ftplib import FTP import zipfile @@ -101,7 +101,7 @@ def download_from_api( file_folder: str, file_name: str, params=None, - crendentials=None, + credentials=None, auth_method="bearer", add_load_date_to_filename=False, load_date=None, @@ -114,7 +114,7 @@ def download_from_api( file_folder (str): The folder where the downloaded file will be saved. file_name (str): The name of the downloaded file. params (dict, optional): Additional parameters to be included in the API request. Defaults to None. - crendentials (str or tuple, optional): The credentials to be used for authentication. Defaults to None. + credentials (str or tuple, optional): The credentials to be used for authentication. Defaults to None. auth_method (str, optional): The authentication method to be used. Valid values are "bearer" and "basic". Defaults to "bearer". add_load_date_to_filename (bool, optional): Whether to add the load date to the filename. Defaults to False. load_date (str, optional): The load date to be added to the filename. Defaults to None. @@ -131,10 +131,10 @@ def download_from_api( log("Downloading data from API") if auth_method == "bearer": - headers = {"Authorization": f"Bearer {crendentials}"} + headers = {"Authorization": f"Bearer {credentials}"} response = requests.get(url, headers=headers, params=params) elif auth_method == "basic": - response = requests.get(url, auth=crendentials, params=params) + response = requests.get(url, auth=credentials, params=params) else: response = requests.get(url, params=params) @@ -277,7 +277,7 @@ def callback(block): return output_path -@task +@task(max_retries=2, retry_delay=timedelta(seconds=5), timeout=timedelta(seconds=240),) def cloud_function_request( url: str, credential: None, @@ -311,6 +311,7 @@ def cloud_function_request( else: raise ValueError("env must be 'prod' or 'dev'") + # TOKEN = os.environ.get("GOOGLE_TOKEN") request = google.auth.transport.requests.Request() TOKEN = google.oauth2.id_token.fetch_id_token(request, cloud_function_url) From eb3d983b5177650d4ecf71d65c4440fd9bfcbf65 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 23:36:39 +0000 Subject: [PATCH 19/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/rj_sms/tasks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipelines/rj_sms/tasks.py b/pipelines/rj_sms/tasks.py index 923d4bd8c..82d16b98f 100644 --- a/pipelines/rj_sms/tasks.py +++ b/pipelines/rj_sms/tasks.py @@ -277,7 +277,11 @@ def callback(block): return output_path -@task(max_retries=2, retry_delay=timedelta(seconds=5), timeout=timedelta(seconds=240),) +@task( + max_retries=2, + retry_delay=timedelta(seconds=5), + timeout=timedelta(seconds=240), +) def cloud_function_request( url: str, credential: None, From 1ac32b4e1b795ee29a1fbcf2483586d7b4fc1b71 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Wed, 29 Nov 2023 20:49:29 -0300 Subject: [PATCH 20/25] Remove "danilo" from code_owners list --- pipelines/constants.py | 6 +----- pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py | 2 +- pipelines/rj_sms/dump_api_prontuario_vitai/flows.py | 2 +- pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py | 2 +- pipelines/rj_sms/dump_ftp_cnes/flows.py | 2 +- pipelines/rj_sms/dump_sheets/flows.py | 2 +- 6 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pipelines/constants.py b/pipelines/constants.py index e8550c2bb..4ddd398fa 100644 --- a/pipelines/constants.py +++ b/pipelines/constants.py @@ -150,8 +150,4 @@ class constants(Enum): # pylint: disable=c0103 "user_id": "222842688117014528", "type": "user_nickname", }, - "danilo": { - "user_id": "1147152438487416873", - "type": "user_nickname", - }, - } + } \ No newline at end of file diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 5a40752a9..7e06a83e2 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -34,7 +34,7 @@ with Flow( name="SMS: Dump VitaCare - Ingerir dados do prontuário VitaCare", - code_owners=["thiago", "andre", "danilo"], + code_owners=["thiago", "andre",], ) as dump_vitacare: ##################################### # Parameters diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index a2728b0dd..6e215456d 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -31,7 +31,7 @@ with Flow( name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", - code_owners=["thiago", "andre", "danilo"], + code_owners=["thiago", "andre",], ) as dump_vitai: ##################################### # Parameters diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index 1db771ed9..80996c54b 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -33,7 +33,7 @@ with Flow( name="SMS: Dump TPC - Ingerir dados do estoque TPC", - code_owners=["thiago", "andre", "danilo"], + code_owners=["thiago", "andre",], ) as dump_tpc: ##################################### # Parameters diff --git a/pipelines/rj_sms/dump_ftp_cnes/flows.py b/pipelines/rj_sms/dump_ftp_cnes/flows.py index a7da70d90..ba8b6badb 100644 --- a/pipelines/rj_sms/dump_ftp_cnes/flows.py +++ b/pipelines/rj_sms/dump_ftp_cnes/flows.py @@ -23,7 +23,7 @@ with Flow( name="SMS: Dump CNES - Captura de dados CNES", - code_owners=["thiago", "andre", "danilo"], + code_owners=["thiago", "andre",], ) as dump_cnes: # Parameters # Parameters for GCP diff --git a/pipelines/rj_sms/dump_sheets/flows.py b/pipelines/rj_sms/dump_sheets/flows.py index 17acf8788..5f386c66e 100644 --- a/pipelines/rj_sms/dump_sheets/flows.py +++ b/pipelines/rj_sms/dump_sheets/flows.py @@ -18,7 +18,7 @@ dump_sms_sheets_flow.name = ( "SMS: Dump Google Sheets - Ingerir planilhas do Google Sheets" ) -dump_sms_sheets_flow.code_owners = ["thiago", "andre", "danilo"] +dump_sms_sheets_flow.code_owners = ["thiago", "andre"] dump_sms_sheets_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value) dump_sms_sheets_flow.run_config = KubernetesRun( From 7488ada8bb9834a69175a85dc1332960bef5ba0e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 23:49:47 +0000 Subject: [PATCH 21/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pipelines/constants.py | 2 +- pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py | 5 ++++- pipelines/rj_sms/dump_api_prontuario_vitai/flows.py | 5 ++++- pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py | 5 ++++- pipelines/rj_sms/dump_ftp_cnes/flows.py | 5 ++++- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pipelines/constants.py b/pipelines/constants.py index 4ddd398fa..1040ca077 100644 --- a/pipelines/constants.py +++ b/pipelines/constants.py @@ -150,4 +150,4 @@ class constants(Enum): # pylint: disable=c0103 "user_id": "222842688117014528", "type": "user_nickname", }, - } \ No newline at end of file + } diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 7e06a83e2..3968e8059 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -34,7 +34,10 @@ with Flow( name="SMS: Dump VitaCare - Ingerir dados do prontuário VitaCare", - code_owners=["thiago", "andre",], + code_owners=[ + "thiago", + "andre", + ], ) as dump_vitacare: ##################################### # Parameters diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index 6e215456d..d67640ad9 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -31,7 +31,10 @@ with Flow( name="SMS: Dump Vitai - Ingerir dados do prontuário Vitai", - code_owners=["thiago", "andre",], + code_owners=[ + "thiago", + "andre", + ], ) as dump_vitai: ##################################### # Parameters diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index 80996c54b..bb3140948 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -33,7 +33,10 @@ with Flow( name="SMS: Dump TPC - Ingerir dados do estoque TPC", - code_owners=["thiago", "andre",], + code_owners=[ + "thiago", + "andre", + ], ) as dump_tpc: ##################################### # Parameters diff --git a/pipelines/rj_sms/dump_ftp_cnes/flows.py b/pipelines/rj_sms/dump_ftp_cnes/flows.py index ba8b6badb..6030e75f2 100644 --- a/pipelines/rj_sms/dump_ftp_cnes/flows.py +++ b/pipelines/rj_sms/dump_ftp_cnes/flows.py @@ -23,7 +23,10 @@ with Flow( name="SMS: Dump CNES - Captura de dados CNES", - code_owners=["thiago", "andre",], + code_owners=[ + "thiago", + "andre", + ], ) as dump_cnes: # Parameters # Parameters for GCP From 80f65b0ebfe63bd1aa1e1fb86a5a81583594aa57 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Wed, 29 Nov 2023 21:01:11 -0300 Subject: [PATCH 22/25] Update start time for SMS clocks --- pipelines/rj_sms/dump_sheets/schedules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/rj_sms/dump_sheets/schedules.py b/pipelines/rj_sms/dump_sheets/schedules.py index b96acb584..ad53ddf10 100644 --- a/pipelines/rj_sms/dump_sheets/schedules.py +++ b/pipelines/rj_sms/dump_sheets/schedules.py @@ -38,9 +38,9 @@ sms_clocks = generate_dump_url_schedules( interval=timedelta(days=1), - start_date=datetime(2023, 1, 1, 13, 20, tzinfo=pytz.timezone("America/Sao_Paulo")), + start_date=datetime(2023, 1, 1, 5, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), labels=[ - constants.RJ_SMS_DEV_AGENT_LABEL.value, + constants.RJ_SMS_AGENT_LABEL.value, ], dataset_id=sheets_constants.DATASET_ID.value, table_parameters=table_parameters, From 53a18cac2a17f3dcb18975d0629634ffe32488c1 Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Thu, 30 Nov 2023 08:45:18 -0300 Subject: [PATCH 23/25] Add .DS_Store file to pipelines/rj_sms directory --- .DS_Store | Bin 0 -> 10244 bytes pipelines/rj_sms/.DS_Store | Bin 0 -> 6148 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 .DS_Store create mode 100644 pipelines/rj_sms/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..0834290813c66665fcb3b543e52dcb221427cf2f GIT binary patch literal 10244 zcmeHMYit}>6+UO;#54BP87J!|UDnAqE(8)cb()YTY1S`NlZTDhPS#19&Fs##Jh4AYmY3AYmY3;7(wGezWSrLb>394=je^Jv`eq5}hy=`r_b=e&%7B>dVyP z#BjW?X51Fit8)8rKa{Db3jNW1qMA!w@G)>(=2~mmAG4+jYs=e^7E$GwNVpas(jr(gm`VI0mt9>!be$yd2WTO%%Px>>Z& z41ySfuW~c{Z4jlUpjvW7#Jo_dI(85a4gCt0bsg(BbgHV_tv-@p^p^6W8PqPi@9r&I`|e4nGeu)_$t+)4@cEcWJ;(D$oEp2Z z;M-QdYL;zxp%AibVcvd>6^%_X)oEs3y45tZTqT{`l(k2P`Z!mFJ4a z#-Qwbjx*&2c4&L<+{J(xp9ooH!kzc1$2ze+S>>;Sv&;`juE7-MOy4)%1vaw$h+~$R zGwF_aVdz!3Cbw4h+$o=NO9h+Cd@cjW-;jh37vW|29sC2{#7?{ici?W^kH_#hK8zp4X)NO+ z+W4r%D~;vmTS(J|0ae;8w-eMyr6aG~)mZM|VC2YRvC}iNGiUX}Z8>aZ=3LoxY85vq z8og!HDc2k`WW$cbxuUlCDmD2;>{a1>;Q1k~JQ*t*MlE2|jBMF;HbxeM)dg5XsP?s< zEtz}Yx$A-bhmVcC{{v@U6~3C%_45=$ZfMWj%%5Gdt#EP949d*4$TiQ^?Z7TMY{oS! ztQQA0O`x`tV;5>A`)bV&myH2c6|Mc-()vY<265W6xHZf8jwpo<3KXzG|F|8jqy(bfN53 zDm0ZKYJ2E-I(>9*KbuhC>rHC{XQPuZm z_GuJ%#yVLY%pBBI17+oi{yvQ&(CCoWem$pY28c>#G&8PJ6dCHj>E5ryP52AE0slnA z9^8yuaVHMpA$*87?=v__+jkyk@e&r%B)8^H?P3+LVt^mQ8|2t4_z`>pKZT#c&*GEh z+@B}+=Fa_P{0csgU&Zg@_wWV$KK=lIh%e!f@h9Z&zr^3-AMs|)iBGjUG5)Q+6W8;; z>v%5D4*5?>H)Tp5U_(oo8%O;-?Ru^9SPWz)t{t zyw}>DjaqW;EpKCoGIg2@XN)t~&Kl2Tn=|*mx3PAMkv5h@mfLKpQ=Arjk?GQ`TG`;7 zLvx1PL8B9Vx9W*ALbD#hlIsp+V4WOOwwvVm?f?IO2aY@$Qo=yOz<-JX>=?_BjgYm* zDKKuBe{0XueVJ~4alI8O1%w`4g|~~(@l@fTxp7<)ddD}v8+*@>Rb41h}d zD5#+rzkQnTuwCL`bb#k>D05a&Mcw&3dr|7+wA3jWe0w~_B;WicApshjR%SW%YF4OaNlD;nTRXn=&Ojk`(RQW;#MV5o|?oCl;dNtF_Dyu~snG>Gx`|;@R zdOe*y9tQ`H9&EwtY-dBbkfwVeab0p|c5;P*pdF|^nklvf8T*#ZFjNK2s2AB(IFEQS_agE)gwMhY}i zWw#j0$dL~$FSOViG;&gQ^P%jOmEEBzxjOm}gib0n=xXPHb70$nJ##qX_5b;w_y5}> zcjO#!4*XXRsQyuOG{cbW+PX71UTXuC4=5bWYYkpV(2%7VTwaRrphzGea03`xYz-oV Pa32C Date: Thu, 30 Nov 2023 08:49:17 -0300 Subject: [PATCH 24/25] Remove .DS_Store files and update .gitignore --- .DS_Store | Bin 10244 -> 10244 bytes .gitignore | 6 ++++-- pipelines/.DS_Store | Bin 6148 -> 0 bytes 3 files changed, 4 insertions(+), 2 deletions(-) delete mode 100644 pipelines/.DS_Store diff --git a/.DS_Store b/.DS_Store index 0834290813c66665fcb3b543e52dcb221427cf2f..b8d4d053f8f501939124d130c7dbe67439c109ea 100644 GIT binary patch delta 75 zcmZn(XbITREX=rda*J@GifDDUfsTT?X|0YzwS@_gX=!RuTg%BIs;qAv6rY`wo0s1{ eSw=*jv177Q;o?FB_(2>*f=?%7|(4JzlZJ9zEXrZjwD8u-aQ3@Qx=OaZ!KR zee8GJ*cmtHj87cPNu1$VA{T2zIvULY From d158c9b9c174bc567ef3103d28253d7ec21a847b Mon Sep 17 00:00:00 2001 From: ThiagoTrabach Date: Thu, 30 Nov 2023 09:00:06 -0300 Subject: [PATCH 25/25] Add "danilo" as a code owner in multiple flows --- pipelines/constants.py | 4 ++++ pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py | 1 + pipelines/rj_sms/dump_api_prontuario_vitai/flows.py | 1 + pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py | 1 + pipelines/rj_sms/dump_ftp_cnes/flows.py | 1 + pipelines/rj_sms/dump_sheets/flows.py | 6 +++++- 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pipelines/constants.py b/pipelines/constants.py index 1040ca077..e8550c2bb 100644 --- a/pipelines/constants.py +++ b/pipelines/constants.py @@ -150,4 +150,8 @@ class constants(Enum): # pylint: disable=c0103 "user_id": "222842688117014528", "type": "user_nickname", }, + "danilo": { + "user_id": "1147152438487416873", + "type": "user_nickname", + }, } diff --git a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py index 3968e8059..78ec60291 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitacare/flows.py @@ -37,6 +37,7 @@ code_owners=[ "thiago", "andre", + "danilo", ], ) as dump_vitacare: ##################################### diff --git a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py index d67640ad9..753e7fb84 100644 --- a/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py +++ b/pipelines/rj_sms/dump_api_prontuario_vitai/flows.py @@ -34,6 +34,7 @@ code_owners=[ "thiago", "andre", + "danilo", ], ) as dump_vitai: ##################################### diff --git a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py index bb3140948..8c6dded02 100644 --- a/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py +++ b/pipelines/rj_sms/dump_azureblob_estoque_tpc/flows.py @@ -36,6 +36,7 @@ code_owners=[ "thiago", "andre", + "danilo", ], ) as dump_tpc: ##################################### diff --git a/pipelines/rj_sms/dump_ftp_cnes/flows.py b/pipelines/rj_sms/dump_ftp_cnes/flows.py index 6030e75f2..9509a8835 100644 --- a/pipelines/rj_sms/dump_ftp_cnes/flows.py +++ b/pipelines/rj_sms/dump_ftp_cnes/flows.py @@ -26,6 +26,7 @@ code_owners=[ "thiago", "andre", + "danilo", ], ) as dump_cnes: # Parameters diff --git a/pipelines/rj_sms/dump_sheets/flows.py b/pipelines/rj_sms/dump_sheets/flows.py index 5f386c66e..343f1c1ca 100644 --- a/pipelines/rj_sms/dump_sheets/flows.py +++ b/pipelines/rj_sms/dump_sheets/flows.py @@ -18,7 +18,11 @@ dump_sms_sheets_flow.name = ( "SMS: Dump Google Sheets - Ingerir planilhas do Google Sheets" ) -dump_sms_sheets_flow.code_owners = ["thiago", "andre"] +dump_sms_sheets_flow.code_owners = [ + "thiago", + "andre", + "danilo", +] dump_sms_sheets_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value) dump_sms_sheets_flow.run_config = KubernetesRun(