From c689b4e67531c494476d55f10277a2b863113e50 Mon Sep 17 00:00:00 2001
From: Rafael Carvalho Pinheiro <74972217+pixuimpou@users.noreply.github.com>
Date: Thu, 5 Oct 2023 11:26:43 -0300
Subject: [PATCH] =?UTF-8?q?Cria=20flow=20generico=20de=20materializa=C3=A7?=
 =?UTF-8?q?=C3=A3o=20+=20Adiciona=20tratamento=20transa=C3=A7=C3=A3o=20Ja?=
 =?UTF-8?q?=C3=A9=20(#513)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* create default materialization flow

* create tasks for default materialization flow

* make generate_execute_schedules more generic

* create bilhetagem materialization flow

* adapt bilhetagem schedules for the new model

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add run config and storage

* Update utils.py

* fix sub tasks

* fix fetch_dataset_sha run

* add run_date variable to materialization flow

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove discord notifications for testing

* add manual date_range / fix flow run name

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix missing table_id logic

* fix empty return

* fix empty return

* add flag_date_range when var_params is blank

* change rename logic when has date variables

* change return values of create_dbt_run_vars

* create dict aux function

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove *args from task

* change coalesce task

* fix rename task

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix task order

* add docstrings

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix line too long

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* pre-commit hook

* adjust tasks

* mudar estrutura do flow materializacao

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* adicionar schedule de bilhetagem

* adicionar schedule no flow de materialização

* ajuste nome da coluna de datetime

* ajustar nome coluna

* mudar coluna de data para datetime_transacao

* ajusta variavel date_range manual

* mudar nome parametro de variável dbt

* cria flow de orquestração materialização

* volta notificação do discord

* ajusta wait_flow_run

* mudar query para teste

* reverter query teste

* usar copy no dicionario de variaveis de data

* adjust constant run interval

* remover funcao comentada

* alterar padrão de nome dos flows

* remove imports comentados

* remove schedules nao utilizados

* remove task comentada

* mudar agent para produção

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Co-authored-by: Rodrigo Cunha <66736583+eng-rodrigocunha@users.noreply.github.com>
---
 .../br_rj_riodejaneiro_bilhetagem/flows.py    | 116 +++++++++++++++--
 .../schedules.py                              |  21 +---
 pipelines/rj_smtr/constants.py                |  32 +++--
 pipelines/rj_smtr/flows.py                    |  84 ++++++++++++-
 pipelines/rj_smtr/tasks.py                    | 117 +++++++++++++++++-
 pipelines/rj_smtr/utils.py                    |  14 ++-
 6 files changed, 344 insertions(+), 40 deletions(-)

diff --git a/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/flows.py b/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/flows.py
index d7f44e3b9..568f96154 100644
--- a/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/flows.py
+++ b/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/flows.py
@@ -7,26 +7,46 @@
 
 from prefect.run_configs import KubernetesRun
 from prefect.storage import GCS
+from prefect.tasks.prefect import create_flow_run, wait_for_flow_run
+from prefect.utilities.edges import unmapped
 
 # EMD Imports #
 
 from pipelines.constants import constants as emd_constants
+from pipelines.utils.decorators import Flow
+from pipelines.utils.tasks import (
+    rename_current_flow_run_now_time,
+    get_current_flow_labels,
+)
+
+
+from pipelines.utils.utils import set_default_parameters
 
 # SMTR Imports #
 
-from pipelines.rj_smtr.flows import default_capture_flow
+from pipelines.rj_smtr.flows import (
+    default_capture_flow,
+    default_materialization_flow,
+)
+
+from pipelines.rj_smtr.tasks import (
+    get_current_timestamp,
+)
 
 from pipelines.rj_smtr.br_rj_riodejaneiro_bilhetagem.schedules import (
-    bilhetagem_principal_schedule,
     bilhetagem_transacao_schedule,
 )
 
+from pipelines.rj_smtr.constants import constants
+
+from pipelines.rj_smtr.schedules import every_hour
+
 # Flows #
 
 # BILHETAGEM TRANSAÇÃO - CAPTURA A CADA MINUTO #
 
 bilhetagem_transacao_captura = deepcopy(default_capture_flow)
-bilhetagem_transacao_captura.name = "SMTR: Bilhetagem Transação (captura)"
+bilhetagem_transacao_captura.name = "SMTR: Bilhetagem Transação - Captura"
 bilhetagem_transacao_captura.storage = GCS(emd_constants.GCS_FLOWS_BUCKET.value)
 bilhetagem_transacao_captura.run_config = KubernetesRun(
     image=emd_constants.DOCKER_IMAGE.value,
@@ -34,13 +54,91 @@
 )
 bilhetagem_transacao_captura.schedule = bilhetagem_transacao_schedule
 
-# BILHETAGEM PRINCIPAL - CAPTURA DIÁRIA DE DIVERSAS TABELAS #
+# BILHETAGEM AUXILIAR - SUBFLOW PARA RODAR ANTES DE CADA MATERIALIZAÇÃO #
+
+bilhetagem_auxiliar_captura = deepcopy(default_capture_flow)
+bilhetagem_auxiliar_captura.name = "SMTR: Bilhetagem Auxiliar - Captura (subflow)"
+bilhetagem_auxiliar_captura.storage = GCS(emd_constants.GCS_FLOWS_BUCKET.value)
+bilhetagem_auxiliar_captura.run_config = KubernetesRun(
+    image=emd_constants.DOCKER_IMAGE.value,
+    labels=[emd_constants.RJ_SMTR_AGENT_LABEL.value],
+)
+
+bilhetagem_auxiliar_captura = set_default_parameters(
+    flow=bilhetagem_auxiliar_captura,
+    default_parameters={
+        "dataset_id": constants.BILHETAGEM_DATASET_ID.value,
+        "secret_path": constants.BILHETAGEM_SECRET_PATH.value,
+        "source_type": constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value["source_type"],
+    },
+)
+
+# MATERIALIZAÇÃO - SUBFLOW DE MATERIALIZAÇÃO
+bilhetagem_materializacao = deepcopy(default_materialization_flow)
+bilhetagem_materializacao.name = "SMTR: Bilhetagem Transação - Materialização (subflow)"
+bilhetagem_materializacao.storage = GCS(emd_constants.GCS_FLOWS_BUCKET.value)
+bilhetagem_materializacao.run_config = KubernetesRun(
+    image=emd_constants.DOCKER_IMAGE.value,
+    labels=[emd_constants.RJ_SMTR_AGENT_LABEL.value],
+)
+
+bilhetagem_materializacao_parameters = {
+    "dataset_id": constants.BILHETAGEM_DATASET_ID.value
+} | constants.BILHETAGEM_MATERIALIZACAO_PARAMS.value
+
+bilhetagem_materializacao = set_default_parameters(
+    flow=bilhetagem_materializacao,
+    default_parameters=bilhetagem_materializacao_parameters,
+)
+
+# TRATAMENTO - RODA DE HORA EM HORA, CAPTURA AUXILIAR + MATERIALIZAÇÃO
+with Flow(
+    "SMTR: Bilhetagem Transação - Tratamento",
+    code_owners=["caio", "fernanda", "boris", "rodrigo"],
+) as bilhetagem_transacao_tratamento:
+    timestamp = get_current_timestamp()
+
+    rename_flow_run = rename_current_flow_run_now_time(
+        prefix=bilhetagem_transacao_tratamento.name + " ",
+        now_time=timestamp,
+    )
+
+    LABELS = get_current_flow_labels()
+
+    # Captura
+    runs_captura = create_flow_run.map(
+        flow_name=unmapped(bilhetagem_auxiliar_captura.name),
+        project_name=unmapped(emd_constants.PREFECT_DEFAULT_PROJECT.value),
+        parameters=constants.BILHETAGEM_CAPTURE_PARAMS.value,
+        labels=unmapped(LABELS),
+    )
+
+    wait_captura = wait_for_flow_run.map(
+        runs_captura,
+        stream_states=unmapped(True),
+        stream_logs=unmapped(True),
+        raise_final_state=unmapped(True),
+    )
+
+    # Materialização
+    run_materializacao = create_flow_run(
+        flow_name=bilhetagem_materializacao.name,
+        project_name=emd_constants.PREFECT_DEFAULT_PROJECT.value,
+        labels=LABELS,
+        upstream_tasks=[wait_captura],
+    )
+
+    wait_materializacao = wait_for_flow_run(
+        run_materializacao,
+        stream_states=True,
+        stream_logs=True,
+        raise_final_state=True,
+    )
 
-bilhetagem_principal_captura = deepcopy(default_capture_flow)
-bilhetagem_principal_captura.name = "SMTR: Bilhetagem Principal (captura)"
-bilhetagem_principal_captura.storage = GCS(emd_constants.GCS_FLOWS_BUCKET.value)
-bilhetagem_principal_captura.run_config = KubernetesRun(
+bilhetagem_transacao_tratamento.storage = GCS(emd_constants.GCS_FLOWS_BUCKET.value)
+bilhetagem_transacao_tratamento.run_config = KubernetesRun(
     image=emd_constants.DOCKER_IMAGE.value,
     labels=[emd_constants.RJ_SMTR_AGENT_LABEL.value],
 )
-bilhetagem_principal_captura.schedule = bilhetagem_principal_schedule
+bilhetagem_transacao_tratamento.schedule = every_hour
+# bilhetagem_materializacao.schedule = bilhetagem_materializacao_schedule
diff --git a/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/schedules.py b/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/schedules.py
index 2f7804811..c2ee21164 100644
--- a/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/schedules.py
+++ b/pipelines/rj_smtr/br_rj_riodejaneiro_bilhetagem/schedules.py
@@ -15,27 +15,10 @@
     generate_execute_schedules,
 )
 
-bilhetagem_principal_clocks = generate_execute_schedules(
-    clock_interval=timedelta(
-        **constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value["principal_run_interval"]
-    ),
-    labels=[
-        emd_constants.RJ_SMTR_AGENT_LABEL.value,
-    ],
-    table_parameters=constants.BILHETAGEM_CAPTURE_PARAMS.value,
-    dataset_id=constants.BILHETAGEM_DATASET_ID.value,
-    secret_path=constants.BILHETAGEM_SECRET_PATH.value,
-    source_type=constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value["source_type"],
-    runs_interval_minutes=constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value[
-        "principal_runs_interval_minutes"
-    ],
-)
-
-bilhetagem_principal_schedule = Schedule(clocks=untuple(bilhetagem_principal_clocks))
-
+BILHETAGEM_TRANSACAO_INTERVAL = timedelta(minutes=1)
 bilhetagem_transacao_clocks = generate_execute_schedules(
     clock_interval=timedelta(
-        **constants.BILHETAGEM_GENERAL_CAPTURE_PARAMS.value["transacao_run_interval"]
+        **constants.BILHETAGEM_CAPTURE_RUN_INTERVAL.value["transacao_run_interval"]
     ),
     labels=[
         emd_constants.RJ_SMTR_AGENT_LABEL.value,
diff --git a/pipelines/rj_smtr/constants.py b/pipelines/rj_smtr/constants.py
index 52e30d9f8..ee8a22cd2 100644
--- a/pipelines/rj_smtr/constants.py
+++ b/pipelines/rj_smtr/constants.py
@@ -183,12 +183,15 @@ class constants(Enum):  # pylint: disable=c0103
         },
         "vpn_url": "http://vpn-jae.mobilidade.rio/",
         "source_type": "api-json",
-        "transacao_run_interval": {"minutes": 1},
-        "principal_run_interval": {"days": 1},
         "transacao_runs_interval_minutes": 0,
         "principal_runs_interval_minutes": 5,
     }
 
+    BILHETAGEM_CAPTURE_RUN_INTERVAL = {
+        "transacao_run_interval": {"minutes": 1},
+        "principal_run_interval": {"days": 1},
+    }
+
     BILHETAGEM_TRANSACAO_CAPTURE_PARAMS = {
         "table_id": "transacao",
         "partition_date_only": False,
@@ -203,11 +206,13 @@ class constants(Enum):  # pylint: disable=c0103
                     data_processamento BETWEEN '{start}'
                     AND '{end}'
             """,
-            "run_interval": BILHETAGEM_GENERAL_CAPTURE_PARAMS["transacao_run_interval"],
+            "run_interval": BILHETAGEM_CAPTURE_RUN_INTERVAL["transacao_run_interval"],
         },
         "primary_key": ["id"],  # id column to nest data on
     }
 
+    BILHETAGEM_SECRET_PATH = "smtr_jae_access_data"
+
     BILHETAGEM_CAPTURE_PARAMS = [
         {
             "table_id": "linha",
@@ -222,7 +227,7 @@ class constants(Enum):  # pylint: disable=c0103
                     WHERE
                         DT_INCLUSAO >= '{start}'
                 """,
-                "run_interval": BILHETAGEM_GENERAL_CAPTURE_PARAMS[
+                "run_interval": BILHETAGEM_CAPTURE_RUN_INTERVAL[
                     "principal_run_interval"
                 ],
             },
@@ -241,7 +246,7 @@ class constants(Enum):  # pylint: disable=c0103
                     WHERE
                         DT_INCLUSAO >= '{start}'
                 """,
-                "run_interval": BILHETAGEM_GENERAL_CAPTURE_PARAMS[
+                "run_interval": BILHETAGEM_CAPTURE_RUN_INTERVAL[
                     "principal_run_interval"
                 ],
             },
@@ -260,7 +265,7 @@ class constants(Enum):  # pylint: disable=c0103
                     WHERE
                         DT_INCLUSAO >= '{start}'
                 """,
-                "run_interval": BILHETAGEM_GENERAL_CAPTURE_PARAMS[
+                "run_interval": BILHETAGEM_CAPTURE_RUN_INTERVAL[
                     "principal_run_interval"
                 ],
             },
@@ -279,7 +284,7 @@ class constants(Enum):  # pylint: disable=c0103
                     WHERE
                         dt_inclusao >= '{start}'
                 """,
-                "run_interval": BILHETAGEM_GENERAL_CAPTURE_PARAMS[
+                "run_interval": BILHETAGEM_CAPTURE_RUN_INTERVAL[
                     "principal_run_interval"
                 ],
             },
@@ -289,4 +294,15 @@ class constants(Enum):  # pylint: disable=c0103
             ],  # id column to nest data on
         },
     ]
-    BILHETAGEM_SECRET_PATH = "smtr_jae_access_data"
+
+    BILHETAGEM_MATERIALIZACAO_PARAMS = {
+        "table_id": BILHETAGEM_TRANSACAO_CAPTURE_PARAMS["table_id"],
+        "upstream": True,
+        "dbt_vars": {
+            "date_range": {
+                "table_run_datetime_column_name": "datetime_transacao",
+                "delay_hours": 1,
+            },
+            "version": {},
+        },
+    }
diff --git a/pipelines/rj_smtr/flows.py b/pipelines/rj_smtr/flows.py
index 4860c6d07..0efb69b17 100644
--- a/pipelines/rj_smtr/flows.py
+++ b/pipelines/rj_smtr/flows.py
@@ -5,7 +5,8 @@
 
 from prefect.run_configs import KubernetesRun
 from prefect.storage import GCS
-from prefect import Parameter
+from prefect import case, Parameter
+from prefect.utilities.edges import unmapped
 
 # EMD Imports #
 
@@ -13,7 +14,11 @@
 from pipelines.utils.decorators import Flow
 from pipelines.utils.tasks import (
     rename_current_flow_run_now_time,
+    get_now_time,
+    get_current_flow_labels,
+    get_current_flow_mode,
 )
+from pipelines.utils.execute_dbt_model.tasks import get_k8s_dbt_client
 
 # SMTR Imports #
 
@@ -22,13 +27,17 @@
     create_local_partition_path,
     get_current_timestamp,
     parse_timestamp_to_string,
+    transform_raw_to_nested_structure,
+    create_dbt_run_vars,
+    set_last_run_timestamp,
+    coalesce_task,
     upload_raw_data_to_gcs,
     upload_staging_data_to_gcs,
-    transform_raw_to_nested_structure,
     get_raw_from_sources,
     create_request_params,
 )
 
+from pipelines.utils.execute_dbt_model.tasks import run_dbt_model
 
 with Flow(
     "SMTR: Captura",
@@ -114,3 +123,74 @@
     image=emd_constants.DOCKER_IMAGE.value,
     labels=[emd_constants.RJ_SMTR_AGENT_LABEL.value],
 )
+
+with Flow(
+    "SMTR: Materialização",
+    code_owners=["caio", "fernanda", "boris", "rodrigo"],
+) as default_materialization_flow:
+    # SETUP #
+
+    dataset_id = Parameter("dataset_id", default=None)
+    table_id = Parameter("table_id", default=None)
+    raw_table_id = Parameter("raw_table_id", default=None)
+    dbt_alias = Parameter("dbt_alias", default=False)
+    upstream = Parameter("upstream", default=None)
+    downstream = Parameter("downstream", default=None)
+    exclude = Parameter("exclude", default=None)
+    flags = Parameter("flags", default=None)
+    dbt_vars = Parameter("dbt_vars", default=dict())
+
+    # treated_table_params = treat_dbt_table_params(table_params=table_params)
+
+    LABELS = get_current_flow_labels()
+    MODE = get_current_flow_mode(LABELS)
+
+    _vars, date_var, flag_date_range = create_dbt_run_vars(
+        dataset_id=dataset_id,
+        dbt_vars=dbt_vars,
+        table_id=table_id,
+        raw_dataset_id=dataset_id,
+        raw_table_id=raw_table_id,
+        mode=MODE,
+    )
+
+    # Rename flow run
+
+    flow_name_prefix = coalesce_task([table_id, dataset_id])
+
+    flow_name_now_time = coalesce_task([date_var, get_now_time()])
+
+    rename_flow_run = rename_current_flow_run_now_time(
+        prefix=default_materialization_flow.name + " " + flow_name_prefix + ": ",
+        now_time=flow_name_now_time,
+    )
+
+    dbt_client = get_k8s_dbt_client(mode=MODE, wait=rename_flow_run)
+
+    RUNS = run_dbt_model.map(
+        dbt_client=unmapped(dbt_client),
+        dataset_id=unmapped(dataset_id),
+        table_id=unmapped(table_id),
+        _vars=_vars,
+        dbt_alias=unmapped(dbt_alias),
+        upstream=unmapped(upstream),
+        downstream=unmapped(downstream),
+        exclude=unmapped(exclude),
+        flags=unmapped(flags),
+    )
+
+    with case(flag_date_range, True):
+        set_last_run_timestamp(
+            dataset_id=dataset_id,
+            table_id=table_id,
+            timestamp=date_var["date_range_end"],
+            wait=RUNS,
+            mode=MODE,
+        )
+
+
+default_materialization_flow.storage = GCS(emd_constants.GCS_FLOWS_BUCKET.value)
+default_materialization_flow.run_config = KubernetesRun(
+    image=emd_constants.DOCKER_IMAGE.value,
+    labels=[emd_constants.RJ_SMTR_AGENT_LABEL.value],
+)
diff --git a/pipelines/rj_smtr/tasks.py b/pipelines/rj_smtr/tasks.py
index a846851b5..f7d687dea 100644
--- a/pipelines/rj_smtr/tasks.py
+++ b/pipelines/rj_smtr/tasks.py
@@ -8,7 +8,7 @@
 import os
 from pathlib import Path
 import traceback
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Iterable
 import io
 
 from basedosdados import Storage, Table
@@ -28,6 +28,7 @@
     get_last_run_timestamp,
     log_critical,
     data_info_str,
+    dict_contains_keys,
     get_raw_data_api,
     get_raw_data_gcs,
     upload_run_logs_to_bq,
@@ -1095,3 +1096,117 @@ def transform_raw_to_nested_structure(
             log(f"[CATCHED] Task failed with error: \n{error}", level="error")
 
     return error, filepath
+
+
+@task(checkpoint=False)
+def coalesce_task(value_list: Iterable):
+    """
+    Task to get the first non None value of a list
+
+    Args:
+        value_list (Iterable): a iterable object with the values
+    Returns:
+        any: value_list's first non None item
+    """
+
+    try:
+        return next(value for value in value_list if value is not None)
+    except StopIteration:
+        return
+
+
+@task(checkpoint=False, nout=3)
+def create_dbt_run_vars(
+    dataset_id: str,
+    dbt_vars: dict,
+    table_id: str,
+    raw_dataset_id: str,
+    raw_table_id: str,
+    mode: str,
+) -> tuple[list[dict], Union[list[dict], dict, None], bool]:
+    """
+    Create the variables to be used in dbt materialization based on a dict
+
+    Args:
+        dataset_id (str): the dataset_id to get the variables
+        dbt_vars (dict): dict containing the parameters
+        table_id (str): the table_id get the date_range variable
+        raw_dataset_id (str): the raw_dataset_id get the date_range variable
+        raw_table_id (str): the raw_table_id get the date_range variable
+        mode (str): the mode to get the date_range variable
+
+    Returns:
+        tuple[list[dict]: the variables to be used in DBT
+        Union[list[dict], dict, None]: the date variable (date_range or run_date)
+        bool: a flag that indicates if the date_range variable came from Redis
+    """
+
+    log(f"Creating DBT variables. Parameter received: {dbt_vars}")
+
+    if (not dbt_vars) or (not table_id):
+        log("dbt_vars or table_id are blank. Skiping task")
+        return [None], None, False
+
+    final_vars = []
+    date_var = None
+    flag_date_range = False
+
+    if "date_range" in dbt_vars.keys():
+        log("Creating date_range variable")
+
+        # Set date_range variable manually
+        if dict_contains_keys(
+            dbt_vars["date_range"], ["date_range_start", "date_range_end"]
+        ):
+            date_var = {
+                "date_range_start": dbt_vars["date_range"]["date_range_start"],
+                "date_range_end": dbt_vars["date_range"]["date_range_end"],
+            }
+        # Create date_range using Redis
+        else:
+            raw_table_id = raw_table_id or table_id
+
+            date_var = get_materialization_date_range.run(
+                dataset_id=dataset_id,
+                table_id=table_id,
+                raw_dataset_id=raw_dataset_id,
+                raw_table_id=raw_table_id,
+                table_run_datetime_column_name=dbt_vars["date_range"].get(
+                    "table_run_datetime_column_name"
+                ),
+                mode=mode,
+                delay_hours=dbt_vars["date_range"].get("delay_hours", 0),
+            )
+
+            flag_date_range = True
+
+        final_vars.append(date_var.copy())
+
+        log(f"date_range created: {date_var}")
+
+    elif "run_date" in dbt_vars.keys():
+        log("Creating run_date variable")
+
+        date_var = get_run_dates.run(
+            dbt_vars["run_date"].get("date_range_start"),
+            dbt_vars["run_date"].get("date_range_end"),
+        )
+        final_vars.append([d.copy() for d in date_var])
+
+        log(f"run_date created: {date_var}")
+
+    if "version" in dbt_vars.keys():
+        log("Creating version variable")
+        dataset_sha = fetch_dataset_sha.run(dataset_id=dataset_id)
+
+        # if there are other variables inside the list, update each item adding the version variable
+        if final_vars:
+            final_vars = get_join_dict.run(dict_list=final_vars, new_dict=dataset_sha)
+        else:
+            final_vars.append(dataset_sha)
+
+        log(f"version created: {dataset_sha}")
+
+    log(f"All variables was created, final value is: {final_vars}")
+
+    return final_vars, date_var, flag_date_range
diff --git a/pipelines/rj_smtr/utils.py b/pipelines/rj_smtr/utils.py
index 1d71dd3dd..f9b98afab 100644
--- a/pipelines/rj_smtr/utils.py
+++ b/pipelines/rj_smtr/utils.py
@@ -434,7 +434,6 @@ def generate_execute_schedules(  # pylint: disable=too-many-arguments,too-many-l
     clocks = []
     for count, parameters in enumerate(table_parameters):
         parameter_defaults = parameters | general_flow_params
-
         log(f"parameter_defaults: {parameter_defaults}")
         clocks.append(
             IntervalClock(
@@ -448,6 +447,19 @@ def generate_execute_schedules(  # pylint: disable=too-many-arguments,too-many-l
     return clocks
 
 
+def dict_contains_keys(input_dict: dict, keys: list[str]) -> bool:
+    """
+    Test if the input dict has all keys present in the list
+
+    Args:
+        input_dict (dict): the dict to test if has the keys
+        keys (list[str]): the list containing the keys to check
+    Returns:
+        bool: True if the input_dict has all the keys otherwise False
+    """
+    return all(x in input_dict.keys() for x in keys)
+
+
 def save_raw_local_func(
     data: Union[dict, str], filepath: str, mode: str = "raw", filetype: str = "json"
 ) -> str: