From 7c76e7ac10b87c78b3f5bb016d09d17b51f7211d Mon Sep 17 00:00:00 2001 From: mgwinner Date: Tue, 13 Feb 2024 14:39:33 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Delete=20get=5Fpromoted=20?= =?UTF-8?q?tests=20and=20get=5Fpromoted=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flows/test_adls_to_azure_sql.py | 35 ------------------- viadot/flows/adls_to_azure_sql.py | 16 --------- 2 files changed, 51 deletions(-) diff --git a/tests/integration/flows/test_adls_to_azure_sql.py b/tests/integration/flows/test_adls_to_azure_sql.py index b83a59e51..34cef2f9e 100644 --- a/tests/integration/flows/test_adls_to_azure_sql.py +++ b/tests/integration/flows/test_adls_to_azure_sql.py @@ -9,41 +9,6 @@ from viadot.flows.adls_to_azure_sql import check_dtypes_sort, df_to_csv_task -def test_get_promoted_adls_path_parquet_file(): - adls_path_file = "raw/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.parquet" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_file) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_file_starts_with_slash(): - adls_path_dir_starts_with_slash = "/raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_starts_with_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir_slash(): - adls_path_dir_slash = "raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir(): - adls_path_dir = "raw/supermetrics/adls_ga_load_times_fr_test" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir_starts_with_slash(): - adls_path_dir_starts_with_slash = "/raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_starts_with_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - def test_df_to_csv_task(): d = {"col1": ["rat", "\tdog"], "col2": ["cat", 4]} df = pd.DataFrame(data=d) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index abac388b3..d41ed129a 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -221,8 +221,6 @@ def __init__( self.overwrite_adls = overwrite_adls self.if_empty = if_empty self.adls_sp_credentials_secret = adls_sp_credentials_secret - self.adls_path_conformed = self.get_promoted_path(env="conformed") - self.adls_path_operations = self.get_promoted_path(env="operations") # AzureSQLCreateTable self.table = table @@ -257,20 +255,6 @@ def _map_if_exists(if_exists: str) -> str: def slugify(name): return name.replace(" ", "_").lower() - def get_promoted_path(self, env: str) -> str: - adls_path_clean = self.adls_path.strip("/") - extension = adls_path_clean.split(".")[-1].strip() - if extension == "parquet": - file_name = adls_path_clean.split("/")[-2] + ".csv" - common_path = "/".join(adls_path_clean.split("/")[1:-2]) - else: - file_name = adls_path_clean.split("/")[-1] - common_path = "/".join(adls_path_clean.split("/")[1:-1]) - - promoted_path = os.path.join(env, common_path, file_name) - - return promoted_path - def gen_flow(self) -> Flow: lake_to_df_task = AzureDataLakeToDF(timeout=self.timeout) df = lake_to_df_task.bind(