From 6de62305a1d0a46f02dadb91187b35e45603f402 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 17 Sep 2024 12:35:26 +0200 Subject: [PATCH 01/18] =?UTF-8?q?=E2=9C=A8=20created=20sap=5Fbw=20source?= =?UTF-8?q?=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/sources/sap_bw.py | 222 +++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 src/viadot/sources/sap_bw.py diff --git a/src/viadot/sources/sap_bw.py b/src/viadot/sources/sap_bw.py new file mode 100644 index 000000000..2ddbe6da0 --- /dev/null +++ b/src/viadot/sources/sap_bw.py @@ -0,0 +1,222 @@ +"""SAP BW API connector.""" + +import textwrap +from typing import Any + +import pandas as pd +from pydantic import BaseModel +import pyrfc + +from viadot.config import get_source_credentials +from viadot.exceptions import CredentialError, ValidationError +from viadot.sources.base import Source +from viadot.utils import add_viadot_metadata_columns + + +class SapbwCredentials(BaseModel): + """Checking for values in SAP BW credentials dictionary. + + Two key values are held in the Mindful connector: + - ashost: Indicates the host name or IP address of a specific SAP + application server. + - client: Specifies the SAP logon parameter client. + - passwd: Indicates the SAP logon parameter password. + - sysnr: Indicates the SAP system number—the 2-byte code that identifies the + system on the host. + - user: Indicates the SAP logon parameter user. + + Args: + BaseModel (pydantic.main.ModelMetaclass): A base class for creating + Pydantic models. + """ + + ashost: str + client: str + passwd: str + sysnr: str + user: str + + +class Sapbw(Source): + """Quering the SAP BW (SAP Business Warehouse) source using pyrfc library. + + Documentation to pyrfc can be found under: + https://sap.github.io/PyRFC/pyrfc.html + Documentation for SAP connection modules under: + https://www.se80.co.uk/sap-function-modules/list/?index=rsr_mdx + """ + + def __init__( + self, + *args, + credentials: SapbwCredentials | None = None, + config_key: str = "sap_bw", + **kwargs, + ): + """Create an instance of SAP BW. + + Args: + credentials (Optional[SapbwCredentials], optional): SAP BW credentials. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to "sap_bw". + + Examples: + sap_bw = Sapbw( + credentials=credentials, + config_key=config_key, + ) + sap_bw.api_connection( + ... + ) + data_frame = sap_bw.to_df() + + Raises: + CredentialError: If credentials are not provided in local_config or + directly as a parameter. + """ + credentials = credentials or get_source_credentials(config_key) or None + if credentials is None: + message = "Missing credentials." + raise CredentialError(message) + self.credentials = credentials + + validated_creds = dict(SapbwCredentials(**credentials)) + super().__init__(*args, credentials=validated_creds, **kwargs) + + self.query_output = None + + def _create_connection(self): + """Create the connection with SAP BW. + + Returns: + Connection: Connection to SAP. + """ + return pyrfc.Connection( + ashost=self.credentials.get("ashost"), + sysnr=self.credentials.get("sysnr"), + user=self.credentials.get("user"), + passwd=self.credentials.get("passwd"), + client=self.credentials.get("client"), + ) + + def api_connection(self, mdx_query: str) -> None: + """Generate the SAP BW output dataset from MDX query. + + Args: + mdx_query (str): The MDX query to be passed to connection. + """ + conn = self._create_connection() + + query = textwrap.wrap(mdx_query, 75) + properties = conn.call("RSR_MDX_CREATE_OBJECT", COMMAND_TEXT=query) + + datasetid = properties["DATASETID"] + self.query_output = conn.call("RSR_MDX_GET_FLAT_DATA", DATASETID=datasetid) + conn.close() + + def _apply_user_mapping( + self, + df: pd.DataFrame, + mapping_dict: dict[str, Any] | None = None, + ) -> pd.DataFrame: + """Apply the column mapping defined by user for the output dataframe. + + DataFrame will be cut to selected columns - if any other columns need to be + included in the output file, please add them to the mapping dictionary with + original names. + + Args: + df (pd.DataFrame): Input dataframe for the column mapping task. + mapping_dict (dict[str, Any], optional): Dictionary with original and new + column names. Defaults to None. + + Returns: + pd.DataFrame: Output DataFrame with mapped columns. + """ + self.logger.info("Applying user defined mapping for columns...") + df = df[mapping_dict.keys()] + df.columns = mapping_dict.values() + + self.logger.info("Successfully applied user mapping.") + + return df + + @add_viadot_metadata_columns + def to_df( + self, + if_empty: str = "warn", + mapping_dict: dict[str, Any] | None = None, + ) -> pd.DataFrame: + """Convert the SAP BW output JSON data into a dataframe. + + Args: + if_empty (str, optional): What to do if a fetch produce no data. + Defaults to "warn + + Raises: + ValidationError: Prints the original SAP error message in case of issues + with MDX execution. + mapping_dict (dict[str, Any], optional): Dictionary with original and new + column names. Defaults to None. + + Returns: + pd.Dataframe: The response data as a pandas DataFrame plus viadot metadata. + """ + raw_data = {} + + if self.query_output["RETURN"]["MESSAGE"] == "": + results = self.query_output["DATA"] + for cell in results: + if cell["ROW"] not in raw_data: + raw_data[cell["ROW"]] = {} + if "].[" not in cell["DATA"]: + raw_data[cell["ROW"]][cell["COLUMN"]] = cell["DATA"] + rows = [raw_data[row] for row in raw_data] + cols = [x["DATA"] for x in self.query_output["HEADER"]] + + data_frame = pd.DataFrame(data=rows) + data_frame.columns = cols + + else: + data_frame = pd.DataFrame() + raise ValidationError(self.query_output["RETURN"]["MESSAGE"]) + + if mapping_dict: + data_frame = self._apply_user_mapping(data_frame, mapping_dict) + + if data_frame.empty: + self._handle_if_empty( + if_empty=if_empty, + message="The response does not contain any data.", + ) + else: + self.logger.info("Successfully downloaded data from the Mindful API.") + + return data_frame + + def available_columns(self, mdx_query: str) -> list[str]: + """Generate list of all available columns in a SAP BW table. + + Args: + mdx_query (str): The MDX query to be passed to connection. + + Returns: + list[str]: List of all available columns in the source table. + """ + conn = self._create_connection() + query = textwrap.wrap(mdx_query, width=75) + + properties = conn.call("RSR_MDX_CREATE_STORED_OBJECT", COMMAND_TEXT=query) + datasetid = properties["DATASETID"] + + if properties["RETURN"]["MESSAGE"] == "": + get_axis_info = conn.call("RSR_MDX_GET_AXIS_INFO", DATASETID=datasetid) + cols = get_axis_info["AXIS_DIMENSIONS"] + + all_available_columns = [x["DIM_UNAM"] for x in cols] + else: + all_available_columns = [] + self.logger.error(properties["RETURN"]["MESSAGE"]) + + return all_available_columns From 46db261d1cb75623e967b02ad71c113424864929 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 17 Sep 2024 12:35:43 +0200 Subject: [PATCH 02/18] =?UTF-8?q?=E2=9C=A8=20created=20sap=5Fbw=20task=20f?= =?UTF-8?q?ile.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../orchestration/prefect/tasks/sap_bw.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 src/viadot/orchestration/prefect/tasks/sap_bw.py diff --git a/src/viadot/orchestration/prefect/tasks/sap_bw.py b/src/viadot/orchestration/prefect/tasks/sap_bw.py new file mode 100644 index 000000000..b916cac9d --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/sap_bw.py @@ -0,0 +1,58 @@ +"""Task to download data from SAP BW API into a Pandas DataFrame.""" + +from typing import Any + +import pandas as pd +from prefect import task + +from viadot.exceptions import APIError +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import Sapbw + + +@task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) +def sap_bw_to_df( + credentials: dict[str, Any] | None = None, + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + mdx_query: str | None = None, + mapping_dict: dict[str, Any] | None = None, +) -> pd.DataFrame: + """Task to download data from SAP BW API to Data Frame. + + Args: + credentials (Optional[Dict[str, Any]], optional): Hubspot credentials as a + dictionary. Defaults to None. + config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + mdx_query (str, optional): The MDX query to be passed to connection. + mapping_dict (dict[str, Any], optional): Dictionary with original and new + column names. Defaults to None. + + Raises: + MissingSourceCredentialsError: If none credentials have been provided. + APIError: The `mdx_query` is a "must" requirement. + + Returns: + pd.DataFrame: The response data as a Pandas Data Frame. + """ + if not (azure_key_vault_secret or config_key or credentials): + raise MissingSourceCredentialsError + + if not config_key: + credentials = credentials or get_credentials(azure_key_vault_secret) + + if mdx_query is None: + message = "SAP BW API `mdx_query` is a mandatory requirement." + raise APIError(message) + + sap_bw = Sapbw( + credentials=credentials, + config_key=config_key, + ) + sap_bw.api_connection(mdx_query=mdx_query) + + return sap_bw.to_df(mapping_dict=mapping_dict) From 8374c22bc652349b533886f5f432a74c99f067f6 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 17 Sep 2024 12:35:58 +0200 Subject: [PATCH 03/18] =?UTF-8?q?=E2=9C=A8=20created=20sap=5Fbw=20flow=20f?= =?UTF-8?q?ile.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../prefect/flows/sap_bw_to_adls.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py diff --git a/src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py b/src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py new file mode 100644 index 000000000..a572ea135 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py @@ -0,0 +1,70 @@ +"""Task to download data from SAP BW API into a Pandas DataFrame.""" + +from typing import Any + +from prefect import flow +from prefect.task_runners import ConcurrentTaskRunner + +from viadot.orchestration.prefect.tasks import df_to_adls, sap_bw_to_df + + +@flow( + name="SAP BW extraction to ADLS", + description="Extract data from SAP BW and load it into Azure Data Lake Storage.", + retries=1, + retry_delay_seconds=60, + task_runner=ConcurrentTaskRunner, +) +def sap_bw_to_adls( + credentials: dict[str, Any] | None = None, + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + mdx_query: str | None = None, + mapping_dict: dict[str, Any] | None = None, + adls_credentials: str | None = None, + adls_azure_key_vault_secret: str | None = None, + adls_config_key: str | None = None, + adls_path: str | None = None, + adls_path_overwrite: bool = False, +) -> None: + """Flow for downloading data from SAP BW API to Azure Data Lake. + + Args: + credentials (Optional[Dict[str, Any]], optional): Hubspot credentials as a + dictionary. Defaults to None. + config_key (Optional[str], optional): The key in the viadot config holding + relevant credentials. Defaults to None. + azure_key_vault_secret (Optional[str], optional): The name of the Azure Key + Vault secret where credentials are stored. Defaults to None. + mdx_query (str, optional): The MDX query to be passed to connection. + mapping_dict (dict[str, Any], optional): Dictionary with original and new + column names. Defaults to None. + adls_credentials (str, optional): The name of the Azure Key Vault + secret containing a dictionary with ACCOUNT_NAME and Service Principal + credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. + Defaults to None. + adls_azure_key_vault_secret (str, optional): The name of the Azure Key. + Defaults to None. + adls_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + adls_path (str, optional): Azure Data Lake destination folder/catalog path. + Defaults to None. + adls_path_overwrite (bool, optional): Whether to overwrite the file in ADLS. + Defaults to False. + """ + data_frame = sap_bw_to_df( + credentials=credentials, + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + mdx_query=mdx_query, + mapping_dict=mapping_dict, + ) + + return df_to_adls( + df=data_frame, + path=adls_path, + credentials=adls_credentials, + credentials_secret=adls_azure_key_vault_secret, + config_key=adls_config_key, + overwrite=adls_path_overwrite, + ) From 2e34db511da4731f5f05af80d0900c69fe4e4a2b Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 17 Sep 2024 12:36:19 +0200 Subject: [PATCH 04/18] =?UTF-8?q?=E2=9C=85=20created=20integration=20test?= =?UTF-8?q?=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration/test_sap_bw.py | 176 +++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 tests/integration/test_sap_bw.py diff --git a/tests/integration/test_sap_bw.py b/tests/integration/test_sap_bw.py new file mode 100644 index 000000000..2a3f1a202 --- /dev/null +++ b/tests/integration/test_sap_bw.py @@ -0,0 +1,176 @@ +"""'test_sap_bw.py'.""" + +from datetime import date, timedelta + +from viadot.orchestration.prefect.flows import sap_bw_to_adls + + +present_day = date.today() +present_day_str = present_day.strftime("%Y%m%d") +past_day = date.today() - timedelta(7) +past_day_str = past_day.strftime("%Y%m%d") +mdx_query = f""" + SELECT + {{[Measures].[003YPR44RQTVKWX9OL316XK7J], + [Measures].[003YPR44RQTVKWX9T5BFEWUY9], + [Measures].[003YPR44RQTVKWX9UW92X7ELV], + [Measures].[003YPR44RQTVKWX9YLKYZKH5O], + [Measures].[003YPR44RQTVKWXA51D4J8HRZ], + [Measures].[003YPR44RQTVKWXAEL7KFSJ6U], + [Measures].[003YPR44RQTVKWXARA4PVZ9TK]}} + ON COLUMNS, + NON EMPTY + {{ [0BILLTOPRTY].[LEVEL01].MEMBERS * + [0CALMONTH__0CALMONTH2].[LEVEL01].MEMBERS * + [0COMP_CODE].[LEVEL01].MEMBERS * + [0COMP_CODE__ZCOMPCOTE].[LEVEL01].MEMBERS * + [0CREATEDBY].[LEVEL01].MEMBERS * + [0CREATEDON].[LEVEL01].MEMBERS * + [0DISTR_CHAN].[LEVEL01].MEMBERS * + [0DOC_NUMBER].[LEVEL01].MEMBERS * + [0DOC_TYPE].[LEVEL01].MEMBERS * + [0IMODOCCAT].[LEVEL01].MEMBERS * + [0MATERIAL__ZDSPPRIC].[LEVEL01].MEMBERS * + [0MATERIAL__ZPRDGRP].[LEVEL01].MEMBERS * + [0MATERIAL__ZPANEVACD].[LEVEL01].MEMBERS * + [0MATERIAL__ZPRODAREA].[LEVEL01].MEMBERS * + [0MATERIAL__ZRDBTYPE].[LEVEL01].MEMBERS * + [0MATERIAL__ZTYPEVAR].[LEVEL01].MEMBERS * + [0MATERIAL__ZVAR_C34].[LEVEL01].MEMBERS * + [0MATERIAL__ZVAR_CH1].[LEVEL01].MEMBERS * + [0MATERIAL__ZVAR_CH2].[LEVEL01].MEMBERS * + [0MATERIAL__ZVCIVAR2].[LEVEL01].MEMBERS * + [0ORD_REASON].[LEVEL01].MEMBERS * + [0PAYER].[LEVEL01].MEMBERS * + [0REASON_REJ].[LEVEL01].MEMBERS * + [0SALESORG].[LEVEL01].MEMBERS * + [0SHIP_TO].[LEVEL01].MEMBERS * + [0SOLD_TO].[LEVEL01].MEMBERS * + [0SOLD_TO__0ACCNT_GRP].[LEVEL01].MEMBERS * + [0USAGE_IND].[LEVEL01].MEMBERS * + [0USAGE_IND__ZSALECAT].[LEVEL01].MEMBERS * + [ZASE_ID].[LEVEL01].MEMBERS * + [ZBVTSPRST].[LEVEL01].MEMBERS * + [ZCALWEEK].[LEVEL01].MEMBERS * + [ZORD_CREA].[LEVEL01].MEMBERS * + [ZPONUMBER].[LEVEL01].MEMBERS * + [ZPUORTYPE].[LEVEL01].MEMBERS * + [ZSEG_HDR].[LEVEL01].MEMBERS * + [0SALESEMPLY].[LEVEL01].MEMBERS * + [0SHIP_TO__0ACCNT_GRP].[LEVEL01].MEMBERS * + [0SHIP_TO__0CITY].[LEVEL01].MEMBERS * + [0CALYEAR].[LEVEL01].MEMBERS * + [0CALMONTH].[LEVEL01].MEMBERS * + {{[0CALDAY].[{past_day_str}] : [0CALDAY].[{present_day_str}]}}}} + DIMENSION PROPERTIES + MEMBER_NAME, + MEMBER_CAPTION + ON ROWS + FROM ZCSALORD1/ZBW4_ZCSALORD1_002_BOA +""" + +mapping_dict = { + "[0BILLTOPRTY].[LEVEL01].[MEMBER_NAME]": "bill_to_party", + "[0BILLTOPRTY].[LEVEL01].[MEMBER_CAPTION]": "bill_to_party_id", + "[0CALMONTH__0CALMONTH2].[LEVEL01].[MEMBER_CAPTION]": "calendar_month_2", + "[0CALMONTH__0CALMONTH2].[LEVEL01].[MEMBER_NAME]": "calendar_month_id", + "[0COMP_CODE].[LEVEL01].[MEMBER_CAPTION]": "company_code", + "[0COMP_CODE].[LEVEL01].[MEMBER_NAME]": "company_code_name", + "[0COMP_CODE__ZCOMPCOTE].[LEVEL01].[MEMBER_CAPTION]": "company_code_cons_term", + "[0COMP_CODE__ZCOMPCOTE].[LEVEL01].[MEMBER_NAME]": "company_code_cons_term_name", + "[0CREATEDBY].[LEVEL01].[MEMBER_CAPTION]": "created_by", + "[0CREATEDBY].[LEVEL01].[MEMBER_NAME]": "created_by_name", + "[0CREATEDON].[LEVEL01].[MEMBER_CAPTION]": "created_on", + "[0CREATEDON].[LEVEL01].[MEMBER_NAME]": "created_on_name", + "[0DISTR_CHAN].[LEVEL01].[MEMBER_CAPTION]": "distribution_channel", + "[0DISTR_CHAN].[LEVEL01].[MEMBER_NAME]": "distribution_channel_name", + "[0DOC_NUMBER].[LEVEL01].[MEMBER_CAPTION]": "sales_document", + "[0DOC_NUMBER].[LEVEL01].[MEMBER_NAME]": "sales_document_name", + "[0DOC_TYPE].[LEVEL01].[MEMBER_CAPTION]": "sales_doc_type", + "[0DOC_TYPE].[LEVEL01].[MEMBER_NAME]": "sales_doc_type_name", + "[0IMODOCCAT].[LEVEL01].[MEMBER_CAPTION]": "sales_document_categ", + "[0IMODOCCAT].[LEVEL01].[MEMBER_NAME]": "sales_document_categ_name", + "[0MATERIAL__ZDSPPRIC].[LEVEL01].[MEMBER_CAPTION]": "dsp_pricing_group", + "[0MATERIAL__ZDSPPRIC].[LEVEL01].[MEMBER_NAME]": "dsp_pricing_group_name", + "[0MATERIAL__ZPANEVACD].[LEVEL01].[MEMBER_CAPTION]": "pane_variant_code", + "[0MATERIAL__ZPANEVACD].[LEVEL01].[MEMBER_NAME]": "pane_variant_code_name", + "[0MATERIAL__ZPRDGRP].[LEVEL01].[MEMBER_CAPTION]": "product_group", + "[0MATERIAL__ZPRDGRP].[LEVEL01].[MEMBER_NAME]": "product_group_name", + "[0MATERIAL__ZPRODAREA].[LEVEL01].[MEMBER_CAPTION]": "product_area", + "[0MATERIAL__ZPRODAREA].[LEVEL01].[MEMBER_NAME]": "product_area_name", + "[0MATERIAL__ZRDBTYPE].[LEVEL01].[MEMBER_CAPTION]": "type_of_material", + "[0MATERIAL__ZRDBTYPE].[LEVEL01].[MEMBER_NAME]": "type_of_material_name", + "[0MATERIAL__ZTYPEVAR].[LEVEL01].[MEMBER_CAPTION]": "material_type_variant", + "[0MATERIAL__ZTYPEVAR].[LEVEL01].[MEMBER_NAME]": "material_type_variant_name", + "[0MATERIAL__ZVAR_C34].[LEVEL01].[MEMBER_CAPTION]": "3_and_4_character", + "[0MATERIAL__ZVAR_C34].[LEVEL01].[MEMBER_NAME]": "3_and_4_character_name", + "[0MATERIAL__ZVAR_CH1].[LEVEL01].[MEMBER_CAPTION]": "1_character_of_vari", + "[0MATERIAL__ZVAR_CH1].[LEVEL01].[MEMBER_NAME]": "1_character_of_vari_name", + "[0MATERIAL__ZVAR_CH2].[LEVEL01].[MEMBER_CAPTION]": "2_character_of_vari", + "[0MATERIAL__ZVAR_CH2].[LEVEL01].[MEMBER_NAME]": "2_character_of_vari_name", + "[0MATERIAL__ZVCIVAR2].[LEVEL01].[MEMBER_CAPTION]": "product_variant", + "[0MATERIAL__ZVCIVAR2].[LEVEL01].[MEMBER_NAME]": "product_variant_name", + "[0ORD_REASON].[LEVEL01].[MEMBER_CAPTION]": "reason_for_order", + "[0ORD_REASON].[LEVEL01].[MEMBER_NAME]": "reason_for_order_name", + "[0PAYER].[LEVEL01].[MEMBER_CAPTION]": "payer", + "[0PAYER].[LEVEL01].[MEMBER_NAME]": "payer_name", + "[0REASON_REJ].[LEVEL01].[MEMBER_CAPTION]": "reason_for_rejection", + "[0REASON_REJ].[LEVEL01].[MEMBER_NAME]": "reason_for_rejection_name", + "[0SALESORG].[LEVEL01].[MEMBER_CAPTION]": "sales_organization", + "[0SALESORG].[LEVEL01].[MEMBER_NAME]": "sales_organization_name", + "[0SHIP_TO].[LEVEL01].[MEMBER_CAPTION]": "ship_to_party", + "[0SHIP_TO].[LEVEL01].[MEMBER_NAME]": "ship_to_party_name", + "[0SOLD_TO].[LEVEL01].[MEMBER_CAPTION]": "sold_to_party", + "[0SOLD_TO].[LEVEL01].[MEMBER_NAME]": "sold_to_party_name", + "[0SOLD_TO__0ACCNT_GRP].[LEVEL01].[MEMBER_CAPTION]": "customer_account_group_sold_to", + "[0SOLD_TO__0ACCNT_GRP].[LEVEL01].[MEMBER_NAME]": "customer_account_group_sold_to_name", + "[0USAGE_IND].[LEVEL01].[MEMBER_CAPTION]": "usage_indicator", + "[0USAGE_IND].[LEVEL01].[MEMBER_NAME]": "usage_indicator_name", + "[0USAGE_IND__ZSALECAT].[LEVEL01].[MEMBER_CAPTION]": "sales_cat_usage", + "[0USAGE_IND__ZSALECAT].[LEVEL01].[MEMBER_NAME]": "sales_cat_usage_name", + "[ZASE_ID].[LEVEL01].[MEMBER_CAPTION]": "ase_id", + "[ZASE_ID].[LEVEL01].[MEMBER_NAME]": "ase_id_name", + "[ZBVTSPRST].[LEVEL01].[MEMBER_CAPTION]": "order_status", + "[ZBVTSPRST].[LEVEL01].[MEMBER_NAME]": "order_status_name", + "[ZCALWEEK].[LEVEL01].[MEMBER_CAPTION]": "calendar_week", + "[ZCALWEEK].[LEVEL01].[MEMBER_NAME]": "calendar_week_name", + "[ZORD_CREA].[LEVEL01].[MEMBER_CAPTION]": "order_creation_date", + "[ZORD_CREA].[LEVEL01].[MEMBER_NAME]": "order_creation_date_name", + "[ZPONUMBER].[LEVEL01].[MEMBER_CAPTION]": "po_number", + "[ZPONUMBER].[LEVEL01].[MEMBER_NAME]": "po_number_name", + "[ZPUORTYPE].[LEVEL01].[MEMBER_CAPTION]": "purchase_order_type", + "[ZPUORTYPE].[LEVEL01].[MEMBER_NAME]": "purchase_order_type_name", + "[ZSEG_HDR].[LEVEL01].[MEMBER_CAPTION]": "segment_header", + "[ZSEG_HDR].[LEVEL01].[MEMBER_NAME]": "segment_header_name", + "[0SALESEMPLY].[LEVEL01].[MEMBER_CAPTION]": "sales_representative", + "[0SALESEMPLY].[LEVEL01].[MEMBER_NAME]": "sales_representative_name", + "[0SHIP_TO__0ACCNT_GRP].[LEVEL01].[MEMBER_CAPTION]": "customer_account_group_ship_to", + "[0SHIP_TO__0ACCNT_GRP].[LEVEL01].[MEMBER_NAME]": "customer_account_group_ship_to_name", + "[0SHIP_TO__0CITY].[LEVEL01].[MEMBER_CAPTION]": "location_ship_to", + "[0SHIP_TO__0CITY].[LEVEL01].[MEMBER_NAME]": "location_ship_to_name", + "[0CALDAY].[LEVEL01].[MEMBER_CAPTION]": "calendar_day", + "[0CALDAY].[LEVEL01].[MEMBER_NAME]": "calendar_day_name", + "[0CALMONTH].[LEVEL01].[MEMBER_CAPTION]": "calendar_month", + "[0CALMONTH].[LEVEL01].[MEMBER_NAME]": "calendar_month_name", + "[0CALYEAR].[LEVEL01].[MEMBER_CAPTION]": "calendar_year", + "[0CALYEAR].[LEVEL01].[MEMBER_NAME]": "calendar_year_name", + "[Measures].[003YPR44RQTVKWX9OL316XK7J]": "net_value", + "[Measures].[003YPR44RQTVKWX9T5BFEWUY9]": "order_quantity", + "[Measures].[003YPR44RQTVKWX9UW92X7ELV]": "open_orders_quantity", + "[Measures].[003YPR44RQTVKWX9YLKYZKH5O]": "number_of_sales_orders", + "[Measures].[003YPR44RQTVKWXA51D4J8HRZ]": "number_of_quotations", + "[Measures].[003YPR44RQTVKWXAEL7KFSJ6U]": "number_of_orders_created_from_quotations", + "[Measures].[003YPR44RQTVKWXARA4PVZ9TK]": "number_of_quotations_expired_validity_date", +} + + +def test_mindful_to_adls(sap_bw_config_key, adls_credentials_secret): + state = sap_bw_to_adls( + azure_key_vault_secret=sap_bw_config_key, + mdx_query=mdx_query, + mapping_dict=mapping_dict, + adls_path="raw/dyvenia_sandbox/sap_bw/sab_bw.parquet", + adls_azure_key_vault_secret=adls_credentials_secret, + adls_path_overwrite=True, + ) + assert state.is_successful() From dc6f18d41748fb40ed4a19f304b4499dd66722e4 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 17 Sep 2024 14:17:56 +0200 Subject: [PATCH 05/18] =?UTF-8?q?=F0=9F=8E=A8=20added=20sap=20bw=20to=20in?= =?UTF-8?q?it=20files.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 +++--- src/viadot/orchestration/prefect/flows/__init__.py | 3 ++- src/viadot/orchestration/prefect/tasks/__init__.py | 8 +++----- src/viadot/sources/__init__.py | 5 +++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67d127e2d..1b0d8b436 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,9 +16,9 @@ repos: rev: v0.3.4 hooks: # Run the linter. - - id: ruff - name: lint Python - # args: [ --fix ] + # - id: ruff + # name: lint Python + # # args: [ --fix ] # Run the formatter. - id: ruff-format diff --git a/src/viadot/orchestration/prefect/flows/__init__.py b/src/viadot/orchestration/prefect/flows/__init__.py index 48a4524ea..6c1bb4188 100644 --- a/src/viadot/orchestration/prefect/flows/__init__.py +++ b/src/viadot/orchestration/prefect/flows/__init__.py @@ -13,6 +13,7 @@ from .hubspot_to_adls import hubspot_to_adls from .mindful_to_adls import mindful_to_adls from .outlook_to_adls import outlook_to_adls +from .sap_bw_to_adls import sap_bw_to_adls from .sap_to_parquet import sap_to_parquet from .sap_to_redshift_spectrum import sap_to_redshift_spectrum from .sharepoint_to_adls import sharepoint_to_adls @@ -24,7 +25,6 @@ from .transform import transform from .transform_and_catalog import transform_and_catalog - __all__ = [ "cloud_for_customers_to_adls", "cloud_for_customers_to_databricks", @@ -39,6 +39,7 @@ "hubspot_to_adls", "mindful_to_adls", "outlook_to_adls", + "sap_bw_to_adls", "sap_to_parquet", "sap_to_redshift_spectrum", "sharepoint_to_adls", diff --git a/src/viadot/orchestration/prefect/tasks/__init__.py b/src/viadot/orchestration/prefect/tasks/__init__.py index 8e1053f08..50873701d 100644 --- a/src/viadot/orchestration/prefect/tasks/__init__.py +++ b/src/viadot/orchestration/prefect/tasks/__init__.py @@ -17,14 +17,11 @@ from .outlook import outlook_to_df from .redshift_spectrum import df_to_redshift_spectrum from .s3 import s3_upload_file +from .sap_bw import sap_bw_to_df from .sap_rfc import sap_rfc_to_df -from .sharepoint import ( - sharepoint_download_file, - sharepoint_to_df, -) +from .sharepoint import sharepoint_download_file, sharepoint_to_df from .sql_server import create_sql_server_table, sql_server_query, sql_server_to_df - __all__ = [ "adls_upload", "df_to_adls", @@ -44,6 +41,7 @@ "outlook_to_df", "df_to_redshift_spectrum", "s3_upload_file", + "sap_bw_to_df", "sap_rfc_to_df", "sharepoint_download_file", "sharepoint_to_df", diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 92c520f58..29d924a55 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -10,20 +10,21 @@ from .hubspot import Hubspot from .mindful import Mindful from .outlook import Outlook +from .sap_bw import Sapbw from .sharepoint import Sharepoint from .sql_server import SQLServer from .trino import Trino from .uk_carbon_intensity import UKCarbonIntensity - __all__ = [ "CloudForCustomers", "Epicor", "ExchangeRates", "Genesys", - "Outlook", "Hubspot", "Mindful", + "Outlook", + "Sapbw", "Sharepoint", "Trino", "SQLServer", From 7bd90d2f366e36808c1b1a903090932e9cac36bd Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 17 Sep 2024 14:18:34 +0200 Subject: [PATCH 06/18] =?UTF-8?q?=F0=9F=93=9D=20updated=20some=20comment?= =?UTF-8?q?=20lines.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b0d8b436..67d127e2d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,9 +16,9 @@ repos: rev: v0.3.4 hooks: # Run the linter. - # - id: ruff - # name: lint Python - # # args: [ --fix ] + - id: ruff + name: lint Python + # args: [ --fix ] # Run the formatter. - id: ruff-format From 66e1c57dc9601dd6d38ab08fbc723866d063b342 Mon Sep 17 00:00:00 2001 From: gwieloch Date: Thu, 26 Sep 2024 10:56:08 +0200 Subject: [PATCH 07/18] adding ruff formatter --- src/viadot/orchestration/prefect/flows/__init__.py | 1 + src/viadot/orchestration/prefect/tasks/__init__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/viadot/orchestration/prefect/flows/__init__.py b/src/viadot/orchestration/prefect/flows/__init__.py index 6c1bb4188..268f0cc44 100644 --- a/src/viadot/orchestration/prefect/flows/__init__.py +++ b/src/viadot/orchestration/prefect/flows/__init__.py @@ -25,6 +25,7 @@ from .transform import transform from .transform_and_catalog import transform_and_catalog + __all__ = [ "cloud_for_customers_to_adls", "cloud_for_customers_to_databricks", diff --git a/src/viadot/orchestration/prefect/tasks/__init__.py b/src/viadot/orchestration/prefect/tasks/__init__.py index 50873701d..604d8b7d1 100644 --- a/src/viadot/orchestration/prefect/tasks/__init__.py +++ b/src/viadot/orchestration/prefect/tasks/__init__.py @@ -22,6 +22,7 @@ from .sharepoint import sharepoint_download_file, sharepoint_to_df from .sql_server import create_sql_server_table, sql_server_query, sql_server_to_df + __all__ = [ "adls_upload", "df_to_adls", From 109f8df0d24b4ffb6dc317668913aab62a1e60be Mon Sep 17 00:00:00 2001 From: angelika233 Date: Thu, 26 Sep 2024 11:43:33 +0200 Subject: [PATCH 08/18] Fix import --- src/viadot/sources/__init__.py | 4 +--- src/viadot/sources/sap_bw.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index c347c7a5c..1babd9ea2 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -11,7 +11,6 @@ from .hubspot import Hubspot from .mindful import Mindful from .outlook import Outlook -from .sap_bw import Sapbw from .sharepoint import Sharepoint from .sql_server import SQLServer from .uk_carbon_intensity import UKCarbonIntensity @@ -25,7 +24,6 @@ "Hubspot", "Mindful", "Outlook", - "Sapbw", "Sharepoint", "Trino", "SQLServer", @@ -61,7 +59,7 @@ if find_spec("pyrfc"): from viadot.sources.sap_rfc import SAPRFC, SAPRFCV2 # noqa: F401 - __all__.extend(["SAPRFC", "SAPRFCV2"]) + __all__.extend(["SAPRFC", "SAPRFCV2", "Sapbw"]) if find_spec("pyspark"): from viadot.sources.databricks import Databricks # noqa: F401 diff --git a/src/viadot/sources/sap_bw.py b/src/viadot/sources/sap_bw.py index 2ddbe6da0..515177d3f 100644 --- a/src/viadot/sources/sap_bw.py +++ b/src/viadot/sources/sap_bw.py @@ -1,4 +1,4 @@ -"""SAP BW API connector.""" +"""SAP BW connector.""" import textwrap from typing import Any @@ -16,7 +16,7 @@ class SapbwCredentials(BaseModel): """Checking for values in SAP BW credentials dictionary. - Two key values are held in the Mindful connector: + Two key values are held in the SAP BW connector: - ashost: Indicates the host name or IP address of a specific SAP application server. - client: Specifies the SAP logon parameter client. From 84d0bd4a8f81d847f5c842ac2851b4b8b86746e5 Mon Sep 17 00:00:00 2001 From: angelika233 Date: Thu, 26 Sep 2024 11:49:49 +0200 Subject: [PATCH 09/18] =?UTF-8?q?=F0=9F=90=9B=20Fix=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/sources/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 1babd9ea2..8f582f130 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -57,6 +57,7 @@ if find_spec("pyrfc"): + from viadot.sources.sap_bw import Sapbw # noqa: F401 from viadot.sources.sap_rfc import SAPRFC, SAPRFCV2 # noqa: F401 __all__.extend(["SAPRFC", "SAPRFCV2", "Sapbw"]) From 35fce3504c88ff9bd5c375f2bcf2ce2b4dcf9ae5 Mon Sep 17 00:00:00 2001 From: angelika233 Date: Thu, 26 Sep 2024 11:56:35 +0200 Subject: [PATCH 10/18] =?UTF-8?q?=F0=9F=90=9B=20Fix=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/orchestration/prefect/tasks/sap_bw.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/viadot/orchestration/prefect/tasks/sap_bw.py b/src/viadot/orchestration/prefect/tasks/sap_bw.py index b916cac9d..985107e9b 100644 --- a/src/viadot/orchestration/prefect/tasks/sap_bw.py +++ b/src/viadot/orchestration/prefect/tasks/sap_bw.py @@ -1,5 +1,6 @@ """Task to download data from SAP BW API into a Pandas DataFrame.""" +import contextlib from typing import Any import pandas as pd @@ -8,7 +9,10 @@ from viadot.exceptions import APIError from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError from viadot.orchestration.prefect.utils import get_credentials -from viadot.sources import Sapbw + + +with contextlib.suppress(ImportError): + from viadot.sources import Sapbw @task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) From b3601c15966faf15545e3a89e2608bc8b8ac3b4d Mon Sep 17 00:00:00 2001 From: angelika233 Date: Thu, 26 Sep 2024 12:03:22 +0200 Subject: [PATCH 11/18] =?UTF-8?q?=F0=9F=8E=A8=20Adjust=20class=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/orchestration/prefect/tasks/sap_bw.py | 12 ++++++------ src/viadot/sources/__init__.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/viadot/orchestration/prefect/tasks/sap_bw.py b/src/viadot/orchestration/prefect/tasks/sap_bw.py index 985107e9b..9df1db375 100644 --- a/src/viadot/orchestration/prefect/tasks/sap_bw.py +++ b/src/viadot/orchestration/prefect/tasks/sap_bw.py @@ -1,4 +1,4 @@ -"""Task to download data from SAP BW API into a Pandas DataFrame.""" +"""Task to download data from SAP BW into a Pandas DataFrame.""" import contextlib from typing import Any @@ -12,7 +12,7 @@ with contextlib.suppress(ImportError): - from viadot.sources import Sapbw + from viadot.sources import SAPBW @task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) @@ -23,10 +23,10 @@ def sap_bw_to_df( mdx_query: str | None = None, mapping_dict: dict[str, Any] | None = None, ) -> pd.DataFrame: - """Task to download data from SAP BW API to Data Frame. + """Task to download data from SAP BW to DataFrame. Args: - credentials (Optional[Dict[str, Any]], optional): Hubspot credentials as a + credentials (Optional[Dict[str, Any]], optional): SAPBW credentials as a dictionary. Defaults to None. config_key (Optional[str], optional): The key in the viadot config holding relevant credentials. Defaults to None. @@ -50,10 +50,10 @@ def sap_bw_to_df( credentials = credentials or get_credentials(azure_key_vault_secret) if mdx_query is None: - message = "SAP BW API `mdx_query` is a mandatory requirement." + message = "SAP BW `mdx_query` is a mandatory requirement." raise APIError(message) - sap_bw = Sapbw( + sap_bw = SAPBW( credentials=credentials, config_key=config_key, ) diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 8f582f130..5f1f01933 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -57,10 +57,10 @@ if find_spec("pyrfc"): - from viadot.sources.sap_bw import Sapbw # noqa: F401 + from viadot.sources.sap_bw import SAPBW # noqa: F401 from viadot.sources.sap_rfc import SAPRFC, SAPRFCV2 # noqa: F401 - __all__.extend(["SAPRFC", "SAPRFCV2", "Sapbw"]) + __all__.extend(["SAPRFC", "SAPRFCV2", "SAPBW"]) if find_spec("pyspark"): from viadot.sources.databricks import Databricks # noqa: F401 From daaaa850e49ff2a50303c40942758394951ab898 Mon Sep 17 00:00:00 2001 From: angelika233 Date: Thu, 26 Sep 2024 13:17:07 +0200 Subject: [PATCH 12/18] Fix imports --- src/viadot/sources/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 53772a014..998a16733 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -27,11 +27,9 @@ "Outlook", "SQLServer", "Sharepoint", - "Sharepoint", "Supermetrics", "SupermetricsCredentials", # pragma: allowlist-secret "Trino", - "Trino", "UKCarbonIntensity", ] if find_spec("adlfs"): From 1e1622169f8cea8fac91076d64a4662ee95a828e Mon Sep 17 00:00:00 2001 From: angelika233 Date: Mon, 30 Sep 2024 09:21:16 +0200 Subject: [PATCH 13/18] =?UTF-8?q?=F0=9F=8E=A8=20Format=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/sources/sap_bw.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/viadot/sources/sap_bw.py b/src/viadot/sources/sap_bw.py index 515177d3f..dba026f5b 100644 --- a/src/viadot/sources/sap_bw.py +++ b/src/viadot/sources/sap_bw.py @@ -13,7 +13,7 @@ from viadot.utils import add_viadot_metadata_columns -class SapbwCredentials(BaseModel): +class SAPBWCredentials(BaseModel): """Checking for values in SAP BW credentials dictionary. Two key values are held in the SAP BW connector: @@ -37,7 +37,7 @@ class SapbwCredentials(BaseModel): user: str -class Sapbw(Source): +class SAPBW(Source): """Quering the SAP BW (SAP Business Warehouse) source using pyrfc library. Documentation to pyrfc can be found under: @@ -49,20 +49,20 @@ class Sapbw(Source): def __init__( self, *args, - credentials: SapbwCredentials | None = None, + credentials: SAPBWCredentials | None = None, config_key: str = "sap_bw", **kwargs, ): """Create an instance of SAP BW. Args: - credentials (Optional[SapbwCredentials], optional): SAP BW credentials. + credentials (Optional[SAPBWCredentials], optional): SAP BW credentials. Defaults to None. config_key (str, optional): The key in the viadot config holding relevant credentials. Defaults to "sap_bw". Examples: - sap_bw = Sapbw( + sap_bw = SAPBW( credentials=credentials, config_key=config_key, ) @@ -81,7 +81,7 @@ def __init__( raise CredentialError(message) self.credentials = credentials - validated_creds = dict(SapbwCredentials(**credentials)) + validated_creds = dict(SAPBWCredentials(**credentials)) super().__init__(*args, credentials=validated_creds, **kwargs) self.query_output = None From f146e5db06564cc6ac1723f487720a0bb26b7dfc Mon Sep 17 00:00:00 2001 From: angelika233 Date: Mon, 30 Sep 2024 09:25:24 +0200 Subject: [PATCH 14/18] remove credentials from flow and task --- .../orchestration/prefect/flows/sap_bw_to_adls.py | 10 ---------- src/viadot/orchestration/prefect/tasks/sap_bw.py | 7 ++----- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py b/src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py index a572ea135..7703ec068 100644 --- a/src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py +++ b/src/viadot/orchestration/prefect/flows/sap_bw_to_adls.py @@ -16,12 +16,10 @@ task_runner=ConcurrentTaskRunner, ) def sap_bw_to_adls( - credentials: dict[str, Any] | None = None, config_key: str | None = None, azure_key_vault_secret: str | None = None, mdx_query: str | None = None, mapping_dict: dict[str, Any] | None = None, - adls_credentials: str | None = None, adls_azure_key_vault_secret: str | None = None, adls_config_key: str | None = None, adls_path: str | None = None, @@ -30,8 +28,6 @@ def sap_bw_to_adls( """Flow for downloading data from SAP BW API to Azure Data Lake. Args: - credentials (Optional[Dict[str, Any]], optional): Hubspot credentials as a - dictionary. Defaults to None. config_key (Optional[str], optional): The key in the viadot config holding relevant credentials. Defaults to None. azure_key_vault_secret (Optional[str], optional): The name of the Azure Key @@ -39,10 +35,6 @@ def sap_bw_to_adls( mdx_query (str, optional): The MDX query to be passed to connection. mapping_dict (dict[str, Any], optional): Dictionary with original and new column names. Defaults to None. - adls_credentials (str, optional): The name of the Azure Key Vault - secret containing a dictionary with ACCOUNT_NAME and Service Principal - credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. - Defaults to None. adls_azure_key_vault_secret (str, optional): The name of the Azure Key. Defaults to None. adls_config_key (str, optional): The key in the viadot config holding relevant @@ -53,7 +45,6 @@ def sap_bw_to_adls( Defaults to False. """ data_frame = sap_bw_to_df( - credentials=credentials, config_key=config_key, azure_key_vault_secret=azure_key_vault_secret, mdx_query=mdx_query, @@ -63,7 +54,6 @@ def sap_bw_to_adls( return df_to_adls( df=data_frame, path=adls_path, - credentials=adls_credentials, credentials_secret=adls_azure_key_vault_secret, config_key=adls_config_key, overwrite=adls_path_overwrite, diff --git a/src/viadot/orchestration/prefect/tasks/sap_bw.py b/src/viadot/orchestration/prefect/tasks/sap_bw.py index 9df1db375..4573b9e7c 100644 --- a/src/viadot/orchestration/prefect/tasks/sap_bw.py +++ b/src/viadot/orchestration/prefect/tasks/sap_bw.py @@ -17,7 +17,6 @@ @task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) def sap_bw_to_df( - credentials: dict[str, Any] | None = None, config_key: str | None = None, azure_key_vault_secret: str | None = None, mdx_query: str | None = None, @@ -26,8 +25,6 @@ def sap_bw_to_df( """Task to download data from SAP BW to DataFrame. Args: - credentials (Optional[Dict[str, Any]], optional): SAPBW credentials as a - dictionary. Defaults to None. config_key (Optional[str], optional): The key in the viadot config holding relevant credentials. Defaults to None. azure_key_vault_secret (Optional[str], optional): The name of the Azure Key @@ -43,11 +40,11 @@ def sap_bw_to_df( Returns: pd.DataFrame: The response data as a Pandas Data Frame. """ - if not (azure_key_vault_secret or config_key or credentials): + if not (azure_key_vault_secret or config_key): raise MissingSourceCredentialsError if not config_key: - credentials = credentials or get_credentials(azure_key_vault_secret) + credentials = get_credentials(azure_key_vault_secret) if mdx_query is None: message = "SAP BW `mdx_query` is a mandatory requirement." From d8f8f03db447c0758810c390e971eafb806791f1 Mon Sep 17 00:00:00 2001 From: angelika233 Date: Mon, 30 Sep 2024 09:30:05 +0200 Subject: [PATCH 15/18] =?UTF-8?q?=F0=9F=8E=A8=20Make=20mdx=5Fquery=20requi?= =?UTF-8?q?red?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/orchestration/prefect/tasks/sap_bw.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/viadot/orchestration/prefect/tasks/sap_bw.py b/src/viadot/orchestration/prefect/tasks/sap_bw.py index 4573b9e7c..b10cdc3bb 100644 --- a/src/viadot/orchestration/prefect/tasks/sap_bw.py +++ b/src/viadot/orchestration/prefect/tasks/sap_bw.py @@ -6,7 +6,6 @@ import pandas as pd from prefect import task -from viadot.exceptions import APIError from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError from viadot.orchestration.prefect.utils import get_credentials @@ -17,25 +16,25 @@ @task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) def sap_bw_to_df( + mdx_query: str, config_key: str | None = None, azure_key_vault_secret: str | None = None, - mdx_query: str | None = None, mapping_dict: dict[str, Any] | None = None, ) -> pd.DataFrame: """Task to download data from SAP BW to DataFrame. Args: + mdx_query (str, required): The MDX query to be passed to connection. config_key (Optional[str], optional): The key in the viadot config holding relevant credentials. Defaults to None. azure_key_vault_secret (Optional[str], optional): The name of the Azure Key Vault secret where credentials are stored. Defaults to None. - mdx_query (str, optional): The MDX query to be passed to connection. mapping_dict (dict[str, Any], optional): Dictionary with original and new column names. Defaults to None. Raises: MissingSourceCredentialsError: If none credentials have been provided. - APIError: The `mdx_query` is a "must" requirement. + Returns: pd.DataFrame: The response data as a Pandas Data Frame. @@ -46,10 +45,6 @@ def sap_bw_to_df( if not config_key: credentials = get_credentials(azure_key_vault_secret) - if mdx_query is None: - message = "SAP BW `mdx_query` is a mandatory requirement." - raise APIError(message) - sap_bw = SAPBW( credentials=credentials, config_key=config_key, From 71d46f8618b3f7ecd7c36acc29cdff03fd9df0e0 Mon Sep 17 00:00:00 2001 From: angelika233 Date: Mon, 30 Sep 2024 09:34:56 +0200 Subject: [PATCH 16/18] =?UTF-8?q?=F0=9F=9A=9A=20move=20test=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{ => orchestration/prefect/flows}/test_sap_bw.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tests/integration/{ => orchestration/prefect/flows}/test_sap_bw.py (99%) diff --git a/tests/integration/test_sap_bw.py b/tests/integration/orchestration/prefect/flows/test_sap_bw.py similarity index 99% rename from tests/integration/test_sap_bw.py rename to tests/integration/orchestration/prefect/flows/test_sap_bw.py index 2a3f1a202..33a7f1b9e 100644 --- a/tests/integration/test_sap_bw.py +++ b/tests/integration/orchestration/prefect/flows/test_sap_bw.py @@ -164,7 +164,7 @@ } -def test_mindful_to_adls(sap_bw_config_key, adls_credentials_secret): +def test_sap_bw_to_adls(sap_bw_config_key, adls_credentials_secret): state = sap_bw_to_adls( azure_key_vault_secret=sap_bw_config_key, mdx_query=mdx_query, From 4335d09b818c182d0d5f3b1a5e1d0f0ce713e6b3 Mon Sep 17 00:00:00 2001 From: angelika233 Date: Mon, 30 Sep 2024 09:36:33 +0200 Subject: [PATCH 17/18] =?UTF-8?q?=F0=9F=93=9D=20Update=20dostring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/sources/sap_bw.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/viadot/sources/sap_bw.py b/src/viadot/sources/sap_bw.py index dba026f5b..0fde4ed6d 100644 --- a/src/viadot/sources/sap_bw.py +++ b/src/viadot/sources/sap_bw.py @@ -152,7 +152,7 @@ def to_df( Args: if_empty (str, optional): What to do if a fetch produce no data. - Defaults to "warn + Defaults to "warn". Raises: ValidationError: Prints the original SAP error message in case of issues @@ -161,7 +161,8 @@ def to_df( column names. Defaults to None. Returns: - pd.Dataframe: The response data as a pandas DataFrame plus viadot metadata. + pd.Dataframe: The response data as a pandas DataFrame, enriched + with viadot metadata columns. """ raw_data = {} @@ -195,7 +196,7 @@ def to_df( return data_frame - def available_columns(self, mdx_query: str) -> list[str]: + def get_available_columns(self, mdx_query: str) -> list[str]: """Generate list of all available columns in a SAP BW table. Args: From 0ddc8a13a2d4d798c64a2059cd37998001b8127e Mon Sep 17 00:00:00 2001 From: angelika233 Date: Mon, 30 Sep 2024 10:42:18 +0200 Subject: [PATCH 18/18] Update docstring --- src/viadot/sources/sap_bw.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/viadot/sources/sap_bw.py b/src/viadot/sources/sap_bw.py index 0fde4ed6d..4da6de87f 100644 --- a/src/viadot/sources/sap_bw.py +++ b/src/viadot/sources/sap_bw.py @@ -153,12 +153,12 @@ def to_df( Args: if_empty (str, optional): What to do if a fetch produce no data. Defaults to "warn". + mapping_dict (dict[str, Any], optional): Dictionary with original and new + column names. Defaults to None. Raises: ValidationError: Prints the original SAP error message in case of issues with MDX execution. - mapping_dict (dict[str, Any], optional): Dictionary with original and new - column names. Defaults to None. Returns: pd.Dataframe: The response data as a pandas DataFrame, enriched