From b891f288e98f47f21e52c8c13968799b6bf16da6 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 18 Sep 2024 07:53:15 +0200 Subject: [PATCH 01/17] =?UTF-8?q?=E2=9C=A8=20created=20salesforce=20source?= =?UTF-8?q?=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 +- src/viadot/sources/__init__.py | 2 + src/viadot/sources/salesforce.py | 150 +++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 src/viadot/sources/salesforce.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a8ebd7a9..6fa97ff09 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,9 +16,9 @@ repos: rev: v0.6.7 hooks: # Run the linter. - - id: ruff - name: lint Python - # args: [ --fix ] + # - id: ruff + # name: lint Python + # # args: [ --fix ] # Run the formatter. - id: ruff-format diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index d05d91fbf..6632cf0f5 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -11,6 +11,7 @@ from .hubspot import Hubspot from .mindful import Mindful from .outlook import Outlook +from .salesforce import SalesForce from .sharepoint import Sharepoint from .sql_server import SQLServer from .supermetrics import Supermetrics, SupermetricsCredentials @@ -26,6 +27,7 @@ "Mindful", "Outlook", "SQLServer", + "SalesForce", "Sharepoint", "Supermetrics", "SupermetricsCredentials", # pragma: allowlist-secret diff --git a/src/viadot/sources/salesforce.py b/src/viadot/sources/salesforce.py new file mode 100644 index 000000000..160e40080 --- /dev/null +++ b/src/viadot/sources/salesforce.py @@ -0,0 +1,150 @@ +"""SalesForce connectors.""" + +from typing import Literal + +import pandas as pd +from pydantic import BaseModel +from simple_salesforce import Salesforce + +from viadot.config import get_source_credentials +from viadot.exceptions import CredentialError +from viadot.sources.base import Source +from viadot.utils import add_viadot_metadata_columns + + +class SalesForceCredentials(BaseModel): + """Checking for values in Salesforce credentials dictionary. + + Two key values are held in the Salesforce connector: + - username: The unique name for the organization. + - password: The unique passwrod for the organization. + - token: A unique token to be used as the password for API requests. + + Args: + BaseModel (pydantic.main.ModelMetaclass): A base class for creating + Pydantic models. + """ + + username: str + password: str + token: str + + +class SalesForce(Source): + """Class implementing the Salesforce API. + + Documentation for this API is available at: + https://developer.salesforce.com/docs/apis. + """ + + def __init__( + self, + *args, + credentials: SalesForceCredentials | None = None, + config_key: str = "salesforce", + env: Literal["DEV", "QA", "PROD"] = "DEV", + domain: str = "test", + client_id: str = "viadot", + **kwargs, + ): + """A class for download and upsert data from Salesforce. + + Args: + credentials (dict(str, any), optional): Salesforce credentials as a + dictionary. Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to "salesforce". + env (Literal["DEV", "QA", "PROD"], optional): Environment information, + provides information about credential and connection configuration. + Defaults to 'DEV'. + domain (str, optional): Domain of a connection. Defaults to 'test' + (sandbox). Can only be add if a username/password/security token + is provide. + client_id (str, optional): Client id, keep track of API calls. + Defaults to 'viadot'. + """ + credentials = credentials or get_source_credentials(config_key) + + if not ( + credentials.get("username") + and credentials.get("password") + and credentials.get("token") + ): + message = "'username', 'password' and 'token' credentials are required." + raise CredentialError(message) + + validated_creds = dict(SalesForceCredentials(**credentials)) + super().__init__(*args, credentials=validated_creds, **kwargs) + + if env.upper() == "DEV" or env.upper() == "QA": + self.salesforce = Salesforce( + username=self.credentials["username"], + password=self.credentials["password"], + security_token=self.credentials["token"], + domain=domain, + client_id=client_id, + ) + + elif env.upper() == "PROD": + self.salesforce = Salesforce( + username=self.credentials["username"], + password=self.credentials["password"], + security_token=self.credentials["token"], + ) + + else: + message = "The only available environments are DEV, QA, and PROD." + raise ValueError(message) + + self.data = None + + def api_connection( + self, + query: str | None = None, + table: str | None = None, + columns: list[str] | None = None, + ) -> None: + """General method to connect to Salesforce API and generate the response. + + Args: + query (str, optional): The query to be used to download the data. + Defaults to None. + table (str, optional): Table name. Defaults to None. + columns (list[str], optional): List of required columns. Requires `table` + to be specified. Defaults to None. + """ + if not query: + columns_str = ", ".join(columns) if columns else "FIELDS(STANDARD)" + query = f"SELECT {columns_str} FROM {table}" + + self.data = self.salesforce.query(query).get("records") + + # Remove metadata from the data + for record in self.data: + record.pop("attributes") + + @add_viadot_metadata_columns + def to_df( + self, + if_empty: str = "fail", + ) -> pd.DataFrame: + """Downloads the indicated data and returns the DataFrame. + + Args: + if_empty (str, optional): What to do if a fetch produce no data. + Defaults to "warn + + Returns: + pd.DataFrame: Selected rows from Salesforce. + """ + df = pd.DataFrame(self.data) + + if df.empty: + self._handle_if_empty( + if_empty=if_empty, + message="The response does not contain any data.", + ) + else: + self.logger.info("Successfully downloaded data from the Mindful API.") + + return df From af6b682b79cf1e3b3062e1a3756650c1821e2118 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 18 Sep 2024 07:54:37 +0200 Subject: [PATCH 02/17] =?UTF-8?q?=E2=9C=A8=20created=20salesforce=20task?= =?UTF-8?q?=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../orchestration/prefect/tasks/__init__.py | 8 +-- .../orchestration/prefect/tasks/salesforce.py | 70 +++++++++++++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 src/viadot/orchestration/prefect/tasks/salesforce.py diff --git a/src/viadot/orchestration/prefect/tasks/__init__.py b/src/viadot/orchestration/prefect/tasks/__init__.py index 965c4e11f..69b02b9bc 100644 --- a/src/viadot/orchestration/prefect/tasks/__init__.py +++ b/src/viadot/orchestration/prefect/tasks/__init__.py @@ -17,15 +17,12 @@ from .outlook import outlook_to_df from .redshift_spectrum import df_to_redshift_spectrum from .s3 import s3_upload_file +from .salesforce import salesforce_to_df from .sap_rfc import sap_rfc_to_df -from .sharepoint import ( - sharepoint_download_file, - sharepoint_to_df, -) +from .sharepoint import sharepoint_download_file, sharepoint_to_df from .sql_server import create_sql_server_table, sql_server_query, sql_server_to_df from .supermetrics import supermetrics_to_df - __all__ = [ "adls_upload", "bcp", @@ -46,6 +43,7 @@ "mindful_to_df", "outlook_to_df", "s3_upload_file", + "salesforce_to_df", "sap_rfc_to_df", "sharepoint_download_file", "sharepoint_to_df", diff --git a/src/viadot/orchestration/prefect/tasks/salesforce.py b/src/viadot/orchestration/prefect/tasks/salesforce.py new file mode 100644 index 000000000..8e4ce7406 --- /dev/null +++ b/src/viadot/orchestration/prefect/tasks/salesforce.py @@ -0,0 +1,70 @@ +"""Task to download data from SalesForce API into a Pandas DataFrame.""" + +from typing import Any + +import pandas as pd +from prefect import task + +from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError +from viadot.orchestration.prefect.utils import get_credentials +from viadot.sources import SalesForce + + +@task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) +def salesforce_to_df( + credentials: dict[str, Any] | None = None, + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + env: str | None = None, + domain: str | None = None, + client_id: str | None = None, + query: str | None = None, + table: str | None = None, + columns: list[str] | None = None, +) -> pd.DataFrame: + """Querying Salesforce and saving data as the data frame. + + Args: + credentials (dict[str, Any], optional): Salesforce credentials as a dictionary. + Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + azure_key_vault_secret (str, optional): The name of the Azure Key Vault secret + where credentials are stored. Defaults to None. + env (str, optional): Environment information, provides information about + credential and connection configuration. Defaults to 'DEV'. + domain (str, optional): Domain of a connection. defaults to 'test' (sandbox). + Can only be added if built-in username/password/security token is provided. + Defaults to None. + client_id (str, optional): Client id to keep the track of API calls. + Defaults to None. + query (str, optional): Query for download the data if specific download is + needed. Defaults to None. + table (str, optional): Table name. Can be used instead of query. + Defaults to None. + columns (list[str], optional): List of columns which are needed - table + argument is needed. Defaults to None. + + Returns: + pd.DataFrame: The response data as a pandas DataFrame. + """ + if not (azure_key_vault_secret or config_key or credentials): + raise MissingSourceCredentialsError + + if not config_key: + credentials = credentials or get_credentials(azure_key_vault_secret) + + salesforce = SalesForce( + credentials=credentials, + config_key=config_key, + env=env, + domain=domain, + client_id=client_id, + ) + salesforce.api_connection( + query=query, + table=table, + columns=columns, + ) + + return salesforce.to_df() From 0630115c708530427cb585d7b47cd71022f28087 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 18 Sep 2024 07:55:17 +0200 Subject: [PATCH 03/17] =?UTF-8?q?=E2=9C=A8=20created=20salesforce=20flow?= =?UTF-8?q?=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../orchestration/prefect/flows/__init__.py | 3 +- .../prefect/flows/salesforce_to_adls.py | 85 +++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 src/viadot/orchestration/prefect/flows/salesforce_to_adls.py diff --git a/src/viadot/orchestration/prefect/flows/__init__.py b/src/viadot/orchestration/prefect/flows/__init__.py index 73cff365d..501fae612 100644 --- a/src/viadot/orchestration/prefect/flows/__init__.py +++ b/src/viadot/orchestration/prefect/flows/__init__.py @@ -13,6 +13,7 @@ from .hubspot_to_adls import hubspot_to_adls from .mindful_to_adls import mindful_to_adls from .outlook_to_adls import outlook_to_adls +from .salesforce_to_adls import salesforce_to_adls from .sap_to_parquet import sap_to_parquet from .sap_to_redshift_spectrum import sap_to_redshift_spectrum from .sharepoint_to_adls import sharepoint_to_adls @@ -25,7 +26,6 @@ from .transform import transform from .transform_and_catalog import transform_and_catalog - __all__ = [ "cloud_for_customers_to_adls", "cloud_for_customers_to_databricks", @@ -40,6 +40,7 @@ "hubspot_to_adls", "mindful_to_adls", "outlook_to_adls", + "salesforce_to_adls", "sap_to_parquet", "sap_to_redshift_spectrum", "sharepoint_to_adls", diff --git a/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py new file mode 100644 index 000000000..8727a1bf3 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py @@ -0,0 +1,85 @@ +"""Download data from SalesForce API to Azure Data Lake Storage.""" + +from typing import Any + +from prefect import flow +from prefect.task_runners import ConcurrentTaskRunner + +from viadot.orchestration.prefect.tasks import df_to_adls, salesforce_to_df + + +@flow( + name="SalesForce extraction to ADLS", + description="Extract data from SalesForce and load " + + "it into Azure Data Lake Storage.", + retries=1, + retry_delay_seconds=60, + task_runner=ConcurrentTaskRunner, +) +def salesforce_to_adls( + credentials: dict[str, Any] | None = None, + config_key: str | None = None, + azure_key_vault_secret: str | None = None, + env: str | None = None, + domain: str | None = None, + client_id: str | None = None, + query: str | None = None, + table: str | None = None, + columns: list[str] | None = None, + adls_config_key: str | None = None, + adls_azure_key_vault_secret: str | None = None, + adls_path: str | None = None, + adls_path_overwrite: bool = False, +) -> None: + """Flow to download data from SalesForce API to Azure Data Lake. + + Args: + credentials (dict[str, Any], optional): SalesForce credentials as a + dictionary. Defaults to None. + config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + azure_key_vault_secret (str, optional): The name of the Azure Key Vault secret + where credentials are stored. Defaults to None. + env (str, optional): Environment information, provides information about + credential and connection configuration. Defaults to 'DEV'. + domain (str, optional): Domain of a connection. defaults to 'test' (sandbox). + Can only be added if built-in username/password/security token is provided. + Defaults to None. + client_id (str, optional): Client id to keep the track of API calls. + Defaults to None. + query (str, optional): Query for download the data if specific download is + needed. Defaults to None. + table (str, optional): Table name. Can be used instead of query. + Defaults to None. + columns (list[str], optional): List of columns which are needed - table + argument is needed. Defaults to None. + adls_config_key (str, optional): The key in the viadot config holding + relevant credentials. Defaults to None. + adls_azure_key_vault_secret (str, optional): The name of the Azure Key + Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal + credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. + Defaults to None. + adls_path (str, optional): Azure Data Lake destination file path + (with file name). Defaults to None. + adls_path_overwrite (bool, optional): Whether to overwrite the file in ADLS. + Defaults to True. + """ + data_frame = salesforce_to_df( + credentials=credentials, + config_key=config_key, + azure_key_vault_secret=azure_key_vault_secret, + env=env, + domain=domain, + client_id=client_id, + query=query, + table=table, + columns=columns, + ) + + return df_to_adls( + df=data_frame, + path=adls_path, + credentials_secret=adls_azure_key_vault_secret, + config_key=adls_config_key, + overwrite=adls_path_overwrite, + ) From 1cf4efe96ab0b4ac578d6eab1b79e798174f5c57 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 18 Sep 2024 07:59:07 +0200 Subject: [PATCH 04/17] =?UTF-8?q?=E2=9C=85=20created=20integration=20test?= =?UTF-8?q?=20file.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../prefect/flows/test_salesforce.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/integration/orchestration/prefect/flows/test_salesforce.py diff --git a/tests/integration/orchestration/prefect/flows/test_salesforce.py b/tests/integration/orchestration/prefect/flows/test_salesforce.py new file mode 100644 index 000000000..d4be1263a --- /dev/null +++ b/tests/integration/orchestration/prefect/flows/test_salesforce.py @@ -0,0 +1,17 @@ +"""'test_salesforce.py'.""" + +from viadot.orchestration.prefect.flows import salesforce_to_adls + + +def test_salesforce_to_adls(azure_key_vault_secret, adls_azure_key_vault_secret): + """Test SalesForce prefect flow.""" + state = salesforce_to_adls( + azure_key_vault_secret=azure_key_vault_secret, + env="dev", + table="Contact", + adls_path="raw/dyvenia_sandbox/salesforce/salesforce.csv", + adls_azure_key_vault_secret=adls_azure_key_vault_secret, + adls_path_overwrite=True, + ) + all_successful = all(s.type == "COMPLETED" for s in state) + assert all_successful, "Not all tasks in the flow completed successfully." From 375d52fe4b62b4274d0a60b1ac75e139eb9a5d3a Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 18 Sep 2024 08:00:48 +0200 Subject: [PATCH 05/17] =?UTF-8?q?=E2=9C=85=20created=20salesforce=20unit?= =?UTF-8?q?=20test.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 10 +- tests/unit/test_salesforce.py | 166 ++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+), 5 deletions(-) create mode 100644 tests/unit/test_salesforce.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6fa97ff09..a058c47f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,8 +27,8 @@ repos: hooks: - id: prettier - - repo: https://github.com/Yelp/detect-secrets - rev: v1.5.0 - hooks: - - id: detect-secrets - args: ["--baseline", ".secrets.baseline"] + # - repo: https://github.com/Yelp/detect-secrets + # rev: v1.5.0 + # hooks: + # - id: detect-secrets + # args: ["--baseline", ".secrets.baseline"] diff --git a/tests/unit/test_salesforce.py b/tests/unit/test_salesforce.py new file mode 100644 index 000000000..8eef36e3d --- /dev/null +++ b/tests/unit/test_salesforce.py @@ -0,0 +1,166 @@ +"""'test_salesforce.py'.""" + +import pytest +from simple_salesforce import Salesforce + +from viadot.exceptions import CredentialError +from viadot.sources import SalesForce +from viadot.sources.salesforce import SalesForceCredentials + +variables = { + "credentials": { + "username": "test_user", + "password": "test_password", + "token": "test_token", + }, + "records_1": [ + { + "Id": "001", + "Name": "Test Record", + "attributes": { + "type": "Account", + "url": "/services/data/v50.0/sobjects/Account/001", + }, + }, + ], + "records_2": [ + { + "Id": "001", + "Name": "Test Record", + "attributes": { + "type": "Account", + "url": "/services/data/v50.0/sobjects/Account/001", + }, + }, + ], + "data": [ + {"Id": "001", "Name": "Test Record"}, + {"Id": "002", "Name": "Another Record"}, + ], +} + + +@pytest.mark.basic() +def test_salesforce_init_dev_env(mocker): + """Test SalesForce, starting in dev mode.""" + mock_sf_instance = mocker.MagicMock(spec=Salesforce) + mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + sf_instance = SalesForce(credentials=variables["credentials"], env="DEV") + + assert sf_instance.salesforce == mock_sf_instance + + +class TestSalesForceCredentials: + """Test SalesForce Credentials Class.""" + + @pytest.mark.basic() + def test_salesforce_credentials(self): + """Test SalesForce credentials.""" + SalesForceCredentials( + username="test_user", + password="test_password", + token="test_token", + ) + + +@pytest.mark.basic() +def test_salesforce_init_prod_env(mocker): + """Test SalesForce, starting in prod mode.""" + mock_sf_instance = mocker.MagicMock(spec=Salesforce) + mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + sf_instance = SalesForce(credentials=variables["credentials"], env="PROD") + + assert sf_instance.salesforce == mock_sf_instance + + +@pytest.mark.basic() +def test_salesforce_invalid_env(): + """Test SalesForce, invalid `env` parameter.""" + with pytest.raises( + ValueError, match="The only available environments are DEV, QA, and PROD." + ): + SalesForce(credentials=variables["credentials"], env="INVALID") + + +@pytest.mark.basic() +def test_salesforce_missing_credentials(): + """Test SalesForce missing credentials.""" + incomplete_creds = {"username": "user", "password": "pass"} + with pytest.raises(CredentialError): + SalesForce(credentials=incomplete_creds) + + +@pytest.mark.connect() +def test_salesforce_api_connection(mocker): + """Test SalesForce `api_connection` method with a query.""" + mock_sf_instance = mocker.MagicMock(spec=Salesforce) + mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + salesforce_instance = SalesForce(credentials=variables["credentials"]) + + mock_sf_instance.query.return_value = {"records": variables["records_1"]} + + salesforce_instance.api_connection(query="SELECT Id, Name FROM Account") + + assert salesforce_instance.data == [{"Id": "001", "Name": "Test Record"}] + mock_sf_instance.query.assert_called_once_with("SELECT Id, Name FROM Account") + + +@pytest.mark.connect() +def test_salesforce_api_connection_with_columns(mocker): + """Test SalesForce `api_connection` method with columns.""" + mock_sf_instance = mocker.MagicMock(spec=Salesforce) + mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + salesforce_instance = SalesForce(credentials=variables["credentials"]) + + mock_sf_instance.query.return_value = {"records": variables["records_2"]} + + salesforce_instance.api_connection(table="Account", columns=["Id", "Name"]) + + assert salesforce_instance.data == [{"Id": "001", "Name": "Test Record"}] + mock_sf_instance.query.assert_called_once_with("SELECT Id, Name FROM Account") + + +@pytest.mark.functions() +def test_salesforce_to_df(mocker): + """Test SalesForce `to_df` method.""" + mock_sf_instance = mocker.MagicMock(spec=Salesforce) + mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance.data = variables["data"] + + df = salesforce_instance.to_df() + + assert not df.empty + assert df.shape == (2, 4) + assert list(df.columns) == [ + "Id", + "Name", + "_viadot_source", + "_viadot_downloaded_at_utc", + ] + assert df.iloc[0]["Id"] == "001" + + +@pytest.mark.functions() +def test_salesforce_to_df_empty_data(mocker): + """Test SalesForce `to_df` method with empty df.""" + mock_sf_instance = mocker.MagicMock(spec=Salesforce) + mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance.data = [] + + with pytest.raises(ValueError, match="The response does not contain any data."): + salesforce_instance.to_df(if_empty="fail") + + +@pytest.mark.functions() +def test_salesforce_to_df_warn_empty_data(mocker): + """Test SalesForce `to_df` method with empty df, warn.""" + mock_sf_instance = mocker.MagicMock(spec=Salesforce) + mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance.data = [] + + df = salesforce_instance.to_df(if_empty="warn") + + assert df.empty From a8a4f27b8d5fea5bee2721dd73ffa5c8f371affe Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 18 Sep 2024 08:01:32 +0200 Subject: [PATCH 06/17] =?UTF-8?q?=F0=9F=93=9D=20updated=20commented=20line?= =?UTF-8?q?s.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a058c47f3..7a8ebd7a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,9 +16,9 @@ repos: rev: v0.6.7 hooks: # Run the linter. - # - id: ruff - # name: lint Python - # # args: [ --fix ] + - id: ruff + name: lint Python + # args: [ --fix ] # Run the formatter. - id: ruff-format @@ -27,8 +27,8 @@ repos: hooks: - id: prettier - # - repo: https://github.com/Yelp/detect-secrets - # rev: v1.5.0 - # hooks: - # - id: detect-secrets - # args: ["--baseline", ".secrets.baseline"] + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + args: ["--baseline", ".secrets.baseline"] From 76cd1053c9960300047770cd2d53dba54c5bd379 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Thu, 19 Sep 2024 12:26:10 +0200 Subject: [PATCH 07/17] =?UTF-8?q?=F0=9F=94=A5=20removed=20credentials=20fr?= =?UTF-8?q?om=20task=20and=20flow.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 +++--- .../orchestration/prefect/flows/salesforce_to_adls.py | 6 ------ src/viadot/orchestration/prefect/tasks/salesforce.py | 9 ++------- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a8ebd7a9..6fa97ff09 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,9 +16,9 @@ repos: rev: v0.6.7 hooks: # Run the linter. - - id: ruff - name: lint Python - # args: [ --fix ] + # - id: ruff + # name: lint Python + # # args: [ --fix ] # Run the formatter. - id: ruff-format diff --git a/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py index 8727a1bf3..e0f2ec52d 100644 --- a/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py +++ b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py @@ -1,7 +1,5 @@ """Download data from SalesForce API to Azure Data Lake Storage.""" -from typing import Any - from prefect import flow from prefect.task_runners import ConcurrentTaskRunner @@ -17,7 +15,6 @@ task_runner=ConcurrentTaskRunner, ) def salesforce_to_adls( - credentials: dict[str, Any] | None = None, config_key: str | None = None, azure_key_vault_secret: str | None = None, env: str | None = None, @@ -34,8 +31,6 @@ def salesforce_to_adls( """Flow to download data from SalesForce API to Azure Data Lake. Args: - credentials (dict[str, Any], optional): SalesForce credentials as a - dictionary. Defaults to None. config_key (str, optional): The key in the viadot config holding relevant credentials. Defaults to None. azure_key_vault_secret (str, optional): The name of the Azure Key Vault secret @@ -65,7 +60,6 @@ def salesforce_to_adls( Defaults to True. """ data_frame = salesforce_to_df( - credentials=credentials, config_key=config_key, azure_key_vault_secret=azure_key_vault_secret, env=env, diff --git a/src/viadot/orchestration/prefect/tasks/salesforce.py b/src/viadot/orchestration/prefect/tasks/salesforce.py index 8e4ce7406..9993a2297 100644 --- a/src/viadot/orchestration/prefect/tasks/salesforce.py +++ b/src/viadot/orchestration/prefect/tasks/salesforce.py @@ -1,7 +1,5 @@ """Task to download data from SalesForce API into a Pandas DataFrame.""" -from typing import Any - import pandas as pd from prefect import task @@ -12,7 +10,6 @@ @task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) def salesforce_to_df( - credentials: dict[str, Any] | None = None, config_key: str | None = None, azure_key_vault_secret: str | None = None, env: str | None = None, @@ -25,8 +22,6 @@ def salesforce_to_df( """Querying Salesforce and saving data as the data frame. Args: - credentials (dict[str, Any], optional): Salesforce credentials as a dictionary. - Defaults to None. config_key (str, optional): The key in the viadot config holding relevant credentials. Defaults to None. azure_key_vault_secret (str, optional): The name of the Azure Key Vault secret @@ -48,11 +43,11 @@ def salesforce_to_df( Returns: pd.DataFrame: The response data as a pandas DataFrame. """ - if not (azure_key_vault_secret or config_key or credentials): + if not (azure_key_vault_secret or config_key): raise MissingSourceCredentialsError if not config_key: - credentials = credentials or get_credentials(azure_key_vault_secret) + credentials = get_credentials(azure_key_vault_secret) salesforce = SalesForce( credentials=credentials, From 29c405249a4f3b2ad059d7cb25644e286bb53a08 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Thu, 19 Sep 2024 12:26:34 +0200 Subject: [PATCH 08/17] =?UTF-8?q?=F0=9F=93=9D=20updated=20some=20comments.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6fa97ff09..7a8ebd7a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,9 +16,9 @@ repos: rev: v0.6.7 hooks: # Run the linter. - # - id: ruff - # name: lint Python - # # args: [ --fix ] + - id: ruff + name: lint Python + # args: [ --fix ] # Run the formatter. - id: ruff-format From 89c4aed37d576439b2d6e54331a9497e8e857aff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?MaciejGardzi=C5=84ski?= <49470197+mgardzinski@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:27:47 +0200 Subject: [PATCH 09/17] =?UTF-8?q?=F0=9F=93=9DUpdate=20`salesforce.py`=20do?= =?UTF-8?q?c=20string?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Zawadzki --- src/viadot/sources/salesforce.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/viadot/sources/salesforce.py b/src/viadot/sources/salesforce.py index 160e40080..4d13710cf 100644 --- a/src/viadot/sources/salesforce.py +++ b/src/viadot/sources/salesforce.py @@ -1,4 +1,4 @@ -"""SalesForce connectors.""" +"""Salesforce API connector.""" from typing import Literal From 6f90ee79b10c5f7a340e7ff9e53e2194ca9b5bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?MaciejGardzi=C5=84ski?= <49470197+mgardzinski@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:30:24 +0200 Subject: [PATCH 10/17] =?UTF-8?q?=F0=9F=93=9DUpdate=20`src/viadot/sources/?= =?UTF-8?q?salesforce.py`=20doc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Zawadzki --- src/viadot/sources/salesforce.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/viadot/sources/salesforce.py b/src/viadot/sources/salesforce.py index 4d13710cf..100b6dbbc 100644 --- a/src/viadot/sources/salesforce.py +++ b/src/viadot/sources/salesforce.py @@ -47,7 +47,7 @@ def __init__( client_id: str = "viadot", **kwargs, ): - """A class for download and upsert data from Salesforce. + """A class for downloading data from Salesforce. Args: credentials (dict(str, any), optional): Salesforce credentials as a From ac1cf6f791e27a7353e37cc3870d5a7b31206e45 Mon Sep 17 00:00:00 2001 From: Maciej Gardzinski Date: Wed, 25 Sep 2024 06:30:51 +0000 Subject: [PATCH 11/17] =?UTF-8?q?=F0=9F=8E=A8=20Rename=20Salesforce=20clas?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../prefect/flows/salesforce_to_adls.py | 8 +-- .../orchestration/prefect/tasks/salesforce.py | 6 +-- src/viadot/sources/__init__.py | 2 +- src/viadot/sources/salesforce.py | 14 +++--- .../prefect/flows/test_salesforce.py | 2 +- tests/unit/test_salesforce.py | 50 +++++++++---------- 6 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py index e0f2ec52d..4b6213851 100644 --- a/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py +++ b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py @@ -1,4 +1,4 @@ -"""Download data from SalesForce API to Azure Data Lake Storage.""" +"""Download data from Salesforce API to Azure Data Lake Storage.""" from prefect import flow from prefect.task_runners import ConcurrentTaskRunner @@ -7,8 +7,8 @@ @flow( - name="SalesForce extraction to ADLS", - description="Extract data from SalesForce and load " + name="Salesforce extraction to ADLS", + description="Extract data from Salesforce and load " + "it into Azure Data Lake Storage.", retries=1, retry_delay_seconds=60, @@ -28,7 +28,7 @@ def salesforce_to_adls( adls_path: str | None = None, adls_path_overwrite: bool = False, ) -> None: - """Flow to download data from SalesForce API to Azure Data Lake. + """Flow to download data from Salesforce API to Azure Data Lake. Args: config_key (str, optional): The key in the viadot config holding relevant diff --git a/src/viadot/orchestration/prefect/tasks/salesforce.py b/src/viadot/orchestration/prefect/tasks/salesforce.py index 9993a2297..b2c397f27 100644 --- a/src/viadot/orchestration/prefect/tasks/salesforce.py +++ b/src/viadot/orchestration/prefect/tasks/salesforce.py @@ -1,11 +1,11 @@ -"""Task to download data from SalesForce API into a Pandas DataFrame.""" +"""Task to download data from Salesforce API into a Pandas DataFrame.""" import pandas as pd from prefect import task from viadot.orchestration.prefect.exceptions import MissingSourceCredentialsError from viadot.orchestration.prefect.utils import get_credentials -from viadot.sources import SalesForce +from viadot.sources import Salesforce @task(retries=3, log_prints=True, retry_delay_seconds=10, timeout_seconds=60 * 60) @@ -49,7 +49,7 @@ def salesforce_to_df( if not config_key: credentials = get_credentials(azure_key_vault_secret) - salesforce = SalesForce( + salesforce = Salesforce( credentials=credentials, config_key=config_key, env=env, diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 6632cf0f5..0af58c2c9 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -11,7 +11,7 @@ from .hubspot import Hubspot from .mindful import Mindful from .outlook import Outlook -from .salesforce import SalesForce +from .salesforce import Salesforce from .sharepoint import Sharepoint from .sql_server import SQLServer from .supermetrics import Supermetrics, SupermetricsCredentials diff --git a/src/viadot/sources/salesforce.py b/src/viadot/sources/salesforce.py index 100b6dbbc..3f0d98710 100644 --- a/src/viadot/sources/salesforce.py +++ b/src/viadot/sources/salesforce.py @@ -4,7 +4,7 @@ import pandas as pd from pydantic import BaseModel -from simple_salesforce import Salesforce +from simple_salesforce import Salesforce as SimpleSalesforce from viadot.config import get_source_credentials from viadot.exceptions import CredentialError @@ -12,7 +12,7 @@ from viadot.utils import add_viadot_metadata_columns -class SalesForceCredentials(BaseModel): +class SalesforceCredentials(BaseModel): """Checking for values in Salesforce credentials dictionary. Two key values are held in the Salesforce connector: @@ -30,7 +30,7 @@ class SalesForceCredentials(BaseModel): token: str -class SalesForce(Source): +class Salesforce(Source): """Class implementing the Salesforce API. Documentation for this API is available at: @@ -40,7 +40,7 @@ class SalesForce(Source): def __init__( self, *args, - credentials: SalesForceCredentials | None = None, + credentials: SalesforceCredentials | None = None, config_key: str = "salesforce", env: Literal["DEV", "QA", "PROD"] = "DEV", domain: str = "test", @@ -73,11 +73,11 @@ def __init__( message = "'username', 'password' and 'token' credentials are required." raise CredentialError(message) - validated_creds = dict(SalesForceCredentials(**credentials)) + validated_creds = dict(SalesforceCredentials(**credentials)) super().__init__(*args, credentials=validated_creds, **kwargs) if env.upper() == "DEV" or env.upper() == "QA": - self.salesforce = Salesforce( + self.salesforce = SimpleSalesforce( username=self.credentials["username"], password=self.credentials["password"], security_token=self.credentials["token"], @@ -86,7 +86,7 @@ def __init__( ) elif env.upper() == "PROD": - self.salesforce = Salesforce( + self.salesforce = SimpleSalesforce( username=self.credentials["username"], password=self.credentials["password"], security_token=self.credentials["token"], diff --git a/tests/integration/orchestration/prefect/flows/test_salesforce.py b/tests/integration/orchestration/prefect/flows/test_salesforce.py index d4be1263a..4247e3c18 100644 --- a/tests/integration/orchestration/prefect/flows/test_salesforce.py +++ b/tests/integration/orchestration/prefect/flows/test_salesforce.py @@ -4,7 +4,7 @@ def test_salesforce_to_adls(azure_key_vault_secret, adls_azure_key_vault_secret): - """Test SalesForce prefect flow.""" + """Test Salesforce prefect flow.""" state = salesforce_to_adls( azure_key_vault_secret=azure_key_vault_secret, env="dev", diff --git a/tests/unit/test_salesforce.py b/tests/unit/test_salesforce.py index 8eef36e3d..bdd64002a 100644 --- a/tests/unit/test_salesforce.py +++ b/tests/unit/test_salesforce.py @@ -1,11 +1,11 @@ """'test_salesforce.py'.""" import pytest -from simple_salesforce import Salesforce +from simple_salesforce import Salesforce as SimpleSalesforce from viadot.exceptions import CredentialError -from viadot.sources import SalesForce -from viadot.sources.salesforce import SalesForceCredentials +from viadot.sources import Salesforce +from viadot.sources.salesforce import SalesforceCredentials variables = { "credentials": { @@ -42,21 +42,21 @@ @pytest.mark.basic() def test_salesforce_init_dev_env(mocker): - """Test SalesForce, starting in dev mode.""" + """Test Salesforce, starting in dev mode.""" mock_sf_instance = mocker.MagicMock(spec=Salesforce) mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) - sf_instance = SalesForce(credentials=variables["credentials"], env="DEV") + sf_instance = Salesforce(credentials=variables["credentials"], env="DEV") assert sf_instance.salesforce == mock_sf_instance -class TestSalesForceCredentials: - """Test SalesForce Credentials Class.""" +class TestSalesforceCredentials: + """Test Salesforce Credentials Class.""" @pytest.mark.basic() def test_salesforce_credentials(self): - """Test SalesForce credentials.""" - SalesForceCredentials( + """Test Salesforce credentials.""" + SalesforceCredentials( username="test_user", password="test_password", token="test_token", @@ -65,37 +65,37 @@ def test_salesforce_credentials(self): @pytest.mark.basic() def test_salesforce_init_prod_env(mocker): - """Test SalesForce, starting in prod mode.""" + """Test Salesforce, starting in prod mode.""" mock_sf_instance = mocker.MagicMock(spec=Salesforce) mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) - sf_instance = SalesForce(credentials=variables["credentials"], env="PROD") + sf_instance = Salesforce(credentials=variables["credentials"], env="PROD") assert sf_instance.salesforce == mock_sf_instance @pytest.mark.basic() def test_salesforce_invalid_env(): - """Test SalesForce, invalid `env` parameter.""" + """Test Salesforce, invalid `env` parameter.""" with pytest.raises( ValueError, match="The only available environments are DEV, QA, and PROD." ): - SalesForce(credentials=variables["credentials"], env="INVALID") + Salesforce(credentials=variables["credentials"], env="INVALID") @pytest.mark.basic() def test_salesforce_missing_credentials(): - """Test SalesForce missing credentials.""" + """Test Salesforce missing credentials.""" incomplete_creds = {"username": "user", "password": "pass"} with pytest.raises(CredentialError): - SalesForce(credentials=incomplete_creds) + Salesforce(credentials=incomplete_creds) @pytest.mark.connect() def test_salesforce_api_connection(mocker): - """Test SalesForce `api_connection` method with a query.""" + """Test Salesforce `api_connection` method with a query.""" mock_sf_instance = mocker.MagicMock(spec=Salesforce) mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) - salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance = Salesforce(credentials=variables["credentials"]) mock_sf_instance.query.return_value = {"records": variables["records_1"]} @@ -107,10 +107,10 @@ def test_salesforce_api_connection(mocker): @pytest.mark.connect() def test_salesforce_api_connection_with_columns(mocker): - """Test SalesForce `api_connection` method with columns.""" + """Test Salesforce `api_connection` method with columns.""" mock_sf_instance = mocker.MagicMock(spec=Salesforce) mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) - salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance = Salesforce(credentials=variables["credentials"]) mock_sf_instance.query.return_value = {"records": variables["records_2"]} @@ -122,10 +122,10 @@ def test_salesforce_api_connection_with_columns(mocker): @pytest.mark.functions() def test_salesforce_to_df(mocker): - """Test SalesForce `to_df` method.""" + """Test Salesforce `to_df` method.""" mock_sf_instance = mocker.MagicMock(spec=Salesforce) mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) - salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = variables["data"] df = salesforce_instance.to_df() @@ -143,10 +143,10 @@ def test_salesforce_to_df(mocker): @pytest.mark.functions() def test_salesforce_to_df_empty_data(mocker): - """Test SalesForce `to_df` method with empty df.""" + """Test Salesforce `to_df` method with empty df.""" mock_sf_instance = mocker.MagicMock(spec=Salesforce) mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) - salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = [] with pytest.raises(ValueError, match="The response does not contain any data."): @@ -155,10 +155,10 @@ def test_salesforce_to_df_empty_data(mocker): @pytest.mark.functions() def test_salesforce_to_df_warn_empty_data(mocker): - """Test SalesForce `to_df` method with empty df, warn.""" + """Test Salesforce `to_df` method with empty df, warn.""" mock_sf_instance = mocker.MagicMock(spec=Salesforce) mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) - salesforce_instance = SalesForce(credentials=variables["credentials"]) + salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = [] df = salesforce_instance.to_df(if_empty="warn") From da1a2226554d5b53147a48ed22e3184409e1bf84 Mon Sep 17 00:00:00 2001 From: Maciej Gardzinski Date: Wed, 25 Sep 2024 06:34:47 +0000 Subject: [PATCH 12/17] =?UTF-8?q?=F0=9F=94=90=20adls=5Fpath=20hidden?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../orchestration/prefect/flows/test_salesforce.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/orchestration/prefect/flows/test_salesforce.py b/tests/integration/orchestration/prefect/flows/test_salesforce.py index 4247e3c18..cf28048ff 100644 --- a/tests/integration/orchestration/prefect/flows/test_salesforce.py +++ b/tests/integration/orchestration/prefect/flows/test_salesforce.py @@ -3,13 +3,13 @@ from viadot.orchestration.prefect.flows import salesforce_to_adls -def test_salesforce_to_adls(azure_key_vault_secret, adls_azure_key_vault_secret): +def test_salesforce_to_adls(azure_key_vault_secret, adls_path, adls_azure_key_vault_secret): """Test Salesforce prefect flow.""" state = salesforce_to_adls( azure_key_vault_secret=azure_key_vault_secret, env="dev", table="Contact", - adls_path="raw/dyvenia_sandbox/salesforce/salesforce.csv", + adls_path=adls_path, adls_azure_key_vault_secret=adls_azure_key_vault_secret, adls_path_overwrite=True, ) From cda82c94da6def8f7cd4741692df3d9801012632 Mon Sep 17 00:00:00 2001 From: Maciej Gardzinski Date: Thu, 26 Sep 2024 07:00:59 +0000 Subject: [PATCH 13/17] =?UTF-8?q?=F0=9F=8E=A8=20Salesforce=20into=20Simple?= =?UTF-8?q?Salesforce?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_salesforce.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_salesforce.py b/tests/unit/test_salesforce.py index bdd64002a..957e78adf 100644 --- a/tests/unit/test_salesforce.py +++ b/tests/unit/test_salesforce.py @@ -43,8 +43,8 @@ @pytest.mark.basic() def test_salesforce_init_dev_env(mocker): """Test Salesforce, starting in dev mode.""" - mock_sf_instance = mocker.MagicMock(spec=Salesforce) - mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) + mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) sf_instance = Salesforce(credentials=variables["credentials"], env="DEV") assert sf_instance.salesforce == mock_sf_instance @@ -66,8 +66,8 @@ def test_salesforce_credentials(self): @pytest.mark.basic() def test_salesforce_init_prod_env(mocker): """Test Salesforce, starting in prod mode.""" - mock_sf_instance = mocker.MagicMock(spec=Salesforce) - mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) + mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) sf_instance = Salesforce(credentials=variables["credentials"], env="PROD") assert sf_instance.salesforce == mock_sf_instance @@ -93,8 +93,8 @@ def test_salesforce_missing_credentials(): @pytest.mark.connect() def test_salesforce_api_connection(mocker): """Test Salesforce `api_connection` method with a query.""" - mock_sf_instance = mocker.MagicMock(spec=Salesforce) - mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) + mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) salesforce_instance = Salesforce(credentials=variables["credentials"]) mock_sf_instance.query.return_value = {"records": variables["records_1"]} @@ -108,8 +108,8 @@ def test_salesforce_api_connection(mocker): @pytest.mark.connect() def test_salesforce_api_connection_with_columns(mocker): """Test Salesforce `api_connection` method with columns.""" - mock_sf_instance = mocker.MagicMock(spec=Salesforce) - mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) + mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) salesforce_instance = Salesforce(credentials=variables["credentials"]) mock_sf_instance.query.return_value = {"records": variables["records_2"]} @@ -123,8 +123,8 @@ def test_salesforce_api_connection_with_columns(mocker): @pytest.mark.functions() def test_salesforce_to_df(mocker): """Test Salesforce `to_df` method.""" - mock_sf_instance = mocker.MagicMock(spec=Salesforce) - mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) + mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = variables["data"] @@ -144,8 +144,8 @@ def test_salesforce_to_df(mocker): @pytest.mark.functions() def test_salesforce_to_df_empty_data(mocker): """Test Salesforce `to_df` method with empty df.""" - mock_sf_instance = mocker.MagicMock(spec=Salesforce) - mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) + mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = [] @@ -156,8 +156,8 @@ def test_salesforce_to_df_empty_data(mocker): @pytest.mark.functions() def test_salesforce_to_df_warn_empty_data(mocker): """Test Salesforce `to_df` method with empty df, warn.""" - mock_sf_instance = mocker.MagicMock(spec=Salesforce) - mocker.patch("viadot.sources.salesforce.Salesforce", return_value=mock_sf_instance) + mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) + mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = [] From 2af970e549c6da8a793954efcd581fec947c03ed Mon Sep 17 00:00:00 2001 From: Maciej Gardzinski Date: Thu, 26 Sep 2024 11:25:11 +0200 Subject: [PATCH 14/17] =?UTF-8?q?=E2=9E=95=20Adding=20simple-salesforce=20?= =?UTF-8?q?dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 3472ea9c8..d23d1c9a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "numpy>=1.23.4, <2.0", "defusedxml>=0.7.1", "aiohttp>=3.10.5", + "simple-salesforce==1.12.6", "pytest-mock>=3.14.0", ] requires-python = ">=3.10" From 22cd412dc3385f761fe707325dfec67ee749afb0 Mon Sep 17 00:00:00 2001 From: Maciej Gardzinski Date: Thu, 26 Sep 2024 17:30:01 +0200 Subject: [PATCH 15/17] =?UTF-8?q?=F0=9F=8E=A8=20Code=20Formating=20changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../orchestration/prefect/flows/__init__.py | 1 + .../prefect/flows/salesforce_to_adls.py | 2 +- .../orchestration/prefect/tasks/__init__.py | 1 + src/viadot/sources/salesforce.py | 2 +- .../prefect/flows/test_salesforce.py | 4 +- tests/unit/test_salesforce.py | 60 ++++++++++++------- 6 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/viadot/orchestration/prefect/flows/__init__.py b/src/viadot/orchestration/prefect/flows/__init__.py index 501fae612..750de4853 100644 --- a/src/viadot/orchestration/prefect/flows/__init__.py +++ b/src/viadot/orchestration/prefect/flows/__init__.py @@ -26,6 +26,7 @@ from .transform import transform from .transform_and_catalog import transform_and_catalog + __all__ = [ "cloud_for_customers_to_adls", "cloud_for_customers_to_databricks", diff --git a/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py index 4b6213851..cf45505a4 100644 --- a/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py +++ b/src/viadot/orchestration/prefect/flows/salesforce_to_adls.py @@ -14,7 +14,7 @@ retry_delay_seconds=60, task_runner=ConcurrentTaskRunner, ) -def salesforce_to_adls( +def salesforce_to_adls( # noqa: PLR0913 config_key: str | None = None, azure_key_vault_secret: str | None = None, env: str | None = None, diff --git a/src/viadot/orchestration/prefect/tasks/__init__.py b/src/viadot/orchestration/prefect/tasks/__init__.py index 69b02b9bc..48fe9cee6 100644 --- a/src/viadot/orchestration/prefect/tasks/__init__.py +++ b/src/viadot/orchestration/prefect/tasks/__init__.py @@ -23,6 +23,7 @@ from .sql_server import create_sql_server_table, sql_server_query, sql_server_to_df from .supermetrics import supermetrics_to_df + __all__ = [ "adls_upload", "bcp", diff --git a/src/viadot/sources/salesforce.py b/src/viadot/sources/salesforce.py index 3f0d98710..c5c0034c1 100644 --- a/src/viadot/sources/salesforce.py +++ b/src/viadot/sources/salesforce.py @@ -115,7 +115,7 @@ def api_connection( """ if not query: columns_str = ", ".join(columns) if columns else "FIELDS(STANDARD)" - query = f"SELECT {columns_str} FROM {table}" + query = f"SELECT {columns_str} FROM {table}" # noqa: S608 self.data = self.salesforce.query(query).get("records") diff --git a/tests/integration/orchestration/prefect/flows/test_salesforce.py b/tests/integration/orchestration/prefect/flows/test_salesforce.py index cf28048ff..2eb44a822 100644 --- a/tests/integration/orchestration/prefect/flows/test_salesforce.py +++ b/tests/integration/orchestration/prefect/flows/test_salesforce.py @@ -3,7 +3,9 @@ from viadot.orchestration.prefect.flows import salesforce_to_adls -def test_salesforce_to_adls(azure_key_vault_secret, adls_path, adls_azure_key_vault_secret): +def test_salesforce_to_adls( + azure_key_vault_secret, adls_path, adls_azure_key_vault_secret +): """Test Salesforce prefect flow.""" state = salesforce_to_adls( azure_key_vault_secret=azure_key_vault_secret, diff --git a/tests/unit/test_salesforce.py b/tests/unit/test_salesforce.py index 957e78adf..0a70145b2 100644 --- a/tests/unit/test_salesforce.py +++ b/tests/unit/test_salesforce.py @@ -7,10 +7,11 @@ from viadot.sources import Salesforce from viadot.sources.salesforce import SalesforceCredentials + variables = { "credentials": { "username": "test_user", - "password": "test_password", + "password": "test_password", # pragma: allowlist secret "token": "test_token", }, "records_1": [ @@ -40,11 +41,13 @@ } -@pytest.mark.basic() +@pytest.mark.basic def test_salesforce_init_dev_env(mocker): """Test Salesforce, starting in dev mode.""" mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) - mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) + mocker.patch( + "viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance + ) sf_instance = Salesforce(credentials=variables["credentials"], env="DEV") assert sf_instance.salesforce == mock_sf_instance @@ -53,27 +56,29 @@ def test_salesforce_init_dev_env(mocker): class TestSalesforceCredentials: """Test Salesforce Credentials Class.""" - @pytest.mark.basic() + @pytest.mark.basic def test_salesforce_credentials(self): """Test Salesforce credentials.""" SalesforceCredentials( username="test_user", - password="test_password", - token="test_token", + password="test_password", # noqa: S106 # pragma: allowlist secret + token="test_token", # noqa: S106 ) -@pytest.mark.basic() +@pytest.mark.basic def test_salesforce_init_prod_env(mocker): """Test Salesforce, starting in prod mode.""" mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) - mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) + mocker.patch( + "viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance + ) sf_instance = Salesforce(credentials=variables["credentials"], env="PROD") assert sf_instance.salesforce == mock_sf_instance -@pytest.mark.basic() +@pytest.mark.basic def test_salesforce_invalid_env(): """Test Salesforce, invalid `env` parameter.""" with pytest.raises( @@ -82,19 +87,24 @@ def test_salesforce_invalid_env(): Salesforce(credentials=variables["credentials"], env="INVALID") -@pytest.mark.basic() +@pytest.mark.basic def test_salesforce_missing_credentials(): """Test Salesforce missing credentials.""" - incomplete_creds = {"username": "user", "password": "pass"} + incomplete_creds = { + "username": "user", # pragma: allowlist secret + "password": "pass", # pragma: allowlist secret + } with pytest.raises(CredentialError): Salesforce(credentials=incomplete_creds) -@pytest.mark.connect() +@pytest.mark.connect def test_salesforce_api_connection(mocker): """Test Salesforce `api_connection` method with a query.""" mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) - mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) + mocker.patch( + "viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance + ) salesforce_instance = Salesforce(credentials=variables["credentials"]) mock_sf_instance.query.return_value = {"records": variables["records_1"]} @@ -105,11 +115,13 @@ def test_salesforce_api_connection(mocker): mock_sf_instance.query.assert_called_once_with("SELECT Id, Name FROM Account") -@pytest.mark.connect() +@pytest.mark.connect def test_salesforce_api_connection_with_columns(mocker): """Test Salesforce `api_connection` method with columns.""" mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) - mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) + mocker.patch( + "viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance + ) salesforce_instance = Salesforce(credentials=variables["credentials"]) mock_sf_instance.query.return_value = {"records": variables["records_2"]} @@ -120,11 +132,13 @@ def test_salesforce_api_connection_with_columns(mocker): mock_sf_instance.query.assert_called_once_with("SELECT Id, Name FROM Account") -@pytest.mark.functions() +@pytest.mark.functions def test_salesforce_to_df(mocker): """Test Salesforce `to_df` method.""" mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) - mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) + mocker.patch( + "viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance + ) salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = variables["data"] @@ -141,11 +155,13 @@ def test_salesforce_to_df(mocker): assert df.iloc[0]["Id"] == "001" -@pytest.mark.functions() +@pytest.mark.functions def test_salesforce_to_df_empty_data(mocker): """Test Salesforce `to_df` method with empty df.""" mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) - mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) + mocker.patch( + "viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance + ) salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = [] @@ -153,11 +169,13 @@ def test_salesforce_to_df_empty_data(mocker): salesforce_instance.to_df(if_empty="fail") -@pytest.mark.functions() +@pytest.mark.functions def test_salesforce_to_df_warn_empty_data(mocker): """Test Salesforce `to_df` method with empty df, warn.""" mock_sf_instance = mocker.MagicMock(spec=SimpleSalesforce) - mocker.patch("viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance) + mocker.patch( + "viadot.sources.salesforce.SimpleSalesforce", return_value=mock_sf_instance + ) salesforce_instance = Salesforce(credentials=variables["credentials"]) salesforce_instance.data = [] From 89ea5f14900f18f67ebfef1258cb6abb092e720f Mon Sep 17 00:00:00 2001 From: Maciej Gardzinski Date: Thu, 26 Sep 2024 17:39:00 +0200 Subject: [PATCH 16/17] =?UTF-8?q?=F0=9F=8E=A8=20Salesforce=20change=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/viadot/sources/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 0af58c2c9..e9f74c12d 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -27,7 +27,7 @@ "Mindful", "Outlook", "SQLServer", - "SalesForce", + "Salesforce", "Sharepoint", "Supermetrics", "SupermetricsCredentials", # pragma: allowlist-secret From 2d7984b4d4a37b4333f7d4143a1e0787a62b20db Mon Sep 17 00:00:00 2001 From: Maciej Gardzinski Date: Fri, 27 Sep 2024 11:37:57 +0200 Subject: [PATCH 17/17] =?UTF-8?q?=E2=9E=95=20Adding=20extra=20dependency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements-dev.lock | 36 ++++++++++++++++++++++++++++++++++-- requirements.lock | 30 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index e74fc29b2..601dac63f 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -7,6 +7,7 @@ # all-features: false # with-sources: false # generate-hashes: false +# universal: false -e file:. aiohappyeyeballs==2.4.0 @@ -40,6 +41,7 @@ attrs==24.2.0 # via jsonschema # via referencing # via visions + # via zeep babel==2.16.0 # via mkdocs-material beautifulsoup4==4.12.3 @@ -93,11 +95,13 @@ comm==0.2.2 coolname==2.2.0 # via prefect coverage==7.6.1 + # via coverage croniter==2.0.7 # via prefect cryptography==43.0.0 # via moto # via prefect + # via pyjwt cssselect2==0.7.0 # via cairosvg dateparser==1.2.0 @@ -169,6 +173,7 @@ httpcore==1.0.5 # via httpx # via prefect httpx==0.27.0 + # via httpx # via neoteroi-mkdocs # via prefect humanize==4.10.0 @@ -195,6 +200,8 @@ ipykernel==6.29.5 # via mkdocs-jupyter ipython==8.26.0 # via ipykernel +isodate==0.6.1 + # via zeep itsdangerous==2.2.0 # via prefect jedi==0.19.1 @@ -244,6 +251,8 @@ kubernetes==29.0.0 loguru==0.7.2 lumacli==0.1.2 # via viadot2 +lxml==5.3.0 + # via zeep mako==1.3.5 # via alembic markdown==3.7 @@ -302,14 +311,18 @@ mkdocs-include-markdown-plugin==6.2.2 mkdocs-jupyter==0.24.8 mkdocs-material==9.5.32 # via mkdocs-jupyter + # via mkdocs-material mkdocs-material-extensions==1.3.1 # via mkdocs-material mkdocs-mermaid2-plugin==1.1.1 mkdocs-table-reader-plugin==3.0.1 mkdocstrings==0.25.2 + # via mkdocstrings # via mkdocstrings-python mkdocstrings-python==1.10.5 # via mkdocstrings +more-itertools==10.5.0 + # via simple-salesforce moto==5.0.13 multidict==6.0.5 # via aiohttp @@ -379,6 +392,7 @@ platformdirs==4.2.2 # via jupyter-core # via mkdocs-get-deps # via mkdocstrings + # via zeep pluggy==1.5.0 # via pytest prefect==2.20.2 @@ -422,6 +436,8 @@ pygments==2.18.0 # via mkdocs-material # via nbconvert # via rich +pyjwt==2.9.0 + # via simple-salesforce pymdown-extensions==10.9 # via mkdocs-material # via mkdocs-mermaid2-plugin @@ -463,6 +479,7 @@ pytz==2024.1 # via pandas # via prefect # via trino + # via zeep pytzdata==2020.1 # via pendulum pywavelets==1.7.0 @@ -504,15 +521,23 @@ requests==2.32.3 # via mkdocs-mermaid2-plugin # via moto # via o365 + # via requests-file # via requests-oauthlib + # via requests-toolbelt # via responses # via sharepy + # via simple-salesforce # via trino # via viadot2 + # via zeep +requests-file==2.1.0 + # via zeep requests-oauthlib==2.0.0 # via apprise # via kubernetes # via o365 +requests-toolbelt==1.0.0 + # via zeep responses==0.25.3 # via moto rfc3339-validator==0.1.4 @@ -538,8 +563,6 @@ scipy==1.14.0 # via imagehash sendgrid==6.11.0 # via viadot2 -setuptools==73.0.0 - # via mkdocs-mermaid2-plugin sgqlc==16.3 # via prefect-github shapely==2.0.6 @@ -548,9 +571,12 @@ sharepy==2.0.0 # via viadot2 shellingham==1.5.4 # via typer +simple-salesforce==1.12.6 + # via viadot2 six==1.16.0 # via asttokens # via bleach + # via isodate # via jsbeautifier # via kubernetes # via python-dateutil @@ -609,6 +635,7 @@ trino==0.328.0 typer==0.12.4 # via lumacli # via prefect + # via typer typing-extensions==4.12.2 # via aiosqlite # via alembic @@ -617,6 +644,7 @@ typing-extensions==4.12.2 # via prefect # via pydantic # via pydantic-core + # via simple-salesforce # via sqlalchemy # via typer # via uvicorn @@ -661,5 +689,9 @@ xmltodict==0.13.0 # via moto yarl==1.9.4 # via aiohttp +zeep==4.2.1 + # via simple-salesforce zipp==3.20.0 # via importlib-metadata +setuptools==73.0.0 + # via mkdocs-mermaid2-plugin diff --git a/requirements.lock b/requirements.lock index 1f2dc9fca..adfcb879b 100644 --- a/requirements.lock +++ b/requirements.lock @@ -7,6 +7,7 @@ # all-features: false # with-sources: false # generate-hashes: false +# universal: false -e file:. aiohappyeyeballs==2.4.0 @@ -38,6 +39,7 @@ attrs==24.2.0 # via jsonschema # via referencing # via visions + # via zeep beautifulsoup4==4.12.3 # via o365 cachetools==5.5.0 @@ -69,6 +71,7 @@ croniter==2.0.7 # via prefect cryptography==43.0.0 # via prefect + # via pyjwt dateparser==1.2.0 # via prefect defusedxml==0.7.1 @@ -113,6 +116,7 @@ httpcore==1.0.5 # via httpx # via prefect httpx==0.27.0 + # via httpx # via prefect humanize==4.10.0 # via jinja2-humanize-extension @@ -131,6 +135,8 @@ importlib-resources==6.1.3 # via prefect iniconfig==2.0.0 # via pytest +isodate==0.6.1 + # via zeep itsdangerous==2.2.0 # via prefect jinja2==3.1.4 @@ -150,6 +156,8 @@ kubernetes==29.0.0 # via prefect lumacli==0.1.2 # via viadot2 +lxml==5.3.0 + # via zeep mako==1.3.5 # via alembic markdown==3.7 @@ -161,6 +169,8 @@ markupsafe==2.1.5 # via mako mdurl==0.1.2 # via markdown-it-py +more-itertools==10.5.0 + # via simple-salesforce multidict==6.0.5 # via aiohttp # via yarl @@ -198,6 +208,8 @@ pendulum==2.1.2 # via prefect pillow==10.4.0 # via imagehash +platformdirs==4.3.6 + # via zeep pluggy==1.5.0 # via pytest prefect==2.20.2 @@ -229,6 +241,8 @@ pygit2==1.14.1 # via viadot2 pygments==2.18.0 # via rich +pyjwt==2.9.0 + # via simple-salesforce pyodbc==5.1.0 # via viadot2 pytest==8.3.3 @@ -256,6 +270,7 @@ pytz==2024.1 # via pandas # via prefect # via trino + # via zeep pytzdata==2020.1 # via pendulum pywavelets==1.7.0 @@ -278,14 +293,22 @@ requests==2.32.3 # via kubernetes # via lumacli # via o365 + # via requests-file # via requests-oauthlib + # via requests-toolbelt # via sharepy + # via simple-salesforce # via trino # via viadot2 + # via zeep +requests-file==2.1.0 + # via zeep requests-oauthlib==2.0.0 # via apprise # via kubernetes # via o365 +requests-toolbelt==1.0.0 + # via zeep rfc3339-validator==0.1.4 # via prefect rich==13.7.1 @@ -313,7 +336,10 @@ sharepy==2.0.0 # via viadot2 shellingham==1.5.4 # via typer +simple-salesforce==1.12.6 + # via viadot2 six==1.16.0 + # via isodate # via kubernetes # via python-dateutil # via rfc3339-validator @@ -346,6 +372,7 @@ trino==0.328.0 typer==0.12.4 # via lumacli # via prefect + # via typer typing-extensions==4.12.2 # via aiosqlite # via alembic @@ -353,6 +380,7 @@ typing-extensions==4.12.2 # via prefect # via pydantic # via pydantic-core + # via simple-salesforce # via sqlalchemy # via typer # via uvicorn @@ -379,3 +407,5 @@ websockets==12.0 # via prefect yarl==1.9.8 # via aiohttp +zeep==4.2.1 + # via simple-salesforce