diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67d127e2d..7a8ebd7a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.3.4 + rev: v0.6.7 hooks: # Run the linter. - id: ruff diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 000000000..3845d6cca --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +docs/references/sources/sql.md diff --git a/.secrets.baseline b/.secrets.baseline index af5b48317..3fc276484 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -90,6 +90,10 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -223,7 +227,7 @@ "filename": "tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py", "hashed_secret": "74fb0b9813045340022392630bada6fd3d555fe6", "is_verified": false, - "line_number": 43 + "line_number": 44 } ], "tests/integration/orchestration/prefect/flows/test_genesys_to_adls.py": [ @@ -282,7 +286,7 @@ "filename": "tests/integration/orchestration/prefect/tasks/test_sql_server.py", "hashed_secret": "74fb0b9813045340022392630bada6fd3d555fe6", "is_verified": false, - "line_number": 28 + "line_number": 29 } ], "tests/integration/test_sharepoint.py": [ @@ -291,7 +295,7 @@ "filename": "tests/integration/test_sharepoint.py", "hashed_secret": "9fb7fe1217aed442b04c0f5e43b5d5a7d3287097", "is_verified": false, - "line_number": 14 + "line_number": 15 } ], "tests/resources/metadata/model/manifest.json": [ @@ -339,7 +343,7 @@ "filename": "tests/unit/test_config.py", "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", "is_verified": false, - "line_number": 8 + "line_number": 9 } ], "tests/unit/test_genesys.py": [ @@ -348,7 +352,7 @@ "filename": "tests/unit/test_genesys.py", "hashed_secret": "1089adfb1f11b95df31344030507912b5abdf57a", "is_verified": false, - "line_number": 17 + "line_number": 18 } ], "tests/unit/test_outlook.py": [ @@ -357,14 +361,14 @@ "filename": "tests/unit/test_outlook.py", "hashed_secret": "b235838f76594bf21886c6eec9c06a207e9ec5ce", "is_verified": false, - "line_number": 17 + "line_number": 18 }, { "type": "Secret Keyword", "filename": "tests/unit/test_outlook.py", "hashed_secret": "1089adfb1f11b95df31344030507912b5abdf57a", "is_verified": false, - "line_number": 48 + "line_number": 49 } ], "tests/unit/test_redshift_spectrum.py": [ @@ -373,7 +377,7 @@ "filename": "tests/unit/test_redshift_spectrum.py", "hashed_secret": "dc724af18fbdd4e59189f5fe768a5f8311527050", "is_verified": false, - "line_number": 19 + "line_number": 21 } ], "tests/unit/test_sharepoint.py": [ @@ -382,16 +386,16 @@ "filename": "tests/unit/test_sharepoint.py", "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", "is_verified": false, - "line_number": 13 + "line_number": 14 }, { "type": "Secret Keyword", "filename": "tests/unit/test_sharepoint.py", "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8", "is_verified": false, - "line_number": 46 + "line_number": 47 } ] }, - "generated_at": "2024-08-30T12:50:18Z" + "generated_at": "2024-09-23T12:45:10Z" } diff --git a/docs/developer_guide/creating_a_prefect_flow.md b/docs/developer_guide/creating_a_prefect_flow.md index a06984466..20d7b8189 100644 --- a/docs/developer_guide/creating_a_prefect_flow.md +++ b/docs/developer_guide/creating_a_prefect_flow.md @@ -32,7 +32,9 @@ def postgresql_to_df(credentials_key: str | None = None, credentials_secret: str if not (credentials_secret or config_key): raise MissingSourceCredentialsError - credentials = credentials or get_credentials(credentials_secret) + if not config_key: + credentials = get_credentials(credentials_secret) + postgres = PostgreSQL(credentials=credentials, config_key=config_key) return postgres.to_df(...) ``` diff --git a/docs/references/orchestration/prefect/flows.md b/docs/references/orchestration/prefect/flows.md index 03cc53e78..e1c291f72 100644 --- a/docs/references/orchestration/prefect/flows.md +++ b/docs/references/orchestration/prefect/flows.md @@ -4,6 +4,8 @@ ::: viadot.orchestration.prefect.flows.exchange_rates_to_adls +::: viadot.orchestration.prefect.flows.exchange_rates_to_databricks + ::: viadot.orchestration.prefect.flows.sap_to_redshift_spectrum ::: viadot.orchestration.prefect.flows.sharepoint_to_adls @@ -17,3 +19,27 @@ ::: viadot.orchestration.prefect.flows.transform ::: viadot.orchestration.prefect.flows.transform_and_catalog + +::: viadot.orchestration.prefect.flows.duckdb_to_parquet + +::: viadot.orchestration.prefect.flows.duckdb_to_sql_server + +::: viadot.orchestration.prefect.flows.duckdb_transform + +::: viadot.orchestration.prefect.flows.epicor_to_parquet + +::: viadot.orchestration.prefect.flows.exchange_rates_api_to_redshift_spectrum + +::: viadot.orchestration.prefect.flows.genesys_to_adls + +::: viadot.orchestration.prefect.flows.hubspot_to_adls + +::: viadot.orchestration.prefect.flows.mindful_to_adls + +::: viadot.orchestration.prefect.flows.outlook_to_adls + +::: viadot.orchestration.prefect.flows.sap_to_parquet + +::: viadot.orchestration.prefect.flows.sql_server_to_minio + +::: viadot.orchestration.prefect.flows.sql_server_to_parquet diff --git a/docs/references/orchestration/prefect/tasks.md b/docs/references/orchestration/prefect/tasks.md index 78ebf516b..67e6cccdb 100644 --- a/docs/references/orchestration/prefect/tasks.md +++ b/docs/references/orchestration/prefect/tasks.md @@ -21,3 +21,25 @@ ::: viadot.orchestration.prefect.tasks.sharepoint_download_file ::: viadot.orchestration.prefect.tasks.sharepoint_to_df + +::: viadot.orchestration.prefect.tasks.duckdb_query + +::: viadot.orchestration.prefect.tasks.epicor_to_df + +::: viadot.orchestration.prefect.tasks.genesys_to_df + +::: viadot.orchestration.prefect.tasks.hubspot_to_df + +::: viadot.orchestration.prefect.tasks.mindful_to_df + +::: viadot.orchestration.prefect.tasks.outlook_to_df + +::: viadot.orchestration.prefect.tasks.sap_rfc_to_df + +::: viadot.orchestration.prefect.tasks.bcp + +::: viadot.orchestration.prefect.tasks.create_sql_server_table + +::: viadot.orchestration.prefect.tasks.sql_server_query + +::: viadot.orchestration.prefect.tasks.sql_server_to_df diff --git a/docs/references/sources/api_sources.md b/docs/references/sources/api.md similarity index 71% rename from docs/references/sources/api_sources.md rename to docs/references/sources/api.md index 91e64177c..f106b3663 100644 --- a/docs/references/sources/api_sources.md +++ b/docs/references/sources/api.md @@ -8,10 +8,16 @@ ::: viadot.sources.cloud_for_customers.CloudForCustomers -::: viadot.sources.sap_rfc.SAPRFC - ::: viadot.sources.sharepoint.Sharepoint ::: viadot.sources.genesys.Genesys +::: viadot.sources.outlook.Outlook + +::: viadot.sources.hubspot.Hubspot + +::: viadot.sources.epicor.Epicor + +::: viadot.sources.mindful.Mindful + ::: viadot.sources.minio.MinIO diff --git a/docs/references/sources/sql_sources.md b/docs/references/sources/sql.md similarity index 69% rename from docs/references/sources/sql_sources.md rename to docs/references/sources/sql.md index f2f57ebb1..80b107ef3 100644 --- a/docs/references/sources/sql_sources.md +++ b/docs/references/sources/sql.md @@ -16,4 +16,10 @@ ::: viadot.sources.databricks.Databricks -::: viadot.sources.trino.Trino +::: viadot.sources._trino.Trino + +::: viadot.sources._duckdb.DuckDB + +::: viadot.sources.sap_rfc.SAPRFC + +::: viadot.sources.sap_rfc.SAPRFCV2 diff --git a/mkdocs.yml b/mkdocs.yml index 1541d743e..caad33a62 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -21,8 +21,8 @@ nav: - References: - Sources: - - SQL Sources: references/sources/sql_sources.md - - API Sources: references/sources/api_sources.md + - SQL Sources: references/sources/sql.md + - API Sources: references/sources/api.md - Orchestration: - Prefect: - Tasks: references/orchestration/prefect/tasks.md diff --git a/pyproject.toml b/pyproject.toml index c26e344ab..155ee859e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "viadot2" -version = "2.1.17" +version = "2.1.19" description = "A simple data ingestion library to guide data flows from some places to other places." authors = [ { name = "acivitillo", email = "acivitillo@dyvenia.com" }, @@ -80,7 +80,7 @@ dev-dependencies = [ "mkdocs-git-revision-date-plugin>=0.3.2", "mkdocs-glightbox>=0.4.0", "pytest>=8.2.2", - "ruff>=0.5.2", + "ruff>=0.6.6", "pytest-asyncio>=0.23.8", "moto>=5.0.13", ] diff --git a/src/viadot/orchestration/prefect/flows/__init__.py b/src/viadot/orchestration/prefect/flows/__init__.py index 80200c7a6..bafa8b1ff 100644 --- a/src/viadot/orchestration/prefect/flows/__init__.py +++ b/src/viadot/orchestration/prefect/flows/__init__.py @@ -9,6 +9,7 @@ from .epicor_to_parquet import epicor_to_parquet from .exchange_rates_to_adls import exchange_rates_to_adls from .exchange_rates_to_databricks import exchange_rates_to_databricks +from .exchange_rates_to_redshift_spectrum import exchange_rates_api_to_redshift_spectrum from .genesys_to_adls import genesys_to_adls from .hubspot_to_adls import hubspot_to_adls from .mindful_to_adls import mindful_to_adls @@ -35,6 +36,7 @@ "epicor_to_parquet", "exchange_rates_to_adls", "exchange_rates_to_databricks", + "exchange_rates_api_to_redshift_spectrum", "genesys_to_adls", "hubspot_to_adls", "mindful_to_adls", diff --git a/src/viadot/orchestration/prefect/flows/exchange_rates_to_redshift_spectrum.py b/src/viadot/orchestration/prefect/flows/exchange_rates_to_redshift_spectrum.py new file mode 100644 index 000000000..f221bd2c6 --- /dev/null +++ b/src/viadot/orchestration/prefect/flows/exchange_rates_to_redshift_spectrum.py @@ -0,0 +1,93 @@ +"""Download data from Exchange Rates API and upload it to AWS Redshift Spectrum.""" + +from datetime import datetime +from typing import Literal + +from prefect import flow + +from viadot.orchestration.prefect.tasks import ( + df_to_redshift_spectrum, + exchange_rates_to_df, +) +from viadot.orchestration.prefect.tasks.exchange_rates import Currency + + +@flow( + name="extract--exchange-rates-api--redshift_spectrum", + description="Extract data from Exchange Rates API and load it into AWS Redshift Spectrum.", + retries=1, + retry_delay_seconds=60, +) +def exchange_rates_api_to_redshift_spectrum( # noqa: PLR0913 + to_path: str, + schema_name: str, + table: str, + currency: Currency = "USD", + start_date: str = datetime.today().strftime("%Y-%m-%d"), + end_date: str = datetime.today().strftime("%Y-%m-%d"), + symbols: list[str] | None = None, + if_exists: Literal["overwrite", "append"] = "overwrite", + partition_cols: list[str] | None = None, + compression: str | None = None, + table_description: str | None = None, + aws_config_key: str | None = None, + aws_credentials_secret: str | None = None, + exchange_rates_api_credentials_secret: str | None = None, + exchange_rates_api_config_key: str | None = None, +) -> None: + """Extract data from Exchange Rates API and load it into AWS Redshift Spectrum. + + Args: + currency (Currency, optional): Base currency to which prices of searched + currencies are related. Defaults to "USD". + start_date (str, optional): Initial date for data search. + Data range is start_date -> end_date, + supported format 'yyyy-mm-dd'. + Defaults to datetime.today().strftime("%Y-%m-%d"). + end_date (str, optional): See above. + Defaults to datetime.today().strftime("%Y-%m-%d"). + symbols (list[str], optional): List of ISO codes of currencies for which + exchange rates from base currency will be fetched. Defaults to + ["USD","EUR","GBP","CHF","PLN","DKK","COP","CZK","SEK","NOK","ISK"]. + to_path (str): Path to a S3 folder where the table will be located. Defaults to + None. + schema_name (str): AWS Glue catalog database name. + table (str): AWS Glue catalog table name. + if_exists (str, optional): 'overwrite' to recreate any possible existing table + or 'append' to keep any possible existing table. Defaults to overwrite. + partition_cols (list[str], optional): List of column names that will be used to + create partitions. Only takes effect if dataset=True. Defaults to None. + compression (str, optional): Compression style (None, snappy, gzip, zstd). + sep (str, optional): Field delimiter for the output file. Defaults to ','. + table_description (str, optional): AWS Glue catalog table description. Defaults + to None. + aws_config_key (str, optional): The key in the viadot config holding relevant + credentials. Defaults to None. + aws_credentials_secret (str, optional): The name of a secret block in Prefect + that stores AWS credentials. Defaults to None. + exchange_rates_api_credentials_secret (str, optional): The name of the secret + storing Exchange Rates API API key. Defaults to None. + More info on: https://docs.prefect.io/concepts/blocks/ + exchange_rates_api_config_key (str, optional): The key in the viadot config + holding relevant credentials. Defaults to None. + """ + df = exchange_rates_to_df( + currency=currency, + start_date=start_date, + end_date=end_date, + symbols=symbols, + credentials_secret=exchange_rates_api_credentials_secret, + config_key=exchange_rates_api_config_key, + ) + df_to_redshift_spectrum( + df=df, + to_path=to_path, + schema_name=schema_name, + table=table, + if_exists=if_exists, + partition_cols=partition_cols, + compression=compression, + description=table_description, + config_key=aws_config_key, + credentials_secret=aws_credentials_secret, + ) diff --git a/src/viadot/orchestration/prefect/tasks/exchange_rates.py b/src/viadot/orchestration/prefect/tasks/exchange_rates.py index 1f5b63103..567c0d2c5 100644 --- a/src/viadot/orchestration/prefect/tasks/exchange_rates.py +++ b/src/viadot/orchestration/prefect/tasks/exchange_rates.py @@ -1,7 +1,7 @@ """Tasks for interacting with the Exchange Rates API.""" from datetime import datetime -from typing import Any, Literal +from typing import Literal import pandas as pd from prefect import task @@ -20,7 +20,6 @@ def exchange_rates_to_df( currency: Currency = "USD", credentials_secret: str | None = None, - credentials: dict[str, Any] | None = None, config_key: str | None = None, start_date: str = datetime.today().strftime("%Y-%m-%d"), end_date: str = datetime.today().strftime("%Y-%m-%d"), @@ -35,8 +34,6 @@ def exchange_rates_to_df( credentials_secret (str, optional): The name of the secret storing the credentials. Defaults to None. More info on: https://docs.prefect.io/concepts/blocks/ - credentials (dict[str, str], optional): The credentials as a dictionary. - Defaults to None. config_key (str, optional): The key in the viadot config holding relevant credentials. Defaults to None. @@ -56,7 +53,7 @@ def exchange_rates_to_df( Returns: pd.DataFrame: The pandas `DataFrame` containing data from the file. """ - if not (credentials_secret or config_key or credentials): + if not (credentials_secret or config_key): raise MissingSourceCredentialsError if not symbols: @@ -74,7 +71,9 @@ def exchange_rates_to_df( "ISK", ] - credentials = credentials or get_credentials(credentials_secret) + if not config_key: + credentials = get_credentials(credentials_secret) + e = ExchangeRates( currency=currency, start_date=start_date, diff --git a/src/viadot/sources/__init__.py b/src/viadot/sources/__init__.py index 0105447e0..e1d1fa6f5 100644 --- a/src/viadot/sources/__init__.py +++ b/src/viadot/sources/__init__.py @@ -3,8 +3,9 @@ from importlib.util import find_spec from .business_core import BusinessCore +from ._duckdb import DuckDB +from ._trino import Trino from .cloud_for_customers import CloudForCustomers -from .duckdb import DuckDB from .epicor import Epicor from .exchange_rates import ExchangeRates from .genesys import Genesys @@ -13,7 +14,6 @@ from .outlook import Outlook from .sharepoint import Sharepoint from .sql_server import SQLServer -from .trino import Trino from .uk_carbon_intensity import UKCarbonIntensity @@ -38,7 +38,7 @@ __all__.extend(["AzureDataLake"]) if find_spec("duckdb"): - from viadot.sources.duckdb import DuckDB # noqa: F401 + from viadot.sources._duckdb import DuckDB # noqa: F401 __all__.extend(["DuckDB"]) diff --git a/src/viadot/sources/duckdb.py b/src/viadot/sources/_duckdb.py similarity index 100% rename from src/viadot/sources/duckdb.py rename to src/viadot/sources/_duckdb.py diff --git a/src/viadot/sources/trino.py b/src/viadot/sources/_trino.py similarity index 100% rename from src/viadot/sources/trino.py rename to src/viadot/sources/_trino.py diff --git a/src/viadot/sources/s3.py b/src/viadot/sources/s3.py index a46f8dad4..efb950e29 100644 --- a/src/viadot/sources/s3.py +++ b/src/viadot/sources/s3.py @@ -1,9 +1,9 @@ """A module for working with Amazon S3 as a data source.""" -from collections.abc import Iterable +from collections.abc import Iterable, Iterator import os from pathlib import Path -from typing import Literal +from typing import Any, Literal try: @@ -310,3 +310,32 @@ def download(self, from_path: str, to_path: str) -> None: to_path (str): Path to local file(s) to be stored. """ wr.s3.download(boto3_session=self.session, path=from_path, local_file=to_path) + + def get_page_iterator( + self, + bucket_name: str, + directory_path: str, + operation_name: str = "list_objects_v2", + **kwargs, + ) -> Iterator[dict[str, Any]]: + """Returns an iterator to paginate through the objects in S3 bucket directory. + + This method uses the S3 paginator to list objects under a specified directory + path in a given S3 bucket. It can accept additional optional parameters + through **kwargs, which will be passed to the paginator. + + Args: + bucket_name (str): The name of the S3 bucket. + directory_path (str): The directory path (prefix) in the bucket to list + objects from. + operation_name (str): The operation name. This is the same name as + the method name on the client. Defaults as "list_objects_v2". + **kwargs: Additional arguments to pass to the paginator (optional). + + Returns: + Iterator: An iterator to paginate through the S3 objects. + """ + client = self.session.client("s3") + paginator = client.get_paginator(operation_name=operation_name) + + return paginator.paginate(Bucket=bucket_name, Prefix=directory_path, **kwargs) diff --git a/src/viadot/sources/sap_rfc.py b/src/viadot/sources/sap_rfc.py index d756e677a..cf2ab2b23 100755 --- a/src/viadot/sources/sap_rfc.py +++ b/src/viadot/sources/sap_rfc.py @@ -983,8 +983,7 @@ def query(self, sql: str, sep: str | None = None) -> None: # noqa: C901, PLR091 self.rfc_total_col_width_character_limit - col_length_reference_column ) - if local_limit < character_limit: - character_limit = local_limit + character_limit = min(local_limit, character_limit) else: character_limit = self.rfc_total_col_width_character_limit @@ -1151,7 +1150,10 @@ def to_df(self, tests: dict | None = None) -> pd.DataFrame: # noqa: C901, PLR09 chunk += 1 elif not response["DATA"]: logger.warning("No data returned from SAP.") - df = df.loc[:, columns] + if not df.empty: + # It is used to filter out columns which are not in select query + # for example columns passed only as unique column + df = df.loc[:, columns] if self.client_side_filters: filter_query = self._build_pandas_filter_query(self.client_side_filters) diff --git a/tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py b/tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py index 7eee9f786..dd2a8a5c1 100644 --- a/tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py +++ b/tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py @@ -1,6 +1,7 @@ from pathlib import Path import pytest + from viadot.orchestration.prefect.flows import duckdb_to_sql_server from viadot.orchestration.prefect.tasks import sql_server_query from viadot.sources import DuckDB, SQLServer @@ -12,13 +13,13 @@ DUCKDB_CREDS = {"database": DATABASE_PATH, "read_only": False} -@pytest.fixture() +@pytest.fixture def sql_server(): # Initialize the SQLServer instance with the test credentials. return SQLServer(config_key="sql_server") -@pytest.fixture() +@pytest.fixture def duckdb(): # Initialize the SQLServer instance with the test credentials. duckdb = DuckDB(credentials=DUCKDB_CREDS) diff --git a/tests/integration/orchestration/prefect/flows/test_duckdb_transform.py b/tests/integration/orchestration/prefect/flows/test_duckdb_transform.py index e7441e505..d68c3b7e9 100644 --- a/tests/integration/orchestration/prefect/flows/test_duckdb_transform.py +++ b/tests/integration/orchestration/prefect/flows/test_duckdb_transform.py @@ -1,6 +1,7 @@ from pathlib import Path import pytest + from viadot.orchestration.prefect.flows import duckdb_transform from viadot.sources import DuckDB diff --git a/tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py b/tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py index 0d8c42a3c..5d2ef1a24 100644 --- a/tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py +++ b/tests/integration/orchestration/prefect/flows/test_sap_to_parquet.py @@ -1,6 +1,7 @@ from pathlib import Path import pandas as pd + from viadot.orchestration.prefect.flows import sap_to_parquet diff --git a/tests/integration/orchestration/prefect/tasks/test_adls.py b/tests/integration/orchestration/prefect/tasks/test_adls.py index 6c8f17773..1cb8f1881 100644 --- a/tests/integration/orchestration/prefect/tasks/test_adls.py +++ b/tests/integration/orchestration/prefect/tasks/test_adls.py @@ -1,5 +1,6 @@ import pandas as pd from prefect import flow, task + from viadot.orchestration.prefect.tasks import df_to_adls from viadot.utils import skip_test_on_missing_extra diff --git a/tests/integration/orchestration/prefect/tasks/test_databricks.py b/tests/integration/orchestration/prefect/tasks/test_databricks.py index 9215d4d9f..d2ef03742 100644 --- a/tests/integration/orchestration/prefect/tasks/test_databricks.py +++ b/tests/integration/orchestration/prefect/tasks/test_databricks.py @@ -2,6 +2,7 @@ from prefect import flow import pytest + from viadot.exceptions import TableDoesNotExistError from viadot.utils import skip_test_on_missing_extra diff --git a/tests/integration/orchestration/prefect/tasks/test_dbt.py b/tests/integration/orchestration/prefect/tasks/test_dbt.py index 4175027b1..b156ce47e 100644 --- a/tests/integration/orchestration/prefect/tasks/test_dbt.py +++ b/tests/integration/orchestration/prefect/tasks/test_dbt.py @@ -1,4 +1,5 @@ from prefect import flow + from viadot.orchestration.prefect.tasks import dbt_task diff --git a/tests/integration/orchestration/prefect/tasks/test_duckdb.py b/tests/integration/orchestration/prefect/tasks/test_duckdb.py index b9cf469fa..c938bb9ee 100644 --- a/tests/integration/orchestration/prefect/tasks/test_duckdb.py +++ b/tests/integration/orchestration/prefect/tasks/test_duckdb.py @@ -1,6 +1,7 @@ from pathlib import Path import pytest + from viadot.orchestration.prefect.tasks import duckdb_query from viadot.sources import DuckDB diff --git a/tests/integration/orchestration/prefect/tasks/test_git.py b/tests/integration/orchestration/prefect/tasks/test_git.py index 02c90d308..96192fa7c 100644 --- a/tests/integration/orchestration/prefect/tasks/test_git.py +++ b/tests/integration/orchestration/prefect/tasks/test_git.py @@ -2,6 +2,7 @@ import shutil from loguru import logger + from viadot.orchestration.prefect.tasks import clone_repo diff --git a/tests/integration/orchestration/prefect/tasks/test_luma.py b/tests/integration/orchestration/prefect/tasks/test_luma.py index ebf61a755..08a52b30a 100644 --- a/tests/integration/orchestration/prefect/tasks/test_luma.py +++ b/tests/integration/orchestration/prefect/tasks/test_luma.py @@ -1,9 +1,10 @@ from loguru import logger import pytest + from viadot.orchestration.prefect.tasks import luma_ingest_task -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_luma_ingest_task_model_metadata(LUMA_URL): logs = await luma_ingest_task.fn( metadata_kind="model", @@ -18,7 +19,7 @@ async def test_luma_ingest_task_model_metadata(LUMA_URL): assert success_message in log -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_luma_ingest_task_model_run_metadata(LUMA_URL): logs = await luma_ingest_task.fn( metadata_kind="model_run", @@ -33,7 +34,7 @@ async def test_luma_ingest_task_model_run_metadata(LUMA_URL): assert success_message in log -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_luma_ingest_task_model_run_metadata_follow(LUMA_URL): logs = await luma_ingest_task.fn( metadata_kind="model_run", diff --git a/tests/integration/orchestration/prefect/tasks/test_minio.py b/tests/integration/orchestration/prefect/tasks/test_minio.py index 1d48e1864..f6676fcb4 100644 --- a/tests/integration/orchestration/prefect/tasks/test_minio.py +++ b/tests/integration/orchestration/prefect/tasks/test_minio.py @@ -1,4 +1,5 @@ import pandas as pd + from viadot.orchestration.prefect.tasks import df_to_minio from viadot.orchestration.prefect.utils import get_credentials from viadot.sources import MinIO diff --git a/tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py b/tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py index 12c2d2a0b..1a0b053fc 100644 --- a/tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py +++ b/tests/integration/orchestration/prefect/tasks/test_redshift_spectrum.py @@ -3,6 +3,7 @@ import pandas as pd from prefect import flow import pytest + from viadot.orchestration.prefect.tasks import df_to_redshift_spectrum from viadot.utils import skip_test_on_missing_extra diff --git a/tests/integration/orchestration/prefect/tasks/test_s3.py b/tests/integration/orchestration/prefect/tasks/test_s3.py index 269a64244..58d120aa8 100644 --- a/tests/integration/orchestration/prefect/tasks/test_s3.py +++ b/tests/integration/orchestration/prefect/tasks/test_s3.py @@ -4,6 +4,7 @@ import pandas as pd from prefect import flow import pytest + from viadot.orchestration.prefect.tasks import s3_upload_file from viadot.utils import skip_test_on_missing_extra @@ -23,7 +24,7 @@ def s3(aws_config_key): return S3(config_key=aws_config_key) -@pytest.fixture() +@pytest.fixture def TEST_FILE_PATH(): path = "test.csv" df = pd.DataFrame( diff --git a/tests/integration/orchestration/prefect/tasks/test_sap_rfc.py b/tests/integration/orchestration/prefect/tasks/test_sap_rfc.py index 8c2daf1b7..4f07aec77 100644 --- a/tests/integration/orchestration/prefect/tasks/test_sap_rfc.py +++ b/tests/integration/orchestration/prefect/tasks/test_sap_rfc.py @@ -1,4 +1,5 @@ from prefect import flow + from viadot.orchestration.prefect.tasks import sap_rfc_to_df diff --git a/tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py b/tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py index f558d28fa..f1b05f050 100644 --- a/tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py +++ b/tests/integration/orchestration/prefect/tasks/test_sharepoint_tasks.py @@ -3,6 +3,7 @@ import pandas as pd from prefect import flow + from viadot.orchestration.prefect.tasks import ( sharepoint_download_file, sharepoint_to_df, diff --git a/tests/integration/orchestration/prefect/tasks/test_sql_server.py b/tests/integration/orchestration/prefect/tasks/test_sql_server.py index f7fdca69c..895ec955a 100644 --- a/tests/integration/orchestration/prefect/tasks/test_sql_server.py +++ b/tests/integration/orchestration/prefect/tasks/test_sql_server.py @@ -1,4 +1,5 @@ import pytest + from viadot.orchestration.prefect.tasks import ( create_sql_server_table, sql_server_query, @@ -11,7 +12,7 @@ SCHEMA = "sandbox" -@pytest.fixture() +@pytest.fixture def sql_server(): # Initialize the SQLServer instance with the test credentials. return SQLServer(config_key="sql_server") diff --git a/tests/integration/test_azure_data_lake.py b/tests/integration/test_azure_data_lake.py index 3a42cbbd4..1f3b2000c 100644 --- a/tests/integration/test_azure_data_lake.py +++ b/tests/integration/test_azure_data_lake.py @@ -1,4 +1,5 @@ import pandas as pd + from viadot.utils import skip_test_on_missing_extra diff --git a/tests/integration/test_cloud_for_customers.py b/tests/integration/test_cloud_for_customers.py index 199eb820c..9a20dc961 100644 --- a/tests/integration/test_cloud_for_customers.py +++ b/tests/integration/test_cloud_for_customers.py @@ -3,6 +3,7 @@ from datetime import datetime, timedelta import pandas as pd + from viadot.sources.cloud_for_customers import CloudForCustomers diff --git a/tests/integration/test_databricks.py b/tests/integration/test_databricks.py index 15ce93707..7730e557c 100644 --- a/tests/integration/test_databricks.py +++ b/tests/integration/test_databricks.py @@ -2,12 +2,14 @@ import pandas as pd import pytest + from viadot.exceptions import TableDoesNotExistError from viadot.utils import add_viadot_metadata_columns, skip_test_on_missing_extra try: from pyspark.sql.utils import AnalysisException + from viadot.sources import Databricks except ImportError: skip_test_on_missing_extra(source_name="Databricks", extra="databricks") @@ -78,7 +80,7 @@ def databricks(databricks_config_key): databricks.session.stop() -@pytest.mark.dependency() +@pytest.mark.dependency def test_create_schema(databricks): with contextlib.suppress(AnalysisException): databricks.drop_schema(TEST_SCHEMA_2) @@ -110,7 +112,7 @@ def test_drop_schema(databricks): databricks.create_schema(TEST_SCHEMA) -@pytest.mark.dependency() +@pytest.mark.dependency def test_create_table(databricks): exists = databricks._check_if_table_exists(schema=TEST_SCHEMA, table=TEST_TABLE) assert exists is False @@ -174,7 +176,7 @@ def to_df(self): databricks.drop_table(schema=TEST_SCHEMA, table=TEST_TABLE) -@pytest.mark.dependency() +@pytest.mark.dependency def test_create_table_replace(databricks): # Setup. with contextlib.suppress(Exception): diff --git a/tests/integration/test_epicor.py b/tests/integration/test_epicor.py index b6f61cd19..4a17a0b0c 100644 --- a/tests/integration/test_epicor.py +++ b/tests/integration/test_epicor.py @@ -1,5 +1,6 @@ import pandas as pd import pytest + from viadot.config import get_source_credentials from viadot.exceptions import DataRangeError from viadot.sources import Epicor diff --git a/tests/integration/test_exchange_rates.py b/tests/integration/test_exchange_rates.py index ece18a883..470add703 100644 --- a/tests/integration/test_exchange_rates.py +++ b/tests/integration/test_exchange_rates.py @@ -1,5 +1,6 @@ import pandas as pd import pytest + from viadot.sources import ExchangeRates diff --git a/tests/integration/test_minio.py b/tests/integration/test_minio.py index 6a5b07da2..b838505ed 100644 --- a/tests/integration/test_minio.py +++ b/tests/integration/test_minio.py @@ -1,6 +1,7 @@ from contextlib import nullcontext as does_not_raise import pytest + from viadot.utils import skip_test_on_missing_extra diff --git a/tests/integration/test_redshift_spectrum.py b/tests/integration/test_redshift_spectrum.py index c5593d662..b7ed3c0c0 100644 --- a/tests/integration/test_redshift_spectrum.py +++ b/tests/integration/test_redshift_spectrum.py @@ -3,6 +3,7 @@ import pandas as pd import pytest + from viadot.utils import skip_test_on_missing_extra diff --git a/tests/integration/test_s3.py b/tests/integration/test_s3.py index 53777a0d3..bf725a562 100644 --- a/tests/integration/test_s3.py +++ b/tests/integration/test_s3.py @@ -3,6 +3,7 @@ import pandas as pd import pytest + from viadot.utils import skip_test_on_missing_extra diff --git a/tests/integration/test_sharepoint.py b/tests/integration/test_sharepoint.py index 620921349..58b10e415 100644 --- a/tests/integration/test_sharepoint.py +++ b/tests/integration/test_sharepoint.py @@ -2,6 +2,7 @@ from pathlib import Path import pytest + from viadot.exceptions import CredentialError from viadot.sources import Sharepoint diff --git a/tests/integration/test_sql_server.py b/tests/integration/test_sql_server.py index 1d59a9028..92abbd3e7 100644 --- a/tests/integration/test_sql_server.py +++ b/tests/integration/test_sql_server.py @@ -2,10 +2,11 @@ import struct import pytest + from viadot.sources import SQLServer -@pytest.fixture() +@pytest.fixture def sql_server(): # Initialize the SQLServer instance with the test credentials. return SQLServer(config_key="sql_server") diff --git a/tests/integration/test_trino.py b/tests/integration/test_trino.py index 67cf0adc3..eeaf08e2e 100644 --- a/tests/integration/test_trino.py +++ b/tests/integration/test_trino.py @@ -2,6 +2,7 @@ import pyarrow as pa import pytest + from viadot.sources import Trino diff --git a/tests/unit/orchestration/prefect/test_git.py b/tests/unit/orchestration/prefect/test_git.py index 8b4ef3960..6bc55457b 100644 --- a/tests/unit/orchestration/prefect/test_git.py +++ b/tests/unit/orchestration/prefect/test_git.py @@ -2,6 +2,7 @@ import shutil from loguru import logger + from viadot.orchestration.prefect.tasks import clone_repo diff --git a/tests/unit/test_cloud_for_customers.py b/tests/unit/test_cloud_for_customers.py index d1323d0a7..e35869c37 100644 --- a/tests/unit/test_cloud_for_customers.py +++ b/tests/unit/test_cloud_for_customers.py @@ -2,6 +2,7 @@ from pydantic import SecretStr import pytest + from viadot.exceptions import CredentialError from viadot.sources.cloud_for_customers import ( CloudForCustomers, @@ -24,7 +25,7 @@ def test_is_configured_invalid(): CloudForCustomersCredentials.is_configured(credentials) -@pytest.fixture() +@pytest.fixture def c4c(): credentials = { "username": "user@tenant.com", diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index ca07cf405..ccac9d2fb 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -2,13 +2,14 @@ from pathlib import Path import pytest + from viadot.config import Config, get_source_config, get_source_credentials FAKE_SOURCE_CONFIG = {"fake_source": {"credentials": {"api_key": "test"}}} -@pytest.fixture() +@pytest.fixture def TEST_CONFIG_PATH(): """Creates and deletes a test config file for each test. @@ -29,7 +30,7 @@ def TEST_CONFIG_PATH(): config_path.unlink() -@pytest.fixture() +@pytest.fixture def TEST_CONFIG_PATH_JSON(): """Creates and deletes a test config file for each test. diff --git a/tests/unit/test_duckdb.py b/tests/unit/test_duckdb.py index 48677faff..c4e1cdd5a 100644 --- a/tests/unit/test_duckdb.py +++ b/tests/unit/test_duckdb.py @@ -3,7 +3,8 @@ from duckdb import BinderException import pandas as pd import pytest -from viadot.sources.duckdb import DuckDB + +from viadot.sources import DuckDB TABLE = "test_table" diff --git a/tests/unit/test_genesys.py b/tests/unit/test_genesys.py index 84325b3be..9d0333a01 100644 --- a/tests/unit/test_genesys.py +++ b/tests/unit/test_genesys.py @@ -5,6 +5,7 @@ import pandas as pd import pytest + from viadot.exceptions import APIError, CredentialError from viadot.sources import Genesys @@ -91,7 +92,7 @@ } -@pytest.fixture() +@pytest.fixture def genesys(): """Return Genesys instance.""" return Genesys(credentials=variables["credentials"], verbose=True) @@ -129,7 +130,7 @@ def test_headers(mock_handle_api_response, genesys): @pytest.mark.skip(reason="Needs to be fixed.") @patch("aiohttp.ClientSession.post", new_callable=AsyncMock) -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_api_call_post_success(mock_post, genesys): """Test Genesys `_api_call()` method called with POST.""" mock_response = AsyncMock() @@ -152,7 +153,7 @@ async def test_api_call_post_success(mock_post, genesys): @pytest.mark.skip(reason="Needs to be fixed.") @patch("aiohttp.ClientSession.post", new_callable=AsyncMock) -@pytest.mark.asyncio() +@pytest.mark.asyncio def test_api_call_get_success(mock_get, genesys): """Test Genesys `_api_call()` method called with GET.""" mock_response = AsyncMock() @@ -176,7 +177,7 @@ def test_api_call_get_success(mock_get, genesys): @pytest.mark.skip(reason="Needs to be fixed.") @patch("aiohttp.ClientSession.post", new_callable=AsyncMock) -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_api_call_post_failure(mock_post, genesys): """Test Genesys `_api_call` method failing when called with POST.""" mock_response = AsyncMock() diff --git a/tests/unit/test_hubspot.py b/tests/unit/test_hubspot.py index 5ecf74fbc..0865a2643 100644 --- a/tests/unit/test_hubspot.py +++ b/tests/unit/test_hubspot.py @@ -7,6 +7,7 @@ import pandas as pd import pytest from requests.models import Response + from viadot.exceptions import APIError, CredentialError from viadot.sources import Hubspot from viadot.sources.hubspot import HubspotCredentials diff --git a/tests/unit/test_mindful.py b/tests/unit/test_mindful.py index 022852241..9c6b770ea 100644 --- a/tests/unit/test_mindful.py +++ b/tests/unit/test_mindful.py @@ -7,6 +7,7 @@ import pytest from requests.auth import HTTPBasicAuth from requests.models import Response + from viadot.exceptions import APIError, CredentialError from viadot.sources import Mindful from viadot.sources.mindful import MindfulCredentials diff --git a/tests/unit/test_outlook.py b/tests/unit/test_outlook.py index c83d0b293..710ad64c5 100644 --- a/tests/unit/test_outlook.py +++ b/tests/unit/test_outlook.py @@ -6,6 +6,7 @@ from O365.message import Message import pandas as pd import pytest + from viadot.exceptions import CredentialError from viadot.sources import Outlook from viadot.sources.outlook import OutlookCredentials diff --git a/tests/unit/test_redshift_spectrum.py b/tests/unit/test_redshift_spectrum.py index 393788409..060b0839b 100644 --- a/tests/unit/test_redshift_spectrum.py +++ b/tests/unit/test_redshift_spectrum.py @@ -2,17 +2,19 @@ import moto import pytest + from viadot.utils import skip_test_on_missing_extra try: import boto3 + from viadot.sources import RedshiftSpectrum except ImportError: skip_test_on_missing_extra("RedshiftSpectrum", extra="aws") -@pytest.fixture() +@pytest.fixture def _aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -22,7 +24,7 @@ def _aws_credentials(): os.environ["AWS_DEFAULT_REGION"] = "us-east-1" -@pytest.fixture() +@pytest.fixture def _mocked_aws(_aws_credentials): """Mock all AWS interactions. @@ -32,7 +34,7 @@ def _mocked_aws(_aws_credentials): yield -@pytest.fixture() +@pytest.fixture def redshift_spectrum(_mocked_aws): conn = boto3.client("s3") conn.create_bucket(Bucket="test_bucket") diff --git a/tests/unit/test_sharepoint.py b/tests/unit/test_sharepoint.py index 177d29fe0..d0012947f 100644 --- a/tests/unit/test_sharepoint.py +++ b/tests/unit/test_sharepoint.py @@ -5,6 +5,7 @@ import pytest import sharepy from sharepy.errors import AuthError + from viadot.exceptions import CredentialError from viadot.sources import Sharepoint from viadot.sources.sharepoint import SharepointCredentials @@ -34,7 +35,7 @@ def _download_file_stream(self, url: str | None = None, **kwargs): # noqa: ARG0 return pd.ExcelFile(Path("tests/unit/test_file.xlsx")) -@pytest.fixture() +@pytest.fixture def sharepoint_mock(): return SharepointMock(credentials=DUMMY_CREDS) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index c83b7adfe..ec2c74e8a 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -4,6 +4,7 @@ import pandas as pd import pytest + from viadot.utils import ( _cast_df_cols, add_viadot_metadata_columns,