Skip to content

Commit

Permalink
Merge branch '2.0' into business_core_2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
judynah committed Sep 24, 2024
2 parents c5d4aa5 + 2550b7b commit 983dcce
Show file tree
Hide file tree
Showing 55 changed files with 288 additions and 57 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repos:

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.3.4
rev: v0.6.7
hooks:
# Run the linter.
- id: ruff
Expand Down
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docs/references/sources/sql.md
26 changes: 15 additions & 11 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_baseline_file",
"filename": ".secrets.baseline"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
Expand Down Expand Up @@ -223,7 +227,7 @@
"filename": "tests/integration/orchestration/prefect/flows/test_duckdb_to_sql_server.py",
"hashed_secret": "74fb0b9813045340022392630bada6fd3d555fe6",
"is_verified": false,
"line_number": 43
"line_number": 44
}
],
"tests/integration/orchestration/prefect/flows/test_genesys_to_adls.py": [
Expand Down Expand Up @@ -282,7 +286,7 @@
"filename": "tests/integration/orchestration/prefect/tasks/test_sql_server.py",
"hashed_secret": "74fb0b9813045340022392630bada6fd3d555fe6",
"is_verified": false,
"line_number": 28
"line_number": 29
}
],
"tests/integration/test_sharepoint.py": [
Expand All @@ -291,7 +295,7 @@
"filename": "tests/integration/test_sharepoint.py",
"hashed_secret": "9fb7fe1217aed442b04c0f5e43b5d5a7d3287097",
"is_verified": false,
"line_number": 14
"line_number": 15
}
],
"tests/resources/metadata/model/manifest.json": [
Expand Down Expand Up @@ -339,7 +343,7 @@
"filename": "tests/unit/test_config.py",
"hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
"is_verified": false,
"line_number": 8
"line_number": 9
}
],
"tests/unit/test_genesys.py": [
Expand All @@ -348,7 +352,7 @@
"filename": "tests/unit/test_genesys.py",
"hashed_secret": "1089adfb1f11b95df31344030507912b5abdf57a",
"is_verified": false,
"line_number": 17
"line_number": 18
}
],
"tests/unit/test_outlook.py": [
Expand All @@ -357,14 +361,14 @@
"filename": "tests/unit/test_outlook.py",
"hashed_secret": "b235838f76594bf21886c6eec9c06a207e9ec5ce",
"is_verified": false,
"line_number": 17
"line_number": 18
},
{
"type": "Secret Keyword",
"filename": "tests/unit/test_outlook.py",
"hashed_secret": "1089adfb1f11b95df31344030507912b5abdf57a",
"is_verified": false,
"line_number": 48
"line_number": 49
}
],
"tests/unit/test_redshift_spectrum.py": [
Expand All @@ -373,7 +377,7 @@
"filename": "tests/unit/test_redshift_spectrum.py",
"hashed_secret": "dc724af18fbdd4e59189f5fe768a5f8311527050",
"is_verified": false,
"line_number": 19
"line_number": 21
}
],
"tests/unit/test_sharepoint.py": [
Expand All @@ -382,16 +386,16 @@
"filename": "tests/unit/test_sharepoint.py",
"hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
"is_verified": false,
"line_number": 13
"line_number": 14
},
{
"type": "Secret Keyword",
"filename": "tests/unit/test_sharepoint.py",
"hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
"is_verified": false,
"line_number": 46
"line_number": 47
}
]
},
"generated_at": "2024-08-30T12:50:18Z"
"generated_at": "2024-09-23T12:45:10Z"
}
4 changes: 3 additions & 1 deletion docs/developer_guide/creating_a_prefect_flow.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def postgresql_to_df(credentials_key: str | None = None, credentials_secret: str
if not (credentials_secret or config_key):
raise MissingSourceCredentialsError

credentials = credentials or get_credentials(credentials_secret)
if not config_key:
credentials = get_credentials(credentials_secret)

postgres = PostgreSQL(credentials=credentials, config_key=config_key)
return postgres.to_df(...)
```
Expand Down
26 changes: 26 additions & 0 deletions docs/references/orchestration/prefect/flows.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

::: viadot.orchestration.prefect.flows.exchange_rates_to_adls

::: viadot.orchestration.prefect.flows.exchange_rates_to_databricks

::: viadot.orchestration.prefect.flows.sap_to_redshift_spectrum

::: viadot.orchestration.prefect.flows.sharepoint_to_adls
Expand All @@ -17,3 +19,27 @@
::: viadot.orchestration.prefect.flows.transform

::: viadot.orchestration.prefect.flows.transform_and_catalog

::: viadot.orchestration.prefect.flows.duckdb_to_parquet

::: viadot.orchestration.prefect.flows.duckdb_to_sql_server

::: viadot.orchestration.prefect.flows.duckdb_transform

::: viadot.orchestration.prefect.flows.epicor_to_parquet

::: viadot.orchestration.prefect.flows.exchange_rates_api_to_redshift_spectrum

::: viadot.orchestration.prefect.flows.genesys_to_adls

::: viadot.orchestration.prefect.flows.hubspot_to_adls

::: viadot.orchestration.prefect.flows.mindful_to_adls

::: viadot.orchestration.prefect.flows.outlook_to_adls

::: viadot.orchestration.prefect.flows.sap_to_parquet

::: viadot.orchestration.prefect.flows.sql_server_to_minio

::: viadot.orchestration.prefect.flows.sql_server_to_parquet
22 changes: 22 additions & 0 deletions docs/references/orchestration/prefect/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,25 @@
::: viadot.orchestration.prefect.tasks.sharepoint_download_file

::: viadot.orchestration.prefect.tasks.sharepoint_to_df

::: viadot.orchestration.prefect.tasks.duckdb_query

::: viadot.orchestration.prefect.tasks.epicor_to_df

::: viadot.orchestration.prefect.tasks.genesys_to_df

::: viadot.orchestration.prefect.tasks.hubspot_to_df

::: viadot.orchestration.prefect.tasks.mindful_to_df

::: viadot.orchestration.prefect.tasks.outlook_to_df

::: viadot.orchestration.prefect.tasks.sap_rfc_to_df

::: viadot.orchestration.prefect.tasks.bcp

::: viadot.orchestration.prefect.tasks.create_sql_server_table

::: viadot.orchestration.prefect.tasks.sql_server_query

::: viadot.orchestration.prefect.tasks.sql_server_to_df
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@

::: viadot.sources.cloud_for_customers.CloudForCustomers

::: viadot.sources.sap_rfc.SAPRFC

::: viadot.sources.sharepoint.Sharepoint

::: viadot.sources.genesys.Genesys

::: viadot.sources.outlook.Outlook

::: viadot.sources.hubspot.Hubspot

::: viadot.sources.epicor.Epicor

::: viadot.sources.mindful.Mindful

::: viadot.sources.minio.MinIO
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,10 @@

::: viadot.sources.databricks.Databricks

::: viadot.sources.trino.Trino
::: viadot.sources._trino.Trino

::: viadot.sources._duckdb.DuckDB

::: viadot.sources.sap_rfc.SAPRFC

::: viadot.sources.sap_rfc.SAPRFCV2
4 changes: 2 additions & 2 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ nav:

- References:
- Sources:
- SQL Sources: references/sources/sql_sources.md
- API Sources: references/sources/api_sources.md
- SQL Sources: references/sources/sql.md
- API Sources: references/sources/api.md
- Orchestration:
- Prefect:
- Tasks: references/orchestration/prefect/tasks.md
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "viadot2"
version = "2.1.17"
version = "2.1.19"
description = "A simple data ingestion library to guide data flows from some places to other places."
authors = [
{ name = "acivitillo", email = "[email protected]" },
Expand Down Expand Up @@ -80,7 +80,7 @@ dev-dependencies = [
"mkdocs-git-revision-date-plugin>=0.3.2",
"mkdocs-glightbox>=0.4.0",
"pytest>=8.2.2",
"ruff>=0.5.2",
"ruff>=0.6.6",
"pytest-asyncio>=0.23.8",
"moto>=5.0.13",
]
Expand Down
2 changes: 2 additions & 0 deletions src/viadot/orchestration/prefect/flows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .epicor_to_parquet import epicor_to_parquet
from .exchange_rates_to_adls import exchange_rates_to_adls
from .exchange_rates_to_databricks import exchange_rates_to_databricks
from .exchange_rates_to_redshift_spectrum import exchange_rates_api_to_redshift_spectrum
from .genesys_to_adls import genesys_to_adls
from .hubspot_to_adls import hubspot_to_adls
from .mindful_to_adls import mindful_to_adls
Expand All @@ -35,6 +36,7 @@
"epicor_to_parquet",
"exchange_rates_to_adls",
"exchange_rates_to_databricks",
"exchange_rates_api_to_redshift_spectrum",
"genesys_to_adls",
"hubspot_to_adls",
"mindful_to_adls",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Download data from Exchange Rates API and upload it to AWS Redshift Spectrum."""

from datetime import datetime
from typing import Literal

from prefect import flow

from viadot.orchestration.prefect.tasks import (
df_to_redshift_spectrum,
exchange_rates_to_df,
)
from viadot.orchestration.prefect.tasks.exchange_rates import Currency


@flow(
name="extract--exchange-rates-api--redshift_spectrum",
description="Extract data from Exchange Rates API and load it into AWS Redshift Spectrum.",
retries=1,
retry_delay_seconds=60,
)
def exchange_rates_api_to_redshift_spectrum( # noqa: PLR0913
to_path: str,
schema_name: str,
table: str,
currency: Currency = "USD",
start_date: str = datetime.today().strftime("%Y-%m-%d"),
end_date: str = datetime.today().strftime("%Y-%m-%d"),
symbols: list[str] | None = None,
if_exists: Literal["overwrite", "append"] = "overwrite",
partition_cols: list[str] | None = None,
compression: str | None = None,
table_description: str | None = None,
aws_config_key: str | None = None,
aws_credentials_secret: str | None = None,
exchange_rates_api_credentials_secret: str | None = None,
exchange_rates_api_config_key: str | None = None,
) -> None:
"""Extract data from Exchange Rates API and load it into AWS Redshift Spectrum.
Args:
currency (Currency, optional): Base currency to which prices of searched
currencies are related. Defaults to "USD".
start_date (str, optional): Initial date for data search.
Data range is start_date -> end_date,
supported format 'yyyy-mm-dd'.
Defaults to datetime.today().strftime("%Y-%m-%d").
end_date (str, optional): See above.
Defaults to datetime.today().strftime("%Y-%m-%d").
symbols (list[str], optional): List of ISO codes of currencies for which
exchange rates from base currency will be fetched. Defaults to
["USD","EUR","GBP","CHF","PLN","DKK","COP","CZK","SEK","NOK","ISK"].
to_path (str): Path to a S3 folder where the table will be located. Defaults to
None.
schema_name (str): AWS Glue catalog database name.
table (str): AWS Glue catalog table name.
if_exists (str, optional): 'overwrite' to recreate any possible existing table
or 'append' to keep any possible existing table. Defaults to overwrite.
partition_cols (list[str], optional): List of column names that will be used to
create partitions. Only takes effect if dataset=True. Defaults to None.
compression (str, optional): Compression style (None, snappy, gzip, zstd).
sep (str, optional): Field delimiter for the output file. Defaults to ','.
table_description (str, optional): AWS Glue catalog table description. Defaults
to None.
aws_config_key (str, optional): The key in the viadot config holding relevant
credentials. Defaults to None.
aws_credentials_secret (str, optional): The name of a secret block in Prefect
that stores AWS credentials. Defaults to None.
exchange_rates_api_credentials_secret (str, optional): The name of the secret
storing Exchange Rates API API key. Defaults to None.
More info on: https://docs.prefect.io/concepts/blocks/
exchange_rates_api_config_key (str, optional): The key in the viadot config
holding relevant credentials. Defaults to None.
"""
df = exchange_rates_to_df(
currency=currency,
start_date=start_date,
end_date=end_date,
symbols=symbols,
credentials_secret=exchange_rates_api_credentials_secret,
config_key=exchange_rates_api_config_key,
)
df_to_redshift_spectrum(
df=df,
to_path=to_path,
schema_name=schema_name,
table=table,
if_exists=if_exists,
partition_cols=partition_cols,
compression=compression,
description=table_description,
config_key=aws_config_key,
credentials_secret=aws_credentials_secret,
)
Loading

0 comments on commit 983dcce

Please sign in to comment.