From b05193310e5d7831c79f379c554b9f66410bd992 Mon Sep 17 00:00:00 2001 From: Marek Dobransky Date: Fri, 7 Jun 2024 13:01:28 +0200 Subject: [PATCH 1/2] initial oss commit --- .flake8 | 16 + .github/CODEOWNERS | 1 + .github/workflows/autodoc.yml | 62 + .github/workflows/pull_rq.yml | 36 + .github/workflows/release-merge.yml | 25 + .github/workflows/release.yml | 40 + .gitignore | 10 + .pre-commit-config.yaml | 35 + CHANGELOG.md | 17 + README.md | 550 ++++++ docs/Makefile | 20 + docs/make.bat | 35 + docs/source/conf.py | 46 + docs/source/index.rst | 23 + docs/source/readme.rst | 5 + docs/source/rialto.common.rst | 29 + docs/source/rialto.jobs.decorators.rst | 45 + docs/source/rialto.jobs.rst | 18 + docs/source/rialto.loader.rst | 45 + docs/source/rialto.maker.rst | 45 + docs/source/rialto.metadata.data_classes.rst | 29 + docs/source/rialto.metadata.rst | 45 + docs/source/rialto.runner.rst | 61 + poetry.lock | 1532 +++++++++++++++++ pyproject.toml | 50 + rialto/__init__.py | 13 + rialto/common/__init__.py | 15 + rialto/common/table_reader.py | 176 ++ rialto/common/utils.py | 83 + rialto/jobs/__init__.py | 13 + rialto/jobs/configuration/config_holder.py | 130 ++ rialto/jobs/decorators/__init__.py | 15 + rialto/jobs/decorators/decorators.py | 100 ++ rialto/jobs/decorators/job_base.py | 135 ++ rialto/jobs/decorators/resolver.py | 110 ++ rialto/jobs/decorators/test_utils.py | 61 + rialto/loader/__init__.py | 16 + rialto/loader/config_loader.py | 48 + rialto/loader/data_loader.py | 45 + rialto/loader/interfaces.py | 76 + rialto/loader/pyspark_feature_loader.py | 211 +++ rialto/maker/__init__.py | 19 + rialto/maker/containers.py | 91 + rialto/maker/feature_maker.py | 264 +++ rialto/maker/utils.py | 46 + rialto/maker/wrappers.py | 194 +++ rialto/metadata/__init__.py | 19 + rialto/metadata/data_classes/__init__.py | 13 + .../metadata/data_classes/feature_metadata.py | 76 + .../metadata/data_classes/group_metadata.py | 84 + rialto/metadata/enums.py | 35 + rialto/metadata/metadata_manager.py | 120 ++ rialto/metadata/utils.py | 34 + rialto/runner/__init__.py | 16 + rialto/runner/config_loader.py | 88 + rialto/runner/date_manager.py | 107 ++ rialto/runner/runner.py | 410 +++++ rialto/runner/table.py | 55 + rialto/runner/tracker.py | 261 +++ rialto/runner/transformation.py | 48 + tests/__init__.py | 13 + tests/common/conftest.py | 36 + tests/common/test_utils.py | 44 + tests/jobs/conftest.py | 37 + tests/jobs/resources.py | 43 + tests/jobs/test_config_holder.py | 100 ++ tests/jobs/test_decorators.py | 65 + tests/jobs/test_job/test_job.py | 40 + tests/jobs/test_job_base.py | 93 + tests/jobs/test_resolver.py | 65 + tests/jobs/test_test_utils.py | 48 + tests/loader/__init__.py | 13 + .../loader/metadata_config/full_example.yaml | 33 + .../missing_field_example.yaml | 27 + .../missing_value_example.yaml | 28 + .../metadata_config/no_map_example.yaml | 30 + .../metadata_config/test_main_config.py | 56 + tests/loader/pyspark/dataframe_builder.py | 27 + tests/loader/pyspark/dummy_loaders.py | 24 + tests/loader/pyspark/example_cfg.yaml | 24 + tests/loader/pyspark/resources.py | 44 + tests/loader/pyspark/test_from_cfg.py | 137 ++ tests/maker/__init__.py | 13 + tests/maker/conftest.py | 36 + tests/maker/test_FeatureFunction.py | 74 + tests/maker/test_FeatureHolder.py | 36 + tests/maker/test_FeatureMaker.py | 187 ++ tests/maker/test_features/__init__.py | 13 + .../aggregated_num_sum_outbound.py | 27 + .../test_features/aggregated_num_sum_txn.py | 27 + .../test_features/dependent_features_fail.py | 29 + .../test_features/dependent_features_fail2.py | 23 + .../test_features/dependent_features_ok.py | 47 + .../test_features/sequential_avg_outbound.py | 22 + .../maker/test_features/sequential_avg_txn.py | 22 + .../test_features/sequential_for_testing.py | 25 + .../test_features/sequential_outbound.py | 22 + .../sequential_outbound_with_param.py | 25 + tests/maker/test_wrappers.py | 116 ++ tests/metadata/__init__.py | 13 + tests/metadata/conftest.py | 56 + tests/metadata/resources.py | 64 + tests/metadata/test_metadata_connector.py | 43 + tests/runner/__init__.py | 13 + tests/runner/conftest.py | 44 + tests/runner/runner_resources.py | 38 + tests/runner/test_date_manager.py | 171 ++ tests/runner/test_runner.py | 360 ++++ tests/runner/test_table.py | 28 + tests/runner/transformations/__init__.py | 14 + tests/runner/transformations/config.yaml | 82 + tests/runner/transformations/config2.yaml | 45 + tests/runner/transformations/simple_group.py | 34 + 113 files changed, 8823 insertions(+) create mode 100644 .flake8 create mode 100644 .github/CODEOWNERS create mode 100644 .github/workflows/autodoc.yml create mode 100644 .github/workflows/pull_rq.yml create mode 100644 .github/workflows/release-merge.yml create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 CHANGELOG.md create mode 100644 README.md create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/readme.rst create mode 100644 docs/source/rialto.common.rst create mode 100644 docs/source/rialto.jobs.decorators.rst create mode 100644 docs/source/rialto.jobs.rst create mode 100644 docs/source/rialto.loader.rst create mode 100644 docs/source/rialto.maker.rst create mode 100644 docs/source/rialto.metadata.data_classes.rst create mode 100644 docs/source/rialto.metadata.rst create mode 100644 docs/source/rialto.runner.rst create mode 100644 poetry.lock create mode 100644 pyproject.toml create mode 100644 rialto/__init__.py create mode 100644 rialto/common/__init__.py create mode 100644 rialto/common/table_reader.py create mode 100644 rialto/common/utils.py create mode 100644 rialto/jobs/__init__.py create mode 100644 rialto/jobs/configuration/config_holder.py create mode 100644 rialto/jobs/decorators/__init__.py create mode 100644 rialto/jobs/decorators/decorators.py create mode 100644 rialto/jobs/decorators/job_base.py create mode 100644 rialto/jobs/decorators/resolver.py create mode 100644 rialto/jobs/decorators/test_utils.py create mode 100644 rialto/loader/__init__.py create mode 100644 rialto/loader/config_loader.py create mode 100644 rialto/loader/data_loader.py create mode 100644 rialto/loader/interfaces.py create mode 100644 rialto/loader/pyspark_feature_loader.py create mode 100644 rialto/maker/__init__.py create mode 100644 rialto/maker/containers.py create mode 100644 rialto/maker/feature_maker.py create mode 100644 rialto/maker/utils.py create mode 100644 rialto/maker/wrappers.py create mode 100644 rialto/metadata/__init__.py create mode 100644 rialto/metadata/data_classes/__init__.py create mode 100644 rialto/metadata/data_classes/feature_metadata.py create mode 100644 rialto/metadata/data_classes/group_metadata.py create mode 100644 rialto/metadata/enums.py create mode 100644 rialto/metadata/metadata_manager.py create mode 100644 rialto/metadata/utils.py create mode 100644 rialto/runner/__init__.py create mode 100644 rialto/runner/config_loader.py create mode 100644 rialto/runner/date_manager.py create mode 100644 rialto/runner/runner.py create mode 100644 rialto/runner/table.py create mode 100644 rialto/runner/tracker.py create mode 100644 rialto/runner/transformation.py create mode 100644 tests/__init__.py create mode 100644 tests/common/conftest.py create mode 100644 tests/common/test_utils.py create mode 100644 tests/jobs/conftest.py create mode 100644 tests/jobs/resources.py create mode 100644 tests/jobs/test_config_holder.py create mode 100644 tests/jobs/test_decorators.py create mode 100644 tests/jobs/test_job/test_job.py create mode 100644 tests/jobs/test_job_base.py create mode 100644 tests/jobs/test_resolver.py create mode 100644 tests/jobs/test_test_utils.py create mode 100644 tests/loader/__init__.py create mode 100644 tests/loader/metadata_config/full_example.yaml create mode 100644 tests/loader/metadata_config/missing_field_example.yaml create mode 100644 tests/loader/metadata_config/missing_value_example.yaml create mode 100644 tests/loader/metadata_config/no_map_example.yaml create mode 100644 tests/loader/metadata_config/test_main_config.py create mode 100644 tests/loader/pyspark/dataframe_builder.py create mode 100644 tests/loader/pyspark/dummy_loaders.py create mode 100644 tests/loader/pyspark/example_cfg.yaml create mode 100644 tests/loader/pyspark/resources.py create mode 100644 tests/loader/pyspark/test_from_cfg.py create mode 100644 tests/maker/__init__.py create mode 100644 tests/maker/conftest.py create mode 100644 tests/maker/test_FeatureFunction.py create mode 100644 tests/maker/test_FeatureHolder.py create mode 100644 tests/maker/test_FeatureMaker.py create mode 100644 tests/maker/test_features/__init__.py create mode 100644 tests/maker/test_features/aggregated_num_sum_outbound.py create mode 100644 tests/maker/test_features/aggregated_num_sum_txn.py create mode 100644 tests/maker/test_features/dependent_features_fail.py create mode 100644 tests/maker/test_features/dependent_features_fail2.py create mode 100644 tests/maker/test_features/dependent_features_ok.py create mode 100644 tests/maker/test_features/sequential_avg_outbound.py create mode 100644 tests/maker/test_features/sequential_avg_txn.py create mode 100644 tests/maker/test_features/sequential_for_testing.py create mode 100644 tests/maker/test_features/sequential_outbound.py create mode 100644 tests/maker/test_features/sequential_outbound_with_param.py create mode 100644 tests/maker/test_wrappers.py create mode 100644 tests/metadata/__init__.py create mode 100644 tests/metadata/conftest.py create mode 100644 tests/metadata/resources.py create mode 100644 tests/metadata/test_metadata_connector.py create mode 100644 tests/runner/__init__.py create mode 100644 tests/runner/conftest.py create mode 100644 tests/runner/runner_resources.py create mode 100644 tests/runner/test_date_manager.py create mode 100644 tests/runner/test_runner.py create mode 100644 tests/runner/test_table.py create mode 100644 tests/runner/transformations/__init__.py create mode 100644 tests/runner/transformations/config.yaml create mode 100644 tests/runner/transformations/config2.yaml create mode 100644 tests/runner/transformations/simple_group.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..21099b7 --- /dev/null +++ b/.flake8 @@ -0,0 +1,16 @@ +[flake8] +max-line-length = 120 +ban-relative-imports = true +inline-quotes = double +max-complexity = 15 +multiline-quotes = double +per-file-ignores = + __init__.py:F401 + tests/*:D +extend-ignore = + N812, + E800, + D400, + D100, + D104, + D107, diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..ef3a340 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +/rialto/ @MDobransky diff --git a/.github/workflows/autodoc.yml b/.github/workflows/autodoc.yml new file mode 100644 index 0000000..b98119b --- /dev/null +++ b/.github/workflows/autodoc.yml @@ -0,0 +1,62 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Sphinx Autodoc to Github Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: [ "master" ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + # Single deploy job since we're just deploying + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10.6" + + - name: Setup Poetry + uses: abatilo/actions-poetry@v2.1.5 + with: + poetry-version: 1.5.1 + + - name: Install dependencies + run: | + poetry install + + - name: Publish Sphinx Pages + run: | + cd docs && poetry run make html + + - name: Setup Pages + uses: actions/configure-pages@v2 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + path: 'docs/build/html' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v1 diff --git a/.github/workflows/pull_rq.yml b/.github/workflows/pull_rq.yml new file mode 100644 index 0000000..22accb7 --- /dev/null +++ b/.github/workflows/pull_rq.yml @@ -0,0 +1,36 @@ +name: Testing + +on: + pull_request: + branches: [ master, develop ] + +env: + LANG: C.UTF-8 + +jobs: + run-tests: + runs-on: ubuntu-20.04 + steps: + - name: Checkout Rialto + uses: actions/checkout@v3 + + - uses: actions/setup-java@v1 + with: + java-version: "8" + + - uses: actions/setup-python@v4 + with: + python-version: "3.10.6" + + - name: Setup Poetry + uses: abatilo/actions-poetry@v2.1.5 + with: + poetry-version: 1.5.1 + + - name: Install dependencies + run: | + poetry install + + - name: ✅ Test with pytest + run: | + poetry run pytest diff --git a/.github/workflows/release-merge.yml b/.github/workflows/release-merge.yml new file mode 100644 index 0000000..5c0477a --- /dev/null +++ b/.github/workflows/release-merge.yml @@ -0,0 +1,25 @@ +name: Merge to develop + +on: + pull_request: + branches: [master] + types: [closed] +jobs: + merge-master-back-to-dev: + if: github.event.pull_request.merged == true + timeout-minutes: 2 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set Git config + run: | + git config --local user.email "actions@github.com" + git config --local user.name "Github Actions" + - name: Merge master back to dev + run: | + git fetch --unshallow + git pull + git checkout develop + git pull + git merge --ff-only master + git push diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..89e547b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,40 @@ +name: Release + +on: + push: + branches: [ master ] + +env: + LC_ALL: C.UTF-8 + LANG: C.UTF-8 + +jobs: + publish: + environment: jfrog + runs-on: ubuntu-20.04 + steps: + - name: Checkout Rialto + uses: actions/checkout@v3 + + - name: Install system dependencies + run: | + apt-get update + apt-get install -y openssl libssl-dev wget + + - uses: actions/setup-python@v4 + with: + python-version: "3.10.6" + + - name: Setup Poetry + uses: abatilo/actions-poetry@v2.1.5 + with: + poetry-version: 1.5.1 + + - name: Install dependencies + run: | + poetry install + + - name: Build and publish the wheel to jfrog + run: | + poetry build +# poetry publish diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2c1426c --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +.DS_store +.idea +__pycache__ +.vscode +.env +.coverage +coverage.xml +.ipynb_checkpoints/ +tmp +dist diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..78f252d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,35 @@ +minimum_pre_commit_version: 3.4.0 +#files: ^rialto/ + +repos: +- hooks: + - additional_dependencies: + - toml + id: isort + repo: https://github.com/timothycrosley/isort + rev: 5.12.0 +- hooks: + - id: black + name: Format code (black) + language_version: python3.10 + repo: https://github.com/psf/black + rev: 23.7.0 +- hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-json + - id: check-toml + - id: check-yaml + - id: check-merge-conflict + repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + +- hooks: + - id: flake8 + additional_dependencies: + - flake8-docstrings==1.7.0 + - flake8-broken-line==1.0.0 + - pep8-naming==0.13.3 + name: Lint code (flake8) + repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e17c3e7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,17 @@ +# Change Log +All notable changes to this project will be documented in this file. + + +## [Unreleased] - yyyy-mm-dd + + +### Added + - passing dependencies from runner to a Transformation + - optional dependency names in the config that could be recalled via dictionary to access paths and date columns + - Rialto now adds rialto_date_column property to written tables + +### Changed +- signature of Transformation +- Allowed future dependencies + +[//]: # (### Fixed) diff --git a/README.md b/README.md new file mode 100644 index 0000000..6611ec2 --- /dev/null +++ b/README.md @@ -0,0 +1,550 @@ + + +# Rialto + +Rialto is a framework for building and deploying machine learning features in a scalable and reusable way. It provides a set of tools that make it easy to define and deploy new features, and it provides a way to orchestrate the execution of these features. + +The name Rialto is a reference to the Rialto Bridge in Venice, Italy. The Rialto Bridge was a major marketplace for goods and ideas the Middle Ages. +Rialto is intended to be a foundation of a similar marketplace for machine learning features, where users and find and share reusable features. + +Sphinx-Generated autodocs pages available **[here](https://legendary-winner-93rlnzn.pages.github.io/)**. + +# Contents +1. [Instalatlion](#install) +2. [Library Overview](#overview) +3. [Contributing](#contributing) + +# 1. Installation +The packages is not yet available in public pip +```bash +pip install rialto +``` + +# 2. Library Overview +This library currently contains: +1. [runner](#runner) +2. [maker](#maker) +3. [jobs](#jobs) +3. [loader](#loader) +4. [metadata](#metadata) +5. [common](#common) + +## 2.1 - runner + +Runner is the orchestrator and scheduler of Rialto. It can be used to execute any [job](#jobs) but is primarily designed to execute feature [maker](#maker) jobs. +The core of runner is execution of a Transformation class that can be extended for any purpose and the execution configuration that defines the handling of i/o, scheduling, dependencies and reporting. + +### Transformation +For the details on the interface see the [implementation](rialto/runner/transformation.py) +Inside the transformation you have access to a [TableReader](#common), date of running, and if provided to Runner, a live spark session and [metadata manager](#metadata). +You can either implement your jobs directly via extending the Transformation class, or by using the [jobs](#jobs) abstraction. + +### Runner + +Bellow is the minimal code necessary to execute the runner. + +```python +from rialto.runner import Runner +# Create the runner instance +runner = Runner(spark, config_path=cfg) +# Execute the run +runner() +``` + +A runner by default executes all the jobs provided in the configuration file, for all the viable execution dates according to the configuration file for which the job has not yet run successfully (i.e. the date partition doesn't exist on the storage) +This behavior can be modified by various parameters and switches available. + +* **feature_metadata_schema** - path to schema where feature metadata are read and stored, needed for [maker](#maker) jobs and jobs that utilized feature [loader](#loader) +* **run_date** - date at which the runner is triggered (defaults to day of running) +* **date_from** - starting date (defaults to rundate - config watch period) +* **date_until** - end date (defaults to rundate) +* **feature_store_schema** - location of features, needed for jobs utilizing feature [loader](#loader) +* **custom_job_config** - dictionary with key-value pairs that will be accessible under the "config" variable in your rialto jobs +* **rerun** - rerun all jobs even if they already succeeded in the past runs +* **op** - run only selected operation / pipeline + + + +Transformations are not included in the runner itself, it imports them dynamically according to the configuration, therefore it's necessary to have them locally installed. + +A runner created table has will have automatically created **rialto_date_column** table property set according to target partition set in the configuration. + +### Configuration + +```yaml +general: + target_schema: catalog.schema # schema where tables will be created, must exist + target_partition_column: INFORMATION_DATE # date to partition new tables on + source_date_column_property: rialto_date_column # name of the date property on source tables + watched_period_units: "months" # unit of default run period + watched_period_value: 2 # value of default run period + job: "run" # run for running the pipelines, check for only checking dependencies + mail: + to: # a list of email addresses + - name@host.domain + - name2@host.domain + sender: rialto.noreply@domain # what a sender should say + smtp: smtp.server.url # your smtp server + subject: "Rialto report" # Email subject header + +pipelines: # a list of pipelines to run +- name: Pipeline1 #Pipeline name & a name of table it creates (table will be converted to have _ instead of uppercase letters) + module: # Python transformation class location + python_module: module_name + python_class: Pipeline1Class + schedule: + frequency: weekly # daily/weekly/monthly + day: 7 # day of the week or month + info_date_shift: #Optional shift in the written information date from the scheduled day + units: "days" # days/weeks/months/years + value: 5 # subtracted from scheduled day + dependencies: # list of dependent tables + - table: catalog.schema.table1 + name: "table1" # Optional table name, used to recall dependency details in transformation + date_col: generation_date # Optional date column name, takes priority + interval: # mandatory availability interval, subtracted from scheduled day + units: "days" + value: 1 + - table: catalog.schema.table2 + name: "table2" + interval: + units: "months" + value: 1 + +- name: PipelineTable1 # will be written as pipeline_table1 + module: + python_module: module_name + python_class: Pipeline2Class + schedule: + frequency: monthly + day: 6 + info_date_shift: # can be combined as a list + - units: "days" + value: 5 + - units: "months" + value: 3 + dependencies: + - table: catalog.schema.table3 + interval: + units: "days" + value: 6 +``` + + + +## 2.2 - maker +The purpose of (feature) maker is to simplify feature creation, allow for consistent feature implementation that is standardized and easy to test. + +Maker provides the utilities to define feature generating functions by wrapping python functions with provided decorators and a framework to execute these functions. + +### FeatureMaker +FeatureMaker is pre-initialized class that provides the means to execute pre-defined feature functions. It has 2 modes, sequential features and aggregated features. +Features need to be defined in a separate importable module as in the examples. + +Since there is always only one FeatureMaker object, you can use its state variables from inside feature functions. Specifically +**FeatureMaker.make_date** and **FeatureMaker.key**. These variables are set when calling a FeatureMaker.make(...) function to the values of its parameters. + +#### Sequential +Sequential feature generation can be simply interpreted as appending a new column for every feature to the existing dataframe. + +```python +from rialto.maker import FeatureMaker +from my_features import simple_features + +features, metadata = FeatureMaker.make(df=input_data, key="KEY", make_date=run_date, features_module=simple_features, keep_preexisting=True) +``` + +#### Aggregated +In aggregated generation, the source dataframe is grouped by the key or keys and the features themselves are the aggregations put into the spark agg function. + +```python +from rialto.maker import FeatureMaker +from my_features import agg_features + +features, metadata = FeatureMaker.make_aggregated(df=input_data, key="KEY", make_date=run_date, features_module=agg_features) +``` + +There are also **make_single_feature** and **make_single_agg_feature** available, intended to be used in tests. (See full documentation) + +### Features +Features, whether sequential or aggregated ar defined as simple python functions that return a PySpark **function**, that generates the desired column. +In the case of sequential, this function is inserted into PySpark withColumn function as following + +```python +data_frame.withColumn("feature_name", RETURNED_FUNCTION_GOES_HERE) +``` + +in the case of aggregated features, they are used as follows + +```python +data_frame.groupBy("key").agg(RETURNED_FUNCTION_GOES_HERE.alias("feature_name")) +``` + +All the features in one python module are processed in one call of FeatureMakers make functions, therefore you can't mix aggregated and sequential features into one module. + +### Decorators +To define Rialto features, the framework provides a set of decorators. + +#### @feature +This registers the function as a feature. Every feature has to be decorated with @feature as the **outermost** wrapper. + +```python +import pyspark.sql.functions as F +import rialto.maker as rfm +from pyspark.sql import Column +from rialto.metadata import ValueType as VT + +@rfm.feature(VT.numerical) +def RECENCY() -> Column: + return F.months_between(F.lit(rfm.FeatureMaker.make_date), F.col("DATE")) +``` + +@feature takes one parameter, [metadata](#metadata) enum of value type. + +#### @desc +Provides an option to pass a string describing the feature that is then saved as part of feature [metadata](#metadata). + +```python +import pyspark.sql.functions as F +import rialto.maker as rfm +from pyspark.sql import Column +from rialto.metadata import ValueType as VT + +@rfm.feature(VT.numerical) +@rfm.desc("Recency of the action") +def RECENCY() -> Column: + return F.months_between(F.lit(rfm.FeatureMaker.make_date), F.col("DATE")) +``` + +#### @param +Inspired by @pytest.mark.parametrize, it has similar interface and fulfills the same role. It allows you to invoke the feature function multiple times with different values of the parameter. +If multiple @params are used, the number of final features will be a product of all parameters. The feature function has to expect a parameter with the same name as the @params name. + +```python +import pyspark.sql.functions as F +import rialto.maker as fml +from pyspark.sql import Column +from rialto.metadata import ValueType as VT + +@fml.feature(VT.numerical) +@fml.param("product", ["A", "B", "C"]) +@fml.param("status", ["ACTIVE", "INACTIVE"]) +@fml.desc("Number of given products with given status") +def NUM(product: str, status: str) -> Column: + return F.count(F.when((F.col("PRODUCT") == product) & (F.col("STATUS") == status), F.lit(1))) +``` + +The above code creates following features +* NUM_PRODUCT_A_STATUS_ACTIVE +* NUM_PRODUCT_A_STATUS_INACTIVE +* NUM_PRODUCT_B_STATUS_ACTIVE +* NUM_PRODUCT_B_STATUS_INACTIVE +* NUM_PRODUCT_C_STATUS_ACTIVE +* NUM_PRODUCT_C_STATUS_INACTIVE + + +#### @depends +@depends allows for a definition of features that depend on each other / need to be calculated in certain order. +**Dependency resolution uses the raw feature names (function names)** not parameter expanded names. + +```python +import pyspark.sql.functions as F +import rialto.maker as rfm +from pyspark.sql import Column +from rialto.metadata import ValueType as VT + +@rfm.feature(VT.numerical) +@rfm.desc("Recency of the action") +def RECENCY() -> Column: + return F.months_between(F.lit(rfm.FeatureMaker.make_date), F.col("DATE")) + + +@rfm.feature(VT.numerical) +@rfm.desc("Action month delay") +@rfm.param("month", [1, 2, 3]) +@rfm.depends("RECENCY") +def DELAY(month) -> Column: + recency = F.col("RECENCY") + return F.when((recency < month) & (recency >= month - 1), F.lit(1)) +``` + +In this example, @depends, ensures that RECENCY is calculated before DELAY + +### Feature name +Publicly available as rialto.maker.utils.feature_name(...), this function is used to create final names of all features. +It concatenates the name of the python function with any given @params and their values used in that instance of the feature. + + +## 2.3 - jobs +Rialto jobs simplify creation of runner transformations. Instead of having to inherit the base *Transformation* class, this module offers two decorators: *datasource*, and *job*. + +As the names might suggest, +* *datasource* registers the function below as a valid datasource, which can be used as dependency +* *job* registers the decorated function as a Rialo transformation. + +The output / return value of both functions **should/has to be** a python dataframe *(or nothing for jobs, more on that later)*. + +### rialto job dependencies +Both jobs and datasources can request dependencies as function arguments. Rialto will attempt to resolve those dependencies and provide them during the job run(s). + +We have a set of pre-defined dependencies: +* **run_date** gives you the *date* on which the job is supposed to run +* **spark** contains the *spark session* +* **config** returns the custom config you can specify with each job run +* **dependencies** returns a dictionary containing the job dependencies config +* **table_reader** returns *TableReader* +* **feature_loader** provides *PysparkFeatureLoader* + +Apart from that, each **datasource** also becomes a fully usable dependency. Note, that this means that datasources can also be dependent on other datasources - just beware of any circular dependencies! + +With that sorted out, we can now provide a quick example of the *rialto.jobs* module's capabilities: + +```python +from pyspark.sql import DataFrame +from rialto.common import TableReader +from rialto.jobs.decorators import job, datasource + +@datasource +def my_datasource(run_date: datetime.date, table_reader: TableReader) -> DataFrame: + return table_reader.get_latest("my_catalog.my_schema.my_table", until=run_date) + + +@job +def my_job(my_datasource: DataFrame) -> DataFrame: + return my_datasource.withColumn("HelloWorld", F.lit(1)) +``` +This piece of code creates a rialto transformation called *my_job*, which is then callable by the rialto runner. It first sources the *my_datasource* and then runs *my_job* on top of that datasource. + +### job naming / outputs +The rialto runner creates a final table according to the job's name. Therefore, we do support 2 ways of creating jobs: +```python +@job("my_custom_name") +def f(...): + ... + +@job +def my_custom_name(...): + ... +``` +Up to you, both work. Just note that any *WeirdCaseNames* will be transformed to *lower_case_with_underscores*. + +### notes / rules +The rules for the dependencies are fairly straightforward. +Both **jobs** and **datasources** can only depend on *pre-defined* dependencies and other *datasources*. Meaning: +* *datasource -> datasource -> job* is perfectly fine, +* *datasource -> job -> datasource* will result in an error. + +Secondly, the jobs can, but **don't necessarily have to output** a dataframe. +In case your job doesn't output a dataframe, your job will only return a bunch of rows, which will ensure that rialto notices that the job ran successfully. +This can be useful in **model training**. + +Finally, remember, that your jobs are still just *Rialto Transformations* internally. +Meaning that at the end of the day, you should always read some data, do some operations on it and either return a pyspark DataFrame, or not return anything and let the framework return the placeholder one. + +## 2.4 - loader +This module is used to load features from feature store into your models and scripts. Loader provides options to load singular features, whole feature groups, as well as a selection of features from multiple groups defined in a config file, and served as a singular dataframe. It also provides interface to access feature metadata. + +Two public classes are exposed form this module. **DatabricksLoader**(DataLoader), **PysparkFeatureLoader**(FeatureLoaderInterface). + +### DatabricksLoader +This is a support class for feature loader and provides the data reading capability from the feature store. + +This class needs to be instantiated with an active spark session and a path to the feature store schema (in the format of "catalog_name.schema_name"). +Optionally a date_column information can be passed, otherwise it defaults to use INFORMATION_DATE +```python +from rialto.loader import DatabricksLoader + +data_loader = DatabricksLoader(spark= spark_instance, schema= "catalog.schema", date_column= "INFORMATION_DATE") +``` + +This class provides one method, read_group(...), which returns a whole feature group for selected date. This is mostly used inside feature loader. + +### PysparkFeatureLoader + +This class needs to be instantiated with an active spark session, data loader and a path to the metadata schema (in the format of "catalog_name.schema_name"). + +```python +from rialto.loader import PysparkFeatureLoader + +feature_loader = PysparkFeatureLoader(spark= spark_instance, data_loader= data_loader_instance, metadata_schema= "catalog.schema") +``` + +#### Single feature + +```python +from rialto.loader import DatabricksLoader, PysparkFeatureLoader +from datetime import datetime + +data_loader = DatabricksLoader(spark, "feature_catalog.feature_schema") +feature_loader = PysparkFeatureLoader(spark, data_loader, "metadata_catalog.metadata_schema") +my_date = datetime.strptime("2020-01-01", "%Y-%m-%d").date() + +feature = feature_loader.get_feature(group_name="CustomerFeatures", feature_name="AGE", information_date=my_date) +metadata = feature_loader.get_feature_metadata(group_name="CustomerFeatures", feature_name="AGE") +``` + +#### Feature group +This method of data access is only recommended for experimentation, as the group schema can evolve over time. + +```python +from rialto.loader import DatabricksLoader, PysparkFeatureLoader +from datetime import datetime + +data_loader = DatabricksLoader(spark, "feature_catalog.feature_schema") +feature_loader = PysparkFeatureLoader(spark, data_loader, "metadata_catalog.metadata_schema") +my_date = datetime.strptime("2020-01-01", "%Y-%m-%d").date() + +features = feature_loader.get_group(group_name="CustomerFeatures", information_date=my_date) +metadata = feature_loader.get_group_metadata(group_name="CustomerFeatures") +``` + +#### Configuration + +```python +from rialto.loader import DatabricksLoader, PysparkFeatureLoader +from datetime import datetime + +data_loader = DatabricksLoader(spark, "feature_catalog.feature_schema") +feature_loader = PysparkFeatureLoader(spark, data_loader, "metadata_catalog.metadata_schema") +my_date = datetime.strptime("2020-01-01", "%Y-%m-%d").date() + +features = feature_loader.get_features_from_cfg(path="local/configuration/file.yaml", information_date=my_date) +metadata = feature_loader.get_metadata_from_cfg(path="local/configuration/file.yaml") +``` + +The configuration file is expected to be in a yaml format and has 3 sections: selection, base, maps. +* **selection** is a list of feature groups and desired features in those groups. Each group also needs a prefix defined, which will prefix a name of every feature from that group +* **base** is a feature group with a set of keys defined, a unique selection of these keys from this group will form the base of the resulting dataframe and all feature will be left-joined onto that +* **maps** is an optional list of dataframes that will be joined onto the base as a whole and their purpose is to bridge feature sets with different primary keys, i.e. we have a map linking IDnumbers and AccountNumbers so we can use feature groups based on either of the primary keys. + + +```yaml +selection: + - group: Group_A + prefix: A_PREFIX + features: + - Feature_A1 + - Feature_A2 + - group: Group_B + prefix: B_PREFIX + features: + - Feature_B1 + - Feature_B2 +base: + group: Group_D + keys: + - Column_Key1 + - Column_Key2 +maps: + - MapGroup1 + - MapGroup2 +``` + + + +## 2.5 - metadata +Rialto metadata module is designed specifically to work with features. It's a support function to pass data from [maker](#maker) to [loader](#loader), and to have additional metadata available for further features processing. + +Metadata module consists of 3 parts: enums, metadata dataclasses and MetadataManager. + +### enums +There's two enums available used across Rialto, **Schedule** and **ValueType**. Schedule defines the frequency of execution (daily, weekly, monthly, yearly) and ValueType defines the feature type (nominal, ordinal, numeric) + +### data classes + +There are 2 metadata data classes: FeatureMetadata and GroupMetadata. They are used as containers to pass metadata information around, and are the return types of metadata requests in loader. + +```python +FeatureMetadata + value_type: ValueType # type of feature value + name: str # feature name + description: str # feature description + group: GroupMetadata # feature group this feature belongs to +``` + +```python +GroupMetadata + name: str # group name (Original name mirroring the transformation class name) + frequency: Schedule # generation frequency + description: str # group description + key: List[str] # group primary keys + fs_name: str = None # actual table name of this feature group in DataBricks + features: List[str] = None # A list of feature names belonging to this group +``` + +### MetadataManager +This class manages the metadata and provides the interface to fetch or write. Metadata is stored in 2 dataframes **group_metadata** and **feature_metadata**, in the schema provided as a parameter. + + + +## 2.6 - common +Common houses functions and utilities used throughout the whole framework. +Most importantly it defines _DataReader_ class which is implemented as _TableReader_ with two public functions, _get_latest(...)_ and _get_table(...)_. + +_TableReader_ can be used as a standalone delta or parquet table loader. + +initialization: +```python +from rialto.common import TableReader + +reader = TableReader(spark=spark_instance) +``` + +usage of _get_table_: +```python +# get whole table +df = reader.get_table(table="catalog.schema.table", date_column="information_date") + +# get a slice of the table +from datetime import datetime + +start = datetime.strptime("2020-01-01", "%Y-%m-%d").date() +end = datetime.strptime("2024-01-01", "%Y-%m-%d").date() + +df = reader.get_table(table="catalog.schema.table", info_date_from=start, info_date_to=end) +``` + +usage of _get_latest_: +```python +# most recent partition +df = reader.get_latest(table="catalog.schema.table", date_column="information_date") + +# most recent partition until +until = datetime.strptime("2020-01-01", "%Y-%m-%d").date() + +df = reader.get_latest(table="catalog.schema.table", until=until, date_column="information_date") + +``` +For full information on parameters and their optionality see technical documentation. + +_TableReader_ needs an active spark session and an information which column is the **date column**. +There are three options how to pass that information on. + +In order of priority from highest: +* Explicit _date_column_ parameter in _get_table_ and _get_latest_ +```python +reader.get_latest(table="catalog.schema.table", date_column="information_date") +``` +* Inferred from delta metadata, triggered by init parameter, only works on delta tables (e.g. doesn't work on views) +```python +reader = TableReader(spark=spark_instance, infer_partition=True) +reader.get_latest(table="catalog.schema.table") +``` +* A custom sql property defined on the table containing the date column name, defaults to _rialto_date_column_ +```python +reader = TableReader(spark=spark_instance, date_property="rialto_date_column") +reader.get_latest(table="catalog.schema.table") +``` + +# 3. Contributing +Contributing: +* Implement the change in a custom branch split off **develop** +* Create a pull request from your **branch** to **develop** +* Get PR approved, merged to **develop** + +Releasing: +* Create a Release/1.x.y branch off **develop** +* Bump all necessary versions +* Create PR from release branch to **MASTER** +* Merge to **master** with a **merge commit** +* Actions will fast-forward new commits from master to develop and deploy new version to pypi repository diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..1ce2224 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,46 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +import os +import sys + +sys.path.insert(0, os.path.abspath("../rialto/")) + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "rialto" +copyright = "2022, Marek Dobransky" +author = "Marek Dobransky" +release = "1.3.0" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.todo", "sphinx.ext.viewcode", "sphinx.ext.autodoc", "sphinx_mdinclude"] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..1c389b9 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,23 @@ +.. rialto documentation master file, created by + sphinx-quickstart on Thu Feb 29 10:39:58 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to rialto's documentation! +================================== + +Welcome to the rialto package documentation! + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + rialto.runner + rialto.maker + rialto.jobs + rialto.loader + rialto.metadata + rialto.common + README.md + +.. mdinclude:: ../../README.md diff --git a/docs/source/readme.rst b/docs/source/readme.rst new file mode 100644 index 0000000..bc43f34 --- /dev/null +++ b/docs/source/readme.rst @@ -0,0 +1,5 @@ +----------- +Readme File +----------- + +.. mdinclude:: ../../README.md diff --git a/docs/source/rialto.common.rst b/docs/source/rialto.common.rst new file mode 100644 index 0000000..595dd98 --- /dev/null +++ b/docs/source/rialto.common.rst @@ -0,0 +1,29 @@ +rialto.common package +===================== + +Submodules +---------- + +rialto.common.table\_reader module +---------------------------------- + +.. automodule:: rialto.common.table_reader + :members: + :undoc-members: + :show-inheritance: + +rialto.common.utils module +-------------------------- + +.. automodule:: rialto.common.utils + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: rialto.common + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/rialto.jobs.decorators.rst b/docs/source/rialto.jobs.decorators.rst new file mode 100644 index 0000000..78adb79 --- /dev/null +++ b/docs/source/rialto.jobs.decorators.rst @@ -0,0 +1,45 @@ +rialto.jobs.decorators package +============================== + +Submodules +---------- + +rialto.jobs.decorators.decorators module +---------------------------------------- + +.. automodule:: rialto.jobs.decorators.decorators + :members: + :undoc-members: + :show-inheritance: + +rialto.jobs.decorators.job\_base module +--------------------------------------- + +.. automodule:: rialto.jobs.decorators.job_base + :members: + :undoc-members: + :show-inheritance: + +rialto.jobs.decorators.resolver module +-------------------------------------- + +.. automodule:: rialto.jobs.decorators.resolver + :members: + :undoc-members: + :show-inheritance: + +rialto.jobs.decorators.test\_utils module +----------------------------------------- + +.. automodule:: rialto.jobs.decorators.test_utils + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: rialto.jobs.decorators + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/rialto.jobs.rst b/docs/source/rialto.jobs.rst new file mode 100644 index 0000000..48bd502 --- /dev/null +++ b/docs/source/rialto.jobs.rst @@ -0,0 +1,18 @@ +rialto.jobs package +=================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + rialto.jobs.decorators + +Module contents +--------------- + +.. automodule:: rialto.jobs + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/rialto.loader.rst b/docs/source/rialto.loader.rst new file mode 100644 index 0000000..58fe708 --- /dev/null +++ b/docs/source/rialto.loader.rst @@ -0,0 +1,45 @@ +rialto.loader package +===================== + +Submodules +---------- + +rialto.loader.config\_loader module +----------------------------------- + +.. automodule:: rialto.loader.config_loader + :members: + :undoc-members: + :show-inheritance: + +rialto.loader.data\_loader module +--------------------------------- + +.. automodule:: rialto.loader.data_loader + :members: + :undoc-members: + :show-inheritance: + +rialto.loader.interfaces module +------------------------------- + +.. automodule:: rialto.loader.interfaces + :members: + :undoc-members: + :show-inheritance: + +rialto.loader.pyspark\_feature\_loader module +--------------------------------------------- + +.. automodule:: rialto.loader.pyspark_feature_loader + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: rialto.loader + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/rialto.maker.rst b/docs/source/rialto.maker.rst new file mode 100644 index 0000000..88a3de7 --- /dev/null +++ b/docs/source/rialto.maker.rst @@ -0,0 +1,45 @@ +rialto.maker package +==================== + +Submodules +---------- + +rialto.maker.containers module +------------------------------ + +.. automodule:: rialto.maker.containers + :members: + :undoc-members: + :show-inheritance: + +rialto.maker.feature\_maker module +---------------------------------- + +.. automodule:: rialto.maker.feature_maker + :members: + :undoc-members: + :show-inheritance: + +rialto.maker.utils module +------------------------- + +.. automodule:: rialto.maker.utils + :members: + :undoc-members: + :show-inheritance: + +rialto.maker.wrappers module +---------------------------- + +.. automodule:: rialto.maker.wrappers + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: rialto.maker + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/rialto.metadata.data_classes.rst b/docs/source/rialto.metadata.data_classes.rst new file mode 100644 index 0000000..44353ff --- /dev/null +++ b/docs/source/rialto.metadata.data_classes.rst @@ -0,0 +1,29 @@ +rialto.metadata.data\_classes package +===================================== + +Submodules +---------- + +rialto.metadata.data\_classes.feature\_metadata module +------------------------------------------------------ + +.. automodule:: rialto.metadata.data_classes.feature_metadata + :members: + :undoc-members: + :show-inheritance: + +rialto.metadata.data\_classes.group\_metadata module +---------------------------------------------------- + +.. automodule:: rialto.metadata.data_classes.group_metadata + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: rialto.metadata.data_classes + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/rialto.metadata.rst b/docs/source/rialto.metadata.rst new file mode 100644 index 0000000..08933bf --- /dev/null +++ b/docs/source/rialto.metadata.rst @@ -0,0 +1,45 @@ +rialto.metadata package +======================= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + rialto.metadata.data_classes + +Submodules +---------- + +rialto.metadata.enums module +---------------------------- + +.. automodule:: rialto.metadata.enums + :members: + :undoc-members: + :show-inheritance: + +rialto.metadata.metadata\_manager module +---------------------------------------- + +.. automodule:: rialto.metadata.metadata_manager + :members: + :undoc-members: + :show-inheritance: + +rialto.metadata.utils module +---------------------------- + +.. automodule:: rialto.metadata.utils + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: rialto.metadata + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/rialto.runner.rst b/docs/source/rialto.runner.rst new file mode 100644 index 0000000..bbdc42f --- /dev/null +++ b/docs/source/rialto.runner.rst @@ -0,0 +1,61 @@ +rialto.runner package +===================== + +Submodules +---------- + +rialto.runner.config\_loader module +----------------------------------- + +.. automodule:: rialto.runner.config_loader + :members: + :undoc-members: + :show-inheritance: + +rialto.runner.date\_manager module +---------------------------------- + +.. automodule:: rialto.runner.date_manager + :members: + :undoc-members: + :show-inheritance: + +rialto.runner.runner module +--------------------------- + +.. automodule:: rialto.runner.runner + :members: + :undoc-members: + :show-inheritance: + +rialto.runner.table module +-------------------------- + +.. automodule:: rialto.runner.table + :members: + :undoc-members: + :show-inheritance: + +rialto.runner.tracker module +---------------------------- + +.. automodule:: rialto.runner.tracker + :members: + :undoc-members: + :show-inheritance: + +rialto.runner.transformation module +----------------------------------- + +.. automodule:: rialto.runner.transformation + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: rialto.runner + :members: + :undoc-members: + :show-inheritance: diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..b516a14 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,1532 @@ +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "alabaster" +version = "0.7.16" +description = "A light, configurable Sphinx theme" +optional = false +python-versions = ">=3.9" +files = [ + {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"}, + {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"}, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "babel" +version = "2.15.0" +description = "Internationalization utilities" +optional = false +python-versions = ">=3.8" +files = [ + {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, + {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, +] + +[package.extras] +dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] + +[[package]] +name = "black" +version = "23.12.1" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, + {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, + {file = "black-23.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920b569dc6b3472513ba6ddea21f440d4b4c699494d2e972a1753cdc25df7b0"}, + {file = "black-23.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:3fa4be75ef2a6b96ea8d92b1587dd8cb3a35c7e3d51f0738ced0781c3aa3a5a3"}, + {file = "black-23.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8d4df77958a622f9b5a4c96edb4b8c0034f8434032ab11077ec6c56ae9f384ba"}, + {file = "black-23.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:602cfb1196dc692424c70b6507593a2b29aac0547c1be9a1d1365f0d964c353b"}, + {file = "black-23.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c4352800f14be5b4864016882cdba10755bd50805c95f728011bcb47a4afd59"}, + {file = "black-23.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:0808494f2b2df923ffc5723ed3c7b096bd76341f6213989759287611e9837d50"}, + {file = "black-23.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:25e57fd232a6d6ff3f4478a6fd0580838e47c93c83eaf1ccc92d4faf27112c4e"}, + {file = "black-23.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d9e13db441c509a3763a7a3d9a49ccc1b4e974a47be4e08ade2a228876500ec"}, + {file = "black-23.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1bd9c210f8b109b1762ec9fd36592fdd528485aadb3f5849b2740ef17e674e"}, + {file = "black-23.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:ae76c22bde5cbb6bfd211ec343ded2163bba7883c7bc77f6b756a1049436fbb9"}, + {file = "black-23.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1fa88a0f74e50e4487477bc0bb900c6781dbddfdfa32691e780bf854c3b4a47f"}, + {file = "black-23.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a4d6a9668e45ad99d2f8ec70d5c8c04ef4f32f648ef39048d010b0689832ec6d"}, + {file = "black-23.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18fb2ae6c4bb63eebe5be6bd869ba2f14fd0259bda7d18a46b764d8fb86298a"}, + {file = "black-23.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:c04b6d9d20e9c13f43eee8ea87d44156b8505ca8a3c878773f68b4e4812a421e"}, + {file = "black-23.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e1b38b3135fd4c025c28c55ddfc236b05af657828a8a6abe5deec419a0b7055"}, + {file = "black-23.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4f0031eaa7b921db76decd73636ef3a12c942ed367d8c3841a0739412b260a54"}, + {file = "black-23.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97e56155c6b737854e60a9ab1c598ff2533d57e7506d97af5481141671abf3ea"}, + {file = "black-23.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:dd15245c8b68fe2b6bd0f32c1556509d11bb33aec9b5d0866dd8e2ed3dba09c2"}, + {file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"}, + {file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "chispa" +version = "0.9.4" +description = "Pyspark test helper library" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "chispa-0.9.4-py3-none-any.whl", hash = "sha256:ba79aef7b45f524dbdb164f225f18401bc24f703abd486edc591e6fb216d2fd3"}, + {file = "chispa-0.9.4.tar.gz", hash = "sha256:9b9d3c5c3096edd938fc96796897f02e46449e392b445efecfaabbc66b7d8d1f"}, +] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.5.1" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0884920835a033b78d1c73b6d3bbcda8161a900f38a488829a83982925f6c2e"}, + {file = "coverage-7.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:39afcd3d4339329c5f58de48a52f6e4e50f6578dd6099961cf22228feb25f38f"}, + {file = "coverage-7.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7b0ceee8147444347da6a66be737c9d78f3353b0681715b668b72e79203e4a"}, + {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a9ca3f2fae0088c3c71d743d85404cec8df9be818a005ea065495bedc33da35"}, + {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd215c0c7d7aab005221608a3c2b46f58c0285a819565887ee0b718c052aa4e"}, + {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4bf0655ab60d754491004a5efd7f9cccefcc1081a74c9ef2da4735d6ee4a6223"}, + {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:61c4bf1ba021817de12b813338c9be9f0ad5b1e781b9b340a6d29fc13e7c1b5e"}, + {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db66fc317a046556a96b453a58eced5024af4582a8dbdc0c23ca4dbc0d5b3146"}, + {file = "coverage-7.5.1-cp310-cp310-win32.whl", hash = "sha256:b016ea6b959d3b9556cb401c55a37547135a587db0115635a443b2ce8f1c7228"}, + {file = "coverage-7.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:df4e745a81c110e7446b1cc8131bf986157770fa405fe90e15e850aaf7619bc8"}, + {file = "coverage-7.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:796a79f63eca8814ca3317a1ea443645c9ff0d18b188de470ed7ccd45ae79428"}, + {file = "coverage-7.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fc84a37bfd98db31beae3c2748811a3fa72bf2007ff7902f68746d9757f3746"}, + {file = "coverage-7.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6175d1a0559986c6ee3f7fccfc4a90ecd12ba0a383dcc2da30c2b9918d67d8a3"}, + {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fc81d5878cd6274ce971e0a3a18a8803c3fe25457165314271cf78e3aae3aa2"}, + {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:556cf1a7cbc8028cb60e1ff0be806be2eded2daf8129b8811c63e2b9a6c43bca"}, + {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9981706d300c18d8b220995ad22627647be11a4276721c10911e0e9fa44c83e8"}, + {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d7fed867ee50edf1a0b4a11e8e5d0895150e572af1cd6d315d557758bfa9c057"}, + {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef48e2707fb320c8f139424a596f5b69955a85b178f15af261bab871873bb987"}, + {file = "coverage-7.5.1-cp311-cp311-win32.whl", hash = "sha256:9314d5678dcc665330df5b69c1e726a0e49b27df0461c08ca12674bcc19ef136"}, + {file = "coverage-7.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fa567e99765fe98f4e7d7394ce623e794d7cabb170f2ca2ac5a4174437e90dd"}, + {file = "coverage-7.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b6cf3764c030e5338e7f61f95bd21147963cf6aa16e09d2f74f1fa52013c1206"}, + {file = "coverage-7.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ec92012fefebee89a6b9c79bc39051a6cb3891d562b9270ab10ecfdadbc0c34"}, + {file = "coverage-7.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16db7f26000a07efcf6aea00316f6ac57e7d9a96501e990a36f40c965ec7a95d"}, + {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beccf7b8a10b09c4ae543582c1319c6df47d78fd732f854ac68d518ee1fb97fa"}, + {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8748731ad392d736cc9ccac03c9845b13bb07d020a33423fa5b3a36521ac6e4e"}, + {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7352b9161b33fd0b643ccd1f21f3a3908daaddf414f1c6cb9d3a2fd618bf2572"}, + {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a588d39e0925f6a2bff87154752481273cdb1736270642aeb3635cb9b4cad07"}, + {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:68f962d9b72ce69ea8621f57551b2fa9c70509af757ee3b8105d4f51b92b41a7"}, + {file = "coverage-7.5.1-cp312-cp312-win32.whl", hash = "sha256:f152cbf5b88aaeb836127d920dd0f5e7edff5a66f10c079157306c4343d86c19"}, + {file = "coverage-7.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:5a5740d1fb60ddf268a3811bcd353de34eb56dc24e8f52a7f05ee513b2d4f596"}, + {file = "coverage-7.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e2213def81a50519d7cc56ed643c9e93e0247f5bbe0d1247d15fa520814a7cd7"}, + {file = "coverage-7.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5037f8fcc2a95b1f0e80585bd9d1ec31068a9bcb157d9750a172836e98bc7a90"}, + {file = "coverage-7.5.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3721c2c9e4c4953a41a26c14f4cef64330392a6d2d675c8b1db3b645e31f0e"}, + {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca498687ca46a62ae590253fba634a1fe9836bc56f626852fb2720f334c9e4e5"}, + {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cdcbc320b14c3e5877ee79e649677cb7d89ef588852e9583e6b24c2e5072661"}, + {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:57e0204b5b745594e5bc14b9b50006da722827f0b8c776949f1135677e88d0b8"}, + {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8fe7502616b67b234482c3ce276ff26f39ffe88adca2acf0261df4b8454668b4"}, + {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9e78295f4144f9dacfed4f92935fbe1780021247c2fabf73a819b17f0ccfff8d"}, + {file = "coverage-7.5.1-cp38-cp38-win32.whl", hash = "sha256:1434e088b41594baa71188a17533083eabf5609e8e72f16ce8c186001e6b8c41"}, + {file = "coverage-7.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:0646599e9b139988b63704d704af8e8df7fa4cbc4a1f33df69d97f36cb0a38de"}, + {file = "coverage-7.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4cc37def103a2725bc672f84bd939a6fe4522310503207aae4d56351644682f1"}, + {file = "coverage-7.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fc0b4d8bfeabd25ea75e94632f5b6e047eef8adaed0c2161ada1e922e7f7cece"}, + {file = "coverage-7.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d0a0f5e06881ecedfe6f3dd2f56dcb057b6dbeb3327fd32d4b12854df36bf26"}, + {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9735317685ba6ec7e3754798c8871c2f49aa5e687cc794a0b1d284b2389d1bd5"}, + {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d21918e9ef11edf36764b93101e2ae8cc82aa5efdc7c5a4e9c6c35a48496d601"}, + {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c3e757949f268364b96ca894b4c342b41dc6f8f8b66c37878aacef5930db61be"}, + {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:79afb6197e2f7f60c4824dd4b2d4c2ec5801ceb6ba9ce5d2c3080e5660d51a4f"}, + {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1d0d98d95dd18fe29dc66808e1accf59f037d5716f86a501fc0256455219668"}, + {file = "coverage-7.5.1-cp39-cp39-win32.whl", hash = "sha256:1cc0fe9b0b3a8364093c53b0b4c0c2dd4bb23acbec4c9240b5f284095ccf7981"}, + {file = "coverage-7.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:dde0070c40ea8bb3641e811c1cfbf18e265d024deff6de52c5950677a8fb1e0f"}, + {file = "coverage-7.5.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:6537e7c10cc47c595828b8a8be04c72144725c383c4702703ff4e42e44577312"}, + {file = "coverage-7.5.1.tar.gz", hash = "sha256:54de9ef3a9da981f7af93eafde4ede199e0846cd819eb27c88e2b712aae9708c"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "delta-spark" +version = "2.4.0" +description = "Python APIs for using Delta Lake with Apache Spark" +optional = false +python-versions = ">=3.6" +files = [ + {file = "delta-spark-2.4.0.tar.gz", hash = "sha256:ef776e325e80d98e3920cab982c747b094acc46599d62dfcdc9035fb112ba6a9"}, + {file = "delta_spark-2.4.0-py3-none-any.whl", hash = "sha256:7204142a97ef16367403b020d810d0c37f4ae8275b4997de4056423cf69b3a4b"}, +] + +[package.dependencies] +importlib-metadata = ">=1.0.0" +pyspark = ">=3.4.0,<3.5.0" + +[[package]] +name = "distlib" +version = "0.3.8" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, +] + +[[package]] +name = "docutils" +version = "0.20.1" +description = "Docutils -- Python Documentation Utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, + {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "filelock" +version = "3.14.0" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"}, + {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] +typing = ["typing-extensions (>=4.8)"] + +[[package]] +name = "flake8" +version = "6.1.0" +description = "the modular source code checker: pep8 pyflakes and co" +optional = false +python-versions = ">=3.8.1" +files = [ + {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, + {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, +] + +[package.dependencies] +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.11.0,<2.12.0" +pyflakes = ">=3.1.0,<3.2.0" + +[[package]] +name = "flake8-broken-line" +version = "1.0.0" +description = "Flake8 plugin to forbid backslashes for line breaks" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "flake8_broken_line-1.0.0-py3-none-any.whl", hash = "sha256:96c964336024a5030dc536a9f6fb02aa679e2d2a6b35b80a558b5136c35832a9"}, + {file = "flake8_broken_line-1.0.0.tar.gz", hash = "sha256:e2c6a17f8d9a129e99c1320fce89b33843e2963871025c4c2bb7b8b8d8732a85"}, +] + +[package.dependencies] +flake8 = ">5" + +[[package]] +name = "flake8-docstrings" +version = "1.7.0" +description = "Extension for flake8 which uses pydocstyle to check docstrings" +optional = false +python-versions = ">=3.7" +files = [ + {file = "flake8_docstrings-1.7.0-py2.py3-none-any.whl", hash = "sha256:51f2344026da083fc084166a9353f5082b01f72901df422f74b4d953ae88ac75"}, + {file = "flake8_docstrings-1.7.0.tar.gz", hash = "sha256:4c8cc748dc16e6869728699e5d0d685da9a10b0ea718e090b1ba088e67a941af"}, +] + +[package.dependencies] +flake8 = ">=3" +pydocstyle = ">=2.1" + +[[package]] +name = "identify" +version = "2.5.36" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"}, + {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, +] + +[package.extras] +license = ["ukkonen"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + +[[package]] +name = "imagesize" +version = "1.4.1" +description = "Getting image size from png/jpeg/jpeg2000/gif file" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, + {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, +] + +[[package]] +name = "importlib-metadata" +version = "7.1.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, + {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "isort" +version = "5.13.2" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, + {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, +] + +[package.extras] +colors = ["colorama (>=0.4.6)"] + +[[package]] +name = "jinja2" +version = "3.1.4" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, + {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "loguru" +version = "0.7.2" +description = "Python logging made (stupidly) simple" +optional = false +python-versions = ">=3.5" +files = [ + {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, + {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, +] + +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"] + +[[package]] +name = "markupsafe" +version = "2.1.5" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, + {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, +] + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + +[[package]] +name = "mistune" +version = "2.0.5" +description = "A sane Markdown parser with useful plugins and renderers" +optional = false +python-versions = "*" +files = [ + {file = "mistune-2.0.5-py2.py3-none-any.whl", hash = "sha256:bad7f5d431886fcbaf5f758118ecff70d31f75231b34024a1341120340a65ce8"}, + {file = "mistune-2.0.5.tar.gz", hash = "sha256:0246113cb2492db875c6be56974a7c893333bf26cd92891c85f63151cee09d34"}, +] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, +] + +[package.dependencies] +setuptools = "*" + +[[package]] +name = "numpy" +version = "1.26.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, +] + +[[package]] +name = "packaging" +version = "24.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, +] + +[[package]] +name = "pandas" +version = "2.2.2" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, + {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, + {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, + {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, + {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, + {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + +[[package]] +name = "pathlib" +version = "1.0.1" +description = "Object-oriented filesystem paths" +optional = false +python-versions = "*" +files = [ + {file = "pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147"}, + {file = "pathlib-1.0.1.tar.gz", hash = "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f"}, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + +[[package]] +name = "pep8-naming" +version = "0.13.3" +description = "Check PEP-8 naming conventions, plugin for flake8" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pep8-naming-0.13.3.tar.gz", hash = "sha256:1705f046dfcd851378aac3be1cd1551c7c1e5ff363bacad707d43007877fa971"}, + {file = "pep8_naming-0.13.3-py3-none-any.whl", hash = "sha256:1a86b8c71a03337c97181917e2b472f0f5e4ccb06844a0d6f0a33522549e7a80"}, +] + +[package.dependencies] +flake8 = ">=5.0.0" + +[[package]] +name = "platformdirs" +version = "4.2.2" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, + {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pre-commit" +version = "3.7.1" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-3.7.1-py2.py3-none-any.whl", hash = "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5"}, + {file = "pre_commit-3.7.1.tar.gz", hash = "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + +[[package]] +name = "py4j" +version = "0.10.9.7" +description = "Enables Python programs to dynamically access arbitrary Java objects" +optional = false +python-versions = "*" +files = [ + {file = "py4j-0.10.9.7-py2.py3-none-any.whl", hash = "sha256:85defdfd2b2376eb3abf5ca6474b51ab7e0de341c75a02f46dc9b5976f5a5c1b"}, + {file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"}, +] + +[[package]] +name = "pycodestyle" +version = "2.11.1" +description = "Python style guide checker" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, + {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, +] + +[[package]] +name = "pydantic" +version = "2.7.1" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"}, + {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.18.2" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.18.2" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"}, + {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"}, + {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"}, + {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"}, + {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"}, + {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"}, + {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"}, + {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"}, + {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"}, + {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"}, + {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"}, + {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"}, + {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"}, + {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"}, + {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"}, + {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"}, + {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"}, + {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"}, + {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"}, + {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"}, + {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"}, + {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"}, + {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"}, + {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"}, + {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"}, + {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"}, + {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"}, + {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"}, + {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"}, + {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"}, + {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"}, + {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"}, + {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pydocstyle" +version = "6.3.0" +description = "Python docstring style checker" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019"}, + {file = "pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1"}, +] + +[package.dependencies] +snowballstemmer = ">=2.2.0" + +[package.extras] +toml = ["tomli (>=1.2.3)"] + +[[package]] +name = "pyflakes" +version = "3.1.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, + {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, +] + +[[package]] +name = "pygments" +version = "2.18.0" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, + {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pyspark" +version = "3.4.3" +description = "Apache Spark Python API" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyspark-3.4.3.tar.gz", hash = "sha256:8d7025fa274830cb6c3bd592228be3d9345cb3b8b1e324018c2aa6e75f48a208"}, +] + +[package.dependencies] +py4j = "0.10.9.7" + +[package.extras] +connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.48.1)", "grpcio-status (>=1.48.1)", "numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] +ml = ["numpy (>=1.15)"] +mllib = ["numpy (>=1.15)"] +pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] +sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "4.1.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"}, + {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "pytest-mock" +version = "3.14.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, +] + +[package.dependencies] +pytest = ">=6.2.5" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2024.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.32.2" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "setuptools" +version = "70.0.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"}, + {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "snowballstemmer" +version = "2.2.0" +description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +optional = false +python-versions = "*" +files = [ + {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, + {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, +] + +[[package]] +name = "sphinx" +version = "7.3.7" +description = "Python documentation generator" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinx-7.3.7-py3-none-any.whl", hash = "sha256:413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3"}, + {file = "sphinx-7.3.7.tar.gz", hash = "sha256:a4a7db75ed37531c05002d56ed6948d4c42f473a36f46e1382b0bd76ca9627bc"}, +] + +[package.dependencies] +alabaster = ">=0.7.14,<0.8.0" +babel = ">=2.9" +colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} +docutils = ">=0.18.1,<0.22" +imagesize = ">=1.3" +Jinja2 = ">=3.0" +packaging = ">=21.0" +Pygments = ">=2.14" +requests = ">=2.25.0" +snowballstemmer = ">=2.0" +sphinxcontrib-applehelp = "*" +sphinxcontrib-devhelp = "*" +sphinxcontrib-htmlhelp = ">=2.0.0" +sphinxcontrib-jsmath = "*" +sphinxcontrib-qthelp = "*" +sphinxcontrib-serializinghtml = ">=1.1.9" +tomli = {version = ">=2", markers = "python_version < \"3.11\""} + +[package.extras] +docs = ["sphinxcontrib-websupport"] +lint = ["flake8 (>=3.5.0)", "importlib_metadata", "mypy (==1.9.0)", "pytest (>=6.0)", "ruff (==0.3.7)", "sphinx-lint", "tomli", "types-docutils", "types-requests"] +test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=6.0)", "setuptools (>=67.0)"] + +[[package]] +name = "sphinx-mdinclude" +version = "0.5.4" +description = "Markdown extension for Sphinx" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sphinx_mdinclude-0.5.4-py3-none-any.whl", hash = "sha256:c8b04b7bbc82aa47d7567841c3b030301dbdfa1cbd188ef2d2f117dcca79f199"}, + {file = "sphinx_mdinclude-0.5.4.tar.gz", hash = "sha256:4a58c8f02e95a2b7ba0eb77a851e096e42c66958f9f7ffe575e427cf659e1789"}, +] + +[package.dependencies] +docutils = ">=0.16,<1.0" +mistune = ">=2.0,<3.0" +pygments = ">=2.8" + +[package.extras] +dev = ["attribution (==1.6.2)", "black (==24.4.0)", "coverage (==7.3.2)", "docutils (==0.20.1)", "docutils (==0.21.1)", "flake8 (==7.0.0)", "flit (==3.9.0)", "mistune (==2.0.4)", "mypy (==1.9.0)", "sphinx (==7.1.2)", "sphinx (==7.3.7)", "ufmt (==2.5.1)", "usort (==1.0.7)"] + +[[package]] +name = "sphinx-rtd-theme" +version = "2.0.0" +description = "Read the Docs theme for Sphinx" +optional = false +python-versions = ">=3.6" +files = [ + {file = "sphinx_rtd_theme-2.0.0-py2.py3-none-any.whl", hash = "sha256:ec93d0856dc280cf3aee9a4c9807c60e027c7f7b461b77aeffed682e68f0e586"}, + {file = "sphinx_rtd_theme-2.0.0.tar.gz", hash = "sha256:bd5d7b80622406762073a04ef8fadc5f9151261563d47027de09910ce03afe6b"}, +] + +[package.dependencies] +docutils = "<0.21" +sphinx = ">=5,<8" +sphinxcontrib-jquery = ">=4,<5" + +[package.extras] +dev = ["bump2version", "sphinxcontrib-httpdomain", "transifex-client", "wheel"] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.8" +description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_applehelp-1.0.8-py3-none-any.whl", hash = "sha256:cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4"}, + {file = "sphinxcontrib_applehelp-1.0.8.tar.gz", hash = "sha256:c40a4f96f3776c4393d933412053962fac2b84f4c99a7982ba42e09576a70619"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.6" +description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_devhelp-1.0.6-py3-none-any.whl", hash = "sha256:6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f"}, + {file = "sphinxcontrib_devhelp-1.0.6.tar.gz", hash = "sha256:9893fd3f90506bc4b97bdb977ceb8fbd823989f4316b28c3841ec128544372d3"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.0.5" +description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl", hash = "sha256:393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04"}, + {file = "sphinxcontrib_htmlhelp-2.0.5.tar.gz", hash = "sha256:0dc87637d5de53dd5eec3a6a01753b1ccf99494bd756aafecd74b4fa9e729015"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["html5lib", "pytest"] + +[[package]] +name = "sphinxcontrib-jquery" +version = "4.1" +description = "Extension to include jQuery on newer Sphinx releases" +optional = false +python-versions = ">=2.7" +files = [ + {file = "sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a"}, + {file = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl", hash = "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae"}, +] + +[package.dependencies] +Sphinx = ">=1.8" + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +description = "A sphinx extension which renders display math in HTML via JavaScript" +optional = false +python-versions = ">=3.5" +files = [ + {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, + {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, +] + +[package.extras] +test = ["flake8", "mypy", "pytest"] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.7" +description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_qthelp-1.0.7-py3-none-any.whl", hash = "sha256:e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182"}, + {file = "sphinxcontrib_qthelp-1.0.7.tar.gz", hash = "sha256:053dedc38823a80a7209a80860b16b722e9e0209e32fea98c90e4e6624588ed6"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.10" +description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl", hash = "sha256:326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7"}, + {file = "sphinxcontrib_serializinghtml-1.1.10.tar.gz", hash = "sha256:93f3f5dc458b91b192fe10c397e324f262cf163d79f3282c158e8436a2c4511f"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.12.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, + {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, +] + +[[package]] +name = "tzdata" +version = "2024.1" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, +] + +[[package]] +name = "urllib3" +version = "2.2.1" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "virtualenv" +version = "20.26.2" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.26.2-py3-none-any.whl", hash = "sha256:a624db5e94f01ad993d476b9ee5346fdf7b9de43ccaee0e0197012dc838a0e9b"}, + {file = "virtualenv-20.26.2.tar.gz", hash = "sha256:82bf0f4eebbb78d36ddaee0283d43fe5736b53880b8a8cdcd37390a07ac3741c"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + +[[package]] +name = "win32-setctime" +version = "1.1.0" +description = "A small Python utility to set file creation time on Windows" +optional = false +python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] + +[package.extras] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] + +[[package]] +name = "zipp" +version = "3.18.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.18.2-py3-none-any.whl", hash = "sha256:dce197b859eb796242b0622af1b8beb0a722d52aa2f57133ead08edd5bf5374e"}, + {file = "zipp-3.18.2.tar.gz", hash = "sha256:6278d9ddbcfb1f1089a88fde84481528b07b0e10474e09dcfe53dad4069fa059"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.10,<4.0" +content-hash = "33525bd539e6e5417fe95d87a461413b0f4310a1388a808a16d3aab4885b97e0" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e08b86b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,50 @@ +[tool.poetry] +name = "rialto" + +version = "1.3.0" + +packages = [ + { include = "rialto" }, +] + +description = "Rialto" +authors = ["Marek Dobransky "] +keywords = ["feature", "featureslib", "featuremaker", "metadata", "featureloader", "loader"] + +[tool.poetry.dependencies] +python = ">=3.10,<4.0" +pydantic = "^2.2.1" +pathlib = "^1.0.1" +pyyaml = "^6.0.1" +delta-spark = "^2.4.0" +python-dateutil = "^2.8.2" +pytest-mock = "^3.11.1" +pandas = "^2.1.0" +flake8-broken-line = "^1.0.0" +loguru = "^0.7.2" + +[tool.poetry.dev-dependencies] +pyspark = "^3.4.1" +chispa = "^0.9.2" +pytest = "^7.4.0" +black = "^23.7.0" +isort = "^5.12.0" +pytest-cov = "^4.1.0" +flake8 = "^6.1.0" +flake8-docstrings = "^1.7.0" +pep8-naming = "^0.13.3" +pre-commit = "^3.4.0" +sphinx = "^7.2.6" +sphinx-rtd-theme = "^2.0.0" +sphinx-mdinclude = "^0.5.3" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.black] +line-length = 120 +target-version = ["py36"] + +[tool.isort] +profile = "black" diff --git a/rialto/__init__.py b/rialto/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/rialto/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/rialto/common/__init__.py b/rialto/common/__init__.py new file mode 100644 index 0000000..93e8922 --- /dev/null +++ b/rialto/common/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rialto.common.table_reader import TableReader diff --git a/rialto/common/table_reader.py b/rialto/common/table_reader.py new file mode 100644 index 0000000..1aef614 --- /dev/null +++ b/rialto/common/table_reader.py @@ -0,0 +1,176 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["DataReader", "TableReader"] + +import abc +import datetime +from typing import Optional + +import pyspark.sql.functions as F +from pyspark.sql import DataFrame, SparkSession + +from rialto.common.utils import get_date_col_property, get_delta_partition + + +class DataReader(metaclass=abc.ABCMeta): + """ + This is an abstract class defining interface for reader of spark tables + + Data reader provides to public functions, get_latest and get_table. + get_latest reads a single snapshot of the given table, while get_table reads the whole table or multiple snapshots. + """ + + @abc.abstractmethod + def get_latest( + self, + table: str, + until: Optional[datetime.date] = None, + date_column: str = None, + uppercase_columns: bool = False, + ) -> DataFrame: + """ + Get latest available date partition of the table until specified date + + :param table: input table path + :param until: Optional until date (inclusive) + :param date_column: column to filter dates on, takes highest priority + :param uppercase_columns: Option to refactor all column names to uppercase + :return: Dataframe + """ + raise NotImplementedError + + @abc.abstractmethod + def get_table( + self, + table: str, + info_date_from: Optional[datetime.date] = None, + info_date_to: Optional[datetime.date] = None, + date_column: str = None, + uppercase_columns: bool = False, + ) -> DataFrame: + """ + Get a whole table or a slice by selected dates + + :param table: input table path + :param info_date_from: Optional date from (inclusive) + :param info_date_to: Optional date to (inclusive) + :param date_column: column to filter dates on, takes highest priority + :param uppercase_columns: Option to refactor all column names to uppercase + :return: Dataframe + """ + raise NotImplementedError + + +class TableReader(DataReader): + """An implementation of data reader for databricks tables""" + + def __init__(self, spark: SparkSession, date_property: str = "rialto_date_column", infer_partition: bool = False): + """ + Init + + :param spark: + :param date_property: Databricks table property specifying date column, take priority over inference + :param infer_partition: infer date column as tables partition from delta metadata + """ + self.spark = spark + self.date_property = date_property + self.infer_partition = infer_partition + super().__init__() + + def _uppercase_column_names(self, df: DataFrame) -> DataFrame: + """ + Change the case of all column names to uppercase + + :param df: Dataframe + :return: renamed Dataframe + """ + for col in df.columns: + df = df.withColumnRenamed(col, col.upper()) + return df + + def _get_latest_available_date(self, df: DataFrame, date_col: str, until: Optional[datetime.date]) -> datetime.date: + if until: + df = df.filter(F.col(date_col) <= until) + df = df.select(F.max(date_col)).alias("latest") + return df.head()[0] + + def _get_date_col(self, table: str, date_column: str): + """ + Get tables date column + + column specified at get_table/get_latest takes priority, if inference is enabled it + takes 2nd place, last resort is table property + """ + if date_column: + return date_column + elif self.infer_partition: + return get_delta_partition(self.spark, table) + else: + return get_date_col_property(self.spark, table, self.date_property) + + def get_latest( + self, + table: str, + until: Optional[datetime.date] = None, + date_column: str = None, + uppercase_columns: bool = False, + ) -> DataFrame: + """ + Get latest available date partition of the table until specified date + + :param table: input table path + :param until: Optional until date (inclusive) + :param date_column: column to filter dates on, takes highest priority + :param uppercase_columns: Option to refactor all column names to uppercase + :return: Dataframe + """ + date_col = self._get_date_col(table, date_column) + df = self.spark.read.table(table) + + selected_date = self._get_latest_available_date(df, date_col, until) + df = df.filter(F.col(date_col) == selected_date) + + if uppercase_columns: + df = self._uppercase_column_names(df) + return df + + def get_table( + self, + table: str, + info_date_from: Optional[datetime.date] = None, + info_date_to: Optional[datetime.date] = None, + date_column: str = None, + uppercase_columns: bool = False, + ) -> DataFrame: + """ + Get a whole table or a slice by selected dates + + :param table: input table path + :param info_date_from: Optional date from (inclusive) + :param info_date_to: Optional date to (inclusive) + :param date_column: column to filter dates on, takes highest priority + :param uppercase_columns: Option to refactor all column names to uppercase + :return: Dataframe + """ + date_col = self._get_date_col(table, date_column) + df = self.spark.read.table(table) + + if info_date_from: + df = df.filter(F.col(date_col) >= info_date_from) + if info_date_to: + df = df.filter(F.col(date_col) <= info_date_to) + if uppercase_columns: + df = self._uppercase_column_names(df) + return df diff --git a/rialto/common/utils.py b/rialto/common/utils.py new file mode 100644 index 0000000..c5527a8 --- /dev/null +++ b/rialto/common/utils.py @@ -0,0 +1,83 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["load_yaml", "get_date_col_property", "get_delta_partition"] + +import os +from typing import Any + +import pyspark.sql.functions as F +import yaml +from pyspark.sql import DataFrame +from pyspark.sql.types import FloatType + + +def load_yaml(path: str) -> Any: + """ + YAML loader + + :param path: file path + :return: Parsed yaml + """ + if not os.path.isfile(path): + raise FileNotFoundError(f"Can't find {path}.") + + with open(path, "r") as stream: + return yaml.safe_load(stream) + + +def get_date_col_property(spark, table: str, property: str) -> str: + """ + Retrieve a data column name from a given table property + + :param spark: spark session + :param table: path to table + :param property: name of the property + :return: data column name + """ + props = spark.sql(f"show tblproperties {table}") + date_col = props.filter(F.col("key") == property).select("value").collect() + if len(date_col): + return date_col[0].value + else: + raise RuntimeError(f"Table {table} has no property {property}.") + + +def get_delta_partition(spark, table: str) -> str: + """ + Select first partition column of the delta table + + :param table: full table name + :return: partition column name + """ + columns = spark.catalog.listColumns(table) + partition_columns = list(filter(lambda c: c.isPartition, columns)) + if len(partition_columns): + return partition_columns[0].name + else: + raise RuntimeError(f"Delta table has no partitions: {table}.") + + +def cast_decimals_to_floats(df: DataFrame) -> DataFrame: + """ + Find all decimal types in the table and cast them to floats. Fixes errors in .toPandas() conversions. + + :param df: pyspark DataFrame + :return: pyspark DataFrame with fixed types + """ + decimal_cols = [col_name for col_name, data_type in df.dtypes if "decimal" in data_type] + for c in decimal_cols: + df = df.withColumn(c, F.col(c).cast(FloatType())) + + return df diff --git a/rialto/jobs/__init__.py b/rialto/jobs/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/rialto/jobs/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/rialto/jobs/configuration/config_holder.py b/rialto/jobs/configuration/config_holder.py new file mode 100644 index 0000000..161c61a --- /dev/null +++ b/rialto/jobs/configuration/config_holder.py @@ -0,0 +1,130 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["ConfigException", "FeatureStoreConfig", "ConfigHolder"] + +import datetime +import typing + +from pydantic import BaseModel + + +class ConfigException(Exception): + """Wrong Configuration Exception""" + + pass + + +class FeatureStoreConfig(BaseModel): + """Configuration of Feature Store Paths""" + + feature_store_schema: str = None + feature_metadata_schema: str = None + + +class ConfigHolder: + """ + Main Rialto Jobs config holder. + + Configured via job_runner and then called from job_base / job decorators. + """ + + _config = {} + _dependencies = {} + _run_date = None + _feature_store_config: FeatureStoreConfig = None + + @classmethod + def set_run_date(cls, run_date: datetime.date) -> None: + """ + Inicialize run Date + + :param run_date: datetime.date, run date + :return: None + """ + cls._run_date = run_date + + @classmethod + def get_run_date(cls) -> datetime.date: + """ + Run date + + :return: datetime.date, Run date + """ + if cls._run_date is None: + raise ConfigException("Run Date not Set !") + return cls._run_date + + @classmethod + def set_feature_store_config(cls, feature_store_schema: str, feature_metadata_schema: str) -> None: + """ + Inicialize feature store config + + :param feature_store_schema: str, schema name + :param feature_metadata_schema: str, metadata schema name + :return: None + """ + cls._feature_store_config = FeatureStoreConfig( + feature_store_schema=feature_store_schema, feature_metadata_schema=feature_metadata_schema + ) + + @classmethod + def get_feature_store_config(cls) -> FeatureStoreConfig: + """ + Feature Store Config + + :return: FeatureStoreConfig + """ + if cls._feature_store_config is None: + raise ConfigException("Feature Store Config not Set !") + + return cls._feature_store_config + + @classmethod + def get_config(cls) -> typing.Dict: + """ + Get config dictionary + + :return: dictionary of key-value pairs + """ + return cls._config.copy() + + @classmethod + def set_custom_config(cls, **kwargs) -> None: + """ + Set custom key-value pairs for custom config + + :param kwargs: key-value pairs to setup + :return: None + """ + cls._config.update(kwargs) + + @classmethod + def get_dependency_config(cls) -> typing.Dict: + """ + Get rialto job dependency config + + :return: dictionary with dependency config + """ + return cls._dependencies + + @classmethod + def set_dependency_config(cls, dependencies: typing.Dict) -> None: + """ + Get rialto job dependency config + + :param dependencies: dictionary with the config + :return: None + """ + cls._dependencies = dependencies diff --git a/rialto/jobs/decorators/__init__.py b/rialto/jobs/decorators/__init__.py new file mode 100644 index 0000000..ba62141 --- /dev/null +++ b/rialto/jobs/decorators/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .decorators import datasource, job diff --git a/rialto/jobs/decorators/decorators.py b/rialto/jobs/decorators/decorators.py new file mode 100644 index 0000000..894f682 --- /dev/null +++ b/rialto/jobs/decorators/decorators.py @@ -0,0 +1,100 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["datasource", "job"] + +import importlib.metadata +import inspect +import typing + +from loguru import logger + +from rialto.jobs.decorators.job_base import JobBase +from rialto.jobs.decorators.resolver import Resolver + + +def datasource(ds_getter: typing.Callable) -> typing.Callable: + """ + Dataset reader functions decorator. + + Registers a data-reading function into a rialto job prerequisite. + You can then request the job via job function arguments. + + :param ds_getter: dataset reader function + :return: raw reader function, unchanged + """ + Resolver.register_callable(ds_getter) + return ds_getter + + +def _get_module(stack: typing.List) -> typing.Any: + last_stack = stack[1] + mod = inspect.getmodule(last_stack[0]) + return mod + + +def _get_version(module: typing.Any) -> str: + try: + parent_name, _, _ = module.__name__.partition(".") + return importlib.metadata.version(parent_name) + + except Exception: + logger.warning(f"Failed to get library {module.__name__} version!") + return "N/A" + + +def _generate_rialto_job(callable: typing.Callable, module: object, class_name: str, version: str) -> typing.Type: + generated_class = type( + class_name, + (JobBase,), + { + "get_custom_callable": lambda _: callable, + "get_job_version": lambda _: version, + "get_job_name": lambda _: class_name, + }, + ) + + generated_class.__module__ = module.__name__ + setattr(module, class_name, generated_class) + + logger.info(f"Registered {class_name} in {module}") + return generated_class + + +def job(name_or_callable: typing.Union[str, typing.Callable]) -> typing.Union[typing.Callable, typing.Type]: + """ + Rialto jobs decorator. + + Transforms a python function into a rialto transormation, which can be imported and ran by Rialto Runner. + Allows a custom name, via @job("custom_name_here") or can be just used as @job and the function's name is used. + + :param name_or_callable: str for custom job name. Otherwise, run function. + :return: One more job wrapper for run function (if custom name specified). + Otherwise, generates Rialto Transformation Type and returns it for in-module registration. + """ + stack = inspect.stack() + + module = _get_module(stack) + version = _get_version(module) + + if type(name_or_callable) is str: + + def inner_wrapper(callable): + return _generate_rialto_job(callable, module, name_or_callable, version) + + return inner_wrapper + + else: + name = name_or_callable.__name__ + return _generate_rialto_job(name_or_callable, module, name, version) diff --git a/rialto/jobs/decorators/job_base.py b/rialto/jobs/decorators/job_base.py new file mode 100644 index 0000000..c55e09c --- /dev/null +++ b/rialto/jobs/decorators/job_base.py @@ -0,0 +1,135 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["JobBase"] + +import abc +import datetime +import typing +from contextlib import contextmanager + +import pyspark.sql.functions as F +from loguru import logger +from pyspark.sql import DataFrame, SparkSession + +from rialto.common import TableReader +from rialto.jobs.configuration.config_holder import ConfigHolder +from rialto.jobs.decorators.resolver import Resolver +from rialto.loader import DatabricksLoader, PysparkFeatureLoader +from rialto.metadata import MetadataManager +from rialto.runner import Transformation + + +class JobBase(Transformation): + """A Base Class for Rialto Jobs. Serves as a foundation into which the @job decorators are inserted.""" + + @abc.abstractmethod + def get_custom_callable(self) -> typing.Callable: + """Getter - Custom callable (i.e. job transformation function)""" + pass + + @abc.abstractmethod + def get_job_version(self) -> str: + """Job version getter""" + pass + + @abc.abstractmethod + def get_job_name(self) -> str: + """Job name getter""" + pass + + @contextmanager + def _setup_resolver(self, run_date: datetime.date) -> None: + Resolver.register_callable(lambda: run_date, "run_date") + + Resolver.register_callable(ConfigHolder.get_config, "config") + Resolver.register_callable(ConfigHolder.get_dependency_config, "dependencies") + + Resolver.register_callable(self._get_spark, "spark") + Resolver.register_callable(self._get_table_reader, "table_reader") + Resolver.register_callable(self._get_feature_loader, "feature_loader") + + try: + yield + finally: + Resolver.cache_clear() + + def _setup( + self, spark: SparkSession, run_date: datetime.date, table_reader: TableReader, dependencies: typing.Dict = None + ) -> None: + self._spark = spark + self._table_rader = table_reader + + ConfigHolder.set_dependency_config(dependencies) + ConfigHolder.set_run_date(run_date) + + def _get_spark(self) -> SparkSession: + return self._spark + + def _get_table_reader(self) -> TableReader: + return self._table_rader + + def _get_feature_loader(self) -> PysparkFeatureLoader: + config = ConfigHolder.get_feature_store_config() + + databricks_loader = DatabricksLoader(self._spark, config.feature_store_schema) + feature_loader = PysparkFeatureLoader(self._spark, databricks_loader, config.feature_metadata_schema) + + return feature_loader + + def _get_timestamp_holder_result(self) -> DataFrame: + spark = self._get_spark() + return spark.createDataFrame( + [(self.get_job_name(), datetime.datetime.now())], schema="JOB_NAME string, CREATION_TIME timestamp" + ) + + def _add_job_version(self, df: DataFrame) -> DataFrame: + version = self.get_job_version() + return df.withColumn("VERSION", F.lit(version)) + + def _run_main_callable(self, run_date: datetime.date) -> DataFrame: + with self._setup_resolver(run_date): + custom_callable = self.get_custom_callable() + raw_result = Resolver.register_resolve(custom_callable) + + if raw_result is None: + raw_result = self._get_timestamp_holder_result() + + result_with_version = self._add_job_version(raw_result) + return result_with_version + + def run( + self, + reader: TableReader, + run_date: datetime.date, + spark: SparkSession = None, + metadata_manager: MetadataManager = None, + dependencies: typing.Dict = None, + ) -> DataFrame: + """ + Rialto transformation run + + :param reader: data store api object + :param info_date: date + :param spark: spark session + :param metadata_manager: metadata api object + :param dependencies: rialto job dependencies + :return: dataframe + """ + try: + self._setup(spark, run_date, reader, dependencies) + return self._run_main_callable(run_date) + except Exception as e: + logger.exception(e) + raise e diff --git a/rialto/jobs/decorators/resolver.py b/rialto/jobs/decorators/resolver.py new file mode 100644 index 0000000..9f90e5a --- /dev/null +++ b/rialto/jobs/decorators/resolver.py @@ -0,0 +1,110 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["ResolverException", "Resolver"] + +import inspect +import typing +from functools import cache + + +class ResolverException(Exception): + """Resolver Errors Class - In Most Cases your dependency tree is not complete.""" + + pass + + +class Resolver: + """ + Resolver handles dependency management between datasets and jobs. + + We register different callables, which can depend on other callables. + Calling resolve() we attempts to resolve these dependencies. + """ + + _storage = {} + + @classmethod + def _get_args_for_call(cls, function: typing.Callable) -> typing.Dict[str, typing.Any]: + result_dict = {} + signature = inspect.signature(function) + + for param in signature.parameters.values(): + result_dict[param.name] = cls.resolve(param.name) + + return result_dict + + @classmethod + def register_callable(cls, callable: typing.Callable, name: str = None) -> str: + """ + Register callable with a given name for later resolution. + + In case name isn't present, function's __name__ attribute will be used. + + :param callable: callable to register (getter) + :param name: str, custom name, f.__name__ will be used otherwise + :return: str, name under which the callable has been registered + """ + if name is None: + name = getattr(callable, "__name__", repr(callable)) + + cls._storage[name] = callable + return name + + @classmethod + @cache + def resolve(cls, name: str) -> typing.Any: + """ + Search for a callable registered prior and attempt to call it with correct arguents. + + Arguments are resolved recursively according to requirements; For example, if we have + a(b, c), b(d), and c(), d() registered, then we recursively call resolve() methods until we resolve + c, d -> b -> a + + :param name: name of the callable to resolve + :return: result of the callable + """ + if name not in cls._storage.keys(): + raise ResolverException(f"{name} declaration not found!") + + getter = cls._storage[name] + args = cls._get_args_for_call(getter) + + return getter(**args) + + @classmethod + def register_resolve(cls, callable: typing.Callable) -> typing.Any: + """ + Register and Resolve a callable. + + Combination of the register() and resolve() methods for a simplified execution. + + :param callable: callable to register and immediately resolve + :return: result of the callable + """ + name = cls.register_callable(callable) + return cls.resolve(name) + + @classmethod + def cache_clear(cls) -> None: + """ + Clear resolver cache. + + The resolve mehtod caches its results to avoid duplication of resolutions. + However, in case we re-register some callables, we need to clear cache + in order to ensure re-execution of all resolutions. + + :return: None + """ + cls.resolve.cache_clear() diff --git a/rialto/jobs/decorators/test_utils.py b/rialto/jobs/decorators/test_utils.py new file mode 100644 index 0000000..bd21dba --- /dev/null +++ b/rialto/jobs/decorators/test_utils.py @@ -0,0 +1,61 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["disable_job_decorators"] + +import importlib +import typing +from contextlib import contextmanager +from unittest.mock import patch + + +def _passthrough_decorator(x: typing.Callable) -> typing.Callable: + if type(x) is str: + return _passthrough_decorator + + else: + return x + + +@contextmanager +def _disable_job_decorators() -> None: + patches = [ + patch("rialto.jobs.decorators.datasource", _passthrough_decorator), + patch("rialto.jobs.decorators.decorators.datasource", _passthrough_decorator), + patch("rialto.jobs.decorators.job", _passthrough_decorator), + patch("rialto.jobs.decorators.decorators.job", _passthrough_decorator), + ] + + for i in patches: + i.start() + + yield + + for i in patches: + i.stop() + + +@contextmanager +def disable_job_decorators(module) -> None: + """ + Disables job decorators in a python module. Useful for testing your rialto jobs and datasources. + + :param module: python module with the decorated functions. + :return: None + """ + with _disable_job_decorators(): + importlib.reload(module) + yield + + importlib.reload(module) diff --git a/rialto/loader/__init__.py b/rialto/loader/__init__.py new file mode 100644 index 0000000..7adc52d --- /dev/null +++ b/rialto/loader/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rialto.loader.data_loader import DatabricksLoader +from rialto.loader.pyspark_feature_loader import PysparkFeatureLoader diff --git a/rialto/loader/config_loader.py b/rialto/loader/config_loader.py new file mode 100644 index 0000000..ead2705 --- /dev/null +++ b/rialto/loader/config_loader.py @@ -0,0 +1,48 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["get_feature_config"] + +from typing import List, Optional + +from pydantic import BaseModel + +from rialto.common.utils import load_yaml + + +class GroupConfig(BaseModel): + group: str + prefix: str + features: List[str] + + +class BaseConfig(BaseModel): + group: str + keys: List[str] + + +class FeatureConfig(BaseModel): + selection: List[GroupConfig] + base: BaseConfig + maps: Optional[List[str]] = None + + +def get_feature_config(path) -> FeatureConfig: + """ + Read yaml and parse it + + :param path: config path + :return: Pydantic feature config object + """ + return FeatureConfig(**load_yaml(path)) diff --git a/rialto/loader/data_loader.py b/rialto/loader/data_loader.py new file mode 100644 index 0000000..930c2b0 --- /dev/null +++ b/rialto/loader/data_loader.py @@ -0,0 +1,45 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["DatabricksLoader"] + +from datetime import date + +from pyspark.sql import DataFrame, SparkSession + +from rialto.common.table_reader import TableReader +from rialto.loader.interfaces import DataLoader + + +class DatabricksLoader(DataLoader): + """Implementation of DataLoader using TableReader to access feature tables""" + + def __init__(self, spark: SparkSession, schema: str, date_column: str = "INFORMATION_DATE"): + super().__init__() + + self.reader = TableReader(spark) + self.schema = schema + self.date_col = date_column + + def read_group(self, group: str, information_date: date) -> DataFrame: + """ + Read a feature group by getting the latest partition by date + + :param group: group name + :param information_date: partition date + :return: dataframe + """ + return self.reader.get_latest( + f"{self.schema}.{group}", until=information_date, date_column=self.date_col, uppercase_columns=True + ) diff --git a/rialto/loader/interfaces.py b/rialto/loader/interfaces.py new file mode 100644 index 0000000..dad08e6 --- /dev/null +++ b/rialto/loader/interfaces.py @@ -0,0 +1,76 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["DataLoader", "FeatureLoaderInterface"] + +import abc +from datetime import date +from typing import Dict + + +class DataLoader(metaclass=abc.ABCMeta): + """ + An interface to read feature groups from storage + + Requires read_group function. + """ + + @abc.abstractmethod + def read_group(self, group: str, information_date: date): + """ + Read one feature group + + :param group: Group name + :param information_date: date + """ + raise NotImplementedError + + +class FeatureLoaderInterface(metaclass=abc.ABCMeta): + """ + A definition of feature loading interface + + Provides functionality to read features, feature groups and selections of features according to configs. + Also provides an interface to access metadata of said features. + """ + + @abc.abstractmethod + def get_feature(self, group_name: str, feature_name: str, information_date: date): + """Get single feature""" + raise NotImplementedError + + @abc.abstractmethod + def get_feature_metadata(self, group_name: str, feature_name: str) -> Dict: + """Get single feature's metadata""" + raise NotImplementedError + + @abc.abstractmethod + def get_group(self, group_name: str, information_date: date): + """Get feature group""" + raise NotImplementedError + + @abc.abstractmethod + def get_group_metadata(self, group_name: str) -> Dict: + """Get feature group's metadata""" + raise NotImplementedError + + @abc.abstractmethod + def get_features_from_cfg(self, path: str, information_date: date): + """Get features from multiple groups as defined by configuration""" + raise NotImplementedError + + @abc.abstractmethod + def get_metadata_from_cfg(self, path: str) -> Dict: + """Get metadata from multiple groups as defined by configuration""" + raise NotImplementedError diff --git a/rialto/loader/pyspark_feature_loader.py b/rialto/loader/pyspark_feature_loader.py new file mode 100644 index 0000000..d0eef20 --- /dev/null +++ b/rialto/loader/pyspark_feature_loader.py @@ -0,0 +1,211 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["PysparkFeatureLoader"] + +from collections import namedtuple +from datetime import date +from typing import Dict, List + +from pyspark.sql import DataFrame, SparkSession + +from rialto.common.utils import cast_decimals_to_floats +from rialto.loader.config_loader import FeatureConfig, GroupConfig, get_feature_config +from rialto.loader.data_loader import DataLoader +from rialto.loader.interfaces import FeatureLoaderInterface +from rialto.metadata.metadata_manager import ( + FeatureMetadata, + GroupMetadata, + MetadataManager, +) + + +class PysparkFeatureLoader(FeatureLoaderInterface): + """Implementation of feature loader for pyspark environment""" + + def __init__(self, spark: SparkSession, data_loader: DataLoader, metadata_schema: str): + """ + Init + + :param spark: spark session + :param data_loader: data loader + :param metadata_schema: schema location of metadata tables + """ + super().__init__() + self.spark = spark + self.data_loader = data_loader + self.metadata = MetadataManager(spark, metadata_schema) + + KeyMap = namedtuple("KeyMap", ["df", "key"]) + + def get_feature(self, group_name: str, feature_name: str, information_date: date) -> DataFrame: + """ + Get single feature + + :param group_name: feature group name + :param feature_name: feature name + :param information_date: selected date + :return: A dataframe containing feature group key and selected feature + """ + print("This function is untested, use with caution!") + key = self.get_group_metadata(group_name).key + return self.data_loader.read_group(self.get_group_fs_name(group_name), information_date).select( + *key, feature_name + ) + + def get_feature_metadata(self, group_name: str, feature_name: str) -> FeatureMetadata: + """ + Get single features metadata + + :param group_name: feature group name + :param feature_name: feature name + :return: metadata dictionary + """ + return self.metadata.get_feature(group_name, feature_name) + + def get_group(self, group_name: str, information_date: date) -> DataFrame: + """ + Get feature group + + :param group_name: feature group name + :param information_date: selected date + :return: A dataframe containing feature group key + """ + print("This function is untested, use with caution!") + return self.data_loader.read_group(self.get_group_fs_name(group_name), information_date) + + def get_group_metadata(self, group_name: str) -> GroupMetadata: + """ + Get feature groups metadata + + :param group_name: feature group name + :return: metadata dictionary + """ + return self.metadata.get_group(group_name) + + def get_group_fs_name(self, group_name: str) -> str: + """ + Return groups file system name + + If given name matches databricks path, i.e. has two dot separators, do nothing. + Else assume it's a class name and search for fs name in metadata. + :param group_name: Group name + :return: group filesystem name + """ + if len(group_name.split(sep=".")) == 3: + return group_name + return self.metadata.get_group(group_name).fs_name + + def _are_all_keys_in(self, keys: List[str], columns: List[str]) -> bool: + """ + Check if all presented keys are in presented list of columns + + :param keys: list of keys + :param columns: list of columns + :return: True/False + """ + for key in keys: + if key not in columns: + return False + return True + + def _add_prefix(self, df: DataFrame, prefix: str, key: List[str]) -> DataFrame: + """ + Prefixes all column names except for key + + :param df: dataframe + :param prefix: prefix + :param key: list of keys + :return: renamed dataframe + """ + for col in df.columns: + if col not in key: + df = df.withColumnRenamed(col, f"{prefix}_{col}") + return df + + def _get_keymaps(self, config: FeatureConfig, information_date: date) -> List[KeyMap]: + """ + Read all key mapping tables specified by configuration + + :param config: configuration object + :param information_date: date + :return: List of tuples of loaded dataframes and their keys + """ + key_maps = [] + for mapping in config.maps: + df = self.data_loader.read_group(self.get_group_fs_name(mapping), information_date).drop("INFORMATION_DATE") + key = self.metadata.get_group(mapping).key + key_maps.append(PysparkFeatureLoader.KeyMap(df, key)) + return key_maps + + def _join_keymaps(self, base: DataFrame, key_maps: List[KeyMap]) -> DataFrame: + if len(key_maps): + for mapping in key_maps: + if self._are_all_keys_in(mapping.key, base.columns): + base = base.join(mapping.df, mapping.key, "inner") + key_maps.remove(mapping) + return self._join_keymaps(base, key_maps) + raise KeyError(f"None of {[x.key for x in key_maps]} can be joined onto {base.columns}") + else: + return base + + def _add_feature_group(self, base: DataFrame, df: DataFrame, group_cfg: GroupConfig) -> DataFrame: + group_key = self.metadata.get_group(group_cfg.group).key + df = df.select(group_cfg.features + group_key) + df = self._add_prefix(df, group_cfg.prefix, group_key) + return base.join(df, group_key, "left") + + def get_features_from_cfg(self, path: str, information_date: date) -> DataFrame: + """Get multiple features across many groups, fetches latest features in relation to the provided date + + :param path: configuration location + :param information_date: date for extraction + """ + config = get_feature_config(path) + # 1 select keys from base + base = self.data_loader.read_group(self.get_group_fs_name(config.base.group), information_date).select( + config.base.keys + ) + # 2 join maps onto base (resolve keys) + if config.maps: + key_maps = self._get_keymaps(config, information_date) + base = self._join_keymaps(base, key_maps) + + # 3 read, select and join other tables + for group_cfg in config.selection: + df = self.data_loader.read_group(self.get_group_fs_name(group_cfg.group), information_date) + base = self._add_feature_group(base, df, group_cfg) + + # 4 fix dtypes for pandas conversion + base = cast_decimals_to_floats(base) + + return base + + def get_metadata_from_cfg(self, path: str) -> Dict[str, FeatureMetadata]: + """ + Get multiple features metadata from config + + :param path: configuration path + :return: dictionary feature_name : FeatureMetadata + """ + result = {} + config = get_feature_config(path) + + for group_cfg in config.selection: + for feature in group_cfg.features: + feature_metadata = self.get_feature_metadata(group_cfg.group, feature) + feature_name = f"{group_cfg.prefix}_{feature}" + result[feature_name] = feature_metadata + + return result diff --git a/rialto/maker/__init__.py b/rialto/maker/__init__.py new file mode 100644 index 0000000..d31cd4a --- /dev/null +++ b/rialto/maker/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rialto.maker.containers import FeatureFunction, FeatureHolder +from rialto.maker.feature_maker import FeatureMaker +from rialto.maker.utils import feature_name +from rialto.maker.wrappers import depends, desc, feature, param +from rialto.metadata import ValueType diff --git a/rialto/maker/containers.py b/rialto/maker/containers.py new file mode 100644 index 0000000..9a93417 --- /dev/null +++ b/rialto/maker/containers.py @@ -0,0 +1,91 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +__all__ = ["FeatureFunction", "FeatureHolder"] + +import typing + +from rialto.maker.utils import feature_name +from rialto.metadata import FeatureMetadata, ValueType + + +class FeatureFunction: + """ + A container for feature generating object + + contains a callable object, it's name, parameters + """ + + def __init__(self, name: str, callable_object: typing.Callable, value_type: ValueType = ValueType.nominal): + self.name = name + self.callable = callable_object + self.parameters: typing.Dict[str, typing.Any] = {} + self.dependencies: typing.List[str] = [] + self.type = value_type + self.description = "basic feature" + + def __str__(self) -> str: + """ + Serialize to string for logging + + :return: string serialized object + """ + return ( + f"Name: {self.name}\n\t" + f"Parameters: {self.parameters}\n\t" + f"Type: {self.get_type()}\n\t" + f"Description: {self.description}" + ) + + def metadata(self) -> FeatureMetadata: + """ + Return functions metadata + + :return: metadata dict + """ + return FeatureMetadata(name=self.get_feature_name(), value_type=self.type, description=self.description) + + def get_feature_name(self) -> str: + """ + Get feature name enhanced by parameters + + :return: full feature name + """ + return feature_name(self.name, self.parameters) + + def get_type(self) -> str: + """ + Get feature value type + + :return: value type + """ + return self.type.value + + +class FeatureHolder(list): + """ + A container for FeatureFunctions used in the feature building process + + Basically just a plain list with unique type name + """ + + def __init__(self): + super().__init__() + + def get_metadata(self) -> typing.List[FeatureMetadata]: + """ + Concats metadata from all functions + + :return: List of metadata + """ + return [func.metadata() for func in self] diff --git a/rialto/maker/feature_maker.py b/rialto/maker/feature_maker.py new file mode 100644 index 0000000..6cbc1ee --- /dev/null +++ b/rialto/maker/feature_maker.py @@ -0,0 +1,264 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["FeatureMaker"] + +import datetime +import inspect +import types +import typing + +import pyspark.sql.functions as F +from loguru import logger +from pyspark.sql import DataFrame + +from rialto.maker.containers import FeatureFunction, FeatureHolder + + +class _FeatureMaker: + """ + A framework for feature making + + Enables registration of callable feature functions and then executes them. + """ + + def __init__(self): + self.feature_functions = FeatureHolder() + self.data_frame = None + self.make_date = None + self.key = None + + def _set_values(self, df: DataFrame, key: typing.Union[str, typing.List[str]], make_date: datetime.date): + """ + Instance value setter + + :param df: DataFrame with input data + :param key: simple or compound string key + :param make_date: a date to make feature from + :return: None + """ + self.data_frame = df + self.key = key + self.make_date = make_date + + def _order_by_dependencies(self, feature_holders: typing.List[FeatureHolder]) -> typing.List[FeatureHolder]: + """ + Order features like directional graph + + Simple O(n^2) solution, in each pass try to find one feature group with dependencies resolved + :param feature_holders: List of feature holders each with one group of feature_functions + :return: ordered list of feature holders each with one group of feature_functions + """ + logger.trace("Resolving module dependencies") + + ordered = [] + resolved = set() + leftover = feature_holders + while True: + dep_is_resolved = True + i = 0 + for i in range(len(leftover)): + dep_is_resolved = True + for dep in leftover[i][0].dependencies: + if dep not in resolved: + dep_is_resolved = False + break + if dep_is_resolved: + break + if dep_is_resolved: + resolved.add(leftover[i][0].name) + ordered.append(leftover.pop(i)) + else: + raise Exception("Feature dependencies can't be resolved!") + + if len(leftover) == 0: + break + return ordered + + def _load_features(self, features_module: types.ModuleType) -> typing.List: + """ + Find feature function definitions inside a given python module + + :param features_module: a python module with definitions of feature functions using wrappers + :return: List of FeatureHolders + """ + return [value for (member, value) in inspect.getmembers(features_module) if isinstance(value, FeatureHolder)] + + def _register_module(self, features_module: types.ModuleType) -> FeatureHolder: + """ + Find feature function definitions inside a given python module and registers them for use in make call + + :param features_module: a python module with definitions of feature functions using FML wrappers + :return: FeatureHolder + """ + logger.trace(f"Registering module: {features_module}") + feature_functions = FeatureHolder() + + ordered_feature_holders = self._order_by_dependencies(self._load_features(features_module)) + for feature_holder in ordered_feature_holders: + for feature_function in feature_holder: + logger.trace(f"Registering feature:\n\t{feature_function}") + feature_functions.append(feature_function) + return feature_functions + + def _filter_null_keys(self, df: DataFrame): + if isinstance(self.key, str): + return df.filter(F.col(self.key).isNotNull()) + else: + for k in self.key: + df = df.filter(F.col(k).isNotNull()) + return df + + def _make_sequential(self, keep_preexisting: bool) -> DataFrame: + """ + Make features by creating new columns on the existing dataframe + + :param keep_preexisting: Bool, keep preexisting data in the dataframe after making features + :return: DataFrame + """ + feature_names = [] + for feature_function in self.feature_functions: + logger.trace(f"Making feature:\n\t{feature_function}") + feature_names.append(feature_function.get_feature_name()) + self.data_frame = self.data_frame.withColumn( + feature_function.get_feature_name(), feature_function.callable() + ) + if not keep_preexisting: + logger.info("Dropping non-selected columns") + self.data_frame = self.data_frame.select(self.key, *feature_names) + return self._filter_null_keys(self.data_frame) + + def _make_aggregated(self) -> DataFrame: + """ + Make features by creating new columns via aggregation on defined key + + :return: DataFrame + """ + aggregates = [] + for feature_function in self.feature_functions: + logger.trace(f"Creating aggregate: \n\t{feature_function.__str__()}") + aggregates.append(feature_function.callable().alias(feature_function.get_feature_name())) + + self.data_frame = self.data_frame.groupBy(self.key).agg(*aggregates) + return self._filter_null_keys(self.data_frame) + + def make( + self, + df: DataFrame, + key: typing.Union[str, typing.List[str]], + make_date: datetime.date, + features_module: types.ModuleType, + keep_preexisting: bool = False, + ) -> (DataFrame, typing.Dict): + """ + Make features by creating new columns based on definitions in imported module + + :param df: DataFrame with input data + :param key: simple or compound string key + :param make_date: a date to make feature from + :param features_module: a python module with definitions of feature functions using FML wrappers + :param keep_preexisting: bool to decide whether to keep input data + :return: DataFrame with features, Dict with feature metadata + """ + logger.info(f"Making sequential for \n\tkey: {key} \n\ton {make_date} \n\tfrom {features_module}") + self._set_values(df, key, make_date) + self.feature_functions = self._register_module(features_module) + features = self._make_sequential(keep_preexisting) + logger.info(f"Finished making sequential features from {features_module}") + return features, self.feature_functions.get_metadata() + + def make_aggregated( + self, + df: DataFrame, + key: typing.Union[str, typing.List[str]], + make_date: datetime.date, + features_module: types.ModuleType, + ) -> (DataFrame, typing.Dict): + """ + Make features by creating new columns based on definitions in imported module + + :param df: DataFrame with input data + :param key: simple or compound string key + :param make_date: a date to make feature from + :param features_module: a python module with definitions of feature functions using FML wrappers + :return: DataFrame with features, Dict with feature metadata + """ + logger.info(f"Making aggregated for \n\tkey: {key} \n\ton {make_date} \n\tfrom {features_module}") + self._set_values(df, key, make_date) + self.feature_functions = self._register_module(features_module) + features = self._make_aggregated() + logger.info(f"Finished making aggregated features from {features_module}") + return features, self.feature_functions.get_metadata() + + def _find_feature(self, name: str, feature_functions: FeatureHolder) -> FeatureFunction: + """ + Find a single feature function by name + + :param name: str name of the feature + :param feature_functions: FeatureHolder with all feature functions + :return: wanted FeatureFunction + """ + for feature_function in feature_functions: + if feature_function.get_feature_name() == name: + return feature_function + raise Exception(f"Feature {name} is not defined!") + + def make_single_feature( + self, + df: DataFrame, + name: typing.Union[str, typing.List[str]], + features_module: types.ModuleType, + make_date: datetime.date = None, + ) -> DataFrame: + """ + Make single feature by creating new columns based on definition in imported module + + Intended for being able to test single features + :param df: DataFrame with input data + :param name: name of the feature + :param features_module: a python module with definitions of feature functions using FML wrappers + :param make_date: an optional date to make feature from + :return: DataFrame with features + """ + self.make_date = make_date + feature_functions = self._register_module(features_module) + feature = self._find_feature(name, feature_functions) + return df.withColumn(feature.get_feature_name(), feature.callable()).select(feature.get_feature_name()) + + def make_single_agg_feature( + self, + df: DataFrame, + name: str, + key: typing.Union[str, typing.List[str]], + features_module: types.ModuleType, + make_date: datetime.date = None, + ) -> DataFrame: + """ + Make single feature by creating new columns based on definition in imported module + + Intended for being able to test single features + :param df: DataFrame with input data + :param name: name of the feature + :param key: key to aggregate on + :param features_module: a python module with definitions of feature functions using FML wrappers + :param make_date: an optional date to make feature from + :return: DataFrame with features + """ + self.make_date = make_date + feature_functions = self._register_module(features_module) + feature = self._find_feature(name, feature_functions) + return df.groupBy(key).agg(feature.callable().alias(feature.get_feature_name())) + + +FeatureMaker = _FeatureMaker() diff --git a/rialto/maker/utils.py b/rialto/maker/utils.py new file mode 100644 index 0000000..829dc60 --- /dev/null +++ b/rialto/maker/utils.py @@ -0,0 +1,46 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["feature_name"] + +import typing + + +def _get_feature_parameter_suffix(parameters: typing.Dict) -> str: + """ + Collate all parameter names and their value in format p1_v1_p2_v2... + + :param parameters: unordered dict of parameters and values + :return: string serialized parameters + """ + if len(parameters) == 0: + return "" + params = "" + for parameter_name, value in sorted(parameters.items()): + params += f"_{parameter_name}_{value}" + return params + + +def feature_name(name: str, parameters: typing.Dict) -> str: + """ + Join feature function name with parameters to create a feature name + + :param name: string name of feature + :param parameters: unordered dict of parameters and values + :return: feature name + """ + params = _get_feature_parameter_suffix(parameters) + if len(params) == 0: + return name.upper() + return f"{name}{params}".upper() diff --git a/rialto/maker/wrappers.py b/rialto/maker/wrappers.py new file mode 100644 index 0000000..a7a4103 --- /dev/null +++ b/rialto/maker/wrappers.py @@ -0,0 +1,194 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["feature", "desc", "param", "depends"] + +import typing +from copy import deepcopy +from functools import partial, wraps + +from loguru import logger + +from rialto.maker.containers import FeatureFunction, FeatureHolder +from rialto.metadata import ValueType + + +def decorator_with_args(decorator_to_enhance): + """ + Wrap decorators to use args + + This function is supposed to be used as a decorator. + It must decorate and other function, that is intended to be used as a decorator. + It will allow any decorator to accept an arbitrary number of arguments + """ + + @wraps(decorator_to_enhance) + def decorator_maker(*args, **kwargs): + """ + Pass arguments to inner wrapper + + We create on the fly a decorator that accepts only a function + but keeps the passed arguments from the maker. + """ + + def decorator_wrapper(func): + """ + Pass arguments to the wrapped function + + We return the result of the original decorator + the decorator must have this specific signature otherwise it won't work: + """ + return decorator_to_enhance(func, *args, **kwargs) + + return decorator_wrapper + + return decorator_maker + + +@decorator_with_args +def feature(feature_functions: typing.Union[typing.Callable, FeatureHolder], feature_type: ValueType): + """ + Wrap a feature definitions. Use as the outermost decorator!!! + + The purpose of this wrapper is to wrap function that have no other decorator inside FeatureHolder + and define feature type + :param feature_functions: FeatureHolder or pure function + :param feature_type: FeatureType enum (numerical, ordinal, nominal) + :return: FeatureHolder + """ + logger.trace(f"Wrapping {feature_functions} as feature") + + def wrapper() -> FeatureHolder: + if isinstance(feature_functions, FeatureHolder): + for f in feature_functions: + f.type = feature_type + return feature_functions + else: + func_list = FeatureHolder() + new_feature_f = FeatureFunction(feature_functions.__name__, feature_functions, feature_type) + func_list.append(new_feature_f) + return func_list + + return wrapper() + + +@decorator_with_args +def desc(feature_functions: typing.Union[typing.Callable, FeatureHolder], desc: str): + """ + Wrap feature with string description, used in metadata + + :param feature_functions: FeatureHolder or pure function + :param type: FeatureType enum (numerical, ordinal, nominal) + :return: FeatureHolder + """ + logger.trace(f"Wrapping {feature_functions} with description") + + def wrapper() -> FeatureHolder: + if isinstance(feature_functions, FeatureHolder): + for f in feature_functions: + f.description = desc + return feature_functions + else: + func_list = FeatureHolder() + new_feature_f = FeatureFunction(feature_functions.__name__, feature_functions) + new_feature_f.description = desc + func_list.append(new_feature_f) + return func_list + + return wrapper() + + +@decorator_with_args +def param(feature_functions: typing.Union[typing.Callable, FeatureHolder], parameter_name: str, values: typing.List): + """ + Wrap a feature function with custom parameter, can be chained + + Creates a cartesian product of all previous parameters with new ones + :param feature_functions: FeatureHolder or pure function + :param parameter_name: string name of the parameter + :param values: a list of parameter values + :return: FeatureHolder + """ + logger.trace(f"Parametrize \n\tfunc: {feature_functions}\n\tparam:{parameter_name}\n\tvalues{values}") + + def expander() -> FeatureHolder: + func_list = FeatureHolder() + if isinstance(feature_functions, FeatureHolder): + for f in feature_functions: + for value in values: + func_list.append(_copy_with_new_parameter(f, parameter_name, value)) + else: + for value in values: + func_list.append(_create_new_with_parameter(feature_functions, parameter_name, value)) + return func_list + + return expander() + + +def _copy_with_new_parameter(func: FeatureFunction, parameter: str, value: typing.Any) -> FeatureFunction: + """ + Create a deep copy of FeatureFunction and store new parameter in it + + :param func: FeatureFunction + :param parameter: string parameter name + :param value: any parameter value + :return: FeatureFunction + """ + logger.trace(f"Extending \n\tfunction: {func} \n\tby parameter: {parameter} \n\twith value: {value}") + new_feature_f = deepcopy(func) + new_feature_f.callable = partial(func.callable, **{parameter: value}) + new_feature_f.parameters[parameter] = value + return new_feature_f + + +def _create_new_with_parameter(func: typing.Callable, parameter: str, value: typing.Any) -> FeatureFunction: + """ + Create a new FeatureFunction from passed callable and store new parameter in it + + :param func: callable object + :param parameter: string parameter name + :param value: any parameter value + :return: FeatureFunction + """ + logger.trace(f"Creating \n\tfunction: {func} \n\twith parameter: {parameter} \n\twith value: {value}") + new_feature_f = FeatureFunction(func.__name__, partial(func, **{parameter: value})) + new_feature_f.parameters[parameter] = value + return new_feature_f + + +@decorator_with_args +def depends(feature_functions: typing.Union[typing.Callable, FeatureHolder], dependency: str): + """ + Specify dependency features + + The purpose of this wrapper is to define inter-feature dependency and order the execution accordingly + :param feature_functions: FeatureHolder or pure function + :param dependency: str name of required function + :return: FeatureHolder + """ + logger.trace(f"Wrapping {feature_functions} with dependency {dependency}") + + def wrapper() -> FeatureHolder: + if isinstance(feature_functions, FeatureHolder): + for f in feature_functions: + f.dependencies.append(dependency) + return feature_functions + else: + func_list = FeatureHolder() + new_feature_f = FeatureFunction(feature_functions.__name__, feature_functions) + new_feature_f.dependencies.append(dependency) + func_list.append(new_feature_f) + return func_list + + return wrapper() diff --git a/rialto/metadata/__init__.py b/rialto/metadata/__init__.py new file mode 100644 index 0000000..5e8893c --- /dev/null +++ b/rialto/metadata/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rialto.metadata.data_classes.feature_metadata import FeatureMetadata +from rialto.metadata.data_classes.group_metadata import GroupMetadata +from rialto.metadata.enums import Schedule, ValueType +from rialto.metadata.metadata_manager import MetadataManager +from rialto.metadata.utils import class_to_catalog_name diff --git a/rialto/metadata/data_classes/__init__.py b/rialto/metadata/data_classes/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/rialto/metadata/data_classes/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/rialto/metadata/data_classes/feature_metadata.py b/rialto/metadata/data_classes/feature_metadata.py new file mode 100644 index 0000000..cff0039 --- /dev/null +++ b/rialto/metadata/data_classes/feature_metadata.py @@ -0,0 +1,76 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["FeatureMetadata"] + +from dataclasses import dataclass +from typing import Tuple + +from pyspark.sql import Row +from typing_extensions import Self + +from rialto.metadata.data_classes.group_metadata import GroupMetadata +from rialto.metadata.enums import ValueType + + +@dataclass +class FeatureMetadata: + """A dataclass to hold all information about a feature""" + + value_type: ValueType + name: str + description: str + group: GroupMetadata = None + + def __repr__(self) -> str: + """Serialize object to string""" + return ( + "FeatureMetadata(" + f"name={self.name!r}, value_type={self.value_type!r}, " + f"description={self.description!r}, group={self.group!r}, " + ")" + ) + + def to_tuple(self, group_name: str) -> Tuple: + """ + Serialize to tuple + + :param group_name: Feature group name + :return: tuple with feature information + """ + return (self.name, self.value_type.value, self.description, group_name) + + def add_group(self, group: GroupMetadata) -> Self: + """ + Add group information to metadata + + :param group: Group name + :return: self + """ + self.group = group + return self + + @classmethod + def from_spark(cls, record: Row) -> Self: + """ + Create new instance from spark row + + :param record: spark row + :return: new instance + """ + return FeatureMetadata( + value_type=ValueType[record.feature_type], + name=record.feature_name, + description=record.feature_description, + ) diff --git a/rialto/metadata/data_classes/group_metadata.py b/rialto/metadata/data_classes/group_metadata.py new file mode 100644 index 0000000..4c30210 --- /dev/null +++ b/rialto/metadata/data_classes/group_metadata.py @@ -0,0 +1,84 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["GroupMetadata"] + +from dataclasses import dataclass +from typing import List, Tuple + +from pyspark.sql import Row +from typing_extensions import Self + +from rialto.metadata.enums import Schedule +from rialto.metadata.utils import class_to_catalog_name + + +@dataclass +class GroupMetadata: + """A dataclass to hold all information about a feature group""" + + name: str + frequency: Schedule + description: str + key: List[str] + fs_name: str = None + features: List[str] = None + + def __repr__(self) -> str: + """Serialize object to string""" + return ( + "GroupMetadata(" + f"name={self.name!r}, frequency={self.frequency!r}, " + f"feature store name={self.fs_name!r}," + f"description={self.description!r}, key={self.key!r}, " + f"features={self.features!r}" + ")" + ) + + def add_features(self, features: List[str]) -> Self: + """ + Add feature list belonging to the group + + :param group: list of feature names + :return: self + """ + if len(features): + self.features = features + return self + + def to_tuple(self) -> Tuple: + """ + Serialize to tuple + + :return: tuple with feature group information + """ + if not self.fs_name: + self.fs_name = class_to_catalog_name(self.name) + return (self.name, self.frequency.value, self.description, self.key, self.fs_name) + + @classmethod + def from_spark(cls, schema: Row) -> Self: + """ + Create new instance from spark row + + :param record: spark row + :return: new instance + """ + return GroupMetadata( + name=schema.group_name, + fs_name=schema.group_fs_name, + frequency=Schedule[schema.group_frequency], + description=schema.group_description, + key=schema.group_key, + ) diff --git a/rialto/metadata/enums.py b/rialto/metadata/enums.py new file mode 100644 index 0000000..be9ea5b --- /dev/null +++ b/rialto/metadata/enums.py @@ -0,0 +1,35 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["Schedule", "ValueType"] + +from enum import Enum + + +class Schedule(Enum): + """Schedule options""" + + weekly = "weekly" + daily = "daily" + monthly = "monthly" + yearly = "yearly" + unscheduled = "unscheduled" + + +class ValueType(Enum): + """Value options""" + + numerical = "numerical" + ordinal = "ordinal" + nominal = "nominal" diff --git a/rialto/metadata/metadata_manager.py b/rialto/metadata/metadata_manager.py new file mode 100644 index 0000000..3a7e5e5 --- /dev/null +++ b/rialto/metadata/metadata_manager.py @@ -0,0 +1,120 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["MetadataManager"] + +from typing import List + +from delta.tables import DeltaTable + +from rialto.metadata.data_classes.feature_metadata import FeatureMetadata +from rialto.metadata.data_classes.group_metadata import GroupMetadata + + +class MetadataManager: + """Metadata storage i/o""" + + def __init__(self, session, schema_path: str = None): + self.spark = session + + self.groups_path = f"{schema_path}.group_metadata" + self.features_path = f"{schema_path}.feature_metadata" + + self.groups = None + self.features = None + + def _load_metadata(self): + if self.groups is None: + self.groups = self.spark.read.table(self.groups_path) + if self.features is None: + self.features = self.spark.read.table(self.features_path) + + def _fetch_group_by_name(self, group_name: str) -> GroupMetadata: + group = self.groups.filter(self.groups.group_name == group_name).collect() + if not len(group): + raise LookupError(f"Group {group_name} not found!") + return GroupMetadata.from_spark(group[0]) + + def _fetch_feature_by_name(self, feature_name: str, group_name: str) -> FeatureMetadata: + feature = ( + self.features.filter(self.features.group_name == group_name) + .filter(self.features.feature_name == feature_name) + .collect() + ) + if not len(feature): + raise LookupError(f"Feature {feature_name} in group {group_name} not found!") + return FeatureMetadata.from_spark(feature[0]) + + def _fetch_features(self, group_name: str) -> List: + return self.features.filter(self.features.group_name == group_name).collect() + + def _add_group(self, group_md: GroupMetadata) -> None: + groups = DeltaTable.forName(self.spark, self.groups_path) + df = self.spark.createDataFrame([group_md.to_tuple()], self.groups.schema) + + groups.alias("groups").merge( + df.alias("updates"), "groups.group_name = updates.group_name " + ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute() + + def _add_features(self, feature_md: List[FeatureMetadata], group_name: str) -> None: + features = DeltaTable.forName(self.spark, self.features_path) + feature_data = [md.to_tuple(group_name) for md in feature_md] + df = self.spark.createDataFrame(feature_data, self.features.schema) + + features.alias("features").merge( + df.alias("updates"), + "features.feature_name = updates.feature_name and features.group_name = updates.group_name", + ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute() + + def update( + self, + group_md: GroupMetadata, + features_md: List[FeatureMetadata], + ): + """ + Save or refresh information about generated features and their groups + + :param group_md: GroupMetadata object + :param features_md: list of FeatureMetadata objects + :return: + """ + self._load_metadata() + self._add_group(group_md) + self._add_features(features_md, group_md.name) + + def get_feature(self, group_name: str, feature_name: str) -> FeatureMetadata: + """ + Get metadata of one feature + + :param group_name: string name of feature group + :param feature_name: string name of feature + :return: FeatureMetadata object + """ + self._load_metadata() + group = self.get_group(group_name) + feature = self._fetch_feature_by_name(feature_name, group_name) + return feature.add_group(group) + + def get_group(self, group_name: str) -> GroupMetadata: + """ + Get metadata of one feature group + + :param group_name: string name of feature group + :return: GroupMetadata object + """ + self._load_metadata() + group = self._fetch_group_by_name(group_name) + features = self._fetch_features(group_name) + group.add_features([f.feature_name for f in features]) + return group diff --git a/rialto/metadata/utils.py b/rialto/metadata/utils.py new file mode 100644 index 0000000..0cb591c --- /dev/null +++ b/rialto/metadata/utils.py @@ -0,0 +1,34 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["class_to_catalog_name"] + + +def class_to_catalog_name(class_name) -> str: + """ + Map python class name of feature group (CammelCase) to databricks compatible format (lowercase with underscores) + + :param class_name: Python class name + :return: feature storage name + """ + res = [] + for i in range(0, len(class_name)): + c = class_name[i] + if c.isupper(): + if i != 0: + res.append("_") + res.append(c.lower()) + else: + res.append(c) + return "".join(res) diff --git a/rialto/runner/__init__.py b/rialto/runner/__init__.py new file mode 100644 index 0000000..6ae343f --- /dev/null +++ b/rialto/runner/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rialto.runner.runner import Runner +from rialto.runner.transformation import Transformation diff --git a/rialto/runner/config_loader.py b/rialto/runner/config_loader.py new file mode 100644 index 0000000..af6640b --- /dev/null +++ b/rialto/runner/config_loader.py @@ -0,0 +1,88 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["get_pipelines_config", "transform_dependencies"] + +from typing import Dict, List, Optional, Union + +from pydantic import BaseModel + +from rialto.common.utils import load_yaml + + +class IntervalConfig(BaseModel): + units: str + value: int + + +class ScheduleConfig(BaseModel): + frequency: str + day: Optional[int] = 0 + info_date_shift: Union[Optional[IntervalConfig], List[IntervalConfig]] = IntervalConfig(units="days", value=0) + + +class DependencyConfig(BaseModel): + table: str + name: Optional[str] = None + date_col: Optional[str] = None + interval: IntervalConfig + + +class ModuleConfig(BaseModel): + python_module: str + python_class: str + + +class MailConfig(BaseModel): + sender: str + to: List[str] + smtp: str + subject: str + sent_empty: Optional[bool] = False + + +class GeneralConfig(BaseModel): + target_schema: str + target_partition_column: str + source_date_column_property: Optional[str] = None + watched_period_units: str + watched_period_value: int + job: str + mail: MailConfig + + +class PipelineConfig(BaseModel): + name: str + module: Optional[ModuleConfig] = None + schedule: ScheduleConfig + dependencies: List[DependencyConfig] = [] + + +class PipelinesConfig(BaseModel): + general: GeneralConfig + pipelines: list[PipelineConfig] + + +def get_pipelines_config(path) -> PipelinesConfig: + """Load and parse yaml config""" + return PipelinesConfig(**load_yaml(path)) + + +def transform_dependencies(dependencies: List[DependencyConfig]) -> Dict: + """Transform dependency config list into a dictionary""" + res = {} + for dep in dependencies: + if dep.name: + res[dep.name] = dep + return res diff --git a/rialto/runner/date_manager.py b/rialto/runner/date_manager.py new file mode 100644 index 0000000..1bcef7b --- /dev/null +++ b/rialto/runner/date_manager.py @@ -0,0 +1,107 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["DateManager"] + +from datetime import date, datetime +from typing import List + +from dateutil.relativedelta import relativedelta + +from rialto.runner.config_loader import ScheduleConfig + + +class DateManager: + """Date generation and shifts based on configuration""" + + @staticmethod + def str_to_date(str_date: str) -> date: + """ + Convert YYYY-MM-DD string to date + + :param str_date: string date + :return: date + """ + return datetime.strptime(str_date, "%Y-%m-%d").date() + + @staticmethod + def date_subtract(run_date: date, units: str, value: int) -> date: + """ + Generate starting date from given date and config + + :param run_date: base date + :param units: units: years, months, weeks, days + :param value: number of units to subtract + :return: Starting date + """ + if units == "years": + return run_date - relativedelta(years=value) + if units == "months": + return run_date - relativedelta(months=value) + if units == "weeks": + return run_date - relativedelta(weeks=value) + if units == "days": + return run_date - relativedelta(days=value) + raise ValueError(f"Unknown time unit {units}") + + @staticmethod + def all_dates(date_from: date, date_to: date) -> List[date]: + """ + Get list of all dates between, inclusive + + :param date_from: starting date + :param date_to: ending date + :return: List[date] + """ + if date_to < date_from: + date_to, date_from = date_from, date_to + + return [date_from + relativedelta(days=n) for n in range((date_to - date_from).days + 1)] + + @staticmethod + def run_dates(date_from: date, date_to: date, schedule: ScheduleConfig) -> List[date]: + """ + Select dates inside given interval depending on frequency and selected day + + :param date_from: interval start + :param date_to: interval end + :param schedule: schedule config + :return: list of dates + """ + options = DateManager.all_dates(date_from, date_to) + if schedule.frequency == "daily": + return options + if schedule.frequency == "weekly": + return [x for x in options if x.isoweekday() == schedule.day] + if schedule.frequency == "monthly": + return [x for x in options if x.day == schedule.day] + raise ValueError(f"Unknown frequency {schedule.frequency}") + + @staticmethod + def to_info_date(date: date, schedule: ScheduleConfig) -> date: + """ + Shift given date according to config + + :param date: input date + :param schedule: schedule config + :return: date + """ + if isinstance(schedule.info_date_shift, List): + for shift in schedule.info_date_shift: + date = DateManager.date_subtract(date, units=shift.units, value=shift.value) + else: + date = DateManager.date_subtract( + date, units=schedule.info_date_shift.units, value=schedule.info_date_shift.value + ) + return date diff --git a/rialto/runner/runner.py b/rialto/runner/runner.py new file mode 100644 index 0000000..ade89ff --- /dev/null +++ b/rialto/runner/runner.py @@ -0,0 +1,410 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["Runner"] + +import datetime +from datetime import date +from importlib import import_module +from typing import List, Tuple + +import pyspark.sql.functions as F +from loguru import logger +from pyspark.sql import DataFrame, SparkSession + +from rialto.common import TableReader +from rialto.common.utils import get_date_col_property, get_delta_partition +from rialto.jobs.configuration.config_holder import ConfigHolder +from rialto.metadata import MetadataManager +from rialto.runner.config_loader import ( + DependencyConfig, + ModuleConfig, + PipelineConfig, + ScheduleConfig, + get_pipelines_config, + transform_dependencies, +) +from rialto.runner.date_manager import DateManager +from rialto.runner.table import Table +from rialto.runner.tracker import Record, Tracker +from rialto.runner.transformation import Transformation + + +class Runner: + """A scheduler and dependency checker for feature runs""" + + def __init__( + self, + spark: SparkSession, + config_path: str, + feature_metadata_schema: str = None, + run_date: str = None, + date_from: str = None, + date_until: str = None, + feature_store_schema: str = None, + custom_job_config: dict = None, + rerun: bool = False, + op: str = None, + ): + self.spark = spark + self.config = get_pipelines_config(config_path) + self.reader = TableReader( + spark, date_property=self.config.general.source_date_column_property, infer_partition=False + ) + if feature_metadata_schema: + self.metadata = MetadataManager(spark, feature_metadata_schema) + else: + self.metadata = None + self.date_from = date_from + self.date_until = date_until + self.rerun = rerun + self.op = op + self.tracker = Tracker(self.config.general.target_schema) + + if (feature_store_schema is not None) and (feature_metadata_schema is not None): + ConfigHolder.set_feature_store_config(feature_store_schema, feature_metadata_schema) + + if custom_job_config is not None: + ConfigHolder.set_custom_config(**custom_job_config) + + if run_date: + run_date = DateManager.str_to_date(run_date) + else: + run_date = date.today() + if self.date_from: + self.date_from = DateManager.str_to_date(date_from) + if self.date_until: + self.date_until = DateManager.str_to_date(date_until) + + if not self.date_from: + self.date_from = DateManager.date_subtract( + run_date=run_date, + units=self.config.general.watched_period_units, + value=self.config.general.watched_period_value, + ) + if not self.date_until: + self.date_until = run_date + if self.date_from > self.date_until: + raise ValueError(f"Invalid date range from {self.date_from} until {self.date_until}") + logger.info(f"Running period from {self.date_from} until {self.date_until}") + + def _load_module(self, cfg: ModuleConfig) -> Transformation: + """ + Load feature group + + :param cfg: Feature configuration + :return: Transformation object + """ + module = import_module(cfg.python_module) + class_obj = getattr(module, cfg.python_class) + return class_obj() + + def _generate( + self, instance: Transformation, run_date: date, dependencies: List[DependencyConfig] = None + ) -> DataFrame: + """ + Run feature group + + :param instance: Instance of Transformation + :param run_date: date to run for + :return: Dataframe + """ + if dependencies is not None: + dependencies = transform_dependencies(dependencies) + df = instance.run( + reader=self.reader, + run_date=run_date, + spark=self.spark, + metadata_manager=self.metadata, + dependencies=dependencies, + ) + logger.info(f"Generated {df.count()} records") + + return df + + def _table_exists(self, table: str) -> bool: + """ + Check table exists in spark catalog + + :param table: full table path + :return: bool + """ + return self.spark.catalog.tableExists(table) + + def _write(self, df: DataFrame, info_date: date, table: Table) -> None: + """ + Write dataframe to storage + + :param df: dataframe to write + :param info_date: date to partition + :param table: path to write to + :return: None + """ + df = df.withColumn(table.partition, F.lit(info_date)) + df.write.partitionBy(table.partition).mode("overwrite").saveAsTable(table.get_table_path()) + logger.info(f"Results writen to {table.get_table_path()}") + + try: + get_date_col_property(self.spark, table.get_table_path(), "rialto_date_column") + except RuntimeError: + sql_query = ( + f"ALTER TABLE {table.get_table_path()} SET TBLPROPERTIES ('rialto_date_column' = '{table.partition}')" + ) + self.spark.sql(sql_query) + logger.info(f"Set table property rialto_date_column to {table.partition}") + + def _delta_partition(self, table: str) -> str: + """ + Select first partition column, should be only one + + :param table: full table name + :return: partition column name + """ + columns = self.spark.catalog.listColumns(table) + partition_columns = list(filter(lambda c: c.isPartition, columns)) + if len(partition_columns): + return partition_columns[0].name + else: + raise RuntimeError(f"Delta table has no partitions: {table}.") + + def _get_partitions(self, table: Table) -> List[date]: + """ + Get partition values + + :param table: Table object + :return: List of partition values + """ + rows = ( + self.reader.get_table(table.get_table_path(), date_column=table.partition) + .select(table.partition) + .distinct() + .collect() + ) + return [r[table.partition] for r in rows] + + def check_dates_have_partition(self, table: Table, dates: List[date]) -> List[bool]: + """ + For given list of dates, check if there is a matching partition for each + + :param table: Table object + :param dates: list of dates to check + :return: list of bool + """ + if self._table_exists(table.get_table_path()): + partitions = self._get_partitions(table) + return [(date in partitions) for date in dates] + else: + logger.info(f"Table {table.get_table_path()} doesn't exist!") + return [False for _ in dates] + + def check_dependencies(self, pipeline: PipelineConfig, run_date: date) -> bool: + """ + Check for all dependencies in config if they have available partitions + + :param pipeline: configuration + :param run_date: run date + :return: bool + """ + logger.info(f"{pipeline.name} checking dependencies for {run_date}") + for dependency in pipeline.dependencies: + dep_from = DateManager.date_subtract(run_date, dependency.interval.units, dependency.interval.value) + logger.info(f"Looking for {dependency.table} from {dep_from} until {run_date}") + + possible_dep_dates = DateManager.all_dates(dep_from, run_date) + + # date column options prioritization (manual column, table property, inferred from delta) + if dependency.date_col: + date_col = dependency.date_col + elif self.config.general.source_date_column_property: + date_col = get_date_col_property( + self.spark, dependency.table, self.config.general.source_date_column_property + ) + else: + date_col = get_delta_partition(self.spark, dependency.table) + logger.debug(f"Date column for {dependency.table} is {date_col}") + + source = Table(table_path=dependency.table, partition=date_col) + if True in self.check_dates_have_partition(source, possible_dep_dates): + logger.info(f"Dependency for {dependency.table} from {dep_from} until {run_date} is fulfilled") + continue + else: + msg = f"Missing dependency for {dependency.table} from {dep_from} until {run_date}" + logger.info(msg) + self.tracker.last_error = msg + return False + return True + + def get_possible_run_dates(self, schedule: ScheduleConfig) -> List[date]: + """ + List possible run dates according to parameters and config + + :param schedule: schedule config + :return: List of dates + """ + return DateManager.run_dates(self.date_from, self.date_until, schedule) + + def get_info_dates(self, schedule: ScheduleConfig, run_dates: List[date]) -> List[date]: + """ + Transform given dates into info dates according to the config + + :param schedule: schedule config + :param run_dates: date list + :return: list of modified dates + """ + return [DateManager.to_info_date(x, schedule) for x in run_dates] + + def _get_completion(self, target: Table, info_dates: List[date]) -> List[bool]: + """ + Check if model has run for given dates + + :param target_path: Table object + :param info_dates: list of dates + :return: bool list + """ + if self.rerun: + return [False for _ in info_dates] + else: + return self.check_dates_have_partition(target, info_dates) + + def _select_run_dates(self, pipeline: PipelineConfig, table: Table) -> Tuple[List, List]: + """ + Select run dates and info dates based on completion + + :param pipeline: pipeline config + :param table: table path + :return: list of run dates and list of info dates + """ + possible_run_dates = self.get_possible_run_dates(pipeline.schedule) + possible_info_dates = self.get_info_dates(pipeline.schedule, possible_run_dates) + current_state = self._get_completion(table, possible_info_dates) + + selection = [ + (run, info) for run, info, state in zip(possible_run_dates, possible_info_dates, current_state) if not state + ] + + if not len(selection): + logger.info(f"{pipeline.name} has no dates to run") + return [], [] + + selected_run_dates, selected_info_dates = zip(*selection) + logger.info(f"{pipeline.name} identified to run for {selected_run_dates}") + + return list(selected_run_dates), list(selected_info_dates) + + def _run_one_date(self, pipeline: PipelineConfig, run_date: date, info_date: date, target: Table) -> int: + """ + Run one pipeline for one date + + :param pipeline: pipeline cfg + :param run_date: run date + :param info_date: information date + :param target: target Table + :return: success bool + """ + if self.check_dependencies(pipeline, run_date): + logger.info(f"Running {pipeline.name} for {run_date}") + + if self.config.general.job == "run": + feature_group = self._load_module(pipeline.module) + df = self._generate(feature_group, run_date, pipeline.dependencies) + records = df.count() + if records > 0: + self._write(df, info_date, target) + return records + else: + raise RuntimeError("No records generated") + return 0 + + def _run_pipeline(self, pipeline: PipelineConfig): + """ + Run single pipeline for all required dates + + :param pipeline: pipeline cfg + :return: success bool + """ + target = Table( + schema_path=self.config.general.target_schema, + class_name=pipeline.module.python_class, + partition=self.config.general.target_partition_column, + ) + logger.info(f"Loaded pipeline {pipeline.name}") + + selected_run_dates, selected_info_dates = self._select_run_dates(pipeline, target) + + # ----------- Checking dependencies available ---------- + for run_date, info_date in zip(selected_run_dates, selected_info_dates): + run_start = datetime.datetime.now() + try: + records = self._run_one_date(pipeline, run_date, info_date, target) + if records > 0: + status = "Success" + message = "" + else: + status = "Failure" + message = self.tracker.last_error + self.tracker.add( + Record( + job=pipeline.name, + target=target.get_table_path(), + date=info_date, + time=datetime.datetime.now() - run_start, + records=records, + status=status, + reason=message, + ) + ) + except Exception as error: + print(f"An exception occurred in pipeline {pipeline.name}") + print(error) + self.tracker.add( + Record( + job=pipeline.name, + target=target.get_table_path(), + date=info_date, + time=datetime.datetime.now() - run_start, + records=0, + status="Error", + reason="Exception", + exception=str(error), + ) + ) + except KeyboardInterrupt: + print(f"Pipeline {pipeline.name} interrupted") + self.tracker.add( + Record( + job=pipeline.name, + target=target.get_table_path(), + date=info_date, + time=datetime.datetime.now() - run_start, + records=0, + status="Error", + reason="Interrupted by user", + ) + ) + raise KeyboardInterrupt + + def __call__(self): + """Execute pipelines""" + try: + if self.op: + selected = [p for p in self.config.pipelines if p.name == self.op] + if len(selected) < 1: + raise ValueError(f"Unknown operation selected: {self.op}") + self._run_pipeline(selected[0]) + else: + for pipeline in self.config.pipelines: + self._run_pipeline(pipeline) + finally: + print(self.tracker.records) + self.tracker.report(self.config.general.mail) diff --git a/rialto/runner/table.py b/rialto/runner/table.py new file mode 100644 index 0000000..416bedb --- /dev/null +++ b/rialto/runner/table.py @@ -0,0 +1,55 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["Table"] + +from rialto.metadata import class_to_catalog_name + + +class Table: + """Handler for databricks catalog paths""" + + def __init__( + self, + catalog: str = None, + schema: str = None, + table: str = None, + schema_path: str = None, + table_path: str = None, + class_name: str = None, + partition: str = None, + ): + self.catalog = catalog + self.schema = schema + self.table = table + self.partition = partition + if schema_path: + schema_path = schema_path.split(".") + self.catalog = schema_path[0] + self.schema = schema_path[1] + if table_path: + table_path = table_path.split(".") + self.catalog = table_path[0] + self.schema = table_path[1] + self.table = table_path[2] + if class_name: + self.table = class_to_catalog_name(class_name) + + def get_schema_path(self): + """Get path of table's schema""" + return f"{self.catalog}.{self.schema}" + + def get_table_path(self): + """Get full table path""" + return f"{self.catalog}.{self.schema}.{self.table}" diff --git a/rialto/runner/tracker.py b/rialto/runner/tracker.py new file mode 100644 index 0000000..de97fb0 --- /dev/null +++ b/rialto/runner/tracker.py @@ -0,0 +1,261 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["Record", "Tracker"] + +import smtplib +from dataclasses import dataclass +from datetime import datetime, timedelta +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from typing import List, Optional + +from rialto.runner.config_loader import MailConfig + + +@dataclass +class Record: + """Dataclass with information about one run of one pipeline.""" + + job: str + target: str + date: datetime.date + time: timedelta + records: int + status: str + reason: str + exception: Optional[str] = None + + +class Tracker: + """Collect information about runs and sent them out via email""" + + def __init__(self, target_schema: str): + self.target_schema = target_schema + self.records = [] + self.last_error = None + self.pipeline_start = datetime.now() + self.exceptions = [] + + def add(self, record: Record) -> None: + """Add record for one run""" + self.records.append(record) + + def report(self, mail_cfg: MailConfig): + """Create and send html report""" + if len(self.records) or mail_cfg.sent_empty: + report = HTMLMessage.make_report(self.target_schema, self.pipeline_start, self.records) + for receiver in mail_cfg.to: + message = Mailer.create_message( + subject=mail_cfg.subject, sender=mail_cfg.sender, receiver=receiver, body=report + ) + Mailer.send_mail(mail_cfg.smtp, message) + + +class HTMLMessage: + bck_colors = ["#00ded6", "#acfcfa"] + borderless_table = 'role="presentation" style="border:0;border-spacing:0;"' + bordered_table = ( + 'role="presentation" style="background-repeat:no-repeat; margin:0;" cellpadding="1" cellspacing="1" border="1""' + ) + + @staticmethod + def _get_status_color(status: str): + if status == "Success": + return "#398f00" + elif status == "Error": + return "#ff0000" + else: + return "#ff8800" + + @staticmethod + def _make_rows(rows): + html = "" + data_options = 'align="center"' + for row, i in zip(rows, range(len(rows))): + r = f""" + + {row.job} + {row.target.split('.')[0]}.
+ {row.target.split('.')[1]}.
+ {row.target.split('.')[2]} + + {row.date} + {str(row.time).split(".")[0]} + {f'{row.records:,}'} + + {row.status} + + {row.reason} + + """ + html += r + return html + + @staticmethod + def _make_overview_header(): + return """ + + Job + Target + Date + Time elapsed + Rows created + Status + Reason + + """ + + @staticmethod + def _make_header(target: str, start: datetime): + return f""" +
+ + + + + + + +

This is is Rialto Feature Runner report

+ Jobs started {str(start).split('.')[0]}, targeting {target} +
+
+ """ + + @staticmethod + def _make_overview(records: List[Record]): + return f""" + + + + +

Overview

+ + {HTMLMessage._make_overview_header()} + {HTMLMessage._make_rows(records)} +
+ """ + + @staticmethod + def _head(): + return """ + + + + + + + + + + + """ + + @staticmethod + def _body_open(): + return """ + +
+
+ """ + + @staticmethod + def _body_close(): + return """ +
+
+ + """ + + @staticmethod + def _make_exceptions(records: List[Record]): + html = "" + for record, i in zip(records, range(len(records))): + if record.exception is not None: + r = f""" + + + + + +
{record.job}{record.date}
+ + Expand +
+ + + + +
{record.exception}
+
+ """ + html += r + return html + + @staticmethod + def _make_insights(records: List[Record]): + return f""" + + + + +

Exceptions

+ {HTMLMessage._make_exceptions(records)} + """ + + @staticmethod + def make_report(target: str, start: datetime, records: List[Record]) -> str: + """Create html email report""" + html = [ + """ + """, + HTMLMessage._head(), + HTMLMessage._body_open(), + HTMLMessage._make_header(target, start), + HTMLMessage._make_overview(records), + HTMLMessage._make_insights(records), + HTMLMessage._body_close(), + ] + return "\n".join(html) + + +class Mailer: + @staticmethod + def create_message(subject: str, sender: str, receiver: str, body: str) -> MIMEMultipart: + msg = MIMEMultipart() + msg["Subject"] = subject + msg["From"] = sender + msg["To"] = receiver + body = MIMEText(body, "html") + msg.attach(body) + return msg + + @staticmethod + def send_mail(smtp: str, message: MIMEMultipart): + s = smtplib.SMTP(host=smtp, port=25) + s.sendmail(from_addr=message["From"], to_addrs=message["To"], msg=message.as_string()) + s.quit() diff --git a/rialto/runner/transformation.py b/rialto/runner/transformation.py new file mode 100644 index 0000000..210cb0b --- /dev/null +++ b/rialto/runner/transformation.py @@ -0,0 +1,48 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["Transformation"] + +import abc +import datetime +from typing import Dict + +from pyspark.sql import DataFrame, SparkSession + +from rialto.common import TableReader +from rialto.metadata import MetadataManager + + +class Transformation(metaclass=abc.ABCMeta): + """Interface for feature implementation""" + + @abc.abstractmethod + def run( + self, + reader: TableReader, + run_date: datetime.date, + spark: SparkSession = None, + metadata_manager: MetadataManager = None, + dependencies: Dict = None, + ) -> DataFrame: + """ + Run the transformation + + :param reader: data store api object + :param run_date: date + :param spark: spark session + :param metadata_manager: metadata api object + :return: dataframe + """ + raise NotImplementedError diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/common/conftest.py b/tests/common/conftest.py new file mode 100644 index 0000000..79455ff --- /dev/null +++ b/tests/common/conftest.py @@ -0,0 +1,36 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from pyspark.sql import SparkSession + + +@pytest.fixture(scope="session") +def spark(request): + """fixture for creating a spark session + Args: + request: pytest.FixtureRequest object + """ + + spark = ( + SparkSession.builder.master("local[2]") + .appName("pytest-pyspark-local-testing") + .config("spark.ui.enabled", "false") + .config("spark.driver.bindAddress", "127.0.0.1") + .config("spark.driver.host", "127.0.0.1") + .getOrCreate() + ) + + request.addfinalizer(lambda: spark.stop()) + + return spark diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py new file mode 100644 index 0000000..cd3ebd9 --- /dev/null +++ b/tests/common/test_utils.py @@ -0,0 +1,44 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pyspark.sql.functions as F +import pytest +from numpy import dtype + +from rialto.common.utils import cast_decimals_to_floats + + +@pytest.fixture +def sample_df(spark): + df = spark.createDataFrame( + [(1, 2.33, "str", 4.55, 5.66), (1, 2.33, "str", 4.55, 5.66), (1, 2.33, "str", 4.55, 5.66)], + schema="a long, b float, c string, d float, e float", + ) + + return df.select("a", "b", "c", F.col("d").cast("decimal"), F.col("e").cast("decimal(18,5)")) + + +def test_cast_decimals_to_floats(sample_df): + df_fixed = cast_decimals_to_floats(sample_df) + + assert df_fixed.dtypes[3] == ("d", "float") + assert df_fixed.dtypes[4] == ("e", "float") + + +def test_cast_decimals_to_floats_topandas_works(sample_df): + df_fixed = cast_decimals_to_floats(sample_df) + df_pd = df_fixed.toPandas() + + assert df_pd.dtypes[3] == dtype("float32") + assert df_pd.dtypes[4] == dtype("float32") diff --git a/tests/jobs/conftest.py b/tests/jobs/conftest.py new file mode 100644 index 0000000..dda863d --- /dev/null +++ b/tests/jobs/conftest.py @@ -0,0 +1,37 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest +from pyspark.sql import SparkSession + + +@pytest.fixture(scope="session") +def spark(request): + """fixture for creating a spark session + :param request: pytest.FixtureRequest object + """ + + spark = ( + SparkSession.builder.master("local[3]") + .appName("pytest-pyspark-local-testing") + .config("spark.ui.enabled", "false") + .config("spark.driver.bindAddress", "127.0.0.1") + .config("spark.driver.host", "127.0.0.1") + .getOrCreate() + ) + + request.addfinalizer(lambda: spark.stop()) + + return spark diff --git a/tests/jobs/resources.py b/tests/jobs/resources.py new file mode 100644 index 0000000..4d33fad --- /dev/null +++ b/tests/jobs/resources.py @@ -0,0 +1,43 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import typing + +import pandas as pd + +from rialto.jobs.decorators.job_base import JobBase + + +def custom_callable(): + pass + + +class CustomJobNoReturnVal(JobBase): + def get_job_name(self) -> str: + return "job_name" + + def get_job_version(self) -> str: + return "job_version" + + def get_custom_callable(self) -> typing.Callable: + return custom_callable + + +class CustomJobReturnsDataFrame(CustomJobNoReturnVal): + def get_custom_callable(self) -> typing.Callable: + def f(spark): + df = pd.DataFrame([["A", 1], ["B", 2]], columns=["FIRST", "SECOND"]) + + return spark.createDataFrame(df) + + return f diff --git a/tests/jobs/test_config_holder.py b/tests/jobs/test_config_holder.py new file mode 100644 index 0000000..38fadb1 --- /dev/null +++ b/tests/jobs/test_config_holder.py @@ -0,0 +1,100 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from datetime import date + +import pytest + +from rialto.jobs.configuration.config_holder import ( + ConfigException, + ConfigHolder, + FeatureStoreConfig, +) + + +def test_run_date_unset(): + with pytest.raises(ConfigException): + ConfigHolder.get_run_date() + + +def test_run_date(): + dt = date(2023, 1, 1) + + ConfigHolder.set_run_date(dt) + + assert ConfigHolder.get_run_date() == dt + + +def test_feature_store_config_unset(): + with pytest.raises(ConfigException): + ConfigHolder.get_feature_store_config() + + +def test_feature_store_config(): + ConfigHolder.set_feature_store_config("store_schema", "metadata_schema") + + fsc = ConfigHolder.get_feature_store_config() + + assert type(fsc) is FeatureStoreConfig + assert fsc.feature_store_schema == "store_schema" + assert fsc.feature_metadata_schema == "metadata_schema" + + +def test_config_unset(): + config = ConfigHolder.get_config() + + assert type(config) is type({}) + assert len(config.items()) == 0 + + +def test_config_dict_copied_not_ref(): + """Test that config holder config can't be set from outside""" + config = ConfigHolder.get_config() + + config["test"] = 123 + + assert "test" not in ConfigHolder.get_config() + + +def test_config(): + ConfigHolder.set_custom_config(hello=123) + ConfigHolder.set_custom_config(world="test") + + config = ConfigHolder.get_config() + + assert config["hello"] == 123 + assert config["world"] == "test" + + +def test_config_from_dict(): + ConfigHolder.set_custom_config(**{"dict_item_1": 123, "dict_item_2": 456}) + + config = ConfigHolder.get_config() + + assert config["dict_item_1"] == 123 + assert config["dict_item_2"] == 456 + + +def test_dependencies_unset(): + deps = ConfigHolder.get_dependency_config() + assert len(deps.keys()) == 0 + + +def test_dependencies(): + ConfigHolder.set_dependency_config({"hello": 123}) + + deps = ConfigHolder.get_dependency_config() + + assert deps["hello"] == 123 diff --git a/tests/jobs/test_decorators.py b/tests/jobs/test_decorators.py new file mode 100644 index 0000000..e896cec --- /dev/null +++ b/tests/jobs/test_decorators.py @@ -0,0 +1,65 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from importlib import import_module + +from rialto.jobs.configuration.config_holder import ConfigHolder +from rialto.jobs.decorators.job_base import JobBase +from rialto.jobs.decorators.resolver import Resolver + + +def test_dataset_decorator(): + _ = import_module("tests.jobs.test_job.test_job") + test_dataset = Resolver.resolve("dataset") + + assert test_dataset == "dataset_return" + + +def _rialto_import_stub(module_name, class_name): + module = import_module(module_name) + class_obj = getattr(module, class_name) + return class_obj() + + +def test_job_function_type(): + result_class = _rialto_import_stub("tests.jobs.test_job.test_job", "job_function") + assert issubclass(type(result_class), JobBase) + + +def test_job_function_callables_filled(): + result_class = _rialto_import_stub("tests.jobs.test_job.test_job", "job_function") + + custom_callable = result_class.get_custom_callable() + assert custom_callable() == "job_function_return" + + version = result_class.get_job_version() + assert version == "N/A" + + job_name = result_class.get_job_name() + assert job_name == "job_function" + + +def test_custom_name_function(): + result_class = _rialto_import_stub("tests.jobs.test_job.test_job", "custom_job_name") + assert issubclass(type(result_class), JobBase) + + custom_callable = result_class.get_custom_callable() + assert custom_callable() == "custom_job_name_return" + + +def test_job_dependencies_registered(spark): + ConfigHolder.set_custom_config(value=123) + job_class = _rialto_import_stub("tests.jobs.test_job.test_job", "job_asking_for_all_deps") + # asserts part of the run + job_class.run(spark=spark, run_date=456, reader=789, metadata_manager=None, dependencies=1011) diff --git a/tests/jobs/test_job/test_job.py b/tests/jobs/test_job/test_job.py new file mode 100644 index 0000000..12baec9 --- /dev/null +++ b/tests/jobs/test_job/test_job.py @@ -0,0 +1,40 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from rialto.jobs.decorators import datasource, job + + +@datasource +def dataset(): + return "dataset_return" + + +@job +def job_function(): + return "job_function_return" + + +@job("custom_job_name") +def custom_name_job_function(): + return "custom_job_name_return" + + +@job +def job_asking_for_all_deps(spark, run_date, config, dependencies, table_reader): + assert spark is not None + assert run_date == 456 + assert config["value"] == 123 + assert table_reader == 789 + assert dependencies == 1011 diff --git a/tests/jobs/test_job_base.py b/tests/jobs/test_job_base.py new file mode 100644 index 0000000..2cdc741 --- /dev/null +++ b/tests/jobs/test_job_base.py @@ -0,0 +1,93 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +from unittest.mock import MagicMock, patch + +import pyspark.sql.types + +import tests.jobs.resources as resources +from rialto.jobs.configuration.config_holder import ConfigHolder, FeatureStoreConfig +from rialto.jobs.decorators.resolver import Resolver +from rialto.loader import PysparkFeatureLoader + + +def test_setup_except_feature_loader(spark): + table_reader = MagicMock() + date = datetime.date(2023, 1, 1) + + ConfigHolder.set_custom_config(hello=1, world=2) + + resources.CustomJobNoReturnVal().run( + reader=table_reader, run_date=date, spark=spark, metadata_manager=None, dependencies={1: 1} + ) + + assert Resolver.resolve("run_date") == date + assert Resolver.resolve("config") == ConfigHolder.get_config() + assert Resolver.resolve("dependencies") == ConfigHolder.get_dependency_config() + assert Resolver.resolve("spark") == spark + assert Resolver.resolve("table_reader") == table_reader + + +@patch( + "rialto.jobs.configuration.config_holder.ConfigHolder.get_feature_store_config", + return_value=FeatureStoreConfig(feature_store_schema="schema", feature_metadata_schema="metadata_schema"), +) +def test_setup_feature_loader(spark): + table_reader = MagicMock() + date = datetime.date(2023, 1, 1) + + resources.CustomJobNoReturnVal().run(reader=table_reader, run_date=date, spark=spark, metadata_manager=None) + + assert type(Resolver.resolve("feature_loader")) == PysparkFeatureLoader + + +def test_custom_callable_called(spark, mocker): + spy_cc = mocker.spy(resources, "custom_callable") + + table_reader = MagicMock() + date = datetime.date(2023, 1, 1) + + resources.CustomJobNoReturnVal().run(reader=table_reader, run_date=date, spark=spark, metadata_manager=None) + + spy_cc.assert_called_once() + + +def test_no_return_vaue_adds_version_timestamp_dataframe(spark): + table_reader = MagicMock() + date = datetime.date(2023, 1, 1) + + result = resources.CustomJobNoReturnVal().run( + reader=table_reader, run_date=date, spark=spark, metadata_manager=None + ) + + assert type(result) is pyspark.sql.DataFrame + assert result.columns == ["JOB_NAME", "CREATION_TIME", "VERSION"] + assert result.first()["VERSION"] == "job_version" + assert result.count() == 1 + + +def test_return_dataframe_forwarded_with_version(spark): + table_reader = MagicMock() + date = datetime.date(2023, 1, 1) + + result = resources.CustomJobReturnsDataFrame().run( + reader=table_reader, run_date=date, spark=spark, metadata_manager=None + ) + + assert type(result) is pyspark.sql.DataFrame + assert result.columns == ["FIRST", "SECOND", "VERSION"] + assert result.first()["VERSION"] == "job_version" + assert result.count() == 2 diff --git a/tests/jobs/test_resolver.py b/tests/jobs/test_resolver.py new file mode 100644 index 0000000..df56b72 --- /dev/null +++ b/tests/jobs/test_resolver.py @@ -0,0 +1,65 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from rialto.jobs.decorators.resolver import Resolver, ResolverException + + +def test_simple_resolve_custom_name(): + def f(): + return 7 + + Resolver.register_callable(f, "hello") + + assert Resolver.resolve("hello") == 7 + + +def test_simple_resolve_infer_f_name(): + def f(): + return 7 + + Resolver.register_callable(f) + + assert Resolver.resolve("f") == 7 + + +def test_dependency_resolve(): + def f(): + return 7 + + def g(f): + return f + 1 + + Resolver.register_callable(f) + Resolver.register_callable(g) + + assert Resolver.resolve("g") == 8 + + +def test_resolve_non_defined(): + with pytest.raises(ResolverException): + Resolver.resolve("whatever") + + +def test_register_resolve(mocker): + def f(): + return 7 + + mocker.patch("rialto.jobs.decorators.resolver.Resolver.register_callable", return_value="f") + mocker.patch("rialto.jobs.decorators.resolver.Resolver.resolve") + + Resolver.register_resolve(f) + + Resolver.register_callable.assert_called_once_with(f) + Resolver.resolve.assert_called_once_with("f") diff --git a/tests/jobs/test_test_utils.py b/tests/jobs/test_test_utils.py new file mode 100644 index 0000000..a6b31b2 --- /dev/null +++ b/tests/jobs/test_test_utils.py @@ -0,0 +1,48 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import rialto.jobs.decorators as decorators +import tests.jobs.test_job.test_job as test_job +from rialto.jobs.decorators.resolver import Resolver +from rialto.jobs.decorators.test_utils import disable_job_decorators + + +def test_raw_dataset_patch(mocker): + spy_rc = mocker.spy(Resolver, "register_callable") + spy_dec = mocker.spy(decorators, "datasource") + + with disable_job_decorators(test_job): + assert test_job.dataset() == "dataset_return" + + spy_dec.assert_not_called() + spy_rc.assert_not_called() + + +def test_job_function_patch(mocker): + spy_dec = mocker.spy(decorators, "job") + + with disable_job_decorators(test_job): + assert test_job.job_function() == "job_function_return" + + spy_dec.assert_not_called() + + +def test_custom_name_job_function_patch(mocker): + spy_dec = mocker.spy(decorators, "job") + + with disable_job_decorators(test_job): + assert test_job.custom_name_job_function() == "custom_job_name_return" + + spy_dec.assert_not_called() diff --git a/tests/loader/__init__.py b/tests/loader/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/tests/loader/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/loader/metadata_config/full_example.yaml b/tests/loader/metadata_config/full_example.yaml new file mode 100644 index 0000000..9ad780c --- /dev/null +++ b/tests/loader/metadata_config/full_example.yaml @@ -0,0 +1,33 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +selection: + - group: A + prefix: A + features: + - A1 + - A2 + - group: B + prefix: B + features: + - B1 + - B2 +base: + group: D + keys: + - K + - L +maps: + - M + - N diff --git a/tests/loader/metadata_config/missing_field_example.yaml b/tests/loader/metadata_config/missing_field_example.yaml new file mode 100644 index 0000000..1caf3b0 --- /dev/null +++ b/tests/loader/metadata_config/missing_field_example.yaml @@ -0,0 +1,27 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +selection: + - group: A + prefix: A + features: + - A1 + - A2 + - group: B + features: + - B1 + - B2 +base: + group: D + keys: K diff --git a/tests/loader/metadata_config/missing_value_example.yaml b/tests/loader/metadata_config/missing_value_example.yaml new file mode 100644 index 0000000..844e25f --- /dev/null +++ b/tests/loader/metadata_config/missing_value_example.yaml @@ -0,0 +1,28 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +selection: + - group: A + prefix: A + features: + - A1 + - A2 + - group: B + prefix: B + features: + - B1 + - B2 +base: + group: D + keys: diff --git a/tests/loader/metadata_config/no_map_example.yaml b/tests/loader/metadata_config/no_map_example.yaml new file mode 100644 index 0000000..d3679fa --- /dev/null +++ b/tests/loader/metadata_config/no_map_example.yaml @@ -0,0 +1,30 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +selection: + - group: A + prefix: A + features: + - A1 + - A2 + - group: B + prefix: B + features: + - B1 + - B2 +base: + group: D + keys: + - K + - L diff --git a/tests/loader/metadata_config/test_main_config.py b/tests/loader/metadata_config/test_main_config.py new file mode 100644 index 0000000..b09f155 --- /dev/null +++ b/tests/loader/metadata_config/test_main_config.py @@ -0,0 +1,56 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from pydantic import ValidationError + +from rialto.loader.config_loader import get_feature_config + + +def test_get_config_full_cfg(): + cfg = get_feature_config("tests/loader/metadata_config/full_example.yaml") + assert len(cfg.selection) == 2 + assert cfg.selection[0].group == "A" + assert cfg.selection[0].prefix == "A" + assert cfg.selection[0].features == ["A1", "A2"] + assert cfg.selection[1].group == "B" + assert cfg.selection[1].prefix == "B" + assert cfg.selection[1].features == ["B1", "B2"] + assert cfg.base.group == "D" + assert cfg.base.keys == ["K", "L"] + assert cfg.maps == ["M", "N"] + + +def test_get_config_no_map_cfg(): + cfg = get_feature_config("tests/loader/metadata_config/no_map_example.yaml") + assert len(cfg.selection) == 2 + assert cfg.selection[0].group == "A" + assert cfg.selection[0].prefix == "A" + assert cfg.selection[0].features == ["A1", "A2"] + assert cfg.selection[1].group == "B" + assert cfg.selection[1].prefix == "B" + assert cfg.selection[1].features == ["B1", "B2"] + assert cfg.base.group == "D" + assert cfg.base.keys == ["K", "L"] + assert cfg.maps is None + + +def test_get_config_no_base_key(): + with pytest.raises(ValidationError): + get_feature_config("tests/loader/metadata_config/missing_value_example.yaml") + + +def test_get_config_no_prefix_field(): + with pytest.raises(ValidationError): + get_feature_config("tests/loader/metadata_config/missing_field_example.yaml") diff --git a/tests/loader/pyspark/dataframe_builder.py b/tests/loader/pyspark/dataframe_builder.py new file mode 100644 index 0000000..94a755e --- /dev/null +++ b/tests/loader/pyspark/dataframe_builder.py @@ -0,0 +1,27 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from pyspark.sql.types import DataType, StructField, StructType + + +def dataframe_builder( + spark, data: typing.List, columns: typing.List[typing.Union[typing.Tuple[str, typing.Type[DataType]]]] +): + schema_builder = [] + for name, data_type in columns: + schema_builder.append(StructField(name, data_type, True)) + schema = StructType(schema_builder) + return spark.createDataFrame(data, schema) diff --git a/tests/loader/pyspark/dummy_loaders.py b/tests/loader/pyspark/dummy_loaders.py new file mode 100644 index 0000000..a2b0cb8 --- /dev/null +++ b/tests/loader/pyspark/dummy_loaders.py @@ -0,0 +1,24 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import date + +from rialto.loader.data_loader import DataLoader + + +class DummyDataLoader(DataLoader): + def __init__(self): + super().__init__() + + def read_group(self, group: str, information_date: date): + return None diff --git a/tests/loader/pyspark/example_cfg.yaml b/tests/loader/pyspark/example_cfg.yaml new file mode 100644 index 0000000..6b19277 --- /dev/null +++ b/tests/loader/pyspark/example_cfg.yaml @@ -0,0 +1,24 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +selection: + - group: B + prefix: B + features: + - F1 + - F3 +base: + group: D + keys: + - KEY1 diff --git a/tests/loader/pyspark/resources.py b/tests/loader/pyspark/resources.py new file mode 100644 index 0000000..64a8363 --- /dev/null +++ b/tests/loader/pyspark/resources.py @@ -0,0 +1,44 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyspark.sql.types import FloatType, IntegerType, StringType + +feature_group_a_data = [("K1", 1, "A"), ("K2", 2, "A"), ("K3", 3, None)] +feature_group_a_columns = [("KEY", StringType()), ("A1", IntegerType()), ("A2", StringType())] + +base_frame_data = [("K1",), ("K2",), ("K3",)] +base_frame_columns = [("KEY1", StringType())] + +mapping1_data = [("M1", "K1", "N11"), ("M2", "K3", "N23")] +mapping1_columns = [("KEY2", StringType()), ("KEY1", StringType()), ("KEY3", StringType())] + +mapping2_data = [("K1", "M1"), ("K2", "M1"), ("K3", "M2"), ("K4", "M2")] +mapping2_columns = [("KEY1", StringType()), ("KEY2", StringType())] + +mapping3_data = [("N11", "H5"), ("N23", "H6")] +mapping3_columns = [("KEY3", StringType()), ("KEY4", StringType())] + +expected_mapping_data = [("K1", "M1", "N11", "H5"), ("K3", "M2", "N23", "H6")] +expected_mapping_columns = [ + ("KEY1", StringType()), + ("KEY2", StringType()), + ("KEY3", StringType()), + ("KEY4", StringType()), +] + +feature_group_b_data = [("K1", "A", 5, None), ("K3", "B", 7, 0.36)] +feature_group_b_columns = [("KEY1", StringType()), ("F1", StringType()), ("F2", IntegerType()), ("F3", FloatType())] + +expected_features_b_data = [("K1", "A", None), ("K2", None, None), ("K3", "B", 0.36)] +expected_features_b_columns = [("KEY1", StringType()), ("B_F1", StringType()), ("B_F3", FloatType())] diff --git a/tests/loader/pyspark/test_from_cfg.py b/tests/loader/pyspark/test_from_cfg.py new file mode 100644 index 0000000..3ad653e --- /dev/null +++ b/tests/loader/pyspark/test_from_cfg.py @@ -0,0 +1,137 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import MagicMock + +import pytest +from chispa import assert_df_equality +from pyspark.sql import SparkSession + +import tests.loader.pyspark.resources as r +from rialto.loader.config_loader import get_feature_config +from rialto.loader.pyspark_feature_loader import PysparkFeatureLoader +from tests.loader.pyspark.dataframe_builder import dataframe_builder as dfb +from tests.loader.pyspark.dummy_loaders import DummyDataLoader + + +@pytest.fixture(scope="session") +def spark(request): + """fixture for creating a spark session + :param request: pytest.FixtureRequest object + """ + spark = ( + SparkSession.builder.master("local[2]") + .appName("pytest-pyspark-local-testing") + .config("spark.ui.enabled", "false") + .config("spark.driver.bindAddress", "127.0.0.1") + .config("spark.driver.host", "127.0.0.1") + .getOrCreate() + ) + + request.addfinalizer(lambda: spark.stop()) + + return spark + + +@pytest.fixture(scope="session") +def loader(spark): + return PysparkFeatureLoader(spark, DummyDataLoader(), MagicMock()) + + +VALID_LIST = [(["a"], ["a"]), (["a"], ["a", "b", "c"]), (["c", "a"], ["a", "b", "c"])] + + +@pytest.mark.parametrize("valid_terms", VALID_LIST) +def test_all_keys_in_true(loader, valid_terms): + assert loader._are_all_keys_in(valid_terms[0], valid_terms[1]) is True + + +INVALID_LIST = [(["d"], ["a"]), (["a", "d"], ["a", "b", "c"]), (["c", "a", "b"], ["a", "c"])] + + +@pytest.mark.parametrize("invalid_terms", INVALID_LIST) +def test_all_keys_in_false(loader, invalid_terms): + assert loader._are_all_keys_in(invalid_terms[0], invalid_terms[1]) is False + + +def test_add_prefix(loader): + df = dfb(loader.spark, data=r.feature_group_a_data, columns=r.feature_group_a_columns) + assert loader._add_prefix(df, "A", ["KEY"]).columns == ["KEY", "A_A1", "A_A2"] + + +def test_join_keymaps(loader, spark): + key_maps = [ + PysparkFeatureLoader.KeyMap(dfb(spark, data=r.mapping1_data, columns=r.mapping1_columns), ["KEY1", "KEY2"]), + PysparkFeatureLoader.KeyMap(dfb(spark, data=r.mapping2_data, columns=r.mapping2_columns), ["KEY1"]), + PysparkFeatureLoader.KeyMap(dfb(spark, data=r.mapping3_data, columns=r.mapping3_columns), ["KEY3"]), + ] + mapped = loader._join_keymaps(dfb(spark, data=r.base_frame_data, columns=r.base_frame_columns), key_maps) + expected = dfb(spark, data=r.expected_mapping_data, columns=r.expected_mapping_columns) + assert_df_equality(mapped, expected, ignore_column_order=True, ignore_row_order=True) + + +def test_add_group(spark, monkeypatch): + class GroupMd: + def __init__(self): + self.key = ["KEY1"] + + def __call__(self, *args, **kwargs): + return self + + metadata = MagicMock() + monkeypatch.setattr(metadata, "get_group", GroupMd()) + loader = PysparkFeatureLoader(spark, DummyDataLoader(), "") + loader.metadata = metadata + + base = dfb(spark, data=r.base_frame_data, columns=r.base_frame_columns) + df = dfb(spark, data=r.feature_group_b_data, columns=r.feature_group_b_columns) + group_cfg = get_feature_config("tests/loader/pyspark/example_cfg.yaml").selection[0] + + features = loader._add_feature_group(base, df, group_cfg) + expected = dfb(spark, data=r.expected_features_b_data, columns=r.expected_features_b_columns) + assert_df_equality(features, expected, ignore_column_order=True, ignore_row_order=True) + + +def test_get_group_metadata(spark, mocker): + mocker.patch("rialto.loader.pyspark_feature_loader.MetadataManager.get_group", return_value=7) + + loader = PysparkFeatureLoader(spark, DummyDataLoader(), "") + ret_val = loader.get_group_metadata("group_name") + + assert ret_val == 7 + loader.metadata.get_group.assert_called_once_with("group_name") + + +def test_get_feature_metadata(spark, mocker): + mocker.patch("rialto.loader.pyspark_feature_loader.MetadataManager.get_feature", return_value=8) + + loader = PysparkFeatureLoader(spark, DummyDataLoader(), "") + ret_val = loader.get_feature_metadata("group_name", "feature") + + assert ret_val == 8 + loader.metadata.get_feature.assert_called_once_with("group_name", "feature") + + +def test_get_metadata_from_cfg(spark, mocker): + mocker.patch( + "rialto.loader.pyspark_feature_loader.MetadataManager.get_feature", + side_effect=lambda g, f: {"B": {"F1": 1, "F3": 2}}[g][f], + ) + mocker.patch("rialto.loader.pyspark_feature_loader.MetadataManager.get_group", side_effect=lambda g: {"B": 10}[g]) + + loader = PysparkFeatureLoader(spark, DummyDataLoader(), "") + metadata = loader.get_metadata_from_cfg("tests/loader/pyspark/example_cfg.yaml") + + assert metadata["B_F1"] == 1 + assert metadata["B_F3"] == 2 + assert len(metadata.keys()) == 2 diff --git a/tests/maker/__init__.py b/tests/maker/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/tests/maker/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/maker/conftest.py b/tests/maker/conftest.py new file mode 100644 index 0000000..79455ff --- /dev/null +++ b/tests/maker/conftest.py @@ -0,0 +1,36 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from pyspark.sql import SparkSession + + +@pytest.fixture(scope="session") +def spark(request): + """fixture for creating a spark session + Args: + request: pytest.FixtureRequest object + """ + + spark = ( + SparkSession.builder.master("local[2]") + .appName("pytest-pyspark-local-testing") + .config("spark.ui.enabled", "false") + .config("spark.driver.bindAddress", "127.0.0.1") + .config("spark.driver.host", "127.0.0.1") + .getOrCreate() + ) + + request.addfinalizer(lambda: spark.stop()) + + return spark diff --git a/tests/maker/test_FeatureFunction.py b/tests/maker/test_FeatureFunction.py new file mode 100644 index 0000000..43590ae --- /dev/null +++ b/tests/maker/test_FeatureFunction.py @@ -0,0 +1,74 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import Mock + +import pytest + +from rialto.maker.containers import FeatureFunction +from rialto.metadata import ValueType + + +def test_name_generation_no_parameters(): + func = FeatureFunction("feature", Mock()) + assert func.get_feature_name() == "FEATURE" + + +def test_name_generation_with_parameter(): + func = FeatureFunction("feature", Mock()) + func.parameters["param"] = 6 + assert func.get_feature_name() == "FEATURE_PARAM_6" + + +def test_name_generation_multiple_params(): + func = FeatureFunction("feature", Mock()) + func.parameters["paramC"] = 1 + func.parameters["paramA"] = 4 + func.parameters["paramB"] = 6 + assert func.get_feature_name() == "FEATURE_PARAMA_4_PARAMB_6_PARAMC_1" + + +def test_feature_type_default_is_nominal(): + func = FeatureFunction("feature", Mock()) + assert func.type == ValueType.nominal + + +@pytest.mark.parametrize( + "feature_type", + [(ValueType.nominal, "nominal"), (ValueType.ordinal, "ordinal"), (ValueType.numerical, "numerical")], +) +def test_feature_type_getter(feature_type: tuple): + func = FeatureFunction("feature", Mock(), feature_type[0]) + assert func.get_type() == feature_type[1] + + +def test_serialization(): + func = FeatureFunction("feature", Mock()) + func.parameters["paramC"] = 1 + func.parameters["paramA"] = 4 + assert ( + func.__str__() + == "Name: feature\n\tParameters: {'paramC': 1, 'paramA': 4}\n\tType: nominal\n\tDescription: basic feature" + ) + + +def test_metadata(): + func = FeatureFunction("feature", Mock(), ValueType.ordinal) + func.parameters["paramC"] = 1 + func.parameters["paramA"] = 4 + func.dependencies = ["featureB", "featureC"] + func.description = "nice feature" + + assert func.metadata().name == "FEATURE_PARAMA_4_PARAMC_1" + assert func.metadata().value_type == ValueType.ordinal + assert func.metadata().description == "nice feature" diff --git a/tests/maker/test_FeatureHolder.py b/tests/maker/test_FeatureHolder.py new file mode 100644 index 0000000..5c00cdb --- /dev/null +++ b/tests/maker/test_FeatureHolder.py @@ -0,0 +1,36 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import Mock + +from rialto.maker.containers import FeatureFunction, FeatureHolder +from rialto.metadata import ValueType + + +def test_metadata_return_type_empty(): + assert isinstance(FeatureHolder().get_metadata(), list) + + +def test_metadata_return_type(): + fh = FeatureHolder() + fh.append(FeatureFunction("feature_nominal", Mock(), ValueType.nominal)) + assert isinstance(fh.get_metadata(), list) + + +def test_metadata_value(): + fh = FeatureHolder() + ff = FeatureFunction("feature_ordinal", Mock(), ValueType.ordinal) + ff.parameters["param"] = 3 + fh.append(ff) + metadata = fh.get_metadata() + assert metadata[0].value_type == ValueType.ordinal diff --git a/tests/maker/test_FeatureMaker.py b/tests/maker/test_FeatureMaker.py new file mode 100644 index 0000000..b8f9c1a --- /dev/null +++ b/tests/maker/test_FeatureMaker.py @@ -0,0 +1,187 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import date + +import pandas as pd +import pytest + +from rialto.maker.feature_maker import FeatureMaker +from rialto.metadata import ValueType +from tests.maker.test_features import ( + aggregated_num_sum_outbound, + aggregated_num_sum_txn, + dependent_features_fail, + dependent_features_fail2, + dependent_features_ok, + sequential_avg_outbound, + sequential_avg_txn, + sequential_for_testing, + sequential_outbound, + sequential_outbound_with_param, +) + + +@pytest.fixture +def input_df(spark): + df = pd.DataFrame( + [ + [42, "A", "C_1"], + [-35, "A", "C_1"], + [-12, "B", "C_1"], + [-65, "B", "C_1"], + [12, "A", "C_2"], + [16, "A", "C_2"], + [-10, "A", "C_2"], + ], + columns=["AMT", "TYPE", "CUSTOMER_KEY"], + ) + return spark.createDataFrame(df) + + +def test_sequential_column_exists(input_df): + df, _ = FeatureMaker.make(input_df, "CUSTOMER_KEY", date.today(), sequential_outbound, keep_preexisting=True) + assert "TRANSACTIONS_OUTBOUND_VALUE" in df.columns + + +def test_sequential_multi_key(input_df): + df, _ = FeatureMaker.make( + input_df, ["CUSTOMER_KEY", "TYPE"], date.today(), sequential_outbound, keep_preexisting=True + ) + assert "TRANSACTIONS_OUTBOUND_VALUE" in df.columns + + +def test_sequential_keeps(input_df): + df, _ = FeatureMaker.make(input_df, "CUSTOMER_KEY", date.today(), sequential_outbound, keep_preexisting=True) + assert "AMT" in df.columns + + +def test_sequential_drops(input_df): + df, _ = FeatureMaker.make(input_df, "CUSTOMER_KEY", date.today(), sequential_outbound, keep_preexisting=False) + assert "AMT" not in df.columns + + +def test_sequential_key_not_dropped(input_df): + df, _ = FeatureMaker.make(input_df, "CUSTOMER_KEY", date.today(), sequential_outbound, keep_preexisting=False) + assert "CUSTOMER_KEY" in df.columns + + +def test_sequential_with_params_column_exists(input_df): + df, _ = FeatureMaker.make( + input_df, "CUSTOMER_KEY", date.today(), sequential_outbound_with_param, keep_preexisting=False + ) + assert "TRANSACTIONS_OUTBOUND_VALUE_V_TYPE_A" in df.columns + + +def test_aggregated_column_exists(input_df): + df, _ = FeatureMaker.make_aggregated(input_df, "CUSTOMER_KEY", date.today(), aggregated_num_sum_txn) + assert "TRANSACTIONS_NUM_TRANSACTIONS" in df.columns + + +def test_aggregated_key_exists(input_df): + df, _ = FeatureMaker.make_aggregated(input_df, "CUSTOMER_KEY", date.today(), aggregated_num_sum_txn) + assert "CUSTOMER_KEY" in df.columns + + +def test_aggregated_multi_key_exists(input_df): + df, _ = FeatureMaker.make_aggregated(input_df, ["CUSTOMER_KEY", "TYPE"], date.today(), aggregated_num_sum_txn) + assert "CUSTOMER_KEY" in df.columns and "TYPE" in df.columns + + +def test_maker_metadata(input_df): + df, metadata = FeatureMaker.make_aggregated(input_df, "CUSTOMER_KEY", date.today(), aggregated_num_sum_txn) + assert metadata[0].value_type == ValueType.numerical + + +def test_double_chained_makers_column_exists(input_df): + df, _ = FeatureMaker.make_aggregated(input_df, "CUSTOMER_KEY", date.today(), aggregated_num_sum_txn) + df, _ = FeatureMaker.make(df, "CUSTOMER_KEY", date.today(), sequential_avg_txn) + assert "TRANSACTIONS_AVG_TRANSACTION" in df.columns + + +def test_tripple_chained_makers_column_exists(input_df): + # create outbound column + df, _ = FeatureMaker.make(input_df, "CUSTOMER_KEY", date.today(), sequential_outbound) + # agg outbound sum and num + df, _ = FeatureMaker.make_aggregated(df, "CUSTOMER_KEY", date.today(), aggregated_num_sum_outbound) + # create outbound avg + df, _ = FeatureMaker.make(df, "CUSTOMER_KEY", date.today(), sequential_avg_outbound) + assert "TRANSACTIONS_AVG_OUTBOUND" in df.columns + + +def test_tripple_chained_makers_key_exists(input_df): + # create outbound column + df, _ = FeatureMaker.make(input_df, "CUSTOMER_KEY", date.today(), sequential_outbound) + # agg outbound sum and num + df, _ = FeatureMaker.make_aggregated(df, "CUSTOMER_KEY", date.today(), aggregated_num_sum_outbound) + # create outbound avg + df, _ = FeatureMaker.make(df, "CUSTOMER_KEY", date.today(), sequential_avg_outbound) + assert "CUSTOMER_KEY" in df.columns + + +def test_dependency_resolution(input_df): + ordered = FeatureMaker._order_by_dependencies(FeatureMaker._load_features(dependent_features_ok)) + ordered = [f[0].name for f in ordered] + assert ordered.index("f4_raw") == 0 + assert ordered.index("f3_depends_f2") < ordered.index("f1_depends_f3_f5") + assert ordered.index("f5_depends_f4") < ordered.index("f1_depends_f3_f5") + assert ordered.index("f4_raw") < ordered.index("f2_depends_f4") + assert ordered.index("f2_depends_f4") < ordered.index("f3_depends_f2") + assert ordered.index("f4_raw") < ordered.index("f5_depends_f4") + + +def test_dependency_resolution_cycle(input_df): + with pytest.raises(Exception, match="Feature dependencies can't be resolved!"): + FeatureMaker._order_by_dependencies(FeatureMaker._load_features(dependent_features_fail)) + + +def test_dependency_resolution_self_reference(input_df): + with pytest.raises(Exception, match="Feature dependencies can't be resolved!"): + FeatureMaker._order_by_dependencies(FeatureMaker._load_features(dependent_features_fail2)) + + +def test_find_single_feature(): + features = FeatureMaker._register_module(sequential_for_testing) + feature = FeatureMaker._find_feature("FOR_TESTING_PARAM_B", features) + assert feature.get_feature_name() == "FOR_TESTING_PARAM_B" + + +def test_make_single_feature_column_exists(input_df): + out = FeatureMaker.make_single_feature(input_df, "FOR_TESTING_PARAM_B", sequential_for_testing) + assert "FOR_TESTING_PARAM_B" in out.columns + + +def test_make_single_feature_column_single(input_df): + out = FeatureMaker.make_single_feature(input_df, "FOR_TESTING_PARAM_B", sequential_for_testing) + assert len(out.columns) == 1 + + +def test_make_single_agg_feature_column_exists(input_df): + out = FeatureMaker.make_single_agg_feature( + input_df, "TRANSACTIONS_SUM_TRANSACTIONS", "CUSTOMER_KEY", aggregated_num_sum_txn + ) + assert "TRANSACTIONS_SUM_TRANSACTIONS" in out.columns + + +def test_make_single_agg_feature_column_single(input_df): + out = FeatureMaker.make_single_agg_feature( + input_df, "TRANSACTIONS_SUM_TRANSACTIONS", "CUSTOMER_KEY", aggregated_num_sum_txn + ) + assert len(out.columns) == 2 + + +def test_make_single_agg_feature_multikey(input_df): + out = FeatureMaker.make_single_agg_feature( + input_df, "TRANSACTIONS_SUM_TRANSACTIONS", ["CUSTOMER_KEY", "TYPE"], aggregated_num_sum_txn + ) + assert len(out.columns) == 3 diff --git a/tests/maker/test_features/__init__.py b/tests/maker/test_features/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/tests/maker/test_features/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/maker/test_features/aggregated_num_sum_outbound.py b/tests/maker/test_features/aggregated_num_sum_outbound.py new file mode 100644 index 0000000..ce3937b --- /dev/null +++ b/tests/maker/test_features/aggregated_num_sum_outbound.py @@ -0,0 +1,27 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.numerical) +def transactions_num_outbound() -> Column: + return F.count(F.col("transactions_outbound_value")) + + +@maker.feature(maker.ValueType.numerical) +def transactions_sum_outbound() -> Column: + return F.sum(F.col("transactions_outbound_value")) diff --git a/tests/maker/test_features/aggregated_num_sum_txn.py b/tests/maker/test_features/aggregated_num_sum_txn.py new file mode 100644 index 0000000..6c807af --- /dev/null +++ b/tests/maker/test_features/aggregated_num_sum_txn.py @@ -0,0 +1,27 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.numerical) +def transactions_num_transactions() -> Column: + return F.count(F.col("AMT")) + + +@maker.feature(maker.ValueType.numerical) +def transactions_sum_transactions() -> Column: + return F.sum(F.col("AMT")) diff --git a/tests/maker/test_features/dependent_features_fail.py b/tests/maker/test_features/dependent_features_fail.py new file mode 100644 index 0000000..d9a8c7f --- /dev/null +++ b/tests/maker/test_features/dependent_features_fail.py @@ -0,0 +1,29 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.nominal) +@maker.depends("f2_depends_f1") +def f1_depends_f2() -> Column: + return F.col("CUSTOMER_KEY") + + +@maker.feature(maker.ValueType.nominal) +@maker.depends("f1_depends_f2") +def f2_depends_f1() -> Column: + return F.col("CUSTOMER_KEY") diff --git a/tests/maker/test_features/dependent_features_fail2.py b/tests/maker/test_features/dependent_features_fail2.py new file mode 100644 index 0000000..4964c8a --- /dev/null +++ b/tests/maker/test_features/dependent_features_fail2.py @@ -0,0 +1,23 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.nominal) +@maker.depends("f5") +def f1_dependsf5() -> Column: + return F.col("CUSTOMER_KEY") diff --git a/tests/maker/test_features/dependent_features_ok.py b/tests/maker/test_features/dependent_features_ok.py new file mode 100644 index 0000000..232f08b --- /dev/null +++ b/tests/maker/test_features/dependent_features_ok.py @@ -0,0 +1,47 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.nominal) +@maker.depends("f3_depends_f2") +@maker.depends("f5_depends_f4") +def f1_depends_f3_f5() -> Column: + return F.col("CUSTOMER_KEY") + + +@maker.feature(maker.ValueType.nominal) +@maker.depends("f4_raw") +def f2_depends_f4() -> Column: + return F.col("CUSTOMER_KEY") + + +@maker.feature(maker.ValueType.nominal) +@maker.depends("f2_depends_f4") +def f3_depends_f2() -> Column: + return F.col("CUSTOMER_KEY") + + +@maker.feature(maker.ValueType.nominal) +def f4_raw() -> Column: + return F.col("CUSTOMER_KEY") + + +@maker.feature(maker.ValueType.nominal) +@maker.depends("f4_raw") +def f5_depends_f4() -> Column: + return F.col("CUSTOMER_KEY") diff --git a/tests/maker/test_features/sequential_avg_outbound.py b/tests/maker/test_features/sequential_avg_outbound.py new file mode 100644 index 0000000..cedad5f --- /dev/null +++ b/tests/maker/test_features/sequential_avg_outbound.py @@ -0,0 +1,22 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.numerical) +def transactions_avg_outbound() -> Column: + return F.col("transactions_sum_outbound") / F.col("transactions_num_outbound") diff --git a/tests/maker/test_features/sequential_avg_txn.py b/tests/maker/test_features/sequential_avg_txn.py new file mode 100644 index 0000000..65d1f7f --- /dev/null +++ b/tests/maker/test_features/sequential_avg_txn.py @@ -0,0 +1,22 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.numerical) +def transactions_avg_transaction() -> Column: + return F.col("transactions_sum_transactions") / F.col("transactions_num_transactions") diff --git a/tests/maker/test_features/sequential_for_testing.py b/tests/maker/test_features/sequential_for_testing.py new file mode 100644 index 0000000..5a8de84 --- /dev/null +++ b/tests/maker/test_features/sequential_for_testing.py @@ -0,0 +1,25 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.numerical) +@maker.param("param", ["A", "B"]) +def for_testing(param) -> Column: + filtered = F.col("TYPE") == param + outbound = F.when(F.col("AMT") < 0, F.col("AMT")).otherwise(None) + return F.when(filtered, outbound) diff --git a/tests/maker/test_features/sequential_outbound.py b/tests/maker/test_features/sequential_outbound.py new file mode 100644 index 0000000..6b0764e --- /dev/null +++ b/tests/maker/test_features/sequential_outbound.py @@ -0,0 +1,22 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.numerical) +def transactions_outbound_value() -> Column: + return F.when(F.col("AMT") < 0, F.col("AMT")).otherwise(None) diff --git a/tests/maker/test_features/sequential_outbound_with_param.py b/tests/maker/test_features/sequential_outbound_with_param.py new file mode 100644 index 0000000..eb50d80 --- /dev/null +++ b/tests/maker/test_features/sequential_outbound_with_param.py @@ -0,0 +1,25 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pyspark.sql.functions as F +from pyspark.sql import Column + +from rialto import maker + + +@maker.feature(maker.ValueType.numerical) +@maker.param("v_type", ["A", "B"]) +def transactions_outbound_value(v_type) -> Column: + filtered = F.col("TYPE") == v_type + outbound = F.when(F.col("AMT") < 0, F.col("AMT")).otherwise(None) + return F.when(filtered, outbound) diff --git a/tests/maker/test_wrappers.py b/tests/maker/test_wrappers.py new file mode 100644 index 0000000..135b4ad --- /dev/null +++ b/tests/maker/test_wrappers.py @@ -0,0 +1,116 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from rialto.maker.containers import FeatureHolder +from rialto.maker.wrappers import depends, desc, feature, param +from rialto.metadata import ValueType + + +def dummy_feature_function(): + return None + + +def dummy_feature_with_args(parameter_1, parameter_2, parameter_3): + return parameter_1 + parameter_2 + parameter_3 + + +def test_feature_from_holder(): + val = feature(ValueType.numerical)(FeatureHolder()) + assert isinstance(val, FeatureHolder) + + +def test_feature_from_function_return_type(): + val = feature(ValueType.numerical)(dummy_feature_function) + assert isinstance(val, FeatureHolder) + + +def test_feature_from_function_function_name(): + val = feature(ValueType.numerical)(dummy_feature_function) + assert val[0].get_feature_name() == "DUMMY_FEATURE_FUNCTION" + + +def test_feature_from_function_function_object(): + val = feature(ValueType.numerical)(dummy_feature_function) + assert val[0].callable == dummy_feature_function + + +def test_parametrize_from_function_return_type(): + val = param("parameter", [1, 2, 3])(dummy_feature_with_args) + assert isinstance(val, FeatureHolder) + + +def test_parametrize_from_function_size(): + val = param("parameter", [1, 2, 3])(dummy_feature_with_args) + assert len(val) == 3 + + +def test_parametrize_chained_size(): + val = param("parameter_1", [1, 2, 3])(dummy_feature_with_args) + val = param("parameter_2", [4, 5, 6])(val) + assert len(val) == 9 + + +def test_parametrize_chained_values(): + val = param("parameter_1", [1, 2, 3])(dummy_feature_with_args) + val = param("parameter_2", [4, 5, 6])(val) + val = param("parameter_3", [7, 8, 9])(val) + # expecting ordered combinations (1,4,7)(1,4,8)(1,4,9)(1,5,7)(1,5,8)..... + assert ( + val[13].parameters["parameter_1"] == 2 + and val[13].parameters["parameter_2"] == 5 + and val[13].parameters["parameter_3"] == 8 + ) + + +def test_parametrize_chained_callable(): + val = param("parameter_1", [1, 2, 3])(dummy_feature_with_args) + val = param("parameter_2", [4, 5, 6])(val) + val = param("parameter_3", [7, 8, 9])(val) + assert val[13].callable() == 15 + + +def test_feature_keeps_size(): + val = feature(ValueType.ordinal)(dummy_feature_function) + assert len(val) == 1 + + +def test_depends(): + val = depends("previous")(dummy_feature_function) + assert val[0].dependencies[0] == "previous" + + +def test_depends_keeps_size(): + val = depends("previous")(dummy_feature_function) + assert len(val) == 1 + + +def test_description(): + val = desc("Feature A")(dummy_feature_function) + assert val[0].description == "Feature A" + + +def test_description_keeps_size(): + val = desc("Feature A")(dummy_feature_function) + assert len(val) == 1 + + +def test_chaining(): + f = desc("Feature A")(dummy_feature_function) + f = param("Param", ["B"])(f) + f = depends("previous")(f) + f = feature(ValueType.ordinal)(f) + assert f[0].description == "Feature A" + assert f[0].dependencies[0] == "previous" + assert f[0].get_type() == "ordinal" + assert f[0].get_feature_name() == "DUMMY_FEATURE_FUNCTION_PARAM_B" + assert len(f) == 1 diff --git a/tests/metadata/__init__.py b/tests/metadata/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/tests/metadata/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/metadata/conftest.py b/tests/metadata/conftest.py new file mode 100644 index 0000000..b0cd24e --- /dev/null +++ b/tests/metadata/conftest.py @@ -0,0 +1,56 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from pyspark.sql import SparkSession + +from rialto.metadata.metadata_manager import MetadataManager +from tests.metadata.resources import ( + feature_base, + feature_schema, + group_base, + group_schema, +) + + +@pytest.fixture(scope="session") +def spark(request): + """fixture for creating a spark session + :param request: pytest.FixtureRequest object + """ + + spark = ( + SparkSession.builder.master("local[3]") + .appName("pytest-pyspark-local-testing") + .config("spark.ui.enabled", "false") + .config("spark.driver.bindAddress", "127.0.0.1") + .config("spark.driver.host", "127.0.0.1") + .getOrCreate() + ) + + request.addfinalizer(lambda: spark.stop()) + + return spark + + +@pytest.fixture(scope="function") +def mdc(spark): + """ + Metadata manager fixture with mocked metadata + :param spark: spark + :return: pytest fixture + """ + mdc = MetadataManager(spark) + mdc.groups = spark.createDataFrame(group_base, group_schema) + mdc.features = spark.createDataFrame(feature_base, feature_schema) + return mdc diff --git a/tests/metadata/resources.py b/tests/metadata/resources.py new file mode 100644 index 0000000..46f0ab1 --- /dev/null +++ b/tests/metadata/resources.py @@ -0,0 +1,64 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from pyspark.sql.types import ArrayType, StringType, StructField, StructType + +from rialto.metadata import FeatureMetadata, GroupMetadata, Schedule, ValueType + +group_schema = StructType( + [ + StructField("group_name", StringType(), False), + StructField("group_frequency", StringType(), False), + StructField("group_description", StringType(), False), + StructField("group_key", ArrayType(StringType(), True), False), + StructField("group_fs_name", StringType(), False), + ] +) + +feature_schema = StructType( + [ + StructField("feature_name", StringType(), True), + StructField("feature_type", StringType(), True), + StructField("feature_description", StringType(), True), + StructField("group_name", StringType(), True), + ] +) + +group_base = [ + ("Group1", "weekly", "group1", ["key1"], "group_1"), + ("Group2", "monthly", "group2", ["key2", "key3"], "group_2"), +] + +feature_base = [ + ("Feature1", "nominal", "feature1", "Group2"), + ("Feature2", "nominal", "feature2", "Group2"), +] + +group_md1 = GroupMetadata( + name="Group1", + fs_name="group_1", + frequency=Schedule.weekly, + description="group1", + key=["key1"], +) + +group_md2 = GroupMetadata( + name="Group2", + fs_name="group_2", + frequency=Schedule.monthly, + description="group2", + key=["key2", "key3"], + features=["Feature1", "Feature2"], +) + +feature_md1 = FeatureMetadata(name="Feature1", value_type=ValueType.nominal, description="feature1", group=group_md2) diff --git a/tests/metadata/test_metadata_connector.py b/tests/metadata/test_metadata_connector.py new file mode 100644 index 0000000..6594e6c --- /dev/null +++ b/tests/metadata/test_metadata_connector.py @@ -0,0 +1,43 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from tests.metadata.resources import feature_md1, group_md1, group_md2 + + +def test_get_group_no_features(mdc): + assert str(mdc.get_group("Group1")) == str(group_md1) + + +def test_get_group_w_features(mdc): + assert str(mdc.get_group("Group2")) == str(group_md2) + + +def test_get_group_none(mdc): + with pytest.raises(Exception): + mdc.get_group("Group42") + + +def test_get_feature(mdc): + assert str(mdc.get_feature("Group2", "Feature1")) == str(feature_md1) + + +def test_get_feature_none_group(mdc): + with pytest.raises(Exception): + mdc.get_feature("Group42", "Feature1") + + +def test_get_feature_none_feature(mdc): + with pytest.raises(Exception): + mdc.get_feature("Group2", "Feature8") diff --git a/tests/runner/__init__.py b/tests/runner/__init__.py new file mode 100644 index 0000000..79c3773 --- /dev/null +++ b/tests/runner/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/runner/conftest.py b/tests/runner/conftest.py new file mode 100644 index 0000000..44f0c09 --- /dev/null +++ b/tests/runner/conftest.py @@ -0,0 +1,44 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from pyspark.sql import SparkSession + +from rialto.runner import Runner + + +@pytest.fixture(scope="session") +def spark(request): + """fixture for creating a spark session + :param request: pytest.FixtureRequest object + """ + + spark = ( + SparkSession.builder.master("local[3]") + .appName("pytest-pyspark-local-testing") + .config("spark.ui.enabled", "false") + .config("spark.driver.bindAddress", "127.0.0.1") + .config("spark.driver.host", "127.0.0.1") + .getOrCreate() + ) + + request.addfinalizer(lambda: spark.stop()) + + return spark + + +@pytest.fixture(scope="function") +def basic_runner(spark): + return Runner( + spark, config_path="tests/runner/transformations/config.yaml", feature_metadata_schema="", run_date="2023-03-31" + ) diff --git a/tests/runner/runner_resources.py b/tests/runner/runner_resources.py new file mode 100644 index 0000000..bd39947 --- /dev/null +++ b/tests/runner/runner_resources.py @@ -0,0 +1,38 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from pyspark.sql.types import DateType, StringType, StructField, StructType + +from rialto.runner.date_manager import DateManager + +simple_group_data = [ + ("A", DateManager.str_to_date("2023-03-05")), + ("B", DateManager.str_to_date("2023-03-12")), + ("C", DateManager.str_to_date("2023-03-19")), +] + +general_schema = StructType([StructField("KEY", StringType(), True), StructField("DATE", DateType(), True)]) + + +dep1_data = [ + ("E", DateManager.str_to_date("2023-03-05")), + ("F", DateManager.str_to_date("2023-03-10")), + ("G", DateManager.str_to_date("2023-03-15")), + ("H", DateManager.str_to_date("2023-03-25")), +] + +dep2_data = [ + ("J", DateManager.str_to_date("2022-11-01")), + ("K", DateManager.str_to_date("2022-12-01")), + ("L", DateManager.str_to_date("2023-01-01")), +] diff --git a/tests/runner/test_date_manager.py b/tests/runner/test_date_manager.py new file mode 100644 index 0000000..9088e0c --- /dev/null +++ b/tests/runner/test_date_manager.py @@ -0,0 +1,171 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import datetime + +import pytest + +from rialto.runner.config_loader import IntervalConfig, ScheduleConfig +from rialto.runner.date_manager import DateManager + + +def test_str_to_date(): + assert DateManager.str_to_date("2023-03-05") == datetime.strptime("2023-03-05", "%Y-%m-%d").date() + + +@pytest.mark.parametrize( + "units , value, res", + [("days", 7, "2023-02-26"), ("weeks", 3, "2023-02-12"), ("months", 5, "2022-10-05"), ("years", 2, "2021-03-5")], +) +def test_date_from(units, value, res): + rundate = DateManager.str_to_date("2023-03-05") + date_from = DateManager.date_subtract(run_date=rundate, units=units, value=value) + assert date_from == DateManager.str_to_date(res) + + +def test_date_from_bad(): + rundate = DateManager.str_to_date("2023-03-05") + with pytest.raises(ValueError) as exception: + DateManager.date_subtract(run_date=rundate, units="random", value=1) + assert str(exception.value) == "Unknown time unit random" + + +def test_all_dates(): + all_dates = DateManager.all_dates( + date_from=DateManager.str_to_date("2023-02-05"), + date_to=DateManager.str_to_date("2023-04-12"), + ) + assert len(all_dates) == 67 + assert all_dates[1] == DateManager.str_to_date("2023-02-06") + + +def test_all_dates_reversed(): + all_dates = DateManager.all_dates( + date_from=DateManager.str_to_date("2023-04-12"), + date_to=DateManager.str_to_date("2023-02-05"), + ) + assert len(all_dates) == 67 + assert all_dates[1] == DateManager.str_to_date("2023-02-06") + + +def test_run_dates_weekly(): + cfg = ScheduleConfig(frequency="weekly", day=5) + + run_dates = DateManager.run_dates( + date_from=DateManager.str_to_date("2023-02-05"), + date_to=DateManager.str_to_date("2023-04-07"), + schedule=cfg, + ) + + expected = [ + "2023-02-10", + "2023-02-17", + "2023-02-24", + "2023-03-03", + "2023-03-10", + "2023-03-17", + "2023-03-24", + "2023-03-31", + "2023-04-07", + ] + expected = [DateManager.str_to_date(d) for d in expected] + assert run_dates == expected + + +def test_run_dates_monthly(): + cfg = ScheduleConfig(frequency="monthly", day=5) + + run_dates = DateManager.run_dates( + date_from=DateManager.str_to_date("2022-08-05"), + date_to=DateManager.str_to_date("2023-04-07"), + schedule=cfg, + ) + + expected = [ + "2022-08-05", + "2022-09-05", + "2022-10-05", + "2022-11-05", + "2022-12-05", + "2023-01-05", + "2023-02-05", + "2023-03-05", + "2023-04-05", + ] + expected = [DateManager.str_to_date(d) for d in expected] + assert run_dates == expected + + +def test_run_dates_daily(): + cfg = ScheduleConfig(frequency="daily") + + run_dates = DateManager.run_dates( + date_from=DateManager.str_to_date("2023-03-28"), + date_to=DateManager.str_to_date("2023-04-03"), + schedule=cfg, + ) + + expected = [ + "2023-03-28", + "2023-03-29", + "2023-03-30", + "2023-03-31", + "2023-04-01", + "2023-04-02", + "2023-04-03", + ] + expected = [DateManager.str_to_date(d) for d in expected] + assert run_dates == expected + + +def test_run_dates_invalid(): + cfg = ScheduleConfig(frequency="random") + with pytest.raises(ValueError) as exception: + DateManager.run_dates( + date_from=DateManager.str_to_date("2023-03-28"), + date_to=DateManager.str_to_date("2023-04-03"), + schedule=cfg, + ) + assert str(exception.value) == "Unknown frequency random" + + +@pytest.mark.parametrize( + "shift, res", + [(7, "2023-02-26"), (3, "2023-03-02"), (-5, "2023-03-10"), (0, "2023-03-05")], +) +def test_to_info_date(shift, res): + cfg = ScheduleConfig(frequency="daily", info_date_shift=IntervalConfig(units="days", value=shift)) + base = DateManager.str_to_date("2023-03-05") + info = DateManager.to_info_date(base, cfg) + assert DateManager.str_to_date(res) == info + + +@pytest.mark.parametrize( + "unit, result", + [("days", "2023-03-02"), ("weeks", "2023-02-12"), ("months", "2022-12-05"), ("years", "2020-03-05")], +) +def test_info_date_shift_units(unit, result): + cfg = ScheduleConfig(frequency="daily", info_date_shift=IntervalConfig(units=unit, value=3)) + base = DateManager.str_to_date("2023-03-05") + info = DateManager.to_info_date(base, cfg) + assert DateManager.str_to_date(result) == info + + +def test_info_date_shift_combined(): + cfg = ScheduleConfig( + frequency="daily", + info_date_shift=[IntervalConfig(units="months", value=3), IntervalConfig(units="days", value=4)], + ) + base = DateManager.str_to_date("2023-03-05") + info = DateManager.to_info_date(base, cfg) + assert DateManager.str_to_date("2022-12-01") == info diff --git a/tests/runner/test_runner.py b/tests/runner/test_runner.py new file mode 100644 index 0000000..0459411 --- /dev/null +++ b/tests/runner/test_runner.py @@ -0,0 +1,360 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from datetime import datetime +from typing import Optional + +import pytest +from pyspark.sql import DataFrame + +from rialto.common.table_reader import DataReader +from rialto.jobs.configuration.config_holder import ConfigHolder +from rialto.runner.runner import DateManager, Runner +from rialto.runner.table import Table +from tests.runner.runner_resources import ( + dep1_data, + dep2_data, + general_schema, + simple_group_data, +) +from tests.runner.transformations.simple_group import SimpleGroup + + +class MockReader(DataReader): + def __init__(self, spark): + self.spark = spark + + def get_table( + self, + table: str, + info_date_from: Optional[datetime.date] = None, + info_date_to: Optional[datetime.date] = None, + date_column: str = None, + uppercase_columns: bool = False, + ) -> DataFrame: + if table == "catalog.schema.simple_group": + return self.spark.createDataFrame(simple_group_data, general_schema) + if table == "source.schema.dep1": + return self.spark.createDataFrame(dep1_data, general_schema) + if table == "source.schema.dep2": + return self.spark.createDataFrame(dep2_data, general_schema) + + def get_latest( + self, + table: str, + until: Optional[datetime.date] = None, + date_column: str = None, + uppercase_columns: bool = False, + ) -> DataFrame: + pass + + +def test_table_exists(spark, mocker, basic_runner): + mock = mocker.patch("pyspark.sql.Catalog.tableExists", return_value=True) + basic_runner._table_exists("abc") + mock.assert_called_once_with("abc") + + +def test_infer_column(spark, mocker, basic_runner): + column = namedtuple("catalog", ["name", "isPartition"]) + catalog = [column("a", True), column("b", False), column("c", False)] + + mock = mocker.patch("pyspark.sql.Catalog.listColumns", return_value=catalog) + partition = basic_runner._delta_partition("aaa") + assert partition == "a" + mock.assert_called_once_with("aaa") + + +def test_load_module(spark, basic_runner): + module = basic_runner._load_module(basic_runner.config.pipelines[0].module) + assert isinstance(module, SimpleGroup) + + +def test_generate(spark, mocker, basic_runner): + run = mocker.patch("tests.runner.transformations.simple_group.SimpleGroup.run") + group = SimpleGroup() + basic_runner._generate(group, DateManager.str_to_date("2023-01-31")) + run.assert_called_once_with( + reader=basic_runner.reader, + run_date=DateManager.str_to_date("2023-01-31"), + spark=spark, + metadata_manager=basic_runner.metadata, + dependencies=None, + ) + + +def test_generate_w_dep(spark, mocker, basic_runner): + run = mocker.patch("tests.runner.transformations.simple_group.SimpleGroup.run") + group = SimpleGroup() + basic_runner._generate(group, DateManager.str_to_date("2023-01-31"), basic_runner.config.pipelines[2].dependencies) + run.assert_called_once_with( + reader=basic_runner.reader, + run_date=DateManager.str_to_date("2023-01-31"), + spark=spark, + metadata_manager=basic_runner.metadata, + dependencies={ + "source1": basic_runner.config.pipelines[2].dependencies[0], + "source2": basic_runner.config.pipelines[2].dependencies[1], + }, + ) + + +def test_init_dates(spark): + runner = Runner( + spark, config_path="tests/runner/transformations/config.yaml", feature_metadata_schema="", run_date="2023-03-31" + ) + assert runner.date_from == DateManager.str_to_date("2023-01-31") + assert runner.date_until == DateManager.str_to_date("2023-03-31") + + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-01", + date_until="2023-03-31", + ) + assert runner.date_from == DateManager.str_to_date("2023-03-01") + assert runner.date_until == DateManager.str_to_date("2023-03-31") + + runner = Runner( + spark, + config_path="tests/runner/transformations/config2.yaml", + feature_metadata_schema="", + run_date="2023-03-31", + ) + assert runner.date_from == DateManager.str_to_date("2023-02-24") + assert runner.date_until == DateManager.str_to_date("2023-03-31") + + +def test_possible_run_dates(spark): + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-01", + date_until="2023-03-31", + ) + + dates = runner.get_possible_run_dates(runner.config.pipelines[0].schedule) + expected = ["2023-03-05", "2023-03-12", "2023-03-19", "2023-03-26"] + assert dates == [DateManager.str_to_date(d) for d in expected] + + +def test_info_dates(spark, basic_runner): + run = ["2023-02-05", "2023-02-12", "2023-02-19", "2023-02-26", "2023-03-05"] + run = [DateManager.str_to_date(d) for d in run] + info = basic_runner.get_info_dates(basic_runner.config.pipelines[0].schedule, run) + expected = ["2023-02-02", "2023-02-09", "2023-02-16", "2023-02-23", "2023-03-02"] + assert info == [DateManager.str_to_date(d) for d in expected] + + +def test_completion(spark, mocker, basic_runner): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=True) + + basic_runner.reader = MockReader(spark) + + dates = ["2023-02-26", "2023-03-05", "2023-03-12", "2023-03-19", "2023-03-26"] + dates = [DateManager.str_to_date(d) for d in dates] + + comp = basic_runner._get_completion(Table(table_path="catalog.schema.simple_group", partition="DATE"), dates) + expected = [False, True, True, True, False] + assert comp == expected + + +def test_completion_rerun(spark, mocker, basic_runner): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=True) + + runner = Runner( + spark, config_path="tests/runner/transformations/config.yaml", feature_metadata_schema="", run_date="2023-03-31" + ) + runner.reader = MockReader(spark) + + dates = ["2023-02-26", "2023-03-05", "2023-03-12", "2023-03-19", "2023-03-26"] + dates = [DateManager.str_to_date(d) for d in dates] + + comp = runner._get_completion(Table(table_path="catalog.schema.simple_group", partition="DATE"), dates) + expected = [False, True, True, True, False] + assert comp == expected + + +def test_check_dates_have_partition(spark, mocker): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=True) + + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-01", + date_until="2023-03-31", + ) + runner.reader = MockReader(spark) + dates = ["2023-03-04", "2023-03-05", "2023-03-06"] + dates = [DateManager.str_to_date(d) for d in dates] + res = runner.check_dates_have_partition(Table(schema_path="source.schema", table="dep1", partition="DATE"), dates) + expected = [False, True, False] + assert res == expected + + +def test_check_dates_have_partition_no_table(spark, mocker): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=False) + + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-01", + date_until="2023-03-31", + ) + dates = ["2023-03-04", "2023-03-05", "2023-03-06"] + dates = [DateManager.str_to_date(d) for d in dates] + res = runner.check_dates_have_partition(Table(schema_path="source.schema", table="dep66", partition="DATE"), dates) + expected = [False, False, False] + assert res == expected + + +@pytest.mark.parametrize( + "r_date, expected", + [("2023-02-26", False), ("2023-03-05", True)], +) +def test_check_dependencies(spark, mocker, r_date, expected): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=True) + + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-01", + date_until="2023-03-31", + ) + runner.reader = MockReader(spark) + res = runner.check_dependencies(runner.config.pipelines[0], DateManager.str_to_date(r_date)) + assert res == expected + + +def test_check_no_dependencies(spark, mocker): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=True) + + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-01", + date_until="2023-03-31", + ) + runner.reader = MockReader(spark) + res = runner.check_dependencies(runner.config.pipelines[1], DateManager.str_to_date("2023-03-05")) + assert res is True + + +def test_select_dates(spark, mocker): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=True) + + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-01", + date_until="2023-03-31", + ) + runner.reader = MockReader(spark) + + r, i = runner._select_run_dates( + runner.config.pipelines[0], Table(table_path="catalog.schema.simple_group", partition="DATE") + ) + expected_run = ["2023-03-05", "2023-03-12", "2023-03-19", "2023-03-26"] + expected_run = [DateManager.str_to_date(d) for d in expected_run] + expected_info = ["2023-03-02", "2023-03-09", "2023-03-16", "2023-03-23"] + expected_info = [DateManager.str_to_date(d) for d in expected_info] + assert r == expected_run + assert i == expected_info + + +def test_select_dates_all_done(spark, mocker): + mocker.patch("rialto.runner.runner.Runner._table_exists", return_value=True) + + runner = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_metadata_schema="", + date_from="2023-03-02", + date_until="2023-03-02", + ) + runner.reader = MockReader(spark) + + r, i = runner._select_run_dates( + runner.config.pipelines[0], Table(table_path="catalog.schema.simple_group", partition="DATE") + ) + expected_run = [] + expected_run = [DateManager.str_to_date(d) for d in expected_run] + expected_info = [] + expected_info = [DateManager.str_to_date(d) for d in expected_info] + assert r == expected_run + assert i == expected_info + + +def test_op_selected(spark, mocker): + mocker.patch("rialto.runner.tracker.Tracker.report") + run = mocker.patch("rialto.runner.runner.Runner._run_pipeline") + + runner = Runner( + spark, config_path="tests/runner/transformations/config.yaml", feature_metadata_schema="", op="SimpleGroup" + ) + + runner() + run.called_once() + + +def test_op_bad(spark, mocker): + mocker.patch("rialto.runner.tracker.Tracker.report") + mocker.patch("rialto.runner.runner.Runner._run_pipeline") + + runner = Runner( + spark, config_path="tests/runner/transformations/config.yaml", feature_metadata_schema="", op="BadOp" + ) + + with pytest.raises(ValueError) as exception: + runner() + assert str(exception.value) == "Unknown operation selected: BadOp" + + +def test_custom_config(spark, mocker): + cc_spy = mocker.spy(ConfigHolder, "set_custom_config") + custom_config = {"cc": 42} + + _ = Runner(spark, config_path="tests/runner/transformations/config.yaml", custom_job_config=custom_config) + + cc_spy.assert_called_once_with(cc=42) + + +def test_feature_store_config(spark, mocker): + fs_spy = mocker.spy(ConfigHolder, "set_feature_store_config") + + _ = Runner( + spark, + config_path="tests/runner/transformations/config.yaml", + feature_store_schema="schema", + feature_metadata_schema="metadata", + ) + + fs_spy.assert_called_once_with("schema", "metadata") + + +def test_no_configs(spark, mocker): + cc_spy = mocker.spy(ConfigHolder, "set_custom_config") + fs_spy = mocker.spy(ConfigHolder, "set_feature_store_config") + + _ = Runner(spark, config_path="tests/runner/transformations/config.yaml") + + cc_spy.assert_not_called() + fs_spy.assert_not_called() diff --git a/tests/runner/test_table.py b/tests/runner/test_table.py new file mode 100644 index 0000000..82e6fa6 --- /dev/null +++ b/tests/runner/test_table.py @@ -0,0 +1,28 @@ +from rialto.runner.table import Table + + +def test_table_basic_init(): + t = Table(catalog="cat", schema="sch", table="tab", schema_path=None, table_path=None, class_name=None) + + assert t.get_table_path() == "cat.sch.tab" + assert t.get_schema_path() == "cat.sch" + + +def test_table_classname_init(): + t = Table(catalog=None, schema=None, table=None, schema_path="cat.sch", table_path=None, class_name="ClaSs") + + assert t.get_table_path() == "cat.sch.cla_ss" + assert t.get_schema_path() == "cat.sch" + assert t.catalog == "cat" + assert t.schema == "sch" + assert t.table == "cla_ss" + + +def test_table_path_init(): + t = Table(catalog=None, schema=None, table=None, schema_path=None, table_path="cat.sch.tab", class_name=None) + + assert t.get_table_path() == "cat.sch.tab" + assert t.get_schema_path() == "cat.sch" + assert t.catalog == "cat" + assert t.schema == "sch" + assert t.table == "tab" diff --git a/tests/runner/transformations/__init__.py b/tests/runner/transformations/__init__.py new file mode 100644 index 0000000..eaa15cd --- /dev/null +++ b/tests/runner/transformations/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from tests.runner.transformations.simple_group import SimpleGroup # noqa diff --git a/tests/runner/transformations/config.yaml b/tests/runner/transformations/config.yaml new file mode 100644 index 0000000..2bfeaf1 --- /dev/null +++ b/tests/runner/transformations/config.yaml @@ -0,0 +1,82 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +general: + target_schema: catalog.schema + target_partition_column: "INFORMATION_DATE" + watched_period_units: "months" + watched_period_value: 2 + job: "run" # run/check + mail: + sender: test@testing.org + smtp: server.test + to: + - developer@testing.org + - developer2@testing.org + subject: test report +pipelines: + - name: SimpleGroup + module: + python_module: tests.runner.transformations + python_class: SimpleGroup + schedule: + frequency: weekly + day: 7 + info_date_shift: + value: 3 + units: days + dependencies: + - table: source.schema.dep1 + interval: + units: "days" + value: 1 + date_col: "DATE" + - table: source.schema.dep2 + interval: + units: "months" + value: 3 + date_col: "DATE" + - name: GroupNoDeps + module: + python_module: tests.runner.transformations + python_class: SimpleGroup + schedule: + frequency: weekly + day: 7 + info_date_shift: + value: 3 + units: days + - name: NamedDeps + module: + python_module: tests.runner.transformations + python_class: SimpleGroup + schedule: + frequency: weekly + day: 7 + info_date_shift: + value: 3 + units: days + dependencies: + - table: source.schema.dep1 + name: source1 + interval: + units: "days" + value: 1 + date_col: "DATE" + - table: source.schema.dep2 + name: source2 + interval: + units: "months" + value: 3 + date_col: "batch" diff --git a/tests/runner/transformations/config2.yaml b/tests/runner/transformations/config2.yaml new file mode 100644 index 0000000..a91894b --- /dev/null +++ b/tests/runner/transformations/config2.yaml @@ -0,0 +1,45 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +general: + target_schema: catalog.schema + target_partition_column: "INFORMATION_DATE" + watched_period_units: "weeks" + watched_period_value: 5 + job: "run" # run/check + mail: + sender: test@testing.org + smtp: server.test + to: + - developer@testing.org + subject: test report +pipelines: +- name: SimpleGroup + module: + python_module: transformations + python_class: SimpleGroup + schedule: + frequency: weekly + day: 7 + dependencies: + - table: source.schema.dep1 + interval: + units: "days" + value: 1 + date_col: "DATE" + - table: source.schema.dep2 + interval: + units: "months" + value: 1 + date_col: "DATE" diff --git a/tests/runner/transformations/simple_group.py b/tests/runner/transformations/simple_group.py new file mode 100644 index 0000000..fcda5c7 --- /dev/null +++ b/tests/runner/transformations/simple_group.py @@ -0,0 +1,34 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import datetime +from typing import Dict + +from pyspark.sql import DataFrame, SparkSession +from pyspark.sql.types import StructType + +from rialto.common import TableReader +from rialto.metadata import MetadataManager +from rialto.runner import Transformation + + +class SimpleGroup(Transformation): + def run( + self, + reader: TableReader, + run_date: datetime.date, + spark: SparkSession = None, + metadata_manager: MetadataManager = None, + dependencies: Dict = None, + ) -> DataFrame: + return spark.createDataFrame([], StructType([])) From 101940836b3cf30155e3eecbd2cefd08c1d96c13 Mon Sep 17 00:00:00 2001 From: Marek Dobransky Date: Fri, 7 Jun 2024 13:06:12 +0200 Subject: [PATCH 2/2] Release 1.3.0 --- CHANGELOG.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e17c3e7..cfd48eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ All notable changes to this project will be documented in this file. -## [Unreleased] - yyyy-mm-dd +## 1.3.0 - 2024-06-07 ### Added diff --git a/README.md b/README.md index 6611ec2..3c8bf4a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Rialto is a framework for building and deploying machine learning features in a The name Rialto is a reference to the Rialto Bridge in Venice, Italy. The Rialto Bridge was a major marketplace for goods and ideas the Middle Ages. Rialto is intended to be a foundation of a similar marketplace for machine learning features, where users and find and share reusable features. -Sphinx-Generated autodocs pages available **[here](https://legendary-winner-93rlnzn.pages.github.io/)**. +Sphinx-Generated autodocs pages available **[here]()**. # Contents 1. [Instalatlion](#install)