diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 43a9ab217413..0836a8913e26 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -50,3 +50,6 @@ d170c89167a96b702edc02b16dbf5984619d0e8f # enable TRYceratops linting; add noqa comments # https://github.com/great-expectations/great_expectations/pull/9684 2bbfb50a6458f09ef197ee1174666a4c4726a850 +# Add "FIXME CoP" comments to type ignore and noqa ignore lines +# https://github.com/great-expectations/great_expectations/pull/10817 +078c1611dd9c70fec6d7b6318f21a063ff6aa9b0 diff --git a/.github/teams.yml b/.github/teams.yml index 32c6c0117c37..31917d2c6f29 100644 --- a/.github/teams.yml +++ b/.github/teams.yml @@ -2,14 +2,14 @@ # NOTE - this should be kept in sync with the GX org's teams devrel: - - "@kyleaton" # Kyle Eaton - - "@rachhouse" # Rachel House - - "@r34ctor" # Nevin Tan - - "@molliemarie" # Mollie Marie Pettit - - "@klavavej" # Kristen Lavavej - "@JessSaavedra" # Jessica Saavedra - - "@deborahniesz" # Deborah Niesz - "@Quantisan" # Paul Lam + - "@deborahniesz" # Deborah Niesz + - "@klavavej" # Kristen Lavavej + - "@kyleaton" # Kyle Eaton + - "@molliemarie" # Mollie Marie Pettit + - "@r34ctor" # Nevin Tan + - "@rachhouse" # Rachel House core: - "@DrewHoo" # Drew Hoover @@ -19,8 +19,11 @@ core: - "@Super-Tanner" # Tanner Beam - "@TrangPham" # Thu Pham - "@abegong" # Abe Gong + - "@alena-hutchinson" # Alena Hutchinson - "@allensallinger" # Allen Sallinger + - "@annabarr" # Anna Barr - "@anthonyburdi" # Anthony Burdi + - "@ashmortar" # Aaron Ross - "@billdirks" # Bill Dirks - "@cdkini" # Chetan Kini - "@dctalbot" # David Talbot @@ -33,12 +36,10 @@ core: - "@lockettks" # Kim Mathieu - "@nicgrayson" # Nic Grayson - "@roblim" # Rob Lim + - "@shiplet" # Michael Shiplet - "@sujensen" # Susan Jensen - "@tyler-hoffman" # Tyler Hoffman - "@wookasz" # Łukasz Lempart - - "@annabarr" # Anna Barr - - "@alena-hutchinson" # Alena Hutchinson - - "@ashmortar" # Aaron Ross bot: - "@dependabot" diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 8d69c54a92fa..a29324a8fb0a 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -6,6 +6,8 @@ - Responsible for keeping PR's up-to-date with `develop` (only works if "auto-merge" is turned on) * [CodeSee Architecture Diagrams](codesee-arch-diagram.yml) - Generates a visualization of proposed changes to the codebase through the use of https://www.codesee.io/ +* [DataSource Cleanup](data_source_cleanup.yml) + - Responsible for cleaning up stray schemas left behind from tests * [StaleBot](stale.yml) - Responsible for marking PR's and issues as `stale` * [PEP-273 Compatability](test-pep273-compatability.yml) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4d7fbd7d9a34..18ada396687d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -113,6 +113,8 @@ jobs: invoke type-check --ci --pretty --check-stub-sources - name: Marker-coverage-check run: invoke marker-coverage + - name: Check for linter ignores without comments + run: ./scripts/check_linter_ignores.sh docs-snippets: needs: [unit-tests, doc-checks, check-actor-permissions] @@ -197,6 +199,9 @@ jobs: # Authorize access to Google Cloud with a service account ./google-cloud-sdk/bin/gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS + - name: Install mssql odbc driver + run: ./scripts/install_mssql_odbc_driver.sh + - name: Set up Python uses: actions/setup-python@v5 with: @@ -326,8 +331,6 @@ jobs: run: python ci/checks/check_name_tag_snippets_referenced.py - name: public_api_report run: invoke public-api - - name: link_checker - run: python docs/checks/docs_link_checker.py -p docs/docusaurus/docs -r docs/docusaurus/docs -sr docs/docusaurus/static -s docs -sp static --skip-external docs-build: needs: [doc-checks, check-actor-permissions] @@ -399,6 +402,13 @@ jobs: GX_CLOUD_ORGANIZATION_ID: ${{secrets.MERCURY_ORGANIZATION_ID}} GX_CLOUD_ACCESS_TOKEN: ${{secrets.MERCURY_ACCESS_TOKEN}} LD_SDK_KEY: ${{secrets.LD_SDK_KEY}} + GX_SCHEDULER_LAMBDA_ARN: ${{secrets.GX_SCHEDULER_LAMBDA_ARN}} + GX_SCHEDULER_EXECUTION_ROLE_ARN: ${{secrets.GX_SCHEDULER_EXECUTION_ROLE_ARN}} + GX_SCHEDULER_EVENTBRIDGE_DLQ_ARN: ${{secrets.GX_SCHEDULER_EVENTBRIDGE_DLQ_ARN}} + GX_SCHEDULER_EVENTBRIDGE_REGION: ${{secrets.GX_SCHEDULER_EVENTBRIDGE_REGION}} + LOCALSTACK_AWS_ENDPOINT_URL: ${{secrets.LOCALSTACK_AWS_ENDPOINT_URL}} + LOCALSTACK_AWS_SECRET_ACCESS_KEY: ${{secrets.LOCALSTACK_AWS_SECRET_ACCESS_KEY}} + LOCALSTACK_AWS_ACCESS_KEY_ID: ${{secrets.LOCALSTACK_AWS_ACCESS_KEY_ID}} PACT_DO_NOT_TRACK: true SNOWFLAKE_CI_ACCOUNT: ${{secrets.SNOWFLAKE_CI_ACCOUNT}} SNOWFLAKE_CI_USER_PASSWORD: ${{secrets.SNOWFLAKE_CI_USER_PASSWORD}} @@ -599,6 +609,9 @@ jobs: pip install $(grep -E '^(invoke)' reqs/requirements-dev-contrib.txt) invoke deps --gx-install -m '${{ matrix.markers }}' -r test + - name: Install mssql odbc driver + run: ./scripts/install_mssql_odbc_driver.sh + - name: Configure ECR AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: diff --git a/.github/workflows/data_source_cleanup.yml b/.github/workflows/data_source_cleanup.yml new file mode 100644 index 000000000000..61f30d0b76f8 --- /dev/null +++ b/.github/workflows/data_source_cleanup.yml @@ -0,0 +1,42 @@ +# Script(s) to clean up any loose data left behind from test runs. +# These can primarily happen if CI is stopped while tests are running +# for big query, etc. + +name: Nightly Data Source Cleanup + +on: + workflow_dispatch: + schedule: + - cron: "0 6 * * *" + +jobs: + cleanup-big-query: + runs-on: ubuntu-latest + env: + # google + GE_TEST_GCP_CREDENTIALS: ${{secrets.GE_TEST_GCP_CREDENTIALS}} + GE_TEST_GCP_PROJECT: ${{secrets.GE_TEST_GCP_PROJECT}} + GE_TEST_BIGQUERY_DATASET: ${{secrets.GE_TEST_BIGQUERY_DATASET}} + GOOGLE_APPLICATION_CREDENTIALS: "gcp-credentials.json" + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: "pip" + cache-dependency-path: | + reqs/requirements-dev-test.txt + setup.py + - name: Create JSON file GCP + run: | + echo "$GE_TEST_GCP_CREDENTIALS" > gcp-credentials.json + - name: Install dependencies + run: | + pip install $(grep -E '^(invoke)' reqs/requirements-dev-contrib.txt) + invoke deps --gx-install -m bigquery + pip install -e . + - name: Run BigQuery cleanup script + run: | + python ./scripts/cleanup/cleanup_big_query.py diff --git a/.gitignore b/.gitignore index fa93f19d04d8..1dddfee02d60 100644 --- a/.gitignore +++ b/.gitignore @@ -157,3 +157,6 @@ tests/integration/cloud/rest_contracts/pacts # Local Netlify folder .netlify + +# mercury container volume +assets/docker/mercury/volume/ diff --git a/README.md b/README.md index f072b7819f6b..738773653213 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ To ensure the long-term quality of the GX Core codebase, we're not yet ready to | -------------------- | ------------------ | ----- | | CredentialStore | 🟢 Ready | | | BatchDefinition | 🟡 Partially ready | Formerly known as splitters | -| Action | 🔴 Not ready | | +| Action | 🟢 Ready | | | DataSource | 🔴 Not ready | Includes MetricProvider and ExecutionEngine | | DataContext | 🔴 Not ready | Also known as Configuration Stores | | DataAsset | 🔴 Not ready | | diff --git a/assets/docker/mercury/docker-compose.yml b/assets/docker/mercury/docker-compose.yml index 20f6d354236e..670e0b599f6e 100644 --- a/assets/docker/mercury/docker-compose.yml +++ b/assets/docker/mercury/docker-compose.yml @@ -79,6 +79,8 @@ services: depends_on: db-provisioner: condition: service_completed_successfully + localstack: + condition: service_healthy mercury-service-api-v1: image: 258143015559.dkr.ecr.us-east-1.amazonaws.com/mercury/api:v1api-latest platform: linux/amd64 @@ -100,19 +102,26 @@ services: AUTH0_MERCURY_API_CLIENT_SECRET: ${AUTH0_MERCURY_API_CLIENT_SECRET} SENDGRID_API_KEY: fake_key LD_SDK_KEY: ${LD_SDK_KEY} + GX_SCHEDULER_LAMBDA_ARN: ${GX_SCHEDULER_LAMBDA_ARN} + GX_SCHEDULER_EXECUTION_ROLE_ARN: ${GX_SCHEDULER_EXECUTION_ROLE_ARN} + GX_SCHEDULER_EVENTBRIDGE_DLQ_ARN: ${GX_SCHEDULER_EVENTBRIDGE_DLQ_ARN} + GX_SCHEDULER_EVENTBRIDGE_REGION: ${GX_SCHEDULER_EVENTBRIDGE_REGION} + AWS_ENDPOINT_URL: ${LOCALSTACK_AWS_ENDPOINT_URL} + AWS_SECRET_ACCESS_KEY: ${LOCALSTACK_AWS_SECRET_ACCESS_KEY} + AWS_ACCESS_KEY_ID: ${LOCALSTACK_AWS_ACCESS_KEY_ID} depends_on: mercury-service-api: condition: service_started db-provisioner: condition: service_completed_successfully + localstack: + condition: service_healthy entrypoint: - /bin/bash - -c - | - sed -i 's/localhost/db/g' /app/services/ge_cloud/mercury/alembic.ini - cd /app/services/ge_cloud/mercury - alembic upgrade head cd /app/services/ge_cloud + ./scripts/localstack_init.sh gunicorn "mercury.api_v1:get_app()" -c ./fastapi-gunicorn.conf.py nginx: image: ${ECR_PULL_THROUGH_REPOSITORY_URL}library/nginx:latest @@ -124,3 +133,16 @@ services: depends_on: - mercury-service-api - mercury-service-api-v1 + localstack: + container_name: localstack + image: localstack/localstack + ports: + - "127.0.0.1:4566:4566" # LocalStack Gateway + - "127.0.0.1:4510-4559:4510-4559" # external services port range + environment: + # LocalStack configuration: https://docs.localstack.cloud/references/configuration/ + DEBUG: ${DEBUG:-0} + SERVICES: scheduler,lambda,sts,iam,ssm,events + volumes: + - "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack" + - "/var/run/docker.sock:/var/run/docker.sock" diff --git a/assets/docker/mssql/README.md b/assets/docker/mssql/README.md index 0484f37b63fb..d12e6a765082 100644 --- a/assets/docker/mssql/README.md +++ b/assets/docker/mssql/README.md @@ -17,7 +17,7 @@ You should now be able to run the tests via `pytest --mssql` See: https://github.com/microsoft/mssql-docker/issues/668#issuecomment-1420259510 -3. Install the ODSBC 17 driver: +3. Install the ODBC 18 driver: https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/install-microsoft-odbc-driver-sql-server-macos?view=sql-server-ver15 @@ -35,9 +35,3 @@ You should now be able to run the tests via `pytest --mssql` python -m pip install --force-reinstall --no-binary :all: pyodbc python -c "import pyodbc; print(pyodbc.version)" ``` - -2. If you are getting `Login timeout expired` when using localhost, try setting: - - ```sh - export GE_TEST_LOCAL_DB_HOSTNAME=127.0.0.1 - ``` diff --git a/assets/scripts/gx_cloud/experimental/onboarding_script.py b/assets/scripts/gx_cloud/experimental/onboarding_script.py deleted file mode 100644 index 508b768cb718..000000000000 --- a/assets/scripts/gx_cloud/experimental/onboarding_script.py +++ /dev/null @@ -1,115 +0,0 @@ -import pprint - -import great_expectations as gx -from great_expectations.checkpoint import Checkpoint -from great_expectations.core.expectation_suite import ExpectationSuite -from great_expectations.data_context import CloudDataContext -from great_expectations.datasource.fluent import BatchRequest, Datasource -from great_expectations.datasource.fluent.pandas_datasource import CSVAsset - -# Make sure GX_CLOUD_ACCESS_TOKEN and GX_CLOUD_ORGANIZATION_ID -# are set in your environment or config_variables.yml -# your organization_id is indicated on https://app.greatexpectations.io/tokens page - -# uncomment the next three lines to set them explicitly in this script -# import os -# os.environ["GX_CLOUD_ACCESS_TOKEN"] = "" -# os.environ["GX_CLOUD_ORGANIZATION_ID"] = "" - -# Create a GX Data Context -context: CloudDataContext = gx.get_context( - cloud_mode=True, -) - -# Provide Datasource name -datasource_name = None -assert datasource_name, "Please set datasource_name." - -# Get or add Datasource -datasource: Datasource = context.data_sources.add_or_update_pandas(datasource_name) - -# Provide an Asset name -asset_name = None -assert asset_name, "Please set asset_name." - -# Provide a path to data -path_to_data = None -# to use sample data uncomment next line -# path_to_data = "https://raw.githubusercontent.com/great-expectations/gx_tutorials/main/data/yellow_tripdata_sample_2019-01.csv" -assert path_to_data, "Please set path_to_data. This can be a local filepath or a remote URL." - -# Get or add Asset -try: - asset: CSVAsset = datasource.get_asset(asset_name=asset_name) -except LookupError: - asset: CSVAsset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data) - -# Build BatchRequest -batch_request: BatchRequest = asset.build_batch_request() - -print(f"\n{20*'='}\nDatasource Config\n{20*'='}\n") -pprint.pprint(datasource.dict()) - -# Provide an Expectation Suite name -expectation_suite_name = None -assert expectation_suite_name, "Please set expectation_suite_name." - -# Get or add Expectation Suite -expectation_suite: ExpectationSuite = context.suites.add( - ExpectationSuite(name=expectation_suite_name) -) -expectation_suite_ge_cloud_id: str = expectation_suite.ge_cloud_id - -# Add Expectations - -# Set a column name you want to test here -column_name = None -# Uncomment the next line for a column name from sample data -# column_name = "passenger_count" -assert column_name is not None, "Please set column_name." - -# Look up all expectations types here - https://greatexpectations.io/expectations/ -expectation_configuration = gx.core.ExpectationConfiguration( - **{ - "type": "expect_column_min_to_be_between", - "kwargs": {"column": column_name, "min_value": 0.1}, - "meta": {}, - } -) - -expectation_suite.add_expectation_configuration(expectation_configuration=expectation_configuration) - -# Save the Expectation Suite -expectation_suite.save() - -print(f"\n{20*'='}\nExpectation Suite\n{20*'='}\n") -pprint.pprint(expectation_suite) - -# Provide a Checkpoint name -checkpoint_name = None -assert checkpoint_name, "Please set checkpoint_name." - -checkpoint_config = { - "name": checkpoint_name, - "validations": [ - { - "expectation_suite_name": expectation_suite_name, - "expectation_suite_ge_cloud_id": expectation_suite.ge_cloud_id, - "batch_request": { - "datasource_name": datasource.name, - "data_asset_name": asset.name, - }, - } - ], -} - -checkpoint: Checkpoint = context.add_or_update_checkpoint(**checkpoint_config) - -print(f"\n{20*'='}\nCheckpoint Config\n{20*'='}\n") -pprint.pprint(checkpoint) - -# Run the Checkpoint: -result = checkpoint.run() - -print(f"\n{20*'='}\nValidation Result\n{20*'='}\n") -pprint.pprint(result) diff --git a/ci/azure-pipelines-sqlalchemy-compatibility.yml b/ci/azure-pipelines-sqlalchemy-compatibility.yml index dda083e8e6a4..007abf077690 100644 --- a/ci/azure-pipelines-sqlalchemy-compatibility.yml +++ b/ci/azure-pipelines-sqlalchemy-compatibility.yml @@ -191,8 +191,12 @@ stages: - bash: python -m pip install --upgrade pip displayName: "Update pip" + - script: ./scripts/install_mssql_odbc_driver.sh + displayName: "Install mssql odbc driver" + - script: | sqlcmd -U sa -P "ReallyStrongPwd1234%^&*" -Q "CREATE DATABASE test_ci;" -o create_db_output.txt + displayName: "Create mssql database" - script: | cp constraints-dev.txt constraints-dev-temp.txt diff --git a/docs/adr/0005-public-api-docstrings.md b/docs/adr/0005-public-api-docstrings.md new file mode 100644 index 000000000000..8154ae9a3d04 --- /dev/null +++ b/docs/adr/0005-public-api-docstrings.md @@ -0,0 +1,19 @@ +# 5. Docstrings for public API functions/classes + +Date: 2024-12-19 + +## Status + +Accepted + +## Context + +By marking an object as part of the public API, it automatically renders in our docs site. In order to provide a clear and informative experience for our end users, we should make sure we always include docstrings for public functions/classes. These docstrings should adhere to a consistent format such that they can be rendered in the same manner by our public API infrastructure. + +## Decision + +All objects decorated with `@public_api` will have docstrings. In order to be considered public, they must meet this criteria. + +## Consequences + +Marginal increase in developer burden but worthwhile due to a higher level of thought and care around what actually gets marked and rendered as part of our public API. diff --git a/docs/checks/docs_link_checker.py b/docs/checks/docs_link_checker.py deleted file mode 100644 index 4ec36cbbfc86..000000000000 --- a/docs/checks/docs_link_checker.py +++ /dev/null @@ -1,441 +0,0 @@ -#!/usr/bin/env python3 -"""A command-line tool used to check links in docusaurus markdown documentation - -To check all of our markdown documentation, from the repo root run: -python ./docs/checks/docs_link_checker.py -p docs -r docs -sr static -s docs -sp static --skip-external - -The above command: - - -p docs (also --path): The path to the markdown files you want to check. For example, if you wanted to check only the tutorial files, you could specify docs/tutorials - - -r docs (also --docs-root): The root of the docs folder, used to resolve absolute and docroot paths - - -sr static (also --static-root): The root of the static assets folder, used to resolve absolute paths for images - - -s docs (also --site-prefix): The site path prefix, used to resolve abosulte paths (ex: in http://blah/docs, it is the docs part) - - -sp static (also --static-prefix): The site static folder prefix, used to resolve abosulte image paths - - --skip-external: If present, external (http) links are not checked -""" - -from __future__ import annotations - -import logging -import pathlib -import re -import sys -from typing import List, Optional - -import click -import requests - -logger = logging.getLogger(__name__) -logger.addHandler(logging.StreamHandler()) -logger.setLevel(logging.INFO) - - -class LinkReport: - """Used to capture the details of a broken link - - Attributes: - link: The link that is broken. - file: The file in which the link is found. - message: A message describing the failure. - """ - - def __init__(self, link: str, file: pathlib.Path, message: str): - self.link = link - self.file = file - self.message = message - - def __str__(self): # type: ignore[explicit-override] # FIXME - return f"{self.message}: File: {self.file}, Link: {self.link}" - - -class LinkChecker: - """Checks image and file links in a set of markdown files.""" - - def __init__( # noqa: PLR0913 # too many arguments - self, - docs_path: pathlib.Path, - docs_root: pathlib.Path, - static_root: pathlib.Path, - site_prefix: str, - static_prefix: str, - skip_external: bool = False, - ): - """Initializes LinkChecker - - Args: - docs_path: The directory of markdown (.md) files whose links you want to check - docs_root: The root directory, used to resolve absolute and docroot paths - site_prefix: The top-level folder (ex: /docs) used to resolve absolute links to local files - static_prefix: The top-level static folder (ex: /static) used to resolve absolute image links to local files - skip_external: Whether or not to skip checking external (http..) links - """ - self._docs_path = docs_path - self._docs_root = docs_root - self._static_root = static_root - self._site_prefix = site_prefix.strip("/") - self._static_prefix = static_prefix.strip("/") - self._skip_external = skip_external - - markdown_link_regex = r"!?\[(.*)\]\((.*?)\)" # inline links, like [Description](link), images start with ! - self._markdown_link_pattern = re.compile(markdown_link_regex) - - external_link_regex = r"^https?:\/\/" # links that start with http or https - self._external_link_pattern = re.compile(external_link_regex) - - # with versioned docs, an absolute link may contain version information - version_info_regex = r"//" - self._version_info_pattern = re.compile(version_info_regex) - - # links that being with /{site_prefix}/(?:/(?P))?/(?P), may end with #abc - # ex: ^/docs(?:/(?P\d{1,2}\.\d{1,2}\.\d{1,2}))?/(?P[\w/-]+?)(?:#\S+)?$ - # /docs/0.15.50/cli#anchor - absolute_link_regex = ( - r"^/" - + site_prefix - + r"(?:/(?P\d{1,2}\.\d{1,2}\.\d{1,2}))?/(?P[\w/-]+?)(?:#\S+)?$" - ) - self._absolute_link_pattern = re.compile(absolute_link_regex) - - absolute_file_regex = rf"(?!(\/{site_prefix}\/))\/\S+\.mdx?(#[^'\"]+)?" - self._absolute_file_pattern = re.compile(absolute_file_regex) - - # docroot links start without a . or a slash - docroot_link_regex = r"^(?P\w[\.\w\/-]+\.md)(?:#\S+)?$" - self._docroot_link_pattern = re.compile(docroot_link_regex) - - # links starting a . or .., file ends with .md, may include an anchor with #abc - relative_link_regex = r"^(?P\.\.?[\.\w\/-]+\.md)(?:#\S+)?$" - self._relative_link_pattern = re.compile(relative_link_regex) - - absolute_image_regex = r"^\/(?P[\w\/-]+\.\w{3,4})$" - self._absolute_image_pattern = re.compile(absolute_image_regex) - - # ending with a 3-4 character suffix - relative_image_regex = r"^(?P\.\.?[\.\w\/-]+\.\w{3,4})$" - self._relative_image_pattern = re.compile(relative_image_regex) - - def _is_image_link(self, markdown_link: str) -> bool: - return markdown_link.startswith("!") - - def _is_doc_link(self, markdown_link: str) -> bool: - return not self._is_image_link(markdown_link) - - def _is_anchor_link(self, link: str) -> bool: - return link.startswith("#") - - def _check_external_link( - self, link: str, file: pathlib.Path - ) -> Optional[LinkReport]: - if self._skip_external: - return None - - logger.debug(f"Checking external link {link} in file {file}", link, file) - - try: - response = requests.get(link) - - if 400 <= response.status_code < 500: - logger.info( - f"External link {link} failed in file {file} with code {response.status_code}" - ) - return LinkReport( - link, - file, - f"External link returned status code: {response.status_code}", - ) - else: - logger.debug( - f"External link {link} successful in file {file}, response code: {response.status_code}", - ) - return None - except requests.exceptions.ConnectionError as err: - logger.info( - f"External link {link} in file {file} raised a connection error" - ) - return LinkReport( - link, file, f"External link raised a connection error {err.errno}" - ) - - def _get_absolute_path(self, path: pathlib.Path | str) -> pathlib.Path: - return self._docs_root.joinpath(path).resolve() - - def _get_absolute_static_path(self, path: pathlib.Path | str) -> pathlib.Path: - return self._static_root / path - - def _get_relative_path( - self, file: pathlib.Path, path: pathlib.Path | str - ) -> pathlib.Path: - # link should be relative to the location of the current file - return file.parent / path - - def _get_docroot_path(self, path: pathlib.Path | str) -> pathlib.Path: - return self._docs_path / path - - def _check_absolute_link( - self, - link: str, - file: pathlib.Path, - path: pathlib.Path | str, - version: Optional[str], - ) -> Optional[LinkReport]: - logger.debug(f"Checking absolute link {link} in file {file}") - - if version: - logger.debug(f"Skipping absolute link {link} due to version information") - return None - - # absolute links should point to files that exist (with the .md extension added) - md_file = pathlib.Path(path).resolve().with_suffix(".md") - logger.debug(f"Absolute link {link} resolved to path {md_file}") - - if not md_file.is_file(): - logger.info(f"Absolute link {link} in file {file} was not found") - return LinkReport(link, file, f"Linked file {md_file} not found") - else: - logger.debug(f"Absolute link {link} in file {file} found") - return None - - def _check_absolute_image( - self, link: str, file: pathlib.Path, path: pathlib.Path - ) -> Optional[LinkReport]: - logger.debug(f"Checking absolute image {link} in file {file}") - - image_file = self._get_absolute_static_path(path) - if not image_file.is_file(): - logger.info(f"Absolute image {link} in file {file} was not found") - return LinkReport(link, file, f"Image {image_file} not found") - else: - logger.debug(f"Absolute image {link} in file {file} found") - return None - - def _check_relative_link( - self, link: str, file: pathlib.Path, path: pathlib.Path - ) -> Optional[LinkReport]: - logger.debug(f"Checking relative link {link} in file {file}") - - md_file = self._get_relative_path(file, path) - logger.debug(f"Relative link {link} resolved to path {md_file}") - - if not md_file.is_file(): - logger.info(f"Relative link {link} in file {file} was not found") - return LinkReport(link, file, f"Linked file {md_file} not found") - else: - logger.debug(f"Relative link {link} in file{file} found") - return None - - def _check_relative_image( - self, link: str, file: pathlib.Path, path: pathlib.Path - ) -> Optional[LinkReport]: - logger.debug(f"Checking relative image {link} in file {file}") - - image_file = self._get_relative_path(file, path) - if not image_file.is_file(): - logger.info(f"Relative image {link} in file {file} was not found") - return LinkReport(link, file, f"Image {image_file} not found") - else: - logger.debug(f"Relative image {link} in file {file} found") - return None - - def _check_docroot_link( - self, link: str, file: pathlib.Path, path: pathlib.Path | str - ) -> Optional[LinkReport]: - logger.debug(f"Checking docroot link {link} in file {file}") - - md_file = self._get_docroot_path(path) - if not md_file.is_file(): - logger.info(f"Docroot link {link} in file {file} was not found") - return LinkReport(link, file, f"Linked file {md_file} not found") - else: - logger.debug(f"Docroot link {link} in file {file} found") - return None - - def _check_link( # noqa: PLR0912, C901 # too complex - self, match: re.Match, file: pathlib.Path - ) -> Optional[LinkReport]: - """Checks that a link is valid. Valid links are: - - Absolute links that begin with a forward slash and the specified site prefix (ex: /docs) with no suffix - - Absolute images with an image suffix - - Relative links that begin with either . or .. and have a .md suffix - - Relative images with an image suffix - - Docroot links that begin with a character (neither . or /) are relative to the doc root (ex: /docs) and have a .md suffix - - Absolute file paths for markdown files - - Args: - match: A positive match of a markdown link (ex: [...](...)) or image - file: The file where the match was found - - Returns: - A LinkReport if the link is broken, otherwise None - """ - link = match.group(2) - - # skip links that are anchor only (start with #) - if self._is_anchor_link(link): - return None - - if self._external_link_pattern.match(link): - result = self._check_external_link(link, file) - elif self._is_image_link(match.group(0)): - match = self._relative_image_pattern.match(link) # type: ignore[assignment] - if match: - result = self._check_relative_image(link, file, match.group("path")) - else: - match = self._absolute_image_pattern.match(link) - if match: - result = self._check_absolute_image(link, file, match.group("path")) - else: - result = LinkReport(link, file, "Invalid image link format") - else: - match = self._relative_link_pattern.match(link) # type: ignore[assignment] - if match: - result = self._check_relative_link(link, file, match.group("path")) - else: - match = self._absolute_link_pattern.match(link) - if match: - result = self._check_absolute_link( - link, file, match.group("path"), match.group("version") - ) - elif match := self._absolute_file_pattern.match(link): - # This could be more robust like the other checks, but the level of complexity will be high for versioned_docs, - # and we should be able to just set onBrokenMarkdownLinks: 'error' - result = None - else: - match = self._docroot_link_pattern.match(link) - if match: - result = self._check_docroot_link( - link, file, match.group("path") - ) - else: - result = LinkReport(link, file, "Invalid link format") - - return result - - def check_file(self, file: pathlib.Path) -> List[LinkReport]: - """Looks for all the links in a file and checks them. - - Returns: - A list of broken links, or an empty list if no links are broken - """ - with open(file) as f: - contents = f.read() - - matches = self._markdown_link_pattern.finditer(contents) - - result: List[LinkReport] = [] - - for match in matches: - report = self._check_link(match, file) - - if report: - result.append(report) - - # sometimes the description may contain a reference to an image - nested_match = self._markdown_link_pattern.match(match.group(1)) - if nested_match: - report = self._check_link(nested_match, file) - - if report: - result.append(report) - - return result - - -@click.command(help="Checks links and images in Docusaurus markdown files") -@click.option( - "--path", - "-p", - type=click.Path(exists=True, file_okay=True, path_type=pathlib.Path), - default=".", - help="Path to markdown file(s) to check", -) -@click.option( - "--docs-root", - "-r", - type=click.Path(exists=True, file_okay=False, path_type=pathlib.Path), - default=None, - help="Root to all docs for link checking", -) -@click.option( - "--static-root", - "-sr", - type=click.Path(exists=True, file_okay=False, path_type=pathlib.Path), - default="docs/docusaurus/static", - help="Root (static folder) to all images for link validating", -) -@click.option( - "--site-prefix", - "-s", - default="docs", - help="Top-most folder in the docs URL for resolving absolute paths", -) -@click.option( - "--static-prefix", - "-sp", - default="static", - help="Top-most folder in the site URL for resolving absolute image paths", -) -@click.option("--skip-external", is_flag=True) -def scan_docs_click( # noqa: PLR0913 - path: pathlib.Path, - docs_root: Optional[pathlib.Path], - static_root: pathlib.Path, - site_prefix: str, - static_prefix: str, - skip_external: bool, -) -> None: - code, message = scan_docs( - path, docs_root, static_root, site_prefix, static_prefix, skip_external - ) - click.echo(message) - sys.exit(code) - - -def scan_docs( # noqa: C901, PLR0913 - path: pathlib.Path, - docs_root: Optional[pathlib.Path], - static_root: pathlib.Path, - site_prefix: str, - static_prefix: str, - skip_external: bool, -) -> tuple[int, str]: - if not docs_root: - docs_root = path - elif not docs_root.is_dir(): - return 1, f"Docs root path: {docs_root} is not a directory" - - # prepare our return value - result: List[LinkReport] = list() - checker = LinkChecker( - path, docs_root, static_root, site_prefix, static_prefix, skip_external - ) - - if path.is_dir(): - # if the path is a directory, get all .md files within it - for file in path.rglob("*.md"): - report = checker.check_file(file) - if report: - result.extend(report) - elif path.is_file(): - # else we support checking one file at a time - result.extend(checker.check_file(path)) - else: - return 1, f"Docs path: {path} is not a directory or file" - - if result: - message: list[str] = [] - message.append("----------------------------------------------") - message.append("------------- Broken Link Report -------------") - message.append("----------------------------------------------") - for line in result: - message.append(str(line)) - - return 1, "\n".join(message) - else: - return 0, "No broken links found" - - -def main(): - scan_docs_click() - - -if __name__ == "__main__": - main() diff --git a/docs/docusaurus/docs/application_integration_support.md b/docs/docusaurus/docs/application_integration_support.md index 32b2189f80b2..2f5642dda81d 100644 --- a/docs/docusaurus/docs/application_integration_support.md +++ b/docs/docusaurus/docs/application_integration_support.md @@ -25,16 +25,17 @@ The following table defines the GX Cloud, GX Core, and Community Supported integ | Data Sources1 | Snowflake
Databricks (SQL)
PostgreSQL2 | Snowflake
Databricks (SQL)
PostgreSQL
SQLite
BigQuery
Spark
Pandas | MSSQL
MySQL
| | Configuration Stores3 | In-app | File system | None | | Data Doc Stores | In-app | File system | None | -| Actions | Email | Slack
Email
Microsoft Teams | None | -| Credential Stores | Environment variables | Environment variables
YAML4 | None | -| Orchestrator | Airflow 5 6 | Airflow 5 6 | None | +| Actions | Email | Slack
Email
Microsoft Teams
Custom4 | None | +| Credential Stores | Environment variables | Environment variables
YAML5 | None | +| Orchestrator | Airflow 6 7 | Airflow 6 7 | None | 1 We've also seen GX work with the following data sources in the past but we can't guarantee ongoing compatibility. These data sources include Clickhouse, Vertica, Dremio, Teradata, Athena, EMR Spark, AWS Glue, Microsoft Fabric, Trino, Pandas on (S3, GCS, Azure), Databricks (Spark), and Spark on (S3, GCS, Azure).
2 Support for BigQuery in GX Cloud will be available in a future release.
-3 This includes configuration storage for Expectations, Checkpoints, Validation Definitions, and Validation Result
-4 config_variables.yml
-5 Although only Airflow is supported, GX Cloud and GX Core should work with any orchestrator that executes Python code.
-6 Airflow version 2.9.0+ required
+3 This includes configuration storage for Expectations, Checkpoints, Validation Definitions, and Validation Results.
+4 We support the general workflow for creating custom Actions but cannot help troubleshoot the domain-specific logic within a custom Action.
+5 Use `config_variables.yml`.
+6 Although only Airflow is supported, GX Cloud and GX Core should work with any orchestrator that executes Python code.
+7 Airflow version 2.9.0+ required.
### GX components diff --git a/docs/docusaurus/docs/cloud/_try_gx_cloud.md b/docs/docusaurus/docs/cloud/_try_gx_cloud.md deleted file mode 100644 index b196678f06d4..000000000000 --- a/docs/docusaurus/docs/cloud/_try_gx_cloud.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -sidebar_label: 'Try GX Cloud' -title: 'Try GX Cloud' -id: try_gx_cloud -description: Try GX Cloud features and functionality. -toc_min_heading_level: 2 -toc_max_heading_level: 2 ---- - -import TabItem from '@theme/TabItem'; -import Tabs from '@theme/Tabs'; - -If you're new to GX Cloud, start here to learn how you can quickly connect to your Data Assets and validate data. - -If you've tested GX Cloud features and functionality and discovered it's a great solution for your organization, see [Connect GX Cloud](./connect/connect_lp.md). - -## Prerequisites - -- You have a [GX Cloud account](https://greatexpectations.io/cloud). - -- You have a [Docker instance](https://docs.docker.com/get-docker/). - -- You've reviewed the prerequisites for the Data Asset you'll create. See [Create a Data Asset](#create-a-data-asset). - -## Self-hosted deployment - -To try GX Cloud, you use a [self-hosted deployment](/cloud/deploy/deployment_patterns.md) to run the GX Agent with Docker, connect the GX Agent to your target Data Sources, and use the GX Cloud web UI to define your Data Assets, create Expectations, and run Validations. A self-hosted deployment is recommended when you want to test GX Cloud features and functionality and it differs from the recommended [org-hosted deployment](/cloud/deploy/deployment_patterns.md), in which the GX Agent runs in your organization's deployment environment. - -## Get your user access token and copy your organization ID - -You'll need your user access token and organization ID to set your environment variables. Access tokens shouldn't be committed to version control software. - -1. In GX Cloud, click **Settings** > **Tokens**. - -2. In the **User access tokens** pane, click **Create user access token**. - -3. In the **Token name** field, enter a name for the token that will help you quickly identify it. - -4. Click **Create**. - -5. Copy and then paste the user access token into a temporary file. The token can't be retrieved after you close the dialog. - -6. Click **Close**. - -7. Copy the value in the **Organization ID** field into the temporary file with your user access token and then save the file. - - GX recommends deleting the temporary file after you set the environment variables. - -## Set the environment variables and deploy the GX Agent - -Environment variables securely store your GX Cloud access credentials. The GX Agent runs open source GX code in GX Cloud, and it allows you to securely access your data without connecting to it or interacting with it directly. To learn more about the GX Agent and deployment patterns, see [GX Cloud deployment patterns](/cloud/deploy/deployment_patterns.md). - -1. Start the Docker Engine. - -2. Run the following code to set the `GX_CLOUD_ACCESS_TOKEN` and `GX_CLOUD_ORGANIZATION_ID` environment variables, install GX Cloud and its dependencies, and start the GX Agent: - - ```bash title="Terminal input" - docker run --rm --pull=always -e GX_CLOUD_ACCESS_TOKEN="" -e GX_CLOUD_ORGANIZATION_ID="" greatexpectations/agent - ``` - Replace `user_access_token` and `organization_id` with the values you copied previously. - -3. In GX Cloud, confirm the GX Agent status is **Active Agent** and the icon is green. This indicates the GX Agent is running. If it isn't, repeat step 2 and confirm the `user_access_token` and `organization_id` values are correct. - - ![GX Agent status](/img/gx_agent_status.png) - -4. Optional. If you created a temporary file to record your user access token and Organization ID, delete it. - -5. Optional. Run `docker ps` or open Docker Desktop to confirm the agent is running. - - If you stop the GX Agent, close the terminal, and open a new terminal you'll need to set the environment variables again. - - To edit an environment variable, stop the GX Agent, edit the environment variable, save the change, and then restart the GX Agent. - -## Create a Data Asset - -Create a Data Asset to define the data you want GX Cloud to access. - - - - -Define the data you want GX Cloud to access within Snowflake. - -### Prerequisites - -- You have a Snowflake database, schema, and table. - -- You have a [Snowflake account](https://docs.snowflake.com/en/user-guide-admin) with USAGE privileges on the table, database, and schema you are validating, and you have SELECT privileges on the table you are validating. To improve data security, GX recommends using a separate Snowflake user service account to connect to GX Cloud. - -- You know your Snowflake password. - -### Create a Snowflake Data Asset - -Create a Data Asset to define the data you want GX Cloud to access within Snowflake. - -1. In GX Cloud, click **Data Assets** > **New Data Asset**. - -2. Click the **New Data Source** tab and then select **Snowflake**. - -3. Enter a meaningful name for the Data Asset in the **Data Source name** field. - -4. Optional. To use a connection string to connect to a Data Source, click the **Use connection string** selector, enter a connection string, and then move to step 6. The connection string format is: `snowflake://:@`. - -5. Complete the following fields: - - - **Account identifier**: Enter your Snowflake organization and account name separated by a hyphen (`oraganizationname-accountname`) or your account name and a legacy account locator separated by a period (`accountname.region`). The legacy account locator value must include the geographical region. For example, `us-east-1`. - - To locate your Snowflake organization name, account name, or legacy account locator values see [Finding the Organization and Account Name for an Account](https://docs.snowflake.com/en/user-guide/admin-account-identifier#finding-the-organization-and-account-name-for-an-account) or [Using an Account Locator as an Identifier](https://docs.snowflake.com/en/user-guide/admin-account-identifier#using-an-account-locator-as-an-identifier). - - - **Username**: Enter the username you use to access Snowflake. - - - **Password**: Enter a Snowflake password. To improve data security, GX recommends using a Snowflake service account to connect to GX Cloud. - - - **Database**: Enter the name of the Snowflake database where the data you want to validate is stored. In Snowsight, click **Data** > **Databases**. In the Snowflake Classic Console, click **Databases**. - - - **Schema**: Enter the name of the Snowflake schema (table) where the data you want to validate is stored. - - - **Warehouse**: Enter the name of your Snowflake database warehouse. In Snowsight, click **Admin** > **Warehouses**. In the Snowflake Classic Console, click **Warehouses**. - - - **Role**: Enter your Snowflake role. - -6. Click **Connect**. - -7. Complete the following fields: - - - **Table name**: Enter a name for the table you're creating in the Data Asset. - - - **Data Asset name**: Enter a name for the Data Asset. Data Asset names must be unique. If you use the same name for multiple Data Assets, each Data Asset must be associated with a unique Data Source. - -8. Select the **Complete Asset** tab to provide all Data Asset records to your Expectations and validations, or select the **Batches** tab to use subsets of Data Asset records for your Expectations and validations. If you selected the **Batches** tab, complete the following fields: - - - **Split Data Asset by** - Select **Year** to partition Data Asset records by year, select **Year - Month** to partition Data Asset records by year and month, or select **Year - Month - Day** to partition Data Asset records by year, month, and day. - - - **Column of datetime type** - Enter the name of the column containing the date and time data. - -9. Optional. Select **Add Data Asset** to add additional tables or queries and repeat steps 8 and 9. - -10. Click **Finish**. The Data Asset(s), a default empty Expectation Suite, and a default Checkpoint are created. - - - - -Define the data you want GX Cloud to access within PostgreSQL. - -### Prerequisites - -- You have a PostgreSQL database, schema, and table. - -- You have a [PostgreSQL instance](https://www.postgresql.org/download/). To improve data security, GX recommends using a separate user service account to connect to GX Cloud. - -### Create a PostgreSQL Data Asset - -1. In GX Cloud, click **Data Assets** > **New Data Asset**. - -2. Click the **New Data Source** tab and then select **PostgreSQL**. - -3. Enter a meaningful name for the Data Asset in the **Data Source name** field. - -4. Enter a connection string in the **Connection string** field. The connection string format is `postgresql+psycopg2//YourUserName:YourPassword@YourHostname:5432/YourDatabaseName`. - -5. Click **Connect**. - -6. Complete the following fields: - - - **Table name**: Enter a name for the table you're creating in the Data Asset. - - - **Data Asset name**: Enter a name for the Data Asset. Data Asset names must be unique. If you use the same name for multiple Data Assets, each Data Asset must be associated with a unique Data Source. - -7. Select the **Complete Asset** tab to provide all Data Asset records to your Expectations and validations, or select the **Batches** tab to use subsets of Data Asset records for your Expectations and validations. If you selected the **Batches** tab, complete the following fields: - - - **Split Data Asset by** - Select **Year** to partition Data Asset records by year, select **Year - Month** to partition Data Asset records by year and month, or select **Year - Month - Day** to partition Data Asset records by year, month, and day. - - - **Column of datetime type** - Enter the name of the column containing the date and time data. - -8. Optional. Select **Add Data Asset** to add additional tables or queries and repeat steps 8 and 9. - -9. Click **Finish**. The Data Asset(s), a default empty Expectation Suite, and a default Checkpoint are created. - - - - - -## Add an Expectation - -An Expectation is a verifiable assertion about your data. They make implicit assumptions about your data explicit. - -1. In the **Data Assets** list, click the Data Asset name. - -2. Click the **Expectations** tab. - -3. Click **New Expectation**. - -4. Select an Expectation type, enter the column name, and then complete the optional fields. To view descriptions of the available Expectation types, see [Available Expectations](./expectations/manage_expectations.md#available-expectations). - -5. Click **Save**. The Expectation is added to the default Expectation Suite. - -6. Optional. Repeat steps 3 to 5 to add additional Expectations. - -## Validate Expectations - -1. Click **Validate**. - -2. When the confirmation message appears, click **See results**, or click the **Validations** tab. diff --git a/docs/docusaurus/docs/cloud/alerts/manage_alerts.md b/docs/docusaurus/docs/cloud/alerts/manage_alerts.md index c2f9da835da2..7433a4a25c54 100644 --- a/docs/docusaurus/docs/cloud/alerts/manage_alerts.md +++ b/docs/docusaurus/docs/cloud/alerts/manage_alerts.md @@ -20,7 +20,7 @@ Every time a Data Asset fails a validation run, GX Cloud sends an email to all u 1. In GX Cloud, click **Data Assets**. 2. Click a Data Asset in the **Data Assets** list. -3. Click the **Expectations** tab and then **Alerts**. +3. Click **Alerts**. 4. Click the **toggle switch** to enable or disable email alerts for the Data Asset. If you disabled an alert, you will stop receiving emails for the Data Asset immediately. If you enabled an alert, you will begin receiving the emails as soon as the Data Asset’s next full validation run is complete. \ No newline at end of file diff --git a/docs/docusaurus/docs/cloud/connect/connect_airflow.md b/docs/docusaurus/docs/cloud/connect/connect_airflow.md index 6fdc9ae892f5..eb4511e10d07 100644 --- a/docs/docusaurus/docs/cloud/connect/connect_airflow.md +++ b/docs/docusaurus/docs/cloud/connect/connect_airflow.md @@ -5,7 +5,7 @@ id: connect_airflow description: Connect GX Cloud to an Airflow Orchestrator. --- -In this quickstart, you'll learn how to use GX Cloud with Apache Airflow. You'll create a simple DAG that runs a Checkpoint that you have already set up in GX Cloud, and then trigger it through a local installation of an Airflow server. +In this quickstart, you'll learn how to use GX Cloud with Apache Airflow. You'll create a basic DAG that runs a Checkpoint in GX Cloud, and then trigger it through a local installation of an Airflow server. Apache Airflow is an orchestration tool that allows you to schedule and monitor your data pipelines. For more information about Apache Airflow, see the [Apache Airflow documentation](https://airflow.apache.org/docs/apache-airflow/stable/index.html). @@ -15,9 +15,9 @@ Apache Airflow is an orchestration tool that allows you to schedule and monitor - You have installed Apache Airflow and initialized the database (__airflow db init__). -- You have [connected GX Cloud to a Data Asset on a Data Source](/cloud/data_assets/manage_data_assets.md). +- You have [connected GX Cloud to a Data Asset on a Data Source](/cloud/data_assets/manage_data_assets.md). (Note that this automatically creates the Checkpoint your DAG will run.) -- You have [created an Expectation Suite](/cloud/expectation_suites/manage_expectation_suites.md) and [added Expectations](/cloud/expectations/manage_expectations.md#add-an-expectation). +- You have [added Expectations](/cloud/expectations/manage_expectations.md#create-an-expectation). ## Run Airflow Standalone to create a fresh local Airflow environment @@ -76,8 +76,12 @@ Apache Airflow is an orchestration tool that allows you to schedule and monitor # You can also set GX_CLOUD_ACCESS_TOKEN and GX_CLOUD_ORGANIZATION_ID as environment variables GX_CLOUD_ACCESS_TOKEN = "" GX_CLOUD_ORGANIZATION_ID = "" - # Find the checkpoint name in the GX Cloud UI beside the Validate button - CHECKPOINT_NAME = "" + # Find the Checkpoint name in the GX Cloud UI. + # - Go to the "Validations" tab. + # - Next to the "Validate" button, click the code snippet icon. + # - Click "Generate snippet". + # - Copy the Checkpoint name from the code snippet and use it below. + CHECKPOINT_NAME = "my_data_asset 123ABC - Default Checkpoint" context = gx.get_context( mode="cloud", cloud_organization_id=GX_CLOUD_ACCESS_TOKEN, diff --git a/docs/docusaurus/docs/cloud/connect/connect_databrickssql.md b/docs/docusaurus/docs/cloud/connect/connect_databrickssql.md index f975fa559bc3..6e2743cac8d0 100644 --- a/docs/docusaurus/docs/cloud/connect/connect_databrickssql.md +++ b/docs/docusaurus/docs/cloud/connect/connect_databrickssql.md @@ -33,6 +33,8 @@ import Tabs from '@theme/Tabs'; 5. Select one or more tables to import as Data Assets. -6. Click **Add x Asset(s)**. +6. Decide if you want to **Generate Expectations that detect column changes in selected Data Assets**. -7. Add an Expectation. See [Add an Expectation](/cloud/expectations/manage_expectations.md#add-an-expectation). \ No newline at end of file +7. Click **Add x Asset(s)**. + +8. Add an Expectation. See [Add an Expectation](/cloud/expectations/manage_expectations.md#add-an-expectation). \ No newline at end of file diff --git a/docs/docusaurus/docs/cloud/connect/connect_lp.md b/docs/docusaurus/docs/cloud/connect/connect_lp.md index 6c00efbf5e68..dc8daaa30f4d 100644 --- a/docs/docusaurus/docs/cloud/connect/connect_lp.md +++ b/docs/docusaurus/docs/cloud/connect/connect_lp.md @@ -11,7 +11,7 @@ import LinkCard from '@site/src/components/LinkCard'; import OverviewCard from '@site/src/components/OverviewCard'; - Connect GX Cloud to your deployment environment. To connect to a Data Source not currently available in GX Cloud, see [Connect to data](/core/connect_to_data/connect_to_data.md) in the GX Core documentation. + Connect GX Cloud to your deployment environment. To connect to a Data Source not currently available in GX Cloud, use [GX Core](/core/connect_to_data/connect_to_data.md). @@ -20,4 +20,4 @@ import OverviewCard from '@site/src/components/OverviewCard'; - \ No newline at end of file + diff --git a/docs/docusaurus/docs/cloud/connect/connect_postgresql.md b/docs/docusaurus/docs/cloud/connect/connect_postgresql.md index a7ac41c580f2..e08f3e08d966 100644 --- a/docs/docusaurus/docs/cloud/connect/connect_postgresql.md +++ b/docs/docusaurus/docs/cloud/connect/connect_postgresql.md @@ -58,7 +58,9 @@ import Tabs from '@theme/Tabs'; 5. Select one or more tables to import as Data Assets. -6. Click **Add x Asset(s)**. +6. Decide if you want to **Generate Expectations that detect column changes in selected Data Assets**. -7. Add an Expectation. See [Add an Expectation](/cloud/expectations/manage_expectations.md#add-an-expectation). +7. Click **Add x Asset(s)**. + +8. Add an Expectation. See [Add an Expectation](/cloud/expectations/manage_expectations.md#add-an-expectation). diff --git a/docs/docusaurus/docs/cloud/connect/connect_python.md b/docs/docusaurus/docs/cloud/connect/connect_python.md index 5d7cf15512cc..b46401e503f0 100644 --- a/docs/docusaurus/docs/cloud/connect/connect_python.md +++ b/docs/docusaurus/docs/cloud/connect/connect_python.md @@ -60,7 +60,7 @@ Environment variables securely store your GX Cloud access credentials. export GX_CLOUD_ORGANIZATION_ID= ``` - :::note + :::note Note After you save your **GX_CLOUD_ACCESS_TOKEN** and **GX_CLOUD_ORGANIZTION_ID**, you can use Python scripts to access GX Cloud and complete other tasks. See the [GX Core guides](/core/introduction/introduction.mdx). ::: @@ -110,17 +110,12 @@ Environment variables securely store your GX Cloud access credentials. ## Validate data -1. Run the following Python code to define a Checkpoint and examine the data to determine if it matches the defined Expectations: - - ```python title="Python" name="tutorials/quickstart/quickstart.py create_checkpoint" - ``` - -2. Use the following command to return the Validation Results: +1. Run the following Python code to examine the data and determine if it matches the defined Expectations. This will return Validation Results: ```python title="Python" name="tutorials/quickstart/quickstart.py run_checkpoint" ``` -3. Run the following Python code to view an HTML representation of the Validation Results in the generated Data Docs: +2. Run the following Python code to view a JSON representation of the Validation Results in the generated Data Docs: ```python title="Python" name="tutorials/quickstart/quickstart.py view_results" ``` diff --git a/docs/docusaurus/docs/cloud/connect/connect_snowflake.md b/docs/docusaurus/docs/cloud/connect/connect_snowflake.md index 85f9e45102b6..0a4d6a30392e 100644 --- a/docs/docusaurus/docs/cloud/connect/connect_snowflake.md +++ b/docs/docusaurus/docs/cloud/connect/connect_snowflake.md @@ -15,6 +15,7 @@ import Tabs from '@theme/Tabs'; - You have a [Snowflake account](https://docs.snowflake.com/en/user-guide-admin) with USAGE privileges on the table, database, and schema you are validating, and you have SELECT privileges on the table you are validating. - Optional. To improve data security, GX recommends using a separate Snowflake user service account to connect to GX Cloud. + - Optional. To streamline automations and improve security, you can connect to Snowflake with key-pair authentication instead of a password. Note that this requires using GX Core in combination with GX Cloud. - Optional. You can use an existing Snowflake warehouse, but GX recommends creating a separate warehouse for GX Cloud to simplify cost management and optimize performance. @@ -58,8 +59,18 @@ Depending on your Snowflake permissions, you may need to ask an admin on your te ![Snowflake Run All](/img/run_all.png) + ## Connect to a Snowflake Data Source and add a Data Asset +:::tip To connect with key-pair authentication, use GX Core +To connect to a Snowflake Data Source using key-pair authentication instead of a password, do the following using GX Core: + +1. [Create a Cloud Data Context](/core/set_up_a_gx_environment/create_a_data_context.md?context_type=gx_cloud). +2. Pass your private key when you [create a Data Source](/core/connect_to_data/sql_data/sql_data.md) in the Cloud Data Context. + +Then, you can use GX Cloud to [add a Data Asset](/cloud/data_assets/manage_data_assets.md#add-a-data-asset-from-an-existing-data-source) from that Data Source. +::: + 1. In GX Cloud, click **Data Assets** > **New Data Asset** > **New Data Source** > **Snowflake**. 2. Enter a meaningful name for the Data Source in the **Data Source name** field. @@ -96,7 +107,10 @@ Depending on your Snowflake permissions, you may need to ask an admin on your te 6. Select one or more tables to import as Data Assets. -7. Click **Add x Asset(s)**. +7. Decide if you want to **Generate Expectations that detect column changes in selected Data Assets**. + +8. Click **Add x Asset(s)**. + +9. Add an Expectation. See [Add an Expectation](/cloud/expectations/manage_expectations.md#add-an-expectation). -8. Add an Expectation. See [Add an Expectation](/cloud/expectations/manage_expectations.md#add-an-expectation). diff --git a/docs/docusaurus/docs/cloud/data_assets/manage_data_assets.md b/docs/docusaurus/docs/cloud/data_assets/manage_data_assets.md index 9e42155b1259..6ca1e14eac9e 100644 --- a/docs/docusaurus/docs/cloud/data_assets/manage_data_assets.md +++ b/docs/docusaurus/docs/cloud/data_assets/manage_data_assets.md @@ -28,19 +28,19 @@ Define the data you want GX Cloud to access. 3. Select one or more tables to import as Data Assets. -4. Click **Add x Asset(s)**. +4. Decide if you want to **Generate Expectations that detect column changes in selected Data Assets**. + +5. Click **Add x Asset(s)**. Then you can [add an Expectation](/cloud/expectations/manage_expectations.md#add-an-expectation) for your new Data Asset. ## View Data Asset metrics -Data Asset metrics provide you with insight into the data you can use for your data validations. +Data Asset metrics provide you with insight into the data you can use for your data validations. When you create a new Data Asset, schema data is automatically fetched. 1. In GX Cloud, click **Data Assets** and then select a Data Asset in the **Data Assets** list. -2. Click the **Overview** tab. - - When you select a new Data Asset, schema data is automatically fetched. +2. Click the **Metrics** tab. 3. Optional. Select one of the following options: diff --git a/docs/docusaurus/docs/cloud/deploy/deploy_gx_agent.md b/docs/docusaurus/docs/cloud/deploy/deploy_gx_agent.md index f09c4decb140..19510b158e0f 100644 --- a/docs/docusaurus/docs/cloud/deploy/deploy_gx_agent.md +++ b/docs/docusaurus/docs/cloud/deploy/deploy_gx_agent.md @@ -1,7 +1,7 @@ --- title: 'Deploy the GX Agent' id: deploy_gx_agent -description: Deploy the GX Agent to use GX Cloud features and functionality. +description: Deploy the GX Agent to connect to your Data Source within your own environment. toc_min_heading_level: 2 toc_max_heading_level: 2 --- @@ -12,12 +12,6 @@ import Tabs from '@theme/Tabs'; The GX Agent is used to run an [agent-enabled deployment](/cloud/deploy/deployment_patterns.md#agent-enabled-deployment) of GX Cloud. If you are running a fully-hosted or read-only deployment, you do not need to deploy the GX Agent. -:::info Enable the GX Agent - -The GX Agent, and agent-enabled deployments, are not available by default in GX Cloud. To enable the GX Agent for your GX Cloud organization, reach out to GX Support at support@greatexpectations.io. - -::: - The GX Agent serves as an intermediary between GX Cloud and your organization's data stores. GX Cloud does not connect directly to your data in an agent-enabled deployment, and all data access occurs within the GX Agent. GX Cloud sends jobs to the GX Agent, the GX Agent executes these jobs against your data, and then sends the job results to GX Cloud. A local deployment of the GX Agent will allow you to test GX Cloud setup or processes from a single machine before moving to a shared production deployment. Alternatively, you can run the GX Agent in your deployment environment and leverage GX Cloud while connecting to Data Sources using your organization's environment and infrastructure, for enhanced control and security. @@ -26,36 +20,42 @@ A local deployment of the GX Agent will allow you to test GX Cloud setup or proc ## Prerequisites - You have a [GX Cloud account](https://greatexpectations.io/cloud). -- You have reached out to GX Support at support@greatexpectations.io to request a GX Agent deployment. - You have a [Docker instance](https://docs.docker.com/get-docker/) or [kubectl](https://kubernetes.io/docs/tasks/tools/). -## Get your access token and organization ID - -You need your access token and organization ID to deploy the GX Agent. Access tokens shouldn't be committed to version control software. +## Enable the GX Agent -If you've used GX Cloud previously, you have your access token and organization ID, and you need to restart the GX Agent, see [Deploy the GX Agent](#deploy-the-gx-agent). +The GX Agent is not enabled by default in GX Cloud. To enable the GX Agent for your GX Cloud organization, request the Agent when adding a Data Source. The workflow depends on whether or not your organization has any Data Sources yet. - + -1. Sign in to GX Cloud. +1. Go to **Data Assets**. +2. Select a Data Source type. +3. Click **Request Agent**. -2. Complete the survey and then click **Continue to GX Cloud**. - -3. Copy and then paste the **Access token** and **Organization ID** values into a temporary file. You'll need them to deploy the GX Agent. + + -4. Click **Deploy the GX Agent** and [deploy the GX Agent](#deploy-the-gx-agent). +1. Go to **Data Assets**. +2. Select **New Data Asset**. +3. Select **New Data Source**. +4. Select a Data Source type. +5. Click **Request Agent**. - + + +We will send you a confirmation email when we fulfill your request. You can continue following the steps below to deploy the GX Agent while we work on enabling it for your organization. -Use the information provided here to view your organization ID or create a new access token. This can be helpful if you've forgotten your organization ID or access token, and you need to restart the GX Agent. +## Get your access token and organization ID + +You need your access token and organization ID to deploy the GX Agent. Access tokens shouldn't be committed to version control software. 1. In GX Cloud, click **Settings** > **Tokens**. @@ -75,8 +75,6 @@ Use the information provided here to view your organization ID or create a new a 8. [Deploy the GX Agent](#deploy-the-gx-agent). - - ## Deploy the GX Agent diff --git a/docs/docusaurus/docs/cloud/expectation_suites/manage_expectation_suites.md b/docs/docusaurus/docs/cloud/expectation_suites/manage_expectation_suites.md deleted file mode 100644 index 2e636e6fea98..000000000000 --- a/docs/docusaurus/docs/cloud/expectation_suites/manage_expectation_suites.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -sidebar_label: 'Manage Expectation Suites' -title: 'Manage Expectation Suites' -description: Create and manage Expectation Suites in GX Cloud. ---- - -Expectation Suites contain multiple Expectations for a single Data Asset. Like Expectations, they can help you better understand your data and help you improve data quality. A default Expectation Suite is created when you create a Data Asset. - - - -## Prerequisites - -- You have a [Data Asset](/cloud/data_assets/manage_data_assets.md). - -## Create an Expectation Suite - -If you have specific business requirements, or you want to examine specific data, you can create an empty Expectation Suite and then add Expectations individually. - -1. In GX Cloud, click **Data Assets** and select a Data Asset in the **Data Assets** list. - -2. Click the **Expectations** tab. - -3. Click **New Suite** in the **Expectation Suites** pane. - -4. Enter a name for the Expectation Suite in the **Expectation Suite name** field. - -5. Click **Create**. - -7. Add Expectations to the Expectation Suite. See [Create an Expectation](/cloud/expectations/manage_expectations.md#create-an-expectation). - -8. Optional. Run a Validation on the Expectation Suite. See [Run a Validation](/cloud/validations/manage_validations.md#run-a-validation). - -## Edit an Expectation Suite - -1. In GX Cloud, click **Data Assets**. - -2. Click the Expectations tab. - -3. Select an Expectation Suite in the Expectation Suites list. - -4. Click **Edit** on the schedule component for the Expectation Suite you want to edit. - -5. Edit the Expectation Suite name or validation schedule and then click **Save**. - -6. Optional. If the Expectation Suite name was changed, update the name in all code that included the previous Expectation Suite name. - -## Delete an Expectation Suite - -1. Click **Expectation Suites**. - -2. Click **Delete** for the Expectation Suite you want to delete. - -3. Click **Delete**. - -## Related documentation - -- [Manage Expectations](../expectations/manage_expectations.md) - -- [Manage Validations](../validations/manage_validations.md) \ No newline at end of file diff --git a/docs/docusaurus/docs/cloud/expectations/expectation_images/dynamic_parameters.png b/docs/docusaurus/docs/cloud/expectations/expectation_images/dynamic_parameters.png deleted file mode 100644 index 9fa923abba06..000000000000 Binary files a/docs/docusaurus/docs/cloud/expectations/expectation_images/dynamic_parameters.png and /dev/null differ diff --git a/docs/docusaurus/docs/cloud/expectations/manage_expectations.md b/docs/docusaurus/docs/cloud/expectations/manage_expectations.md index fdf4fd6c63af..af82e186d108 100644 --- a/docs/docusaurus/docs/cloud/expectations/manage_expectations.md +++ b/docs/docusaurus/docs/cloud/expectations/manage_expectations.md @@ -4,7 +4,7 @@ title: "Manage Expectations" description: Create and manage Expectations in GX Cloud. --- -An Expectation is a verifiable assertion about your data. They make implicit assumptions about your data explicit, and they provide a flexible, declarative language for describing expected behavior. They can help you better understand your data and help you improve data quality. An Expectation Suite contains multiple Expectations. +An Expectation is a verifiable assertion about your data. They make implicit assumptions about your data explicit, and they provide a flexible, declarative language for describing expected behavior. They can help you better understand your data and help you improve data quality. @@ -16,52 +16,53 @@ An Expectation is a verifiable assertion about your data. They make implicit ass The following table lists the available GX Cloud Expectations. -| Data Quality Issue | Expectation | Description | Dynamic Parameters? | -|--------------------|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|--------------------| -| Cardinality | `expect_column_values_to_be_unique` | Expect each column value to be unique. | No | -| Cardinality | `expect_compound_columns_to_be_unique` | Expect the compound columns to be unique. | No | -| Cardinality | `expect_select_column_values_to_be_unique_within_record` | Expect the values for each record to be unique across the columns listed. Note that records can be duplicated. | No | -| Cardinality | `expect_column_proportion_of_unique_values_to_be_between` | Expect the proportion of unique values to be between a minimum value and a maximum value. | Yes | -| Cardinality | `expect_column_unique_value_count_to_be_between` | Expect the number of unique values to be between a minimum value and a maximum value. | Yes | -| Data Integrity | `expect_column_pair_values_to_be_equal` | Expect the values in column A to be the same as column B. | No | -| Data Integrity | `expect_multicolumn_sum_to_equal` | Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total. | No | -| Distribution | `expect_column_pair_values_A_to_be_greater_than_B` | Expect the values in column A to be greater than column B. | No | -| Distribution | `expect_column_values_to_be_between` | Expect the column entries to be between a minimum value and a maximum value. | No | -| Distribution | `expect_column_z_scores_to_be_less_than` | Expect the Z-scores of a column's values to be less than a given threshold. | No | -| Distribution | `expect_column_stdev_to_be_between` | Expect the column standard deviation to be between a minimum value and a maximum value. | Yes | -| Distribution | `expect_column_sum_to_be_between` | Expect the column sum to be between a minimum value and a maximum value. | Yes | -| Missingness | `expect_column_values_to_be_null` | Expect the column values to be null. | Coming soon | -| Missingness | `expect_column_values_to_not_be_null` | Expect the column values to not be null. | Coming soon | -| Numerical Data | `expect_column_max_to_be_between` | Expect the column maximum to be between a minimum and a maximum value. | Yes | -| Numerical Data | `expect_column_mean_to_be_between` | Expect the column mean to be between a minimum and a maximum value. | Yes | -| Numerical Data | `expect_column_median_to_be_between` | Expect the column median to be between a minimum and a maximum value. | Yes | -| Numerical Data | `expect_column_min_to_be_between` | Expect the column minimum to be between a minimum value and a maximum value. | Yes | -| Pattern matching | `expect_column_value_length_to_equal` | Expect the column entries to be strings with length equal to the provided value. | No | -| Pattern matching | `expect_column_value_length_to_be_between` | Expect the column entries to be strings with length between a minimum value and a maximum value. | No | -| Pattern matching | `expect_column_values_to_match_like_pattern` | Expect the column entries to be strings that match a given like pattern expression. | No | -| Pattern matching | `expect_column_values_to_match_like_pattern_list` | Expect the column entries to be strings that match any of a provided list of like pattern expressions. | No | -| Pattern matching | `expect_column_values_to_match_regex` | Expect the column entries to be strings that match a given regular expression. | No | -| Pattern matching | `expect_column_values_to_match_regex_list` | Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions. | No | -| Pattern matching | `expect_column_values_to_not_match_like_pattern` | Expect the column entries to be strings that do NOT match a given like pattern expression. | No | -| Pattern matching | `expect_column_values_to_not_match_like_pattern_list` | Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions. | No | -| Pattern matching | `expect_column_values_to_not_match_regex` | Expect the column entries to be strings that do NOT match a given regular expression. | No | -| Pattern matching | `expect_column_values_to_not_match_regex_list` | Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string. | No | -| Schema | `expect_column_to_exist` | Checks for the existence of a specified column within a table. | No | -| Schema | `expect_column_values_to_be_in_type_list` | Expect a column to contain values from a specified type list. | No | -| Schema | `expect_column_values_to_be_of_type` | Expect a column to contain values of a specified data type. | No | -| Schema | `expect_table_column_count_to_be_between` | Expect the number of columns in a table to be between two values. | Yes | -| Schema | `expect_table_column_count_to_equal` | Expect the number of columns in a table to equal a value. | No | -| Schema | `expect_table_columns_to_match_ordered_list` | Expect the columns in a table to exactly match a specified list. | No | -| Schema | `expect_table_columns_to_match_set` | Expect the columns in a table to match an unordered set. | No | -| Sets | `expect_column_values_to_be_in_set` | Expect each column value to be in a given set. | No | -| Sets | `expect_column_values_to_not_be_in_set` | Expect column entries to not be in the set. | No | -| Sets | `expect_column_distinct_values_to_be_in_set` | Expect the set of distinct column values to be contained by a given set. | No | -| Sets | `expect_column_distinct_values_to_contain_set` | Expect the set of distinct column values to contain a given set. | No | -| Sets | `expect_column_distinct_values_to_equal_set` | Expect the set of distinct column values to equal a given set. | No | -| Sets | `expect_column_most_common_value_to_be_in_set` | Expect the most common value to be within the designated value set. | No | -| Volume | `expect_table_row_count_to_be_between` | Expect the number of rows to be between two values. | Yes | -| Volume | `expect_table_row_count_to_equal` | Expect the number of rows to equal a value. | No | -| Volume | `expect_table_row_count_to_equal_other_table` | Expect the number of rows to equal the number in another table within the same database. | No | +| Data quality issue | Expectation | Description | Dynamic Parameters? | +|------------------|----------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------|-------------------| +| Completeness | **column values to be null** | Expect the column values to be null. | Coming soon | +| Completeness | **column values to not be null** | Expect the column values to not be null. | Coming soon | +| Numeric | **column max to be between** | Expect the column maximum to be between a minimum and a maximum value. | Yes | +| Numeric | **column mean to be between** | Expect the column mean to be between a minimum and a maximum value. | Yes | +| Numeric | **column median to be between** | Expect the column median to be between a minimum and a maximum value. | Yes | +| Numeric | **column min to be between** | Expect the column minimum to be between a minimum value and a maximum value. | Yes | +| Numeric | **column pair values A to be greater than B** | Expect the values in column A to be greater than column B. | No | +| Numeric | **column stdev to be between** | Expect the column standard deviation to be between a minimum value and a maximum value. | Yes | +| Numeric | **column sum to be between** | Expect the column sum to be between a minimum value and a maximum value. | Yes | +| Numeric | **column values to be between** | Expect the column entries to be between a minimum value and a maximum value. | No | +| Numeric | **column z scores to be less than** | Expect the Z-scores of a column's values to be less than a given threshold. | No | +| Numeric | **multicolumn sum to equal** | Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total. | No | +| Numeric, Validity | **column most common value to be in set** | Expect the most common value to be within the designated value set. | No | +| Numeric, Validity | **column pair values to be equal** | Expect the values in column A to be the same as column B. | No | +| Numeric, Validity | **column values to be in set** | Expect each column value to be in a given set. | No | +| Numeric, Validity | **column values to not be in set** | Expect column entries to not be in the set. | No | +| Schema | **column to exist** | Checks for the existence of a specified column within a table. | No | +| Schema | **column values to be in type list** | Expect a column to contain values from a specified type list. | No | +| Schema | **column values to be of type** | Expect a column to contain values of a specified data type. | No | +| Schema | **table column count to be between** | Expect the number of columns in a table to be between two values. | Yes | +| Schema | **table column count to equal** | Expect the number of columns in a table to equal a value. | No | +| Schema | **table columns to match ordered list** | Expect the columns in a table to exactly match a specified list. | No | +| Schema | **table columns to match set** | Expect the columns in a table to match an unordered set. | No | +| Uniqueness | **column distinct values to be in set** | Expect the set of distinct column values to be contained by a given set. | No | +| Uniqueness | **column distinct values to contain set** | Expect the set of distinct column values to contain a given set. | No | +| Uniqueness | **column distinct values to equal set** | Expect the set of distinct column values to equal a given set. | No | +| Uniqueness | **column proportion of unique values to be between** | Expect the proportion of unique values to be between a minimum value and a maximum value. | Yes | +| Uniqueness | **column unique value count to be between** | Expect the number of unique values to be between a minimum value and a maximum value. | Yes | +| Uniqueness | **column values to be unique** | Expect each column value to be unique. | No | +| Uniqueness | **compound columns to be unique** | Expect the compound columns to be unique. | No | +| Uniqueness | **select column values to be unique within record** | Expect the values for each record to be unique across the columns listed. Note that records can be duplicated. | No | +| Validity | **column value lengths to be between** | Expect the column entries to be strings with length between a minimum value and a maximum value. | No | +| Validity | **column value lengths to equal** | Expect the column entries to be strings with length equal to the provided value. | No | +| Validity | **column values to match like pattern** | Expect the column entries to be strings that match a given like pattern expression. | No | +| Validity | **column values to match like pattern list** | Expect the column entries to be strings that match any of a provided list of like pattern expressions. | No | +| Validity | **column values to match regex** | Expect the column entries to be strings that match a given regular expression. | No | +| Validity | **column values to match regex list** | Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions. | No | +| Validity | **column values to not match like pattern** | Expect the column entries to be strings that do NOT match a given like pattern expression. | No | +| Validity | **column values to not match like pattern list** | Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions. | No | +| Validity | **column values to not match regex** | Expect the column entries to be strings that do NOT match a given regular expression. | No | +| Validity | **column values to not match regex list** | Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string. | No | +| Volume | **table row count to be between** | Expect the number of rows to be between two values. | Yes | +| Volume | **table row count to equal** | Expect the number of rows to equal a value. | No | +| Volume | **table row count to equal other table** | Expect the number of rows to equal the number in another table within the same database. | No | + ## Custom SQL Expectations @@ -97,9 +98,6 @@ When you select your `n` run count, and: - There are `>n` runs, the Expectation will take the last `n` runs into account. - -![GX Cloud dynamic parameters](./expectation_images/dynamic_parameters.png) - ## Expectation condition The Expectation condition is an optional field that applies to any Expectation validating row-level data. This condition allows you to filter your data so that only a specific subset of your Batch is validated. Rows will be validated only when the condition is true. @@ -124,57 +122,48 @@ To clear the Expectation condition, click the clear button located on the right- 2. In the **Data Assets** list, click the Data Asset name. -3. Click the **Expectations** tab. - -4. Click **New Expectation**. - -5. Select an Expectation type. See [Available Expectations](#available-expectations). +3. Click **New Expectation**. -6. If you are adding your first expectation on this data asset, you may be able to select a time-based Batch interval for that asset. +4. Select a data quality issue to test for. - - A batch is a feature of the data asset, and allows you to validate your data incrementally. A batch interval can only be defined once per data asset; you cannot change it after setting it. +5. Select an Expectation type. - - In order to be able to select a batch interval, the data asset must have at least one DATE or DATETIME column. +6. Complete the mandatory and optional fields for the Expectation. A recurring [validation schedule](/cloud/schedules/manage_schedules.md) will be applied automatically to your Expectation. - - Select the **Entire table** tab to provide all Data Asset records to your Expectations and validations, or select the **Yearly**/**Monthly**/**Daily** tab to use subsets of Data Asset records for your Expectations and validations. +7. Click **Save** or click **Save & Add More** and then repeat steps 4 through 7 to add additional Expectations. - - Select **Yearly** to partition Data Asset records by year, select **Monthly** to partition Data Asset records by year and month, or select **Daily** to partition Data Asset records by year, month, and day. +8. Optional. Run a Validation. See [Run a Validation](/cloud/validations/manage_validations.md#run-a-validation). - - **Batch column** - Select a name column from a prefilled list of DATE and DATETIME columns containing the date and time data. - -7. Complete the mandatory and optional fields for the Expectation. A recurring validation schedule will be applied automatically to your Expectation, based on the settings of your Expectation Suite. +:::tip Automate rules for schema change detection +When you [create a new Data Asset](/cloud/data_assets/manage_data_assets.md#add-a-data-asset-from-an-existing-data-source), you can choose to automatically generate Expectations that detect column changes in that Data Asset. +::: -8. Click **Save** or click **Save & Add More** and then repeat steps 5 and 7 to add additional Expectations. -9. Optional. Run a Validation. See [Run a Validation](/cloud/validations/manage_validations.md#run-a-validation). +## Optional. Define a Batch -## Edit an Expectation +If your Data Asset has at least one DATE or DATETIME column, you can define a Batch to validate your data incrementally. 1. In GX Cloud, click **Data Assets**. 2. In the **Data Assets** list, click the Data Asset name. -3. Click the **Expectations** tab. - -4. Click **Edit Expectation** for the Expectation that you want to edit. +3. Click **Define batch**. -5. Edit the Expectation configuration. +4. Choose how to **Validate by**. Select the **Entire Asset** tab to provide all Data Asset records to your Expectations and validations, or select one of the **Year**/**Month**/**Day** tabs to use subsets of Data Asset records for your Expectations and validations. **Year** partitions Data Asset records by year, **Month** partitions Data Asset records by year and month, **Day** partitions Data Asset records by year, month, and day. -6. Click **Save**. +5. Select the **Batch column** that contains the DATE or DATETIME data to partition on. -## View Expectation history - -View the Expectation history to determine when an Expectation was changed and who made the change. +## Edit an Expectation -1. In GX Cloud, click **Expectation Suites**. +1. In GX Cloud, click **Data Assets**. -2. In the **Expectation Suites** list, click the Expectation Suite name. +2. In the **Data Assets** list, click the Data Asset name. -3. Click the **Change Log** tab. +3. Click **Edit Expectation** for the Expectation that you want to edit. -4. Optional. Select an Expectation in the **Columns** pane to view the change history for a specific Expectation. +4. Edit the Expectation configuration. - The date, time, and email address of the users who created, edited, or deleted the Expectation appears below the Expectation name. Strikethrough text indicates an Expectation was deleted. +5. Click **Save**. ## Delete an Expectation @@ -182,12 +171,30 @@ View the Expectation history to determine when an Expectation was changed and wh 2. In the **Data Assets** list, click the Data Asset name. -3. Click the **Expectations** tab. +3. Click **Delete Expectation** for the Expectation you want to delete. -4. Click **Delete Expectation** for the Expectation you want to delete. +4. Click **Yes, delete Expectation**. -5. Click **Yes, delete Expectation**. +## GX-managed vs. API-managed Expectations -## Related documentation +In GX Cloud, Expectations can be GX-managed or API-managed. +- GX-managed Expectations are created through the GX Cloud UI. +- API-managed Expectations are created with the API in a GX Cloud Data Context. -- [Manage Expectation Suites](../expectation_suites/manage_expectation_suites.md) +If you have both kinds of Expectations, they will be organized in separate tables on the **Expectations** tab as they have different capabilities in the Cloud UI. + +Here is a comparison of key characteristics of GX-managed and API-managed Expectations. + +| Characteristic | GX-managed Expectation | API-managed Expectation | +|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Edit | [Edit parameters](/cloud/expectations/manage_expectations.md#edit-an-expectation) with the GX Cloud UI | [Edit parameters with the API](/reference/api/expectations/Expectation_class.mdx) or the GX Cloud UI | +| Batch | [Define a Batch](/cloud/expectations/manage_expectations.md#optional-define-a-batch) in the Cloud UI | Define a Batch with the API when connecting to [SQL](/core/connect_to_data/sql_data/sql_data.md#create-a-batch-definition), [filesystem](/core/connect_to_data/filesystem_data/filesystem_data.md#create-a-batch-definition), or [dataframe](/core/connect_to_data/dataframes/dataframes.md#create-a-batch-definition) data | +| Validate | [Run a Validation](/docs/cloud/validations/manage_validations.md#run-a-validation) through the Cloud UI or [run a Checkpoint](/core/trigger_actions_based_on_results/run_a_checkpoint.md) with the API | [Create a Validation Definition and run it](/core/run_validations/run_validations.md) with the API | +| Validation Results | [Access results in the Validations tab](/cloud/validations/manage_validations.md#view-validation-run-history) of the Cloud UI | [Access results with the API](/core/trigger_actions_based_on_results/choose_a_result_format/choose_a_result_format.md) or in the Validations tab of the Cloud UI | +| Schedule | Keep default schedule or [edit schedule](/cloud/schedules/manage_schedules.md) in the Cloud UI | Not supported, use an [orchestrator](/cloud/connect/connect_airflow.md) to control recurring validations | +| Expectation Suite | Automatically organized in a hidden default Expectation Suite | Manually grouped into [custom Expectation Suites](/core/define_expectations/organize_expectation_suites.md) via the API | +| Delete | [Delete Expectation](/docs/cloud/expectations/manage_expectations/#delete-an-expectation) with the Cloud UI | [Delete Expectation with the API](/reference/api/ExpectationSuite_class.mdx#great_expectations.ExpectationSuite.delete_expectation) or the Cloud UI | + +:::note Hidden resources for GX-managed Expectations +To support GX-managed Expectations, we create resources that you typically won't directly interact with. For example, we create a GX-managed Expectation Suite that we use to organize your Expectations. For some workflows you may need to work with these hidden resources, for example, you may need to [find the name of an automatically created Checkpoint](/cloud/connect/connect_airflow.md#create-a-dag-file-for-your-gx-cloud-checkpoint). But, typically you can ignore the existence of these hidden resources. +::: diff --git a/docs/docusaurus/docs/cloud/gx_cloud_lp.md b/docs/docusaurus/docs/cloud/gx_cloud_lp.md index dee1909e6a6d..4a94a4afccd7 100644 --- a/docs/docusaurus/docs/cloud/gx_cloud_lp.md +++ b/docs/docusaurus/docs/cloud/gx_cloud_lp.md @@ -33,7 +33,6 @@ import OverviewCard from '@site/src/components/OverviewCard'; - diff --git a/docs/docusaurus/docs/cloud/overview/gx_cloud_overview.md b/docs/docusaurus/docs/cloud/overview/gx_cloud_overview.md index dab8beabe4b7..9afacd4ada33 100644 --- a/docs/docusaurus/docs/cloud/overview/gx_cloud_overview.md +++ b/docs/docusaurus/docs/cloud/overview/gx_cloud_overview.md @@ -46,13 +46,14 @@ There are a variety of GX Cloud features that support additional enhancements to * **Data Asset profiling.** GX Cloud introspects your data schema by default on Data Asset creation, and also offers one-click fetching of additional descriptive metrics including column type and statistical summaries. Data profiling results are used to suggest parameters for Expectations that you create. +* **Automate schema change detection.** GX Cloud can automatically generate Expectations that detect column changes. This option is available when [you create new Data Assets](/cloud/data_assets/manage_data_assets.md#add-a-data-asset-from-an-existing-data-source). + * **Schedule Validations.** GX Cloud enables you to schedule validations, so that you can test and assess your data on a regular cadence and monitor data quality over time. See [Manage schedules](/cloud/schedules/manage_schedules.md) for more detail. * **Alerting.** GX Cloud provides the ability to send alerts when validations fail, enabling your organization to remain proactively aware of the health of your Data Assets. See [Manage alerts](/cloud/alerts/manage_alerts.md) for more detail. - ## GX Cloud architecture GX Cloud architecture comprises a frontend web UI, storage for entity configuration and metadata, a backend application, and a Python client. @@ -65,4 +66,4 @@ GX Cloud architecture comprises a frontend web UI, storage for entity configurat * **GX Cloud backend application**. Contains the necessary logic and compute to connect to data and run queries. The specifics of how the GX Cloud backend connects to your data is described in [Deployment patterns](/cloud/deploy/deployment_patterns.md). -* **GX Core Python client**. Enables you to interact programmatically with the GX Cloud backend application. The [GX Core Python client](/core/introduction/introduction.mdx) can complement and extend your web UI-created workflows. \ No newline at end of file +* **GX Core Python client**. Enables you to interact programmatically with GX Cloud entities and workflows created via the API. The [GX Core Python client](/core/introduction/introduction.mdx) can complement and extend your web UI-created workflows. \ No newline at end of file diff --git a/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_architecture.png b/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_architecture.png index b0cd981077fe..34eddccd8524 100644 Binary files a/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_architecture.png and b/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_architecture.png differ diff --git a/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_overview_images.drawio b/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_overview_images.drawio index 671c643edd0b..50c109061c89 100644 --- a/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_overview_images.drawio +++ b/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_overview_images.drawio @@ -1,6 +1,6 @@ - + - + @@ -83,7 +83,7 @@ - + @@ -158,13 +158,25 @@ - + + + + + + + + + + + + + - + @@ -213,8 +225,8 @@ - - + + @@ -240,13 +252,13 @@ - + - + - + diff --git a/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_workflow_enhanced.png b/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_workflow_enhanced.png index 51071b65190d..11445f908666 100644 Binary files a/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_workflow_enhanced.png and b/docs/docusaurus/docs/cloud/overview/overview_images/gx_cloud_workflow_enhanced.png differ diff --git a/docs/docusaurus/docs/cloud/schedules/manage_schedules.md b/docs/docusaurus/docs/cloud/schedules/manage_schedules.md index b8d258290d08..d89cdeadbcc6 100644 --- a/docs/docusaurus/docs/cloud/schedules/manage_schedules.md +++ b/docs/docusaurus/docs/cloud/schedules/manage_schedules.md @@ -4,49 +4,12 @@ title: 'Manage schedules' description: Create and manage schedules for Validations in GX Cloud. --- -Add schedules to your GX Cloud Expectation Suites to take advantage of automated data quality checks. Schedules are automatically enabled when you add your first Expectation to an Expectation Suite in the Cloud UI. +Use a schedule to automate data quality checks with GX-managed Expectations. When you add your first Expectation in the GX Cloud UI for a Data Asset, including when you choose to auto-generate Expectations to detect schema changes, we enable a default schedule for that Asset's GX-managed Expectations. By default, GX-managed Expectations are scheduled to run every 24 hours. The first run will be at the start of the next hour after you add your first Expectation in the Cloud UI. You can keep the default schedule, edit it, or disable it. -## Create a schedule for an existing Expectation Suite +:::note Schedules are for GX-managed Expectations only +To automate data quality checks for [API-managed Expectations](/cloud/expectations/manage_expectations.md#gx-managed-vs-api-managed-expectations), use an [orchestrator](/cloud/connect/connect_airflow.md). +::: -1. In GX Cloud, click **Data Assets**. - -2. In the **Data Assets** list, click the Data Asset name. - -3. Click the **Expectations** tab and then select the Expectation Suite you'd like to apply a schedule to. - -4. Ensure you have an Expectation in the Expectation Suite. Unpause the schedule for the Expectation Suite using the toggle in the Scheduling component. - -5. To edit the default schedule, click the edit icon in the Scheduling component. - - - **Frequency:** Select a frequency between 1 hour and 24 hours. - - - **Start time:** Select a local start time for your schedule. Your selected frequency overrides start time, in the case where start time is later than the next occurence of selected frequency. - -6. Click **Save**. - -## Create a schedule for a new Expectation Suite - -1. In GX Cloud, click **Data Assets**. - -2. In the **Data Assets** list, click the Data Asset name. - -3. Click the **Expectations** tab. - -4. Click **New Suite** in the **Expectation Suites** pane. - -5. Enter a name for the Expectation Suite in the **Expectation Suite name** field. - -6. Click **Create**. - -7. Add an Expectation to the Expectation Suite to unpause the schedule for the Expectation Suite. - -8. To edit the default schedule, click the edit icon in the Scheduling component. - - - **Frequency:** Select a frequency between 1 hour and 24 hours. - - - **Start time:** Select a local start time for your schedule. Your selected frequency overrides start time, in the case where start time is later than the next occurence of selected frequency. - -9. Click **Save**. ## Edit a schedule @@ -54,13 +17,9 @@ Add schedules to your GX Cloud Expectation Suites to take advantage of automated 2. In the **Data Assets** list, click the Data Asset name. -3. Click the **Expectations** tab and then select the Expectation Suite you'd like to apply a schedule to. +3. In the Scheduling component, click the **Edit Schedule** icon. -4. To edit the default schedule, click the edit icon in the Scheduling component. - - - **Frequency:** Select a frequency between 1 hour and 24 hours. - - - **Start time:** Select a local start time for your schedule. Your selected frequency overrides start time, in the case where start time is later than the next occurence of selected frequency. +4. Change the **Frequency** and/or the **Start time** for the first run of the new schedule. 5. Click **Save**. @@ -70,6 +29,4 @@ Add schedules to your GX Cloud Expectation Suites to take advantage of automated 2. In the **Data Assets** list, click the Data Asset name. -3. Click the **Expectations** tab and then select the Expectation Suite you'd like to change. - -4. Pause the schedule for the Expectation Suite using the toggle in the Scheduling component. \ No newline at end of file +3. Pause the schedule using the toggle in the Scheduling component. diff --git a/docs/docusaurus/docs/cloud/validations/manage_validations.md b/docs/docusaurus/docs/cloud/validations/manage_validations.md index 8bd7d87edf29..271eac34f764 100644 --- a/docs/docusaurus/docs/cloud/validations/manage_validations.md +++ b/docs/docusaurus/docs/cloud/validations/manage_validations.md @@ -4,7 +4,11 @@ title: 'Manage Validations' description: Create and manage Validations in GX Cloud. --- -When you run a validation on an Expectation, a Checkpoint is added. The Checkpoint saves the Validation Results, runs the Actions you specify, and displays the Validation Results. +You can manually run a Validation through the GX Cloud UI. This is useful for exploring your data and fine-tuning your Expectations. To run recurring Validations, use a [schedule](/docs/cloud/schedules/manage_schedules.md) or an [orchestrator](/cloud/connect/connect_airflow.md). + +:::tip Manual validations are for GX-managed Expectations only +To run a validation for an [API-managed Expectation](/cloud/expectations/manage_expectations.md#gx-managed-vs-api-managed-expectations), create and run a [Validation Definition](/core/run_validations/create_a_validation_definition.md). +::: @@ -18,39 +22,35 @@ When you run a validation on an Expectation, a Checkpoint is added. The Checkpoi 2. Click a Data Asset in the **Data Assets** list. -3. Click the **Expectations** tab and then select an Expectation Suite in the **Expectation Suites** list. - -4. Click **Validate**. +3. Click **Validate**. -5. When the confirmation message appears, click **See results**, or click the **Validations** tab and select the Validation in the **Batches & run history** pane. +4. When the confirmation message appears, click **See results**, or click the **Validations** tab and select the Validation in the **Batches & run history** pane. -6. Optional. Click **Share** to copy the URL for the Validation Results and share them with other users in your organization. +5. Optional. Click **Share** to copy the URL for the Validation Results and share them with other users in your organization. -## Run a Validation on a Data Asset containing partitions +## Run a Validation on a subset of a Data Asset -When you connect to a Data Asset, you can add a partition to create Expectations and run validations on subsets of Data Asset records. If you've added a partition, you can run a Validation on the latest Batch of data, or you can select a specific year, year and month, or year, month, and day period for the Validation. +If you've [defined a Batch](/cloud/expectations/manage_expectations.md#optional-define-a-batch), you can run a Validation on the latest Batch of data, or you can select a specific year, year and month, or year, month, and day period for the Validation. If a Batch is defined, Batch information appears on the Data Asset **Metrics** page and on the **Validations** page in the **Batches & run history** pane. -To make the identification of Data Assets containing partitions easier, partition information appears in the Data Asset **Overview** page and in the **Batches & run history** pane on the **Validations** page. +To run a Validation for a specific Batch, do the following: 1. In GX Cloud, click **Data Assets**. 2. Click a Data Asset in the **Data Assets** list. -3. Click the **Expectations** tab and then select an Expectation Suite in the **Expectation Suites** list. +3. Click **Validate**. -4. Click **Validate**. +4. Select one of the following options: -5. Select one of the following options: + - **Latest** - Run the Validation on the latest Batch of data. - - **Latest Batch** - Run the Validation on the latest Batch of data. + - **Custom** - Select the **year**, **month**, or **day** to run the Validation on a Batch of data for a specific period. - - **Custom Batch** - Enter the **Year**, **Month/Year**, or the **Year/Month/Day** value to run the Validation on a Batch of data for a specific period. +5. Click **Run**. -6. Click **Validate**. +6. When the confirmation message appears, click **See results**, or click the **Validations** tab and select the Validation in the **Batches & run history** pane. -7. When the confirmation message appears, click **See results**, or click the **Validations** tab and select the Validation in the **Batches & run history** pane. - -8. Optional. Click **Share** to copy the URL for the Validation Results and share them with other users in your organization. +7. Optional. Click **Share** to copy the URL for the Validation Results and share them with other users in your organization. ## View Validation run history @@ -60,22 +60,20 @@ To make the identification of Data Assets containing partitions easier, partitio 3. Click the **Validations** tab. -4. Select an Expectation Suite in the **Expectation Suites** list. - -5. On the **Validations** page, select one of the following options: +4. On the **Validations** page, select one of the following options: - To view only run validation failures, click **Failures Only**. - - To view the run history for specific Validation, select a Validation in the **Run History** pane. + - To view the run history for specific Validation, select a Validation in the **Batches & run history** pane. - To view the run history of all Validations, select **All Runs** to view a graph showing the Validation run history for all columns. - :::tip + :::tip Tip Items in run history with a calendar icon are run on a GX-managed schedule. ::: -6. Optional. Hover over a circle in the Validation timeline to view details about a specific Validation run, including the observed values. +5. Optional. Hover over a circle in the Validation timeline to view details about a specific Validation run, including the observed values. ![Validation timeline detail](/img/view_validation_timeline_detail.png) diff --git a/docs/docusaurus/docs/components/_data.jsx b/docs/docusaurus/docs/components/_data.jsx index 2a9c85423e25..7be5b8d1593e 100644 --- a/docs/docusaurus/docs/components/_data.jsx +++ b/docs/docusaurus/docs/components/_data.jsx @@ -1,5 +1,5 @@ export default { - release_version: 'great_expectations, version 1.2.5', + release_version: 'great_expectations, version 1.3.3', min_python: '3.9', max_python: '3.12' } diff --git a/docs/docusaurus/docs/components/examples_under_test.py b/docs/docusaurus/docs/components/examples_under_test.py index 2cec39d10270..5ca414ea2a05 100644 --- a/docs/docusaurus/docs/components/examples_under_test.py +++ b/docs/docusaurus/docs/components/examples_under_test.py @@ -439,6 +439,16 @@ # data_context_dir="", backend_dependencies=[], ), + # Create a custom Action + IntegrationTestFixture( + # To test, run: + # pytest --docs-tests -k "docs_example_create_a_custom_action" tests/integration/test_script_runner.py + name="docs_example_create_a_custom_action", + user_flow_script="docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py", + data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file", + # data_context_dir="", + backend_dependencies=[], + ), # Run a Checkpoint IntegrationTestFixture( # To test, run: diff --git a/docs/docusaurus/docs/components/setup/python_environment/_tip_python_or_python3_executable.md b/docs/docusaurus/docs/components/setup/python_environment/_tip_python_or_python3_executable.md index c760c559df3d..393cdfde03b0 100644 --- a/docs/docusaurus/docs/components/setup/python_environment/_tip_python_or_python3_executable.md +++ b/docs/docusaurus/docs/components/setup/python_environment/_tip_python_or_python3_executable.md @@ -1,4 +1,4 @@ -:::info executing python commands with `python` or `python3` +:::info Executing python commands with `python` or `python3` Depending on your installation and configuration of Python 3, you may find that executing Python commands from the terminal by calling `python` doesn't work as desired. If a command using `python` does not work, try using `python3`. diff --git a/docs/docusaurus/docs/components/warnings/_sql_alchemy2.md b/docs/docusaurus/docs/components/warnings/_sql_alchemy2.md index 070c7b1b564d..20b1cc062040 100644 --- a/docs/docusaurus/docs/components/warnings/_sql_alchemy2.md +++ b/docs/docusaurus/docs/components/warnings/_sql_alchemy2.md @@ -1,4 +1,4 @@ -:::caution +:::caution Caution As of this writing, Great Expectations is not compatible with SQLAlchemy version 2 or greater. We recommend using the latest non-version-2 release. diff --git a/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_aws_secrets_manager.md b/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_aws_secrets_manager.md index 205a0fef7829..c9c08840e43e 100644 --- a/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_aws_secrets_manager.md +++ b/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_aws_secrets_manager.md @@ -1,13 +1,13 @@ import GxData from '../../_core_components/_data.jsx' import PreReqFileDataContext from '../../_core_components/prerequisites/_file_data_context.md' -### Prerequisites +### Prerequisites {#prerequisites-aws} - An AWS Secrets Manager instance. See [AWS Secrets Manager](https://docs.aws.amazon.com/secretsmanager/latest/userguide/tutorials_basic.html). - The ability to install Python packages with `pip`. - . -### Procedure +### Procedure {#procedure-aws} 1. Set up AWS Secrets Manager support. diff --git a/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_azure_key_vault.md b/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_azure_key_vault.md index 8a7c75b5ab6c..332badc33e09 100644 --- a/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_azure_key_vault.md +++ b/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_azure_key_vault.md @@ -1,13 +1,13 @@ import GxData from '../../_core_components/_data.jsx' import PreReqFileDataContext from '../../_core_components/prerequisites/_file_data_context.md' -### Prerequisites +### Prerequisites {#prerequisites-azure} - An [Azure Key Vault instance with configured secrets](https://docs.microsoft.com/en-us/azure/key-vault/general/overview). - The ability to install Python packages with `pip`. - . -### Procedure +### Procedure {#procedure-azure} 1. Set up Azure Key Vault support. diff --git a/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_gcp_secret_manager.md b/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_gcp_secret_manager.md index e5796027c39f..cf9ffb5c8712 100644 --- a/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_gcp_secret_manager.md +++ b/docs/docusaurus/docs/core/configure_project_settings/access_secrets_managers/_gcp_secret_manager.md @@ -1,13 +1,13 @@ import GxData from '../../_core_components/_data.jsx' import PreReqFileDataContext from '../../_core_components/prerequisites/_file_data_context.md' -### Prerequisites +### Prerequisites {#prerequisites-gcp} - A [GCP Secret Manager instance with configured secrets](https://cloud.google.com/secret-manager/docs/quickstart). - The ability to install Python packages with `pip`. - . -### Procedure +### Procedure {#prerequisites-gcp} 1. Set up GCP Secret Manager support. diff --git a/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_context_variable.md b/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_context_variable.md index b2afed1cd3b3..2e56203a6d23 100644 --- a/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_context_variable.md +++ b/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_context_variable.md @@ -8,13 +8,13 @@ import PrereqFileDataContext from '../../_core_components/prerequisites/_file_da The Data Context variable `analytics_enabled` can be used to toggle the collection of analytics information. Because the analytics configuration is loaded when a Data Context is initialized this method is only suitable when working with a File Data Context. For other types of Data Context, use the [Environment Variable](/core/configure_project_settings/toggle_analytics_events/toggle_analytics_events.md?config_method=environment_variable#methods-for-toggling-analytics-collection) method for toggling analytics collection. -### Prerequisites +### Prerequisites {#prerequisites-context-variable} - . - . - . -### Procedure +### Procedure {#procedure-context-variable} diff --git a/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_environment_variable.md b/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_environment_variable.md index a42f3c627d1d..da12aec31b40 100644 --- a/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_environment_variable.md +++ b/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/_tab_environment_variable.md @@ -5,13 +5,13 @@ The environment variable `GX_ANALYTICS_ENABLED` can be used to toggle the collec `GX_ANALYTICS_ENABLED` will also work to toggle analytics collection when using a GX Cloud Data Context or a File Data Context. -### Prerequisites +### Prerequisites {#prerequisites-environment-variable} - - - Permissions necessary to set local Environment Variables. -### Procedure +### Procedure {#procedure-environment-variable} 1. Set the environment variable `GX_ANALYTICS_ENABLED`. diff --git a/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/toggle_analytics_events.md b/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/toggle_analytics_events.md index 57c0ef1b8836..1562dcd6d24f 100644 --- a/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/toggle_analytics_events.md +++ b/docs/docusaurus/docs/core/configure_project_settings/toggle_analytics_events/toggle_analytics_events.md @@ -12,6 +12,8 @@ import Tabs from '@theme/Tabs'; import TabEnvironmentVaribale from './_tab_environment_variable.md'; import TabContextVariable from './_tab_context_variable.md'; +To help us improve Great Expectations, we track analytics events by default. The data includes things like which GX features are used with what OS and Python version. While we hope you'll leave them on, you can disable analytics events tracking for a Data Context. + In order to determine if analytics should be enabled, GX Core checks two sources: - The environment variable `GX_ANALYTICS_ENABLED` diff --git a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_access_key_pair.md b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_access_key_pair.md new file mode 100644 index 000000000000..abf8506c691e --- /dev/null +++ b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_access_key_pair.md @@ -0,0 +1,24 @@ + +To use key-pair authentication for Snowflake, you will pass the private key as a connection argument with `kwargs` in addition to passing connection details with the `connection_string` parameter. Here's an example of how to access your private key in Python. + +```python title="Python" +import pathlib + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives import serialization + +PRIVATE_KEY_FILE = pathlib.Path("path/to/my/rsa_key.p8").resolve(strict=True) + +p_key = serialization.load_pem_private_key( + PRIVATE_KEY_FILE.read_bytes(), + password=b"my_password", + backend=default_backend() + ) + +pkb = p_key.private_bytes( + encoding=serialization.Encoding.DER, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption()) + +connect_args = {"private_key": pkb} +``` \ No newline at end of file diff --git a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_configure_credentials.md b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_configure_credentials.md index d48e1d9b596c..f694d4c3c1c0 100644 --- a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_configure_credentials.md +++ b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_configure_credentials.md @@ -5,7 +5,9 @@ import TabItem from '@theme/TabItem'; import ConnectionString from './_connection_string.md'; import EnvironmentVariables from './_environment_variables.md'; import ConfigYml from './_config_yml.md'; +import KeyPair from './_key_pair.md'; import AccessCredentials from './_access_credentials.md' +import AccessKeyPair from './_access_key_pair.md' @@ -25,11 +27,11 @@ GX Core also supports referencing credentials that have been stored in the AWS S -2. Store the credentials required for your connection string. +2. Store the credentials required for your connection. - GX supports the following methods of securely storing credentials. Chose one to implement for your connection string: + GX supports the following methods of securely storing credentials. Chose one to implement for your connection: - + @@ -39,11 +41,29 @@ GX Core also supports referencing credentials that have been stored in the AWS S + + + + 3. Access your credentials in Python strings. - + + + + + + + + + + + + + + + 4. Optional. Access credentials stored in a secret manager. diff --git a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string.md b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string.md index 42ce5948314f..5853ee18f515 100644 --- a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string.md +++ b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string.md @@ -1,4 +1,4 @@ -import ConnectionStringTable from './_connection_string_reference_table.md'; +import ConnectionStringTable from './_connection_string_reference_table.mdx'; Different types of SQL database have different formats for their connection string. In the following table, the text in `<>` corresponds to the values specific to your credentials and connection string. diff --git a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string_reference_table.md b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string_reference_table.mdx similarity index 88% rename from docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string_reference_table.md rename to docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string_reference_table.mdx index 0762c6519346..e6d8b974245a 100644 --- a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string_reference_table.md +++ b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_connection_string_reference_table.mdx @@ -3,7 +3,7 @@ |-----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| | PostgreSQL | `postgresql+psycopg2://:@:/` | | SQLite | `sqlite:///` | -| Snowflake | `snowflake://:@//?warehouse=&role=&application=great_expectations_oss` | +| Snowflake | `snowflake://:@//?warehouse=&role=&application=great_expectations_oss`
You have the option to connect to Snowflake with key-pair authentication instead of a password.| | Databricks SQL | `databricks://token:@:?http_path=&catalog=&schema=` | | BigQuery SQL | `bigquery:///?credentials_path=/path/to/your/credentials.json` | diff --git a/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_key_pair.md b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_key_pair.md new file mode 100644 index 000000000000..02f48f71a747 --- /dev/null +++ b/docs/docusaurus/docs/core/connect_to_data/configure_credentials/_key_pair.md @@ -0,0 +1,3 @@ +If you're connecting to Snowflake, you can use key-pair authentication instead of a password. This improves security and can be helpful for automations. + +Follow Snowflake's docs to [configure and store the private and public keys](https://docs.snowflake.com/en/user-guide/key-pair-auth). \ No newline at end of file diff --git a/docs/docusaurus/docs/core/connect_to_data/dataframes/dataframes.md b/docs/docusaurus/docs/core/connect_to_data/dataframes/dataframes.md index f399a806e900..6f01625a0eba 100644 --- a/docs/docusaurus/docs/core/connect_to_data/dataframes/dataframes.md +++ b/docs/docusaurus/docs/core/connect_to_data/dataframes/dataframes.md @@ -20,14 +20,14 @@ A dataframe is a set of data that resides in-memory and is represented in your c Because the dataframes reside in memory you do not need to specify the location of the data when you create your Data Source. Instead, the type of Data Source you create depends on the type of dataframe containing your data. Great Expectations has methods for connecting to both pandas and Spark dataframes. -### Prerequisites +### Prerequisites {#prerequisites-data-source} - - - Optional. . - . These examples assume the variable `context` contains your Data Context. -### Procedure +### Procedure {#procedure-data-source} - @@ -115,7 +115,7 @@ A dataframe Data Asset is used to group your Validation Results. For instance, - . These examples assume the variable `context` contains your Data Context. - A [pandas or Spark dataframe Data Source](#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset} - @@ -179,7 +181,7 @@ This means that Batch Definitions for dataframe Data Assets don't work to subdiv - . These examples assume the variable `context` contains your Data Context. - A [pandas or Spark dataframe Data Asset](#create-a-data-asset). -### Procedure +### Procedure {#procedure-batch-definition} - @@ -244,7 +246,7 @@ Because dataframes exist in memory and cease to exist when a Python session ends - Data in a pandas or Spark dataframe. These examples assume the variable `dataframe` contains your pandas or Spark dataframe. - Optional. A Validation Definition. -### Procedure +### Procedure {#procedure-dataframes} 1. Define the Batch Parameter dictionary. diff --git a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_create_a_batch_definition.md b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_create_a_batch_definition.md index ce575eb13d86..5a8b41041a8a 100644 --- a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_create_a_batch_definition.md +++ b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_create_a_batch_definition.md @@ -3,8 +3,6 @@ import Tabs from '@theme/Tabs'; import TabFileDataAsset from './_tab-file_batch_definition.md'; import TabDirectoryDataAsset from './_tab-directory_batch_definition.md'; -A Batch Definition determines which records in a Data Asset are retrieved for Validation. Batch Definitions can be configured to either provide all of the records in a Data Asset, or to subdivide the Data Asset based on a date. - diff --git a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_tab-directory_batch_definition.md b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_tab-directory_batch_definition.md index afce657d13bf..d4d304382525 100644 --- a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_tab-directory_batch_definition.md +++ b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_batch_definition/_tab-directory_batch_definition.md @@ -5,11 +5,11 @@ import PreReqDataContext from '../../../_core_components/prerequisites/_preconfi Batch Definitions for a Directory Data Asset can be configured to return all of the records for the files in the Data Asset, or to subdivide the Data Asset's records on the content of a Datetime field and only return the records that correspond to a specific year, month, or day. -### Prerequisites +### Prerequisites {#prerequisites-batch-definition-directory} - . The variable `context` is used for your Data Context in the following example code. - [A File Data Asset on a Filesystem Data Source](#create-a-data-asset). -### Procedure +### Procedure {#procedure-batch-definition-directory} . The variable `context` is used for your Data Context in the following example code. - [A File Data Asset on a Filesystem Data Source](#create-a-data-asset). diff --git a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_abs.md b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_abs.md index 34073512bac5..dac1f5ab2577 100644 --- a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_abs.md +++ b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_abs.md @@ -14,4 +14,4 @@ import TabDirectoryDataAsset from './_tab-directory_data_asset.md'; - \ No newline at end of file + diff --git a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_tab-directory_data_asset.md b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_tab-directory_data_asset.md index 4b02a297bba7..46d514bbb58b 100644 --- a/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_tab-directory_data_asset.md +++ b/docs/docusaurus/docs/core/connect_to_data/filesystem_data/_create_a_data_asset/_abs/_tab-directory_data_asset.md @@ -6,13 +6,13 @@ import PrereqGxInstall from '../../../../_core_components/prerequisites/_gx_inst import PrereqDataContext from '../../../../_core_components/prerequisites/_preconfigured_data_context.md' import PrereqSparkFilesystemDataSource from '../../../../_core_components/prerequisites/_data_source_spark_filesystem.md' -### Prerequisites +### Prerequisites {#prerequisites-data-asset-directory-abs} - . - and [Spark dependencies](/core/set_up_a_gx_environment/install_additional_dependencies.md?dependencies=spark). - . - [A Filesystem Data Source configured to access data files in Azure Blob Storage](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=abs#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-directory-abs} . - . - . - Access to data files in Azure Blob Storage. - A pandas or Spark [Filesystem Data Source configured for Azure Blob Storage data files](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=abs#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-file-abs} . - and [Spark dependencies](/core/set_up_a_gx_environment/install_additional_dependencies.md?dependencies=spark). - . - [A Filesystem Data Source configured to access data files in Google Cloud Storage](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=gcs#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-directory-gcs} . - . - . - Access to data files in Google Cloud Storage. - [A pandas](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=pandas&environment=gcs#create-a-data-source) or [Spark Filesystem Data Source configured for Google Cloud Storage data files](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=gcs#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-file-gcs} . - . - . - [A Spark Filesystem Data Source configured to access data files in a local or networked folder hierarchy](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=filesystem#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-directory-lon} . - . - . - Access to data files (such as `.csv` or `.parquet` files) in a local or networked folder hierarchy. - [A pandas](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=pandas&environment=filesystem#create-a-data-source) or [Spark Filesystem Data Source configured for local or networked data files](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=filesystem#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-file-lon} . - and [Spark dependencies](/core/set_up_a_gx_environment/install_additional_dependencies.md?dependencies=spark). - . - [A Filesystem Data Source configured to access data files in S3](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=s3#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-directory-s3} . - . - . - Access to data files in S3. - [A Filesystem Data Source configured to access data files in S3](/core/connect_to_data/filesystem_data/filesystem_data.md?data_source_type=spark&environment=s3#create-a-data-source). -### Procedure +### Procedure {#procedure-data-asset-file-s3} - - Optional. To create a Spark Filesystem Data Source you will also need to [install the Spark Python dependencies](/core/set_up_a_gx_environment/install_additional_dependencies.md?dependencies=spark). - - Access to data files in Azure Blob Storage. -### Procedure +### Procedure {#procedure-data-source-abs} - - Optional. To create a Spark Filesystem Data Source you will also need to [install the Spark Python dependencies](/core/set_up_a_gx_environment/install_additional_dependencies.md?dependencies=spark). - - Access to data files in Google Cloud Storage. -### Procedure +### Procedure {#procedure-data-source-gcs} - - Optional. To create a Spark Filesystem Data Source you will also need to [install the Spark Python dependencies](/core/set_up_a_gx_environment/install_additional_dependencies.md?dependencies=spark). @@ -18,7 +18,7 @@ import PandasDefault from './_pandas_default.md' ::: -### Procedure +### Procedure {#procedure-data-source-lon} - - Optional. To create a Spark Filesystem Data Source you will also need to [install the Spark Python dependencies](/core/set_up_a_gx_environment/install_additional_dependencies.md?dependencies=spark). - - Access to data files on a S3 bucket. -### Procedure +### Procedure {#procedure-data-source-s3} \ No newline at end of file diff --git a/docs/docusaurus/docs/core/connect_to_data/sql_data/_create_a_batch_definition/_create_a_batch_definition.md b/docs/docusaurus/docs/core/connect_to_data/sql_data/_create_a_batch_definition/_create_a_batch_definition.md index 0cd23b5a2a0c..81d1a8859980 100644 --- a/docs/docusaurus/docs/core/connect_to_data/sql_data/_create_a_batch_definition/_create_a_batch_definition.md +++ b/docs/docusaurus/docs/core/connect_to_data/sql_data/_create_a_batch_definition/_create_a_batch_definition.md @@ -3,11 +3,11 @@ import Tabs from '@theme/Tabs'; import GxData from '../../../_core_components/_data.jsx' import PreReqDataContext from '../../../_core_components/prerequisites/_preconfigured_data_context.md' -### Prerequisites +### Prerequisites {#prerequisites-batch-definition} - . The variable `context` is used for your Data Context in the following example code. - [A Data Asset on a SQL Data Source](#create-a-data-asset). -### Procedure +### Procedure {#procedure-batch-definition} . The variable `context` is used for your Data Context in the following example code. - . -### Procedure +### Procedure {#procedure-data-asset} . - - . - . -### Procedure +### Procedure {#procedure-data-source} \ No newline at end of file diff --git a/docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py b/docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py index a6dad57282c9..f6dca8a12570 100644 --- a/docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py +++ b/docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py @@ -35,32 +35,36 @@ def set_up_context_for_example(context): # import great_expectations as gx +# Define your custom SQL query. +# +my_query = """ + SELECT + * + FROM + {batch} + WHERE + passenger_count > 6 or passenger_count < 0 + """ +# -# Define a custom Expectation that uses SQL by subclassing UnexpectedRowsExpectation -# -# -# -class ExpectPassengerCountToBeLegal(gx.expectations.UnexpectedRowsExpectation): - # - unexpected_rows_query: str = ( - "SELECT * FROM {batch} WHERE passenger_count > 6 or passenger_count < 0" - ) - # - description: str = "There should be no more than **6** passengers." - +# Customize how the Expectation renders in Data Docs. +# +my_description = "There should be no more than **6** passengers." +# +# Create an Expectation using the UnexpectedRowsExpectation class and your parameters. +# +expect_passenger_count_to_be_legal = gx.expectations.UnexpectedRowsExpectation( + unexpected_rows_query=my_query, + description=my_description, +) # +# Test the Expectation. context = gx.get_context() # Hide this set_up_context_for_example(context) -# Instantiate the custom Expectation -# -expectation = ExpectPassengerCountToBeLegal() -# - -# Test the Expectation data_source_name = "my_sql_data_source" data_asset_name = "my_data_asset" batch_definition_name = "my_batch_definition" @@ -71,5 +75,5 @@ class ExpectPassengerCountToBeLegal(gx.expectations.UnexpectedRowsExpectation): .get_batch() ) -batch.validate(expectation) +batch.validate(expect_passenger_count_to_be_legal) # diff --git a/docs/docusaurus/docs/core/customize_expectations/define_a_custom_expectation_class.md b/docs/docusaurus/docs/core/customize_expectations/define_a_custom_expectation_class.md index 2a99c65651cb..7d487d76acfb 100644 --- a/docs/docusaurus/docs/core/customize_expectations/define_a_custom_expectation_class.md +++ b/docs/docusaurus/docs/core/customize_expectations/define_a_custom_expectation_class.md @@ -17,7 +17,7 @@ Advantages of subclassing an Expectation and providing customized attributes rat - More descriptive Expectation names can be provided that indicate the business logic behind the Expectation. - Customized text can be provided to describe the Expectation when Data Docs are generated from Validation Results. -

Prerequisites

+## Prerequisites - . - . diff --git a/docs/docusaurus/docs/core/customize_expectations/use_sql_to_define_a_custom_expectation.md b/docs/docusaurus/docs/core/customize_expectations/use_sql_to_define_a_custom_expectation.md index 91028741bb14..090d103eeefb 100644 --- a/docs/docusaurus/docs/core/customize_expectations/use_sql_to_define_a_custom_expectation.md +++ b/docs/docusaurus/docs/core/customize_expectations/use_sql_to_define_a_custom_expectation.md @@ -9,15 +9,13 @@ import PrereqGxInstalled from '../_core_components/prerequisites/_gx_installatio import PrereqPreconfiguredDataContext from '../_core_components/prerequisites/_preconfigured_data_context.md'; import PrereqPreconfiguredDataSourceAndAsset from '../_core_components/prerequisites/_data_source_and_asset_connected_to_data.md'; -Among the available Expectations, the `UnexpectedRowsExpectation` is designed to facilitate the execution of SQL or Spark-SQL queries as the core logic for an Expectation. By default, `UnexpectedRowsExpectation` considers validation successful when no rows are returned by the provided SQL query. - -Like any other Expectation, you can instantiate the `UnexpectedRowsExpectation` directly. You can also customize an `UnexpectedRowsExpectation` in essentially the same manner as you would [define a custom Expectation](/core/customize_expectations/define_a_custom_expectation_class.md), by subclassing `UnexpectedRowsExpectation` and providing customized default attributes and text for Data Docs. However, there are some caveats around the `UnexpectedRowsExpectation`'s `unexpected_rows_query` attribute that deserve further detail. +Among the available Expectations, the `UnexpectedRowsExpectation` is designed to facilitate the execution of SQL queries as the core logic for an Expectation. By default, `UnexpectedRowsExpectation` considers validation successful when no rows are returned by the provided SQL query. -

Prerequisites

+## Prerequisites - . - . @@ -37,38 +35,35 @@ Like any other Expectation, you can instantiate the `UnexpectedRowsExpectation` -1. Create a new Expectation class that inherits the `UnexpectedRowsExpectation` class. - - The class name `UnexpectedRowsExpectation` describes the functionality of the Expectation: it finds rows with unexpected values. When you create a customized Expectation class you can provide a class name that is more indicative of your specific use case. In this example, the customized subclass of `UnexpectedRowsExpectation` will be used to find invalid passenger counts in taxi trip data: - - ```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a more descriptive name for an UnexpectedRowsExpectation" - ``` - -2. Override the Expectation's `unexpected_rows_query` attribute. +1. Determine your custom SQL query. - The `unexpected_rows_query` attribute is a SQL or Spark-SQL query that returns a selection of rows from the Batch of data being validated. By default, rows that are returned have failed the validation check. + The `UnexpectedRowsExpectation` class takes an `unexpected_rows_query` attribute, which is a SQL or Spark-SQL query that returns a selection of rows from the Batch of data being validated. By default, rows that are returned have failed the validation check. - The `unexpected_rows_query` should be written in standard SQL or Spark-SQL syntax, except that it can also contain the special `{batch}` named query. When the Expectation is evaluated, the `{batch}` keyword will be replaced with the Batch of data that is configured for your Data Asset. + The custom SQL query should be written in the SQL dialect your database uses, except that it can also contain the special `{batch}` named query. When the Expectation is evaluated, the `{batch}` keyword will be replaced with the Batch of data that is configured for your Data Asset. - In this example, `unexpected_rows_query` will select any rows where the passenger count is greater than `6` or less than `0`. These rows will fail validation for this Expectation: + In this example, the custom query will select any rows where the passenger count is greater than `6` or less than `0`: - ```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define the query for an UnexpectedRowsExpectation" + ```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define query" ``` -3. Customize the rendering of the new Expectation when displayed in Data Docs. +2. Customize how the Expectation renders in Data Docs. - As with other Expectations, the `description` attribute contains the text describing the customized Expectation when your results are rendered into Data Docs. It can be set when an Expectation class is defined or edited as an attribute of an Expectation instance. You can format the `description` string with Markdown syntax: + As with other Expectations, the `description` attribute contains the text describing the Expectation when your results are rendered into Data Docs. You can format the `description` string with Markdown syntax: - ```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a custom UnexpectedRowsExpectation" + ```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define description" ``` -4. Use the customized subclass as an Expectation. - - Once the customized Expectation subclass has been defined, instances of it can be created, added to Expectation Suites, and validated just like any other Expectation class: +3. Create a new Expectation using the `UnexpectedRowsExpectation` class and your parameters. + + The class name `UnexpectedRowsExpectation` describes the functionality of the Expectation: it finds rows with unexpected values. When you create your Expectation, you can use a name that is more indicative of your specific use case. In this example, the customized Expectation will be used to find invalid passenger counts in taxi trip data: - ```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - instantiate the custom SQL Expectation" + ```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - create Expectation" ``` +4. Use your custom SQL Expectation. + + Now that you've created a custom SQL Expectation, you can [add it to an Expectation Suite](/core/define_expectations/organize_expectation_suites.md) and [validate it](/docs/core/run_validations/run_a_validation_definition.md) like any other Expectation. + diff --git a/docs/docusaurus/docs/core/define_expectations/_retrieve_a_batch_of_test_data/_from_a_batch_definition.md b/docs/docusaurus/docs/core/define_expectations/_retrieve_a_batch_of_test_data/_from_a_batch_definition.md index 0bbe4ee99e91..f5b49e3453c4 100644 --- a/docs/docusaurus/docs/core/define_expectations/_retrieve_a_batch_of_test_data/_from_a_batch_definition.md +++ b/docs/docusaurus/docs/core/define_expectations/_retrieve_a_batch_of_test_data/_from_a_batch_definition.md @@ -8,14 +8,14 @@ import PrereqDataSourceAndAssetConnectedToData from '../../_core_components/prer Batch Definitions both organize a Data Asset's records into Batches and provide a method for retrieving those records. Any Batch Definition can be used to retrieve a Batch of records for use in testing Expectations or data exploration. -## Prerequisites +## Prerequisites {#prerequisites-batch-definition} - . - . - . These examples assume the variable `context` contains your Data Context. - . -### Procedure +### Procedure {#procedure-batch-definition} . - . - . These examples assume the variable `context` contains your Data Context. - Data in a file format supported by pandas, such as `.csv` or `.parquet`. -### Procedure +### Procedure {#procedure-pandas-default} Prerequisites +## Prerequisites {#prerequisites-create-expectation} - . - . -### Procedure +### Procedure {#procedure-create-expectation} Prerequisites +:::note For API-managed Expectations only +If you use GX Cloud and GX Core together, note that only [API-managed Expectations](/cloud/expectations/manage_expectations.md#gx-managed-vs-api-managed-expectations) can be grouped into custom Expectation Suites. +::: + +## Prerequisites {#prerequisites-expectation-suites} - . - . - Recommended. . - Recommended. . -### Procedure +### Procedure {#procedure-expectation-suites} Prerequisites +## Prerequisites {#prerequisites-test-expectation} - . - . @@ -21,7 +21,7 @@ Data can be validated against individual Expectations. This workflow is general - [A Batch of sample data](/core/define_expectations/retrieve_a_batch_of_test_data.md). This guide assumes the variable `batch` contains your sample data. - . This guide assumes the variable `expectation` contains the Expectation to be tested. -### Procedure +### Procedure {#procedure-test-expectation} \d{4})-01.csv", ) ) @@ -39,6 +40,8 @@ def set_up_context_for_example(context): ) +df = "fake_data_frame" + # EXAMPLE SCRIPT STARTS HERE: # import great_expectations as gx @@ -53,9 +56,17 @@ def set_up_context_for_example(context): validation_definition = context.validation_definitions.get(validation_definition_name) # +# Define Batch parameters +# Accepted keys are determined by the BatchDefinition used to instantiate this ValidationDefinition. +# +batch_parameters_dataframe = {"dataframe": df} +batch_parameters_daily = {"year": "2020", "month": "1", "day": "17"} +batch_parameters_yearly = {"year": "2019"} +# + # Run the Validation Definition # -validation_results = validation_definition.run() +validation_results = validation_definition.run(batch_parameters=batch_parameters_yearly) # # Review the Validation Results diff --git a/docs/docusaurus/docs/core/run_validations/create_a_validation_definition.md b/docs/docusaurus/docs/core/run_validations/create_a_validation_definition.md index 9c82ea21bfe5..b745dc49b9c7 100644 --- a/docs/docusaurus/docs/core/run_validations/create_a_validation_definition.md +++ b/docs/docusaurus/docs/core/run_validations/create_a_validation_definition.md @@ -16,7 +16,7 @@ import StepRequestADataContext from '../_core_components/common_steps/_request_a A Validation Definition is a fixed reference that links a Batch of data to an Expectation Suite. It can be run by itself to validate the referenced data against the associated Expectations for testing or data exploration. Multiple Validation Definitions can also be provided to a Checkpoint which, when run, executes Actions based on the Validation Results for each provided Validation Definition. -

Prerequisites

+## Prerequisites - . - . diff --git a/docs/docusaurus/docs/core/run_validations/run_a_validation_definition.md b/docs/docusaurus/docs/core/run_validations/run_a_validation_definition.md index abb0da659270..20526f171939 100644 --- a/docs/docusaurus/docs/core/run_validations/run_a_validation_definition.md +++ b/docs/docusaurus/docs/core/run_validations/run_a_validation_definition.md @@ -10,8 +10,7 @@ import PrereqPreconfiguredDataContext from '../_core_components/prerequisites/_p import PrereqValidationDefinition from '../_core_components/prerequisites/_validation_definition.md'; - -

Prerequisites

+## Prerequisites - . - . @@ -35,10 +34,17 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid If you have created a new Validation Definition you can use the object returned by your Data Context's `.validation_definitions.add(...)` method. Alternatively, you can retrieve a previously configured Validation Definition by updating the variable `validation_definition_name` in the following code and executing it: - ```python title="Python name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - retrieve a Validation Definition" + ```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - retrieve a Validation Definition" + ``` + +2. Define a Batch of data to validate + + The [Batch parameters accepted by a Validation Definition](/docs/reference/api/ValidationDefinition_class#great_expectations.ValidationDefinition.run) are determined by the Batch Definition used to instantiate it. + + ```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - define batch parameters" ``` -2. Execute the Validation Definition's `run()` method: +3. Execute the Validation Definition's `run()` method: ```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - run a Validation Definition" ``` @@ -51,23 +57,13 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid ::: -3. Review the Validation Results: +4. Review the Validation Results: ```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - review Validation Results" ``` When you print the returned Validation Result object you will recieve a json representation of the results. By default this will include a `"results"` list that includes each Expectation in your Validation Definition's Expectation Suite, whether the Expectation was successfully met or failed to pass, and some sumarized information explaining the why the Expectation succeeded or failed. - :::tip Result presentation - - When using a [GX Cloud Data Context](/core/set_up_a_gx_environment/create_a_data_context.md?context_type=gx_cloud), you can view the Validation Results in the GX Cloud UI by following the url provided with: - - ```python title="Python" - print(validation_results.result_url) - ``` - - ::: -
@@ -77,4 +73,4 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid -
\ No newline at end of file +
diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_cloud_data_context.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_cloud_data_context.md index cb60a6d5e040..9811bd24714d 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_cloud_data_context.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_cloud_data_context.md @@ -4,7 +4,7 @@ import TabItem from '@theme/TabItem' import PrereqPythonInstallation from '../../_core_components/prerequisites/_python_installation.md' import PrereqGxInstallation from '../../_core_components/prerequisites/_gx_installation.md' -## Prerequisites +## Prerequisites {#prerequisites-cloud-data-context} - - diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_ephemeral_data_context.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_ephemeral_data_context.md index 4dcea518509c..7d2042624764 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_ephemeral_data_context.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_ephemeral_data_context.md @@ -4,7 +4,7 @@ import TabItem from '@theme/TabItem' import PrereqPythonInstallation from '../../_core_components/prerequisites/_python_installation.md' import PrereqGxInstallation from '../../_core_components/prerequisites/_gx_installation.md' -## Prerequisites +## Prerequisites {#prerequisites-ephemeral} - - diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_file_data_context.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_file_data_context.md index b2446f273875..cec3318a6748 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_file_data_context.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_file_data_context.md @@ -4,7 +4,7 @@ import TabItem from '@theme/TabItem' import PrereqPythonInstallation from '../../_core_components/prerequisites/_python_installation.md' import PrereqGxInstallation from '../../_core_components/prerequisites/_gx_installation.md' -## Prerequisites +## Prerequisites {#prerequisites-file-data-context} - - diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_quick_start.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_quick_start.md index 78f6c616ae82..925ab7bf293d 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_quick_start.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_create_a_data_context/_quick_start.md @@ -4,7 +4,7 @@ import TabItem from '@theme/TabItem' import PrereqPythonInstallation from '../../_core_components/prerequisites/_python_installation.md' import PrereqGxInstallation from '../../_core_components/prerequisites/_gx_installation.md' -## Prerequisites +## Prerequisites {#prerequisites-quick-start} - - diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_amazon_s3.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_amazon_s3.md index 5e21e6ccd7b8..02c477b3e0cd 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_amazon_s3.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_amazon_s3.md @@ -5,14 +5,14 @@ import InfoUsingAVirtualEnvironment from '../../_core_components/admonitions/_if GX Core uses the Python library `boto3` to access objects stored in Amazon S3 buckets, but you must configure your Amazon S3 account and credentials through AWS and the AWS command line interface (CLI). -## Prerequisites +## Prerequisites {#prerequisites-amazon} - The AWS CLI. See [Installing or updating the latest version of the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html). - AWS credentials. See [Configuring the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html). - - -## Installation +## Installation {#installation-amazon} Python interacts with AWS through the `boto3` library. GX Core uses the library in the background when working with AWS. Although you won't use `boto3` directly, must install it in your Python environment. @@ -32,7 +32,7 @@ To set up `boto3` with AWS, and use `boto3` within Python, see the [Boto3 docume python -m pip install boto3 ``` - :::tip + :::tip Tip If the `python -m pip install boto3` does not work, try: @@ -56,7 +56,7 @@ To set up `boto3` with AWS, and use `boto3` within Python, see the [Boto3 docume Run the following terminal command to install the optional dependencies required by GX Core to work with AWS S3: - :::info + :::info Info ::: diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_azure_blob_storage.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_azure_blob_storage.md index 951d1b81a135..0cf1d7dd3d7b 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_azure_blob_storage.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_azure_blob_storage.md @@ -5,20 +5,20 @@ import InfoUsingAVirtualEnvironment from '../../_core_components/admonitions/_if Azure Blob Storage stores unstructured data on the Microsoft cloud data storage platform. To validate Azure Blob Storage data with GX Core you install additional Python libraries and define a connection string. -## Prerequisites +## Prerequisites {#prerequisites-azure} - An [Azure Storage account](https://docs.microsoft.com/en-us/azure/storage). - [Azure storage account access keys](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal). - - -## Installation +## Installation {#installation-azure} 1. Install the Python dependencies for Azure Blob Storage support. Run the following code to install GX Core with the additional Python libraries needed to work with Azure Blob Storage: - :::info + :::info Info ::: @@ -34,7 +34,7 @@ Azure Blob Storage stores unstructured data on the Microsoft cloud data storage export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;EndpointSuffix=core.windows.net;AccountName=;AccountKey=" ``` - :::info + :::info Info You can manage your credentials for all environments and Data Sources by storing them as environment variables. To do this, enter `export ENV_VARIABLE_NAME=env_var_value` in the terminal or add the equivalent command to your `~/.bashrc` file. diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_google_cloud_platform.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_google_cloud_platform.md index c32c0e5d106e..06b2e0c07da2 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_google_cloud_platform.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_google_cloud_platform.md @@ -5,7 +5,7 @@ import InfoUsingAVirtualEnvironment from '../../_core_components/admonitions/_if To validate Google Cloud Platform (GCP) data with GX Core, you create your GX Python environment, configure your GCP credentials, and install GX Core locally with the additional dependencies to support GCP. -## Prerequisites +## Prerequisites {#prerequisites-cloud} - A [GCP service account](https://cloud.google.com/iam/docs/service-account-overview) with permissions to access GCP resources and storage Objects. - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set. See the Google documentation [Set up Application Default Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc). @@ -13,7 +13,7 @@ To validate Google Cloud Platform (GCP) data with GX Core, you create your GX Py - - -## Installation +## Installation {#installation-cloud} 1. Ensure your GCP credentials are correctly configured. This process includes: @@ -27,7 +27,7 @@ To validate Google Cloud Platform (GCP) data with GX Core, you create your GX Py Run the following terminal command to install GX Core with the additional dependencies for GCP support: - :::info + :::info Info ::: diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_spark.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_spark.md index e14249209e6c..5a234f650a0e 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_spark.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_spark.md @@ -5,12 +5,12 @@ import InfoUsingAVirtualEnvironment from '../../_core_components/admonitions/_if To validate data while using Spark to read from dataframes or file formats such as `.csv` and `.parquet` with GX Core, you create your GX Python environment, install GX Core locally, and then configure the necessary dependencies. -## Prerequisites +## Prerequisites {#prerequisites-spark} - - -## Installation +## Installation {#installation-spark} 1. Optional. Activate your virtual environment. diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_sql.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_sql.md index c05b91295215..842847e6c614 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_sql.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_additional_dependencies/_sql.md @@ -6,12 +6,12 @@ import SqlDialectInstallationCommands from './_sql_dialect_installation_commands To validate data stored on SQL databases with GX Core, you create your GX Python environment, install GX Core locally, and then configure the necessary dependencies. -## Prerequisites +## Prerequisites {#prerequisites-sql} - - -## Installation +## Installation {#installation-sql} 1. Run the pip command to install the dependencies for your data's SQL dialect. @@ -34,7 +34,7 @@ To validate data stored on SQL databases with GX Core, you create your GX Python export MY_DB_CONNECTION_STRING= ``` - :::info + :::info Info You can manage your credentials for all environments and Data Sources by storing them as environment variables. To do this, enter `export ENV_VARIABLE_NAME=env_var_value` in the terminal or add the equivalent command to your `~/.bashrc` file. diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_databricks_installation.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_databricks_installation.md index 240daedb26a7..98a1f9c5dfb9 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_databricks_installation.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_databricks_installation.md @@ -6,12 +6,12 @@ To avoid configuring external resources, you'll use the [Databricks File System DBFS is a distributed file system mounted in a Databricks workspace and available on Databricks clusters. Files on DBFS can be written and read as if they were on a local filesystem by adding the /dbfs/ prefix to the path. It also persists in object storage, so you won’t lose data after terminating a cluster. See the Databricks documentation for best practices, including mounting object stores. -### Additional prerequisites +### Additional prerequisites {#additional-prerequisites-databricks} - A complete Databricks setup, including a running Databricks cluster with an attached notebook - Access to [DBFS](https://docs.databricks.com/dbfs/index.html) -### Installation and setup +### Installation and setup {#installation-setup-databricks} 1. Run the following command in your notebook to install GX as a notebook-scoped library: diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_emr_spark_installation.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_emr_spark_installation.md index 2a40ec6c86ac..c6f597c57240 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_emr_spark_installation.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_emr_spark_installation.md @@ -1,11 +1,11 @@ Use the information provided here to install GX on an EMR Spark cluster and instantiate a Data Context without a full configuration directory. -### Additional prerequisites +### Additional prerequisites {#additional-prerequisites-spark} - An EMR Spark cluster. - Access to the EMR Spark notebook. -### Installation and setup +### Installation and setup {#installation-setup-spark} 1. To install GX on your EMR Spark cluster copy this code snippet into a cell in your EMR Spark notebook and then run it: diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_gx_cloud_installation.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_gx_cloud_installation.md index 166a00df75a4..2b163ae83306 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_gx_cloud_installation.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_gx_cloud_installation.md @@ -3,7 +3,7 @@ import GxData from '../../_core_components/_data.jsx' GX Cloud provides a web interface for using GX to validate your data without creating and running complex Python code. However, GX Core can connect to a GX Cloud account if you want to customize or automate your workflows through Python scripts. -### Installation and setup +### Installation and setup {#installation-setup-cloud} To deploy a GX Agent, which serves as an intermediary between GX Cloud's interface and your organization's data stores, see [Connect GX Cloud](/cloud/connect/connect_lp.md). The GX Agent serves all GX Cloud users within your organization. If a GX Agent has already been deployed for your organization, you can use the GX Cloud online application without further installation or setup. diff --git a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_local_installation.md b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_local_installation.md index b003f68d2108..ee59866f6632 100644 --- a/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_local_installation.md +++ b/docs/docusaurus/docs/core/set_up_a_gx_environment/_install_gx/_local_installation.md @@ -3,7 +3,7 @@ import GxData from '../../_core_components/_data.jsx' GX Core is a Python library and as such can be used with a local Python installation to access the functionality of GX through Python scripts. -### Installation and setup +### Installation and setup {#installation-setup-local} 1. Optional. Activate your virtual environment. diff --git a/docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py b/docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py new file mode 100644 index 000000000000..b7c8df3468c9 --- /dev/null +++ b/docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py @@ -0,0 +1,53 @@ +""" +This is an example script for how to create a custom Action. + +To test, run: +pytest --docs-tests -k "docs_example_create_a_custom_action" tests/integration/test_script_runner.py +""" + +# EXAMPLE SCRIPT STARTS HERE: + +# + +from typing import Literal + +from typing_extensions import override + +from great_expectations.checkpoint import ( + ActionContext, + CheckpointResult, + ValidationAction, +) + + +# 1. Extend the `ValidationAction` class. +# +class MyCustomAction(ValidationAction): + # + + # 2. Set the `type` attribute to a unique string that identifies the Action. + # + type: Literal["my_custom_action"] = "my_custom_action" + # + + # 3. Override the `run()` method to perform the desired task. + # + @override + def run( + self, + checkpoint_result: CheckpointResult, + action_context: ActionContext, # Contains results from prior Actions in the same Checkpoint run. + ) -> dict: + # Domain-specific logic + self._do_my_custom_action(checkpoint_result) + # Return information about the Action + return {"some": "info"} + + def _do_my_custom_action(self, checkpoint_result: CheckpointResult): + # Perform custom logic based on the validation results. + ... + + # + + +# diff --git a/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/_result_output_reference_table.md b/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/_result_output_reference_table.md index 3a42bb9d739a..ce24b5c0246d 100644 --- a/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/_result_output_reference_table.md +++ b/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/_result_output_reference_table.md @@ -12,6 +12,6 @@ The following table lists the fields that can be found in the `result` dictionar | partial_unexpected_list | A partial list of values that violate the Expectation. (Up to 20 values by default.) | | partial_unexpected_index_list | A partial list the unexpected values in the column, as defined by the columns in `unexpected_index_column_names`. (Up to 20 indecies by default.) | | partial_unexpected_counts | A partial list of values and counts, showing the number of times each of the unexpected values occur. (Up to 20 unexpected value/count pairs by default.) | -| unexpected_index_list | A list of the indices of the unexpected values in the column, as defined by the columns in `unexpected_index_column_names`. | -| unexpected_index_query | A query that can be used to retrieve all unexpected values (SQL and Spark), or the full list of unexpected indices (Pandas). | +| unexpected_index_list | A list of the indices of the unexpected values in the column, as defined by the columns in `unexpected_index_column_names`. This only applies to Expectations that have a yes/no answer for each row. | +| unexpected_index_query | A query that can be used to retrieve all unexpected values (SQL and Spark), or the full list of unexpected indices (Pandas). This only applies to Expectations that have a yes/no answer for each row. | | unexpected_list | A list of up to 200 values that violate the Expectation. | diff --git a/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/choose_a_result_format.md b/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/choose_a_result_format.md index 98d1b25c92e8..7eae769c6189 100644 --- a/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/choose_a_result_format.md +++ b/docs/docusaurus/docs/core/trigger_actions_based_on_results/choose_a_result_format/choose_a_result_format.md @@ -27,7 +27,7 @@ import AdditionalSettingsComplete from './_additional_settings/_COMPLETE.md'; When you validate data with GX Core you can set the level of detail returned in your Validation Results by specifying a value for the optional `result_format` parameter. These settings will be applied to the results returned by each validated Expectation. -Typical use cases customizing Result Format settings include summarizing values that cause Expectations to fail durring data exploration, retrieving failed rows to facilitate cleaning data, or excluding excess Validation Result data in published Data Docs. +Typical use cases for customizing Result Format settings include summarizing values that cause Expectations to fail during data exploration, retrieving failed rows to facilitate cleaning data, or excluding excess Validation Result data in published Data Docs. ## Define a Result Format configuration @@ -92,11 +92,11 @@ The `result_format` parameter takes in a dictionary of configuration settings.
-3. Apply the Result Format to a Checkpoint or Validation Definition. +3. Apply the Result Format to a Checkpoint, Validation Definition, or Batch. You can define a persisting Result Format configuration by passing it in as the `result_format` parameter when a Checkpoint is created. The Result Format will be applied every time the Checkpoint is run. For more information on creating a Checkpoint see [Create a Checkpoint with Actions](/core/trigger_actions_based_on_results/create_a_checkpoint_with_actions.md). - You can also pass a `result_format` configuration in to a Validation Definition's `.run(...)` method to apply the Result Format configuration to the Validation Results that are returned. This `result_format` configuration does not persist with the Validation Definition and will only apply to the current execution of the `.run(...)` method. For more information on running a Validation Definition outside of a Checkpoint see [Run a Validation Definition](/core/run_validations/run_a_validation_definition.md). + You can also pass a `result_format` configuration at runtime to the `.run(...)` method of a Validation Definition or to the `.validate(...)` method of a Batch. This `result_format` configuration does not persist with the Validation Definition or Batch and will apply to only the current execution of the `.run(...)` or `.validate(...)` method. For more information see [Run a Validation Definition](/core/run_validations/run_a_validation_definition.md) or [Test an Expectation](/docs/core/define_expectations/test_an_expectation.md). ## Validation Results reference tables diff --git a/docs/docusaurus/docs/core/trigger_actions_based_on_results/create_a_checkpoint_with_actions.md b/docs/docusaurus/docs/core/trigger_actions_based_on_results/create_a_checkpoint_with_actions.md index 726c7481984d..0dc2542da783 100644 --- a/docs/docusaurus/docs/core/trigger_actions_based_on_results/create_a_checkpoint_with_actions.md +++ b/docs/docusaurus/docs/core/trigger_actions_based_on_results/create_a_checkpoint_with_actions.md @@ -11,14 +11,14 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid A Checkpoint executes one or more Validation Definitions and then performs a set of Actions based on the Validation Results each Validation Definition returns. -

Prerequisites

+## Prerequisites - . - . - . In this guide the variable `context` is assumed to contain your Data Context. - . -### Procedure +## Procedure . +- . + +## Procedure + + + + + +1. Create a new custom Action class that inherits the `ValidationAction` class. + + ```python title="Python" name="docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py - extend class" + ``` + +2. Set a unique name for `type`. + + ```python title="Python" name="docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py - set type" + ``` + +3. Override the `run()` method with the logic for the Action. + + ```python title="Python" name="docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py - override run" + ``` + + + + + +```python title="Python" name="docs/docusaurus/docs/core/trigger_actions_based_on_results/_examples/create_a_custom_action.py - full code example" +``` + + + + + +Now you can use your custom Action like you would any built-in Action. [Create a Checkpoint with Actions](/core/trigger_actions_based_on_results/create_a_checkpoint_with_actions.md) to start automating responses to Validation Results. \ No newline at end of file diff --git a/docs/docusaurus/docs/core/trigger_actions_based_on_results/run_a_checkpoint.md b/docs/docusaurus/docs/core/trigger_actions_based_on_results/run_a_checkpoint.md index 0c5efc77b800..ed11424fdc74 100644 --- a/docs/docusaurus/docs/core/trigger_actions_based_on_results/run_a_checkpoint.md +++ b/docs/docusaurus/docs/core/trigger_actions_based_on_results/run_a_checkpoint.md @@ -13,7 +13,8 @@ Running a Checkpoint will cause it to validate all of its Validation Definitions At runtime, a Checkpoint can take in a `batch_parameters` dictionary that selects the Batch to validate from each Validation Definition. A Checkpoint will also accept an `expectation_parameters` dictionary that provides values for the parameters of the any Expectations that have been configured to accept parameters at runtime. -

Prerequisites

+## Prerequisites + - . - . - . @@ -21,6 +22,10 @@ At runtime, a Checkpoint can take in a `batch_parameters` dictionary that select ### Procedure +:::tip Generate a code snippet to validate GX-managed Expectations +If you want to use the API to run a validation for [GX-managed Expectations](/cloud/expectations/manage_expectations.md#gx-managed-vs-api-managed-expectations) in a GX Cloud deployment, you can use the GX Cloud UI to generate the necessary code. For the Data Asset of interest, go to the **Validations** tab, click the code snippet icon next to the **Validate** button, and then click **Generate snippet**. +::: + diff --git a/docs/docusaurus/docs/core/trigger_actions_based_on_results/trigger_actions_based_on_results.md b/docs/docusaurus/docs/core/trigger_actions_based_on_results/trigger_actions_based_on_results.md index f1f20287bd6d..a912e083d601 100644 --- a/docs/docusaurus/docs/core/trigger_actions_based_on_results/trigger_actions_based_on_results.md +++ b/docs/docusaurus/docs/core/trigger_actions_based_on_results/trigger_actions_based_on_results.md @@ -23,6 +23,14 @@ import OverviewCard from '@site/src/components/OverviewCard'; to="/core/trigger_actions_based_on_results/create_a_checkpoint_with_actions" icon="/img/expectation_icon.svg" /> + + - Learn how to use GX to address key data quality issues. + Learn how to use GX to address key data quality scenarios. A comprehensive data quality strategy relies on a multidimensional approach to achieving and maintaining high-quality data. GX enables you to define and validate data quality checks across a variety of data quality and observability dimensions. diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/freshness.md b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/freshness.md index f0fdd9ec6496..ce6c1efd12dd 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/freshness.md +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/freshness.md @@ -17,7 +17,7 @@ This article assumes basic familiarity with GX components and workflows. If you' ## Data preview -The examples presented in this article use a synthetic sensor reading dataset; a sample of this dataset is shown below. The data is available a public Postgres database table, as described in the examples, and is also available in [CSV format](https://raw.githubusercontent.com/great-expectations/great_expectations/develop/tests/test_sets/learn_data_quality_use_cases/freshness_sensor_readings.csv). +The examples presented in this article use a synthetic sensor reading dataset; a sample of this dataset is shown below. The data is available in a public Postgres database table, as described in the examples, and is also available in [CSV format](https://raw.githubusercontent.com/great-expectations/great_expectations/develop/tests/test_sets/learn_data_quality_use_cases/freshness_sensor_readings.csv). | reading_id | sensor_id | temperature_k | reading_ts | created_at | |:-----------|:----------|:--------------|:--------------------|:--------------------| @@ -53,7 +53,7 @@ This Expectation can be used on a timestamp column to expect that the maximum, o ### Expect column minimum to be between -Checking for data freshness may also center on validating how old the data is. This Expectation can be used on a timestamp column to validate the the minimum, or oldest, timestamp in a column is within an expected range of time. +Checking for data freshness may also center on validating how old the data is. This Expectation can be used on a timestamp column to validate the minimum, or oldest, timestamp in a column is within an expected range of time. **Example**: Validate that the oldest data is no older than a certain time. @@ -79,7 +79,7 @@ The examples in this section showcase how to use available features in GX Cloud ### Create a freshness custom SQL Expectation using GX Cloud -**Goal**: Create a custom SQL Expectations in GX Cloud to validate data freshness and schedule data validation to run hourly. +**Goal**: Create custom SQL Expectations in GX Cloud to validate data freshness and schedule data validation to run hourly. Use the GX Cloud UI to walk through the following steps: @@ -105,7 +105,7 @@ Use the GX Cloud UI to walk through the following steps: where extract(epoch from (age(current_timestamp, most_recent_reading))) > 5*60 ``` -5. Edit the active Expectation Suite Validation schedule to modify the frequency of recurring data validation. Select a **Frequency** of *Every 1 hour* to run recurring freshness checks each hour. +5. Edit the active Validation schedule to modify the frequency of recurring data validation. Select a **Frequency** of *Every 1 hour* to run recurring freshness checks each hour. 6. Inspect the Validation Results on the `freshness_sensor_readings` Data Asset once validation has run. @@ -113,7 +113,7 @@ Use the GX Cloud UI to walk through the following steps: * The freshness Expectation that sensor readings are available in the database no more than 10 minutes after initial capture passes, as this condition holds true for all sensor readings accumulated in the database. -* The freshness Expectation that new sensor readings have arrived in the database within the last 5 minutes fails, as the sample data represents readings from a past point in time and reading are not being continually refreshed in the Postgres database. However, if sensor readings were to be arriving regularly, this Expectation would be able validate whether or not fresh data was arriving in the required time frame. +* The freshness Expectation that new sensor readings have arrived in the database within the last 5 minutes fails, as the sample data represents readings from a past point in time and readings are not being continually refreshed in the Postgres database. However, if sensor readings were to be arriving regularly, this Expectation would be able to validate whether or not fresh data was arriving in the required time frame. **GX solution**: GX enables dynamic data freshness validation, relative to the current point in time, through the use of custom SQL Expectations. Though this example showcased use of a custom SQL Expectation in GX Cloud, this feature is also available in GX Core. @@ -151,7 +151,7 @@ Effective data freshness validation requires an understanding of appropriate thr 5. **Ensure regulatory compliance**: Research industry-specific freshness and timeliness regulations and verify that thresholds meet mandatory requirements. -For example, consider a case of validating freshness for sales transaction data generated by a hypothetical retail store. The business impact of stale data is high—it can lead to stockouts and lost revenue. The source systems (in-store point of sales terminals and an an e-commerce platform) provide near real-time data. The data pipeline has moderate complexity with several transformation steps. There are clear daily and weekly sales cycles, with lower volumes sold overnight and on Sundays. +For example, consider a case of validating freshness for sales transaction data generated by a hypothetical retail store. The business impact of stale data is high—it can lead to stockouts and lost revenue. The source systems (in-store point of sales terminals and an e-commerce platform) provide near real-time data. The data pipeline has moderate complexity with several transformation steps. There are clear daily and weekly sales cycles, with lower volumes sold overnight and on Sundays. Balancing these factors, appropriate freshness thresholds for this retail sales data could be: - During peak hours: The latest transaction timestamp occurred within the last 30 minutes. diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/integrity.md b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/integrity.md index 63c8930e7b45..4b2be4c0a08e 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/integrity.md +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/integrity.md @@ -155,7 +155,7 @@ Use the GX Cloud UI to implement the following steps: where extract(epoch from (age(received_ts, sent_ts))) > 45 ``` -5. Validate the `integrity_transfers` Data Asset once again, now with two Expectations in the Expectation Suite. +5. Validate the `integrity_transfers` Data Asset once again, now with two Expectations. 6. Review the Validation Results. The increased specificity of the custom SQL Expectation enables you to see that one of the transfer transaction records has failed to meet the business logic-informed definition of integrity. @@ -227,7 +227,7 @@ Run the following GX Core workflow. ## Scenarios -The following scenarios provide insight into how Great Expectations can help enforce data integrity across various domains. By combining built-in Expectations with custom SQL checks, organizations can create comprehensive validation frameworks that catch inconsistencies, prevent data quality issues, and ensure compliance with business rules and regulations. +The following scenarios provide insight into how Great Expectations can help enforce data integrity across various domains. By combining built-in Expectations with custom SQL checks, organizations can create comprehensive validation frameworks that catch inconsistencies, prevent data quality problems, and ensure compliance with business rules and regulations. ### Cross-system financial reconciliation diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness.md b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness.md index 7c721f50035b..e0dedd80bb61 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness.md +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness.md @@ -5,7 +5,7 @@ title: 'Manage missing data with GX' Missing data, also known as missingness, poses a significant challenge in data quality management. Missing data occurs when expected information is absent from a dataset, often appearing as `NULL` values in databases or manifesting differently across various systems. Effectively managing this issue is crucial for maintaining data integrity and reliability since unaddressed missing data can lead to disrupted calculations, skewed analyses, and compromised data analytics tasks. -Great Expectations (GX) offers a robust solution for addressing missing data through a comprehensive suite of Expectations that allow users to define and enforce data quality rules. By integrating GX into your data pipelines, you can establish robust validation processes that catch issues early, ensuring your dataset remains clean, consistent, and ready for accurate reporting, predictive modeling, and other advanced analytics applications. +Great Expectations (GX) offers a robust solution for addressing missing data through a comprehensive collection of Expectations that allow users to define and enforce data quality rules. By integrating GX into your data pipelines, you can establish robust validation processes that catch issues early, ensuring your dataset remains clean, consistent, and ready for accurate reporting, predictive modeling, and other advanced analytics applications. In this guide, you will learn how to leverage GX to effectively handle missing data. This includes applying Expectations and making informed decisions about managing various types of missingness. By following these steps, you can ensure your datasets maintain high quality, thus enabling more accurate and reliable data-driven insights. @@ -26,9 +26,7 @@ In this dataset, you'll notice missing data in the `type` and `transfer_date` co ## Key missingness Expectations -GX provides a suite of missingness-focused Expectations to manage missing data in your datasets. These Expectations can be created using the GX Cloud UI or the GX Core Python library. - -![Add a missingness Expectation in GX Cloud](./missingness_resources/gx_cloud_missingness_expectations_add.gif) +GX provides missingness-focused Expectations to manage missing data in your datasets. These Expectations can be created using the GX Cloud UI or the GX Core Python library. ### Expect Column Values To Be Null @@ -64,9 +62,7 @@ Ensures that values within a specific column are not `NULL`. The examples given in this section provide insight into how and when to apply missingness Expectations to identify different varieties of missing data. The focus of this guidance is on the specifics of the Expectations, rather than the overall workflow, which can be implemented using either GX Cloud or GX Core. -[GX Cloud](/cloud/overview/gx_cloud_overview.md) provides a visual interface to create and run workflows for managing missing data. The GX Cloud workflow to handle data missingness is intuitive and straightforward: create a Data Asset, define Expectations for missing values, run a Validation, and review Validation Results. - -![Validate missingness Expectations in GX Cloud](./missingness_resources/gx_cloud_missingness_expectations_validate.gif) +[GX Cloud](/cloud/overview/gx_cloud_overview.md) provides a visual interface to create and run workflows for managing missing data. The GX Cloud workflow to handle data missingness is: create a Data Asset, define Expectations for missing values, run a Validation, and review Validation Results. [GX Core](/core/introduction/gx_overview.md) can be used to complement and extend the capabilities of GX Cloud to programmatically implement custom workflows for handling missing data. diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness_resources/gx_cloud_missingness_expectations_add.gif b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness_resources/gx_cloud_missingness_expectations_add.gif deleted file mode 100644 index d31d61b3c9f5..000000000000 Binary files a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness_resources/gx_cloud_missingness_expectations_add.gif and /dev/null differ diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness_resources/gx_cloud_missingness_expectations_validate.gif b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness_resources/gx_cloud_missingness_expectations_validate.gif deleted file mode 100644 index 2d890b1436bf..000000000000 Binary files a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/missingness_resources/gx_cloud_missingness_expectations_validate.gif and /dev/null differ diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema.md b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema.md index c9c6f6d39f2a..c0887270e140 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema.md +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema.md @@ -17,11 +17,7 @@ established rules, such as verifying column names, data types, and the presence Schema changes, whether planned or unexpected, can significantly impact data integrity and the performance of data-dependent systems. -Great Expectations (GX) provides a powerful suite of schema-focused Expectations that allow you -to define and enforce the structural integrity of your datasets. These tools enable you to establish -robust schema validation within your data pipelines, helping to catch and address schema-related -issues before they propagate through your data ecosystem. This guide will walk you through -leveraging these Expectations to implement effective schema validation in your data workflows. +Great Expectations (GX) provides schema-focused Expectations that allow you to define and enforce the structural integrity of your datasets. You can automatically generate some of these Expectations when you add a new Data Asset in GX Cloud. These tools enable you to establish robust schema validation within your data pipelines, helping to catch and address schema-related issues before they propagate through your data ecosystem. This guide will walk you through leveraging these Expectations to implement effective schema validation in your data workflows. ## Prerequisite knowledge This article assumes basic familiarity with GX components and workflows. See the [GX Overview](/core/introduction/gx_overview.md) for additional content on GX fundamentals. @@ -41,11 +37,9 @@ You can [access this dataset](https://raw.githubusercontent.com/great-expectatio ## Key schema Expectations -GX offers a collection of Expectations for schema validation, all of which can be added to an Expectation Suite directly from the GX Cloud UI or using the GX Core Python library. +GX offers a collection of Expectations for schema validation, all of which can be added directly from the GX Cloud UI or using the GX Core Python library. Some of them can be automatically generated when you add a new Data Asset using GX Cloud. -![Add a schema Expectation in GX Cloud](./schema_resources/gx_cloud_schema_expectations_add.gif) - -The schema Expectations provide straightforward, practical solutions for common validation scenarios and can also be used to satisfy more nuanced validation needs. +The schema Expectations provide basic practical solutions for common validation scenarios and can also be used to satisfy more nuanced validation needs. ### Column-level Expectations @@ -144,6 +138,10 @@ flexibility where column presence is more critical than their sequence. ```python title="Python" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_expectations.py ExpectTableColumnsToMatchSet" ``` +:::tip Automate this rule +When you [create a new Data Asset](/cloud/data_assets/manage_data_assets.md#add-a-data-asset-from-an-existing-data-source), you can choose to automatically generate this Expectation to test that columns don't diverge from the initial set over time. +::: + View `ExpectTableColumnsToMatchSet` in the [Expectation Gallery](https://greatexpectations.io/expectations/expect_table_columns_to_match_set). @@ -175,9 +173,7 @@ Gallery](https://greatexpectations.io/expectations/expect_table_column_count_to_ ## Examples -**GX Cloud** provides a visual interface to create and run schema validation workflows. The GX Cloud workflow for validating data schema is to create a Data Asset, define an Expectation Suite, run a Checkpoint, and review Validation Results. - -![Validate schema Expectations in GX Cloud](./schema_resources/gx_cloud_schema_expectations_validate.gif) +**GX Cloud** provides a visual interface to create and run schema validation workflows. The GX Cloud workflow for validating data schemas is to create Data Assets with automatically generated Expectations, manually define additional Expectations, run Validations, and review Validation Results. **GX Core** can be used to complement and extend the capabilities of GX Cloud to programmatically implement custom validation workflows. The examples provided in this section feature use cases that leverage GX Core to achieve schema validation. diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/gx_cloud_schema_expectations_add.gif b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/gx_cloud_schema_expectations_add.gif deleted file mode 100644 index 72e5329ec6be..000000000000 Binary files a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/gx_cloud_schema_expectations_add.gif and /dev/null differ diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/gx_cloud_schema_expectations_validate.gif b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/gx_cloud_schema_expectations_validate.gif deleted file mode 100644 index 4606ad2b6366..000000000000 Binary files a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/gx_cloud_schema_expectations_validate.gif and /dev/null differ diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_strict_and_relaxed.py b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_strict_and_relaxed.py index e1d8f3669ff6..49fcf65e4a39 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_strict_and_relaxed.py +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_strict_and_relaxed.py @@ -63,7 +63,7 @@ ) strict_suite.add_expectation( - gxe.ExpectColumnValuesToBeOfType(column="transfer_amount", type_="DOUBLE_PRECISION") + gxe.ExpectColumnValuesToBeOfType(column="transfer_amount", type_="DOUBLE PRECISION") ) strict_results = batch.validate(strict_suite) @@ -85,7 +85,7 @@ relaxed_suite.add_expectation( gxe.ExpectColumnValuesToBeInTypeList( - column="transfer_amount", type_list=["DOUBLE_PRECISION", "STRING"] + column="transfer_amount", type_list=["DOUBLE PRECISION", "STRING"] ) ) diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_validation_over_time.py b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_validation_over_time.py index 3e031139e0c5..e1942e7d6741 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_validation_over_time.py +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/schema_resources/schema_validation_over_time.py @@ -69,7 +69,8 @@ def add_column_to_transfers_table() -> None: "recipient_fullname", "transfer_amount", "transfer_date", - ] + ], + exact_match=False, ) ) suite.add_expectation(gxe.ExpectTableColumnCountToEqual(value=5)) diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/uniqueness.md b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/uniqueness.md index 0660c32dd6e6..ff99a34fb8e1 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/uniqueness.md +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/uniqueness.md @@ -8,7 +8,7 @@ import TabItem from '@theme/TabItem'; Data uniqueness is a fundamental aspect of data quality that ensures distinct values are present only once where expected in a dataset. Uniqueness constraints are often applied to columns that serve as primary keys, composite keys, or other unique identifiers. Validating uniqueness is critical for maintaining data integrity, preventing duplication, and enabling accurate analysis. -Failing to validate uniqueness can lead to various data quality issues: +Failing to validate uniqueness can lead to various data quality problems: * Duplicates can skew analytics, leading to incorrect conclusions and flawed decision-making. For example, duplicate transactions could overstate revenue. * Non-unique identifiers can cause data corruption when merging or joining datasets, resulting in lost data or mismatched records. @@ -74,7 +74,7 @@ This Expectation validates that the combination of values across multiple column This Expectation validates that the proportion of unique values in a column is between a specified minimum and maximum value. It is useful for ensuring a certain level of uniqueness in a column without requiring full uniqueness. -**Example**: Validate that least 90% of all customer `email_address` values are unique. +**Example**: Validate that at least 90% of all customer `email_address` values are unique. ```python title="Python" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/uniqueness_resources/uniqueness_expectations.py ExpectColumnProportionOfUniqueValuesToBeBetween" ``` @@ -154,7 +154,7 @@ Use the GX Cloud UI to walk through the following steps: * Column List: `last_name`, `phone_number` * Ignore Row If: Any value is missing -4. Validate the `uniqueness_customers` Data Asset with the newly create Expectations. +4. Validate the `uniqueness_customers` Data Asset with the newly created Expectations. 5. Review the Validation Results. Under **Batches & run history**, select the individual Validation run (not **All Runs**) to view the sample unexpected values that were identified for failing Expectations. diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md index bab3a7b96f40..c6365ca8df4e 100644 --- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md +++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md @@ -28,9 +28,7 @@ This dataset represents daily financial transactions. In a real-world scenario, ## Key volume Expectations -GX provides several Expectations specifically designed for managing data volume. These can be added to an Expectation Suite via the GX Cloud UI or using the GX Core Python library. - -![Add a volume Expectation in GX Cloud](./volume_resources/gx_cloud_volume_expectations_add.gif) +GX provides several Expectations specifically designed for managing data volume. These can be added via the GX Cloud UI or using the GX Core Python library. ### Expect Table Row Count To Be Between @@ -120,15 +118,17 @@ Use the GX Cloud UI to walk through the following steps. 2. Profile the Data Asset. 3. Add an **Expect table row count to be between** Expectation to the freshly created Data Asset. -4. Populate the Expectation: - * Define a **Daily** batch interval for the Expectation, using `transfer_ts` as the **Batch column**. - * Provide a **Min Value** of `1` and a **Max Value** of `5`. +4. Populate the Expectation with a **Min Value** of `1` and a **Max Value** of `5`. 5. Save the Expectation. -6. Click the **Validate** button and define which batch to validate. - * **Latest Batch** validates data for the most recent batch found in the Data Asset. - * **Custom Batch** validates data for the batch provided. -7. Click **Validate**. -8. Review Validation Results. +6. Click **Define batch**. +7. For **Validate by**, select **Day**. +8. Set the **Batch column** to `transfer_ts`. +9. Click **Save**. +10. Click the **Validate** button and define which batch to validate. + * **Latest** validates data for the most recent batch found in the Data Asset. + * **Custom** validates data for the batch provided. +11. Click **Validate**. +12. Review Validation Results. diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/gx_cloud_volume_expectations_add.gif b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/gx_cloud_volume_expectations_add.gif deleted file mode 100644 index 8686d2ae5f42..000000000000 Binary files a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/gx_cloud_volume_expectations_add.gif and /dev/null differ diff --git a/docs/docusaurus/docs/reference/learn/integrations/airflow_pipeline_tutorial.md b/docs/docusaurus/docs/reference/learn/integrations/data_pipeline_tutorial.md similarity index 58% rename from docs/docusaurus/docs/reference/learn/integrations/airflow_pipeline_tutorial.md rename to docs/docusaurus/docs/reference/learn/integrations/data_pipeline_tutorial.md index 7a16ed4035c9..a54d71512d29 100644 --- a/docs/docusaurus/docs/reference/learn/integrations/airflow_pipeline_tutorial.md +++ b/docs/docusaurus/docs/reference/learn/integrations/data_pipeline_tutorial.md @@ -1,18 +1,20 @@ --- -sidebar_label: 'Integrate GX with Airflow' -title: 'Integrate GX with Airflow' +sidebar_label: 'Integrate GX in a data pipeline' +title: 'Integrate GX in a data pipeline' --- import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import Admonition from '@theme/Admonition'; -import AirflowIcon from "../../../../static/img/airflow_icon.svg"; +import GitHubMark from "../../../../static/img/github-mark-small.svg"; -This tutorial provides working, hands-on examples of how to use GX to add data validation within an Airflow pipeline. The tutorial is hosted on GitHub. +This tutorial provides working, hands-on examples of how to use GX to add data validation within a data pipeline. The tutorial is hosted on GitHub. -The tutorial provides a functioning deployment of GX, Airflow, JupyterLab, and Postgres that can be run locally using Docker compose. Educational content is provided as JupyterLab notebooks that can be interactively run and modified to learn and explore how GX integrates with Airflow. +The tutorial provides a functioning deployment of GX, Airflow, JupyterLab, and Postgres that can be run locally using Docker compose. Educational content is provided as JupyterLab notebooks that can be interactively run and modified to learn and explore how GX integrates with a data pipeline. -}> +The tutorial environment uses Airflow as the orchestrator and Postgres as the Data Source, but the examples can be adapted to any other [supported Data Source and orchestrator](/docs/application_integration_support). + +}> Click the link below to access the `tutorial-gx-in-the-data-pipeline` repo. diff --git a/docs/docusaurus/docs/reference/learn/integrations/dbt_tutorial.md b/docs/docusaurus/docs/reference/learn/integrations/dbt_tutorial.md index d62d93b6b5b5..8c92858940b0 100644 --- a/docs/docusaurus/docs/reference/learn/integrations/dbt_tutorial.md +++ b/docs/docusaurus/docs/reference/learn/integrations/dbt_tutorial.md @@ -19,7 +19,7 @@ Finally, to interact with our data, you will be using pgAdmin to query/view it ( ## Prerequisites -:::note +:::note Note In addition to the prerequisites below, you are recommneded to use an IDE like VSCode to organize your project. ::: @@ -124,7 +124,7 @@ docker compose up -d --build When you run this for the first time, it may take several minutes to download and install the required libraries. -:::note +:::note Note You will also notice that one of the services `airflow-init` exits after running, but that is expected because it’s only used to initialize the airflow service. ::: @@ -167,7 +167,7 @@ Open the dbt Docker container’s terminal via running the following command in docker exec -it dbt bash -l ``` -:::note +:::note Note Here you are using the `docker exec` command to open an interactive terminal inside the container using bash as the shell. The -l flag is important here because it tells the shell to be a login shell, which automatically sources the .bashrc file in the container, which in turn exports the dbt environment variables. See the dbt.Dockerfile for more info. ::: @@ -229,7 +229,7 @@ from great_expectations.checkpoint import UpdateDataDocsAction context = gx.get_context(mode="file") ``` -:::note +:::note Note You can either press the b button on your keyboard or press the “Insert a cell below” button in any cell to create a new cell. ::: @@ -399,7 +399,7 @@ Run the DAG by navigating to **Actions** and clicking the **play** button. Then ![Trigger DAG](./dbt_tutorial/trigger_dag.png) -:::note +:::note Note If you see an error saying “Task exited with return code Negsignal.SIGKILL” then it usually means that Airflow doesn’t have enough resources to run. Airflow recommends 4GB memory. Make sure your Docker resources are set appropriately (Docker Desktop > settings > Resources.) ::: @@ -414,5 +414,5 @@ Refresh the Data Docs page to see new results from the DAG run: ## Conclusion You have learned to build a pipeline using PostgreSQL, dbt, GX and Airflow. This tutorial covers a basic implementation for scheduling and running a data pipeline with open source tools. You can explore other possibilities with GX by connecting to your own Data Sources or exploring the other use cases in this section. See the [Expectations Gallery](https://greatexpectations.io/expectations/) to learn about all the Expectations you can run on your data. -If you are ready to take next steps and collaborate with your team using GX, check out [GX Cloud](https://greatexpectations.io/cloud), where you can use our online platform to run GX in a no-code environment and share results. +If you are ready to take next steps and collaborate with your team using GX, check out [GX Cloud](https://greatexpectations.io/cloud), where you can use our online platform to run GX in a no-code environment, automate rules, schedule validations, and share results. diff --git a/docs/docusaurus/docs/reference/learn/integrations/integrations_lp.md b/docs/docusaurus/docs/reference/learn/integrations/integrations_lp.md index 93b8c4c9b0a4..af5639e74348 100644 --- a/docs/docusaurus/docs/reference/learn/integrations/integrations_lp.md +++ b/docs/docusaurus/docs/reference/learn/integrations/integrations_lp.md @@ -20,7 +20,7 @@ Great Expectations can be integrated with a variety of orchestrators and data pi - + diff --git a/docs/docusaurus/docs/reference/learn/migration_guide.md b/docs/docusaurus/docs/reference/learn/migration_guide.md index 2301aeb059b5..7e63bf6bd457 100644 --- a/docs/docusaurus/docs/reference/learn/migration_guide.md +++ b/docs/docusaurus/docs/reference/learn/migration_guide.md @@ -291,7 +291,7 @@ In `0.X`, a Data Source represents where the data lives and the execution engine **id**: This is a new field and is an arbitrary UUID. If migrating you can pick any unique UUID. -:::note +:::note Note We no longer support arbitrary batching regexes. Batches must be defined by one of our temporal batch definitions which are yearly, monthly, or daily. ::: @@ -673,7 +673,7 @@ A few configurations are **NO LONGER SUPPORTED**: ```python title="Python" # Create datasource connection_string = "snowflake://:@//?warehouse=&role=" - snowflake_ds = context.sources.add_snowflake(name="snowflake_ds", connection_string=connection_string) + snowflake_ds = context.data_sources.add_snowflake(name="snowflake_ds", connection_string=connection_string) # Create table asset and batch definitions table_asset = snowflake_ds.add_table_asset(name="taxi_data", table_name="TAXI_DATA_ALL_SAMPLES") diff --git a/docs/docusaurus/docs/reference/learn/reference_overview.md b/docs/docusaurus/docs/reference/learn/reference_overview.md index 46250e3504f1..1e81a002de37 100644 --- a/docs/docusaurus/docs/reference/learn/reference_overview.md +++ b/docs/docusaurus/docs/reference/learn/reference_overview.md @@ -23,7 +23,7 @@ import OverviewCard from '@site/src/components/OverviewCard'; - + diff --git a/docs/docusaurus/docs/reference/learn/usage_statistics.md b/docs/docusaurus/docs/reference/learn/usage_statistics.md deleted file mode 100644 index 8a512609f9c3..000000000000 --- a/docs/docusaurus/docs/reference/learn/usage_statistics.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: Usage statistics ---- - -To help us improve Great Expectations, by default we track event data when certain Data Context-enabled commands are run. Our [blog post from April 2020](https://greatexpectations.io/blog/anonymized-usage-statistics) explains a little bit more about what we want to capture with usage statistics and why! The usage statistics include things like the OS and python version, and which GX features are used. You can see the exact schemas for all of our messages [here](https://github.com/great-expectations/great_expectations/blob/develop/great_expectations/core/usage_statistics/schemas.py). - -While we hope you'll leave them on, you can easily disable usage statistics for a Data Context by adding the following to your data context configuration: - -```yaml title="YAML" - anonymous_usage_statistics: - data_context_id: - enabled: false -``` - -You can also disable usage statistics system-wide by setting the `GE_USAGE_STATS` environment variable to `FALSE` or adding the following code block to a file called `great_expectations.conf` located in `/etc/` or `~/.great_expectations`: - -```ini title="great_expectations.conf" - [anonymous_usage_statistics] - enabled=FALSE -``` - -As always, please reach out [on Slack](https://greatexpectations.io/slack) if you have any questions or comments. diff --git a/docs/docusaurus/docusaurus.config.js b/docs/docusaurus/docusaurus.config.js index 47c5306fdd25..e303ea0cad1a 100644 --- a/docs/docusaurus/docusaurus.config.js +++ b/docs/docusaurus/docusaurus.config.js @@ -34,6 +34,10 @@ module.exports = { async: true, defer: true, }, + { + src: '/scripts/web-tracking.js', + async: true + }, { src: 'https://fast.wistia.net/assets/external/E-v1.js', async: true @@ -44,7 +48,7 @@ module.exports = { announcementBar: { id: 'cta_bar', content: - `[New webinar] Top data quality tactics to boost trust and improve collaboration. Register today!`, + 'Get started with GX Cloud by joining our bi-weekly hands on workshop.', backgroundColor: '#141432', textColor: '#fff', isCloseable: false, @@ -52,15 +56,15 @@ module.exports = { gxCard: { title: 'What is GX Cloud?', description: - 'GX Cloud is a fully-managed SaaS solution that simplifies deployment, scaling, and collaboration and lets you focus on data validation.', + 'GX Cloud is a fully-managed SaaS solution that simplifies deployment, scaling, and collaboration—so you can focus on data validation.', buttons: { primary: { - label: 'Try GX Cloud', - href: 'https://greatexpectations.io/cloud', + label: 'Request a demo', + href: 'https://greatexpectations.io/demo', }, secondary: { - label: 'GX Cloud overview', - href: '/docs/cloud/overview/gx_cloud_overview', + label: 'Why GX Cloud?', + href: 'https://greatexpectations.io/why-gx-cloud', }, }, }, @@ -299,7 +303,7 @@ module.exports = { lastVersion: 'current', versions: { current: { - label: '1.2.5', + label: '1.3.3', }, ['0.18']: { label: '0.18.21', diff --git a/docs/docusaurus/sidebars.js b/docs/docusaurus/sidebars.js index d7e0d2a7b5bc..370fd002f4bf 100644 --- a/docs/docusaurus/sidebars.js +++ b/docs/docusaurus/sidebars.js @@ -108,6 +108,7 @@ module.exports = { link: { type: 'doc', id: 'core/trigger_actions_based_on_results/trigger_actions_based_on_results' }, items: [ { type: 'doc', id: 'core/trigger_actions_based_on_results/create_a_checkpoint_with_actions' }, + { type: 'doc', id: 'core/trigger_actions_based_on_results/create_a_custom_action' }, { type: 'doc', id: 'core/trigger_actions_based_on_results/choose_a_result_format/choose_a_result_format' }, { type: 'doc', id: 'core/trigger_actions_based_on_results/run_a_checkpoint' }, ] @@ -263,40 +264,23 @@ module.exports = { }, { type: 'link', - label: 'Edit an Expectation', - href: '/docs/cloud/expectations/manage_expectations#edit-an-expectation', + label: 'Optional. Define a Batch', + href: '/docs/cloud/expectations/manage_expectations#optional-define-a-batch', }, { type: 'link', - label: 'View Expectation history', - href: '/docs/cloud/expectations/manage_expectations#view-expectation-history', + label: 'Edit an Expectation', + href: '/docs/cloud/expectations/manage_expectations#edit-an-expectation', }, { type: 'link', label: 'Delete an Expectation', href: '/docs/cloud/expectations/manage_expectations#delete-an-expectation', }, - ] - }, - { - type: 'category', - label: 'Manage Expectation Suites', - link: { type: 'doc', id: 'cloud/expectation_suites/manage_expectation_suites' }, - items: [ - { - type: 'link', - label: 'Create an Expectation Suite ', - href: '/docs/cloud/expectation_suites/manage_expectation_suites#create-an-expectation-suite', - }, { type: 'link', - label: 'Edit an Expectation Suite name', - href: '/docs/cloud/expectation_suites/manage_expectation_suites#edit-an-expectation-suite-name', - }, - { - type: 'link', - label: 'Delete an Expectation Suite', - href: '/docs/cloud/expectation_suites/manage_expectation_suites#delete-an-expectation-suite', + label: 'GX-managed vs. API-managed Expectations', + href: '/docs/cloud/expectations/manage_expectations#gx-managed-vs-api-managed-expectations', }, ] }, @@ -312,8 +296,8 @@ module.exports = { }, { type: 'link', - label: 'Run a Validation on a Data Asset containing partitions', - href: '/docs/cloud/validations/manage_validations#run-a-validation-on-a-data-asset-containing-partitions', + label: 'Run a Validation on a subset of a Data Asset', + href: '/docs/cloud/validations/manage_validations#run-a-validation-on-a-subset-of-a-data-asset', }, { type: 'link', @@ -327,11 +311,6 @@ module.exports = { label: 'Manage schedules', link: { type: 'doc', id: 'cloud/schedules/manage_schedules' }, items: [ - { - type: 'link', - label: 'Add a schedule', - href: '/docs/cloud/schedules/manage_schedules#create-a-schedule-for-an-existing-expectation-suite', - }, { type: 'link', label: 'Edit a schedule', @@ -447,11 +426,10 @@ module.exports = { label: 'Integration tutorials', link: { type: 'doc', id: 'reference/learn/integrations/integrations_lp' }, items: [ - 'reference/learn/integrations/airflow_pipeline_tutorial', + 'reference/learn/integrations/data_pipeline_tutorial', 'reference/learn/integrations/dbt_tutorial', ] }, - 'reference/learn/usage_statistics', 'reference/learn/glossary' ], } diff --git a/docs/docusaurus/src/css/alerts.scss b/docs/docusaurus/src/css/alerts.scss index f2ef751a0910..b7a89673675a 100644 --- a/docs/docusaurus/src/css/alerts.scss +++ b/docs/docusaurus/src/css/alerts.scss @@ -31,6 +31,10 @@ border-left-width: var(--ifm-alert-border-left-width); box-shadow: none; + [class^="admonitionHeading"] { + text-transform: none; + } + div:first-child { font-size: var(--p-font-size); diff --git a/docs/docusaurus/src/css/api_docs/api_docs.scss b/docs/docusaurus/src/css/api_docs/api_docs.scss index 516a113afa32..a09af7787e8c 100644 --- a/docs/docusaurus/src/css/api_docs/api_docs.scss +++ b/docs/docusaurus/src/css/api_docs/api_docs.scss @@ -56,4 +56,9 @@ color: var(--ifm-font-color-base); } } + + h2 { + color: var(--ifm-font-color-base); + font-weight: var(--ifm-font-weight-bold); + } } diff --git a/docs/docusaurus/src/css/custom.scss b/docs/docusaurus/src/css/custom.scss index e349087679e0..9878c50f87e6 100755 --- a/docs/docusaurus/src/css/custom.scss +++ b/docs/docusaurus/src/css/custom.scss @@ -143,6 +143,10 @@ h3 { padding: 0 var(--ifm-pre-padding); } +a code { + vertical-align: top; +} + /*footer*/ .footer__items { diff --git a/docs/docusaurus/static/_redirects b/docs/docusaurus/static/_redirects index 63ba4bbcd6b3..ca0d96c27f09 100644 --- a/docs/docusaurus/static/_redirects +++ b/docs/docusaurus/static/_redirects @@ -54,7 +54,7 @@ /en/latest/expectation_glossary.html https://docs.greatexpectations.io/docs/reference/learn/glossary /en/latest/reference/supporting_resources.html https://docs.greatexpectations.io/docs/core/introduction/community_resources/ /en/latest/reference/spare_parts/* https://docs.greatexpectations.io/docs/core/introduction/ -/docs/reference/usage_statistics https://docs.greatexpectations.io/docs/reference/learn/usage_statistics +/docs/reference/usage_statistics /docs/core/configure_project_settings/toggle_analytics_events/ /en/latest/autoapi/great_expectations/render/renderer/page_renderer/index.html https://docs.greatexpectations.io/docs/reference/api/render/renderer/renderer/Renderer_class /en/latest/autoapi/great_expectations/expectations/* https://greatexpectations.io/expectations /en/latest/getting_started* https://docs.greatexpectations.io/docs/core/introduction/ @@ -143,7 +143,8 @@ # Redirects for renamed reference docs -/docs/reference/anonymous_usage_statistics /docs/reference/usage_statistics +/docs/reference/anonymous_usage_statistics /docs/core/configure_project_settings/toggle_analytics_events/ +/docs/reference/learn/integrations/airflow_pipeline_tutorial /docs/reference/learn/integrations/data_pipeline_tutorial # Redirect from Getting Started Tutorial to Quickstart @@ -300,6 +301,7 @@ docs/guides/setup/configuring_data_docs/how_to_host_and_share_data_docs_on_azure /docs/reference/expectations/implemented_expectations/ https://greatexpectations.io/expectations.html /docs/reference/customize_your_deployment /docs/core/configure_project_settings/ /docs/guides/expectations/advanced/how_to_create_a_new_expectation_suite_using_rule_based_profilers /docs/guides/expectations/expectations_lp +/docs/reference/learn/usage_statistics /docs/core/configure_project_settings/toggle_analytics_events/ # Redirects for removed index pages and new landing pages @@ -402,6 +404,8 @@ https://docs.greatexpectations.io/docs/reference/api/profile/user_configurable_p /docs/reference/learn/conceptual_guides/* /docs/0.18/reference/learn/conceptual_guides/:splat /docs/reference/api/* /docs/0.18/reference/api/:splat +/docs/cloud/expectation_suites/manage_expectation_suites /docs/cloud/expectations/manage_expectations + ## Redirects for old versioned docs /docs/0.15.50/guides/connecting_to_your_data/database/bigquery https://docs.greatexpectations.io/docs/core/connect_to_data/ diff --git a/docs/docusaurus/static/img/github-mark-small.svg b/docs/docusaurus/static/img/github-mark-small.svg new file mode 100644 index 000000000000..224ad0a59b6c --- /dev/null +++ b/docs/docusaurus/static/img/github-mark-small.svg @@ -0,0 +1,12 @@ + + + + + + + + \ No newline at end of file diff --git a/docs/docusaurus/static/img/view_validation_timeline_detail.png b/docs/docusaurus/static/img/view_validation_timeline_detail.png index e5d86066c112..54dd3721bd20 100644 Binary files a/docs/docusaurus/static/img/view_validation_timeline_detail.png and b/docs/docusaurus/static/img/view_validation_timeline_detail.png differ diff --git a/docs/docusaurus/static/scripts/web-tracking.js b/docs/docusaurus/static/scripts/web-tracking.js new file mode 100644 index 000000000000..db821e8e14f6 --- /dev/null +++ b/docs/docusaurus/static/scripts/web-tracking.js @@ -0,0 +1,18 @@ +(function() { + if (typeof window === 'undefined') return; + if (typeof window.signals !== 'undefined') return; + var script = document.createElement('script'); + script.src = 'https://cdn.cr-relay.com/v1/site/fa55f78e-0306-4363-88ae-e92ab04d95c6/signals.js'; + script.async = true; + window.signals = Object.assign( + [], + ['page', 'identify', 'form'].reduce(function (acc, method){ + acc[method] = function () { + signals.push([method, arguments]); + return signals; + }; + return acc; + }, {}) + ); + document.head.appendChild(script); +})(); \ No newline at end of file diff --git a/docs/docusaurus/versioned_docs/version-0.18/cloud/connect/connect_python.md b/docs/docusaurus/versioned_docs/version-0.18/cloud/connect/connect_python.md index 477be3067752..0e2f9c8d62e1 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/cloud/connect/connect_python.md +++ b/docs/docusaurus/versioned_docs/version-0.18/cloud/connect/connect_python.md @@ -64,7 +64,7 @@ Environment variables securely store your GX Cloud access credentials. export GX_CLOUD_ORGANIZATION_ID= ``` - :::note + :::note Note After you save your **GX_CLOUD_ACCESS_TOKEN** and **GX_CLOUD_ORGANIZTION_ID**, you can use Python scripts to access GX Cloud and complete other tasks. See the [GX OSS guides](../../oss/oss.md). ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/cloud/deploy_gx_agent.md b/docs/docusaurus/versioned_docs/version-0.18/cloud/deploy_gx_agent.md index f5410e2cc016..51a6312ee254 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/cloud/deploy_gx_agent.md +++ b/docs/docusaurus/versioned_docs/version-0.18/cloud/deploy_gx_agent.md @@ -9,7 +9,7 @@ toc_max_heading_level: 2 import TabItem from '@theme/TabItem'; import Tabs from '@theme/Tabs'; -:::info +:::info Info For access to the GX Agent, reach out to Support at support@greatexpectations.io. diff --git a/docs/docusaurus/versioned_docs/version-0.18/cloud/validations/manage_validations.md b/docs/docusaurus/versioned_docs/version-0.18/cloud/validations/manage_validations.md index c6a224ebbe4d..58c887533a22 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/cloud/validations/manage_validations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/cloud/validations/manage_validations.md @@ -70,7 +70,7 @@ To make the identification of Data Assets containing partitions easier, partitio - To view the run history of all Validations, select **All Runs** to view a graph showing the Validation run history for all columns. -:::tip +:::tip Tip Items in run history with a calendar icon are run on a GX-managed schedule. diff --git a/docs/docusaurus/versioned_docs/version-0.18/components/setup/python_environment/_tip_python_or_python3_executable.md b/docs/docusaurus/versioned_docs/version-0.18/components/setup/python_environment/_tip_python_or_python3_executable.md index c760c559df3d..393cdfde03b0 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/components/setup/python_environment/_tip_python_or_python3_executable.md +++ b/docs/docusaurus/versioned_docs/version-0.18/components/setup/python_environment/_tip_python_or_python3_executable.md @@ -1,4 +1,4 @@ -:::info executing python commands with `python` or `python3` +:::info Executing python commands with `python` or `python3` Depending on your installation and configuration of Python 3, you may find that executing Python commands from the terminal by calling `python` doesn't work as desired. If a command using `python` does not work, try using `python3`. diff --git a/docs/docusaurus/versioned_docs/version-0.18/components/warnings/_sql_alchemy2.md b/docs/docusaurus/versioned_docs/version-0.18/components/warnings/_sql_alchemy2.md index 070c7b1b564d..20b1cc062040 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/components/warnings/_sql_alchemy2.md +++ b/docs/docusaurus/versioned_docs/version-0.18/components/warnings/_sql_alchemy2.md @@ -1,4 +1,4 @@ -:::caution +:::caution Caution As of this writing, Great Expectations is not compatible with SQLAlchemy version 2 or greater. We recommend using the latest non-version-2 release. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_airflow.md b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_airflow.md index 4b1cbe3c3812..f2f9819867f5 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_airflow.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_airflow.md @@ -8,7 +8,7 @@ sidebar_custom_props: { icon: 'img/integrations/airflow_icon.png' } Airflow is a data orchestration tool for creating and maintaining data pipelines through DAGs written in Python. DAGs complete work through operators, which are templates that encapsulate a specific type of work. -:::info +:::info Info Consult Astronomer's [Orchestrate Great Expectations with Airflow](https://docs.astronomer.io/learn/airflow-great-expectations) guide for more information on how to set up and configure the `GreatExpectationsOperator` in an Airflow DAG. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_prefect.md b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_prefect.md index 26ca25ae8314..8e4709f7d70f 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_prefect.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_great_expectations_with_prefect.md @@ -9,7 +9,7 @@ sidebar_custom_props: { icon: 'img/integrations/prefect_icon.svg' } Prefect can be used with Great Expectations validations so that you can be confident about the state of your data. With a [Prefect deployment](https://docs.prefect.io/latest/concepts/deployments/), you can productionize your workflow and run data quality checks in reaction to the arrival of new data or on a schedule. -:::info +:::info Info Consult Prefect's [Great Expectations + Prefect](https://prefecthq.github.io/prefect-great-expectations/) documentation for more information on using GX with Prefect. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_checkpoint_create_and_run.md b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_checkpoint_create_and_run.md index d08f7a3b2e02..a6970d9913a4 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_checkpoint_create_and_run.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_checkpoint_create_and_run.md @@ -4,7 +4,7 @@ To validate and run post-validation Checkpoints can be preconfigured with a Batch Request and Expectation Suite, or they can take them in as parameters at runtime. They can also execute numerous Actions based on the Validation Results that are returned when Checkpoint runs. -:::tip +:::tip Tip To preconfigure a Checkpoint with a Batch Request and Expectation Suite, see [Manage Checkpoints](/oss/guides/validation/checkpoints/checkpoint_lp.md) diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_data_docs_build_and_view.md b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_data_docs_build_and_view.md index 7777393dfbc7..3523a1209f86 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_data_docs_build_and_view.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/deployment_patterns/how_to_use_gx_with_aws/components/_data_docs_build_and_view.md @@ -2,7 +2,7 @@ import TechnicalTag from '../../../../reference/learn/term_tags/_tag.mdx'; The Checkpoint contains `UpdateDataDocsAction` which renders the from the generated Validation Results. The Data Docs store contains a new entry for the rendered Validation Result. -:::tip +:::tip Tip For more information on Actions that Checkpoints can perform and how to add them, see [Configure Actions](/oss/guides/validation/validation_actions/actions_lp.md). diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_pandas/_test_your_new_datasource.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_pandas/_test_your_new_datasource.mdx index 8e0ea5f79664..127c18857847 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_pandas/_test_your_new_datasource.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_pandas/_test_your_new_datasource.mdx @@ -15,7 +15,7 @@ Verify your new by loading Add the S3 path to your CSV in the `path` key under `runtime_parameters` in your `BatchRequest`. -:::tip +:::tip Tip The path you will want to use is your S3 URI, not the URL. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_spark/_test_your_new_datasource.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_spark/_test_your_new_datasource.md index e80e647d4331..8f069a3fd2d9 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_spark/_test_your_new_datasource.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/cloud/s3/components_spark/_test_your_new_datasource.md @@ -15,7 +15,7 @@ Verify your new by loading Add the S3 path to your CSV in the `path` key under `runtime_parameters` in your `RuntimeBatchRequest`. -:::tip +:::tip Tip The path you will want to use is your S3 URI, not the URL. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_caution_runtime_batch_identifier_values.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_caution_runtime_batch_identifier_values.mdx index 7a2a60f0fdfe..e24cbff0ab81 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_caution_runtime_batch_identifier_values.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_caution_runtime_batch_identifier_values.mdx @@ -1,4 +1,4 @@ -:::caution +:::caution Caution We stated above that the names that you use for your `batch_identifiers` in a Runtime Data Connector's configuration can be completely arbitrary, and will be used as keys for the `batch_identifiers` dictionary in future Batch Requests. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_part_batch_spec_passthrough.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_part_batch_spec_passthrough.mdx index be58c4c5d513..8a257fdcf65f 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_part_batch_spec_passthrough.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_part_batch_spec_passthrough.mdx @@ -27,7 +27,7 @@ The second key to include is `inferSchema`. Again, the value should be either ` ```python title="Python" name="populate inferSchema for reader_options as True" ``` -:::caution +:::caution Caution - `inferSchema` will read datetime columns in as text columns. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_section_add_your_new_datasource_to_your_data_context.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_section_add_your_new_datasource_to_your_data_context.mdx index 9317cbb08682..333638706839 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_section_add_your_new_datasource_to_your_data_context.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_section_add_your_new_datasource_to_your_data_context.mdx @@ -3,13 +3,13 @@ Now that you have verified that you have a valid configuration you can add your ```python title="Python" name="add your datasource to your data_context" ``` -:::caution +:::caution Caution If the value of `datasource_config["name"]` corresponds to a Data Source that is already defined in your Data Context, then using the above command will overwrite the existing Data Source. ::: -:::tip +:::tip Tip If you want to ensure that you only add a Data Source when it won't overwrite an existing one, you can use the following code instead: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_configured_data_connector_overview.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_configured_data_connector_overview.mdx index 5716a8e83139..b15b7dfe6c2c 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_configured_data_connector_overview.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_configured_data_connector_overview.mdx @@ -1,4 +1,4 @@ -:::tip +:::tip Tip A `ConfiguredAssetDataConnector` enables the most fine-tuning, allowing you to easily work with multiple Batches. It also requires an explicit listing of each Data Asset you connect to and how Batches or defined within that Data Asset, which makes it very clear what Data Assets are being provided when you reference it in Batch Requests or Checkpoints. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_custom_data_connector_module_name.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_custom_data_connector_module_name.mdx index b8cf22afe3e2..162a3a6f2fba 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_custom_data_connector_module_name.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_custom_data_connector_module_name.mdx @@ -1,4 +1,4 @@ -:::tip +:::tip Tip Because we are using one of Great Expectation's builtin Data Connectors, an entry for `module_name` along with a default value will be provided when this Data Connector is initialized. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_inferred_data_connector_overview.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_inferred_data_connector_overview.mdx index abba71236fd6..b3961f6f6b28 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_inferred_data_connector_overview.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_inferred_data_connector_overview.mdx @@ -1,4 +1,4 @@ -:::tip +:::tip Tip The `InferredDataConnector` is ideal for: - quickly setting up a Data Source and getting access to data diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_more_info_on_regex.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_more_info_on_regex.mdx index 65c4e00a858f..72c6297888a2 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_more_info_on_regex.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_more_info_on_regex.mdx @@ -1,4 +1,4 @@ -:::tip +:::tip Tip For more information on the special characters and mechanics of matching and grouping strings with regular expressions, please see [the Python documentation on the `re` module](https://docs.python.org/3/library/re.html). diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_using_a_configured_connector_like_an_inferred_connector.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_using_a_configured_connector_like_an_inferred_connector.mdx index 6117aa7c0833..cc28c8ea6be3 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_using_a_configured_connector_like_an_inferred_connector.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/components/_tip_using_a_configured_connector_like_an_inferred_connector.mdx @@ -1,4 +1,4 @@ -:::tip +:::tip Tip A `ConfiguredAssetDataConnector` can also be configured to functionally duplicate the ability of an `InferredAssetDataConnector` to grant access to all files in a folder based on a regex match by defining a Data Asset that uses the same `pattern` as you would use in an `InferredAssetDataConnector`'s `default_regex`. This will cause the Data Asset in question to have one Batch for each matched file, just as the `InferredAssetDataConnector` would have one Data Asset for each file. Therefore, the only difference would be in how you accessed the data in question. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_configured_single.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_configured_single.mdx index c5aa34a4d47f..e10196f584ab 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_configured_single.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_configured_single.mdx @@ -25,7 +25,7 @@ Since you want this Data Asset to only match the file `yellow_tripdata_sample_20 ```python title="Python" name="pattern_for_single_batch_configured_data_asset_configuration" ``` -:::note +:::note Note The pattern we defined contains a regex group, even though we logically don't need a group to identify the desired Batch in a Data Asset that can only return one Batch. This is because Great Expectations currently does not permit `pattern` to be defined without also having `group_names` defined. Thus, in the example above you are creating a group that corresponds to `01` so that there is a valid group to associate a `group_names` entry with. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_multi.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_multi.mdx index 6f3c1dd7ad4e..f9567789d50c 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_multi.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_multi.mdx @@ -29,7 +29,7 @@ Looking back at our sample files, this regex will result in the `InferredAssetFi - `02` - `03` -:::note +:::note Note Any characters that are not included in a group when you define your regex will still be checked for when determining if a file name "matches" the regular expression. However, those characters will not be included in any of the Batch Identifiers, which is why the `-` and `.csv` portions of the filenames are not found in either the `data_asset_name` or `month` values. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_single.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_single.mdx index ad7be78d2cf6..56f4fb6e9afe 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_single.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_config_for_assets_inferred_single.mdx @@ -26,7 +26,7 @@ Looking back at our sample files, this regex will result in the `InferredAssetFi - `yellow_tripdata_sample_2020-02` - `yellow_tripdata_sample_2020-03` -:::note +:::note Note Since we did not include `.csv` in the first group of the regex we defined, the `.csv` portion of the filename will be dropped from the value that is recognized as a valid `data_asset_name`. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_multi.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_multi.mdx index 633815fc5cf7..c07aea1dae18 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_multi.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_multi.mdx @@ -1,4 +1,4 @@ -:::tip +:::tip Tip Remember that when you are working with a Configured Asset Data Connector you need to explicitly define each of your Data Assets. So, if you want to add additional Data Assets, go ahead and repeat the process of defining an entry in your configuration's `assets` dictionary to do so. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_single.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_single.mdx index 437e2efe0673..58f8013095d4 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_single.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/filesystem_components/_note_explicit_configured_assets_single.mdx @@ -1,4 +1,4 @@ -:::note +:::note Note Because Configured Data Assets require that you explicitly define each Data Asset they provide access to, you will have to add `assets` entries for February and March if you also want to access `yellow_tripdata_sample_2020-02.csv` and `yellow_tripdata_sample_2020-03.csv` in the same way. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_section_add_the_execution_engine_to_your_datasource_configuration.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_section_add_the_execution_engine_to_your_datasource_configuration.mdx index 15b64ae0475c..57f130372a7b 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_section_add_the_execution_engine_to_your_datasource_configuration.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_section_add_the_execution_engine_to_your_datasource_configuration.mdx @@ -12,7 +12,7 @@ Additionally, your `execution_engine` dictionary will require a values for eithe ```python title="Python" name="sql datasource define CONNECTION_STRING" ``` -:::tip +:::tip Tip Your connection string will vary depending on the type of SQL database you are connecting to. For more information on how to configure your connection string, please see [the appropriate guide for connecting to a specific Database](../#database). ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_configured.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_configured.mdx index f7f5a7b18e6f..033c96e7693d 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_configured.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_configured.mdx @@ -37,7 +37,7 @@ The key/value pairs that used for splitting a Data Asset into Batches are: -:::tip +:::tip Tip For more information on the available splitting methods, please see the [Splitting methods subsection under Additional notes](#splitting-methods) at the end of this guide. @@ -51,7 +51,7 @@ The key/value pairs that are used for sampling data from a Data Asset are: -:::tip +:::tip Tip Although this guide will not use sampling in its examples, there is a list of the available sampling methods in [the Sampling methods subsection of the Additional notes section](#sampling-methods) at the end of this guide. @@ -63,7 +63,7 @@ Finally, there is an optional key that can be defined to alter the default behav -:::tip +:::tip Tip You will find a list of the valid keys for the `introspection_directives` dictionary and their corresponding values in the [Introspection directives subsection of the Additional notes](#introspection-directives) at the end of this guide. @@ -71,7 +71,7 @@ You will find a list of the valid keys for the `introspection_directives` dictio #### A note on optional Data Connector keys -:::note +:::note Note These key/value pairs can also be defined in the configurations for individual Data Assets, which will be shown later in this guide. If these values are defined both in the Data Connector dictionary and in a Data Asset dictionary, the definition in the Data Asset will take precedence. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_inferred.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_inferred.mdx index 1e92d2d47ea8..f9cf1c68d69b 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_inferred.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_tab_data_connector_example_configurations_inferred.mdx @@ -35,7 +35,7 @@ There is an optional key that can be defined for your Data Connector to alter th -:::tip +:::tip Tip You will find a list of the valid keys for the `introspection_directives` dictionary and their corresponding values in the [Introspection directives subsection of the Additional notes](#introspection-directives) at the end of this guide. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_sampling_methods.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_sampling_methods.mdx index 5285476f9563..713bc9757409 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_sampling_methods.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_sampling_methods.mdx @@ -1,5 +1,5 @@ -:::note NOTES ON SAMPLING METHODS +:::note Notes on sampling methods - The names of `sampling_method` values can be specified with or without a preceding underscore. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_splitting_methods.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_splitting_methods.mdx index 5e1aeb96b2cb..3be14072fccd 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_splitting_methods.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/datasource_configuration/sql_components/_table_splitting_methods.mdx @@ -1,5 +1,5 @@ -:::note NOTES ON SPLITTER METHODS +:::note Notes on splitter methods - The names of `splitter_method` values can be specified with or without a preceding underscore. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/database/connect_sql_source_data.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/database/connect_sql_source_data.md index a7b6479dfdd8..53ea177614bf 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/database/connect_sql_source_data.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/database/connect_sql_source_data.md @@ -411,7 +411,7 @@ Connect GX to a BigQuery SQL database to access Data Assets. ### Create a BigQuery Data Source. -:::note +:::note Note Tables that are created by BigQuery queries are automatically set to expire after one day. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/filesystem/connect_filesystem_source_data.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/filesystem/connect_filesystem_source_data.md index 082c9b4bcfef..09feb8478a1d 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/filesystem/connect_filesystem_source_data.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/connecting_to_your_data/fluent/filesystem/connect_filesystem_source_data.md @@ -470,7 +470,7 @@ Run the following Python code to read the data in individual files directly into Modify the following code to connect to your . If you don't have data available for testing, you can use the [NYC taxi data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page). The NYC taxi data is open source, and it is updated every month. An individual record in the data corresponds to one taxi trip. -:::caution +:::caution Caution Do not include sensitive information such as credentials in the configuration when you connect to your Data Source. This information appears as plain text in the database. If you must include credentials or a full connection string, GX recommends using a [config variables file](https://docs.greatexpectations.io/docs/oss/guides/setup/configuring_data_contexts/how_to_configure_credentials/). ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/advanced/how_to_create_expectations_that_span_multiple_batches_using_evaluation_parameters.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/advanced/how_to_create_expectations_that_span_multiple_batches_using_evaluation_parameters.md index 36d19e2dfec5..229b0b8565d4 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/advanced/how_to_create_expectations_that_span_multiple_batches_using_evaluation_parameters.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/advanced/how_to_create_expectations_that_span_multiple_batches_using_evaluation_parameters.md @@ -60,7 +60,7 @@ When executed in the notebook, this Expectation will generate a Validation Resul ```python title="Python" name="docs/docusaurus/docs/oss/guides/expectations/advanced/how_to_create_expectations_that_span_multiple_batches_using_evaluation_parameters.py expected_validation_result" ``` -:::warning +:::warning Warning Your URN must be exactly correct in order to work in production. Unfortunately, successful execution at this stage does not guarantee that the URN is specified correctly and that the intended parameters will be available when executed later. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_add_support_for_the_auto_initializing_framework_to_a_custom_expectation.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_add_support_for_the_auto_initializing_framework_to_a_custom_expectation.md index 26298d23c2c2..8d8da7410137 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_add_support_for_the_auto_initializing_framework_to_a_custom_expectation.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_add_support_for_the_auto_initializing_framework_to_a_custom_expectation.md @@ -46,7 +46,7 @@ The `DomainBuilder` configuration requries a `class_name` and `module_name`. In - `TableDomainBuilder`: This `DomainBuilder` outputs table `Domains`, which is required by `Expectations` that act on tables, like (`expect_table_row_count_to_equal`, or `expect_table_columns_to_match_set`). - `MapMetricColumnDomainBuilder`: This `DomainBuilder` allows you to choose columns based on Map Metrics, which give a yes/no answer for individual values or rows. - `CategoricalColumnDomainBuilder`: This `DomainBuilder` allows you to choose columns based on their cardinality (number of unique values). - :::note + :::note Note `CategoricalColumnDomainBuilder` will take in various `cardinality_limit_mode` values for cardinality. For a full listing of valid modes, along with the associated values, please refer to [the `CardinalityLimitMode` enum in the source code on our GitHub](https://github.com/great-expectations/great_expectations/blob/develop/great_expectations/experimental/rule_based_profiler/helpers/cardinality_checker.py). ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations.md index f13c0c703770..bb84fd69a7da 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations.md @@ -157,7 +157,7 @@ Completeness checklist for ExpectBatchColumnsToBeUnique: ... ``` -:::note +:::note Note For more information on tests and example cases,
see our guide on [creating example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -168,7 +168,7 @@ This is the stage where you implement the actual business logic for your Expecta To do so, you'll need to implement a function within a class, and link it to your Expectation. By the time your Expectation is complete, your Metric will have functions for all three Execution Engines (Pandas, Spark, and SQLAlchemy) supported by Great Expectations. For now, we're only going to define one. -:::note +:::note Note Metrics answer questions about your data posed by your Expectation,
and allow your Expectation to judge whether your data meets ***your*** expectations. ::: @@ -177,7 +177,7 @@ Your Metric function will have the `@metric_value` decorator, with the appropria ```python title="Python" name="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/expect_batch_columns_to_be_unique.py pandas" ``` -:::note +:::note Note The `@metric_value` decorator allows us to explicitly structure queries and directly access our compute domain. While this can result in extra roundtrips to your database in some situations, it allows for advanced functionality and customization of your Custom Expectations. ::: @@ -271,7 +271,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -304,7 +304,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_aggregate_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_aggregate_expectations.md index 1f55972c15ea..e6d3b5a033cf 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_aggregate_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_aggregate_expectations.md @@ -161,7 +161,7 @@ Completeness checklist for ExpectColumnValuesToBeBetweenCustom: Passes all linting checks ``` -:::note +:::note Note For more information on tests and example cases,
see our guide on [creating example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -172,7 +172,7 @@ This is the stage where you implement the actual business logic for your Expecta To do so, you'll need to implement a function within a Metric class, and link it to your Expectation. By the time your Expectation is complete, your Metric will have functions for all three (Pandas, Spark, and SQLAlchemy) supported by Great Expectations. For now, we're only going to define one. -:::note +:::note Note Metrics answer questions about your data posed by your Expectation,
and allow your Expectation to judge whether your data meets ***your*** expectations. ::: @@ -272,7 +272,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -306,7 +306,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_map_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_map_expectations.md index d1f7b21ff02f..7c4c2e13e0a4 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_map_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_map_expectations.md @@ -156,7 +156,7 @@ Completeness checklist for ExpectColumnValuesToEqualThree: Passes all linting checks ``` -:::note +:::note Note For more information on tests and example cases,
see our guide on [how to create example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -168,7 +168,7 @@ This is the stage where you implement the actual business logic for your Expecta To do so, you'll need to implement a function within a Metric, and link it to your Expectation. By the time your Expectation is complete, your Metric will have functions for all three (Pandas, Spark, & SQLAlchemy) supported by Great Expectations. For now, we're only going to define one. -:::note +:::note Note Metrics answer questions about your data posed by your Expectation,
and allow your Expectation to judge whether your data meets ***your*** expectations. ::: @@ -247,7 +247,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -265,7 +265,7 @@ Completeness checklist for ExpectColumnValuesToEqualThree: ... ``` -:::note +:::note Note If you've already built a [Custom Column Aggregate Expectation](./how_to_create_custom_column_aggregate_expectations.md), you may notice that we didn't implement a `_validate` method here. While we have to explicitly create this functionality for Column Aggregate Expectations, Column Map Expectations come with that functionality built in; no extra `_validate` needed! @@ -287,7 +287,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_pair_map_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_pair_map_expectations.md index 4869012b3ee5..c68c1b3d4587 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_pair_map_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_column_pair_map_expectations.md @@ -156,7 +156,7 @@ Completeness checklist for ExpectColumnPairValuesToHaveADifferenceOfThree: ... ``` -:::note +:::note Note For more information on tests and example cases,
see our guide on [how to create example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -168,7 +168,7 @@ This is the stage where you implement the actual business logic for your Expecta To do so, you'll need to implement a function within a Metric, and link it to your Expectation. By the time your Expectation is complete, your Metric will have functions for all three (Pandas, Spark, & SQLAlchemy) supported by Great Expectations. For now, we're only going to define one. -:::note +:::note Note Metrics answer questions about your data posed by your Expectation,
and allow your Expectation to judge whether your data meets ***your*** expectations. ::: @@ -247,7 +247,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -265,7 +265,7 @@ Completeness checklist for ExpectColumnPairValuesToHaveADifferenceOfThree: ... ``` -:::note +:::note Note If you've already built a [Custom Column Aggregate Expectation](./how_to_create_custom_column_aggregate_expectations.md), you may notice that we didn't implement a `_validate` method here. While we have to explicitly create this functionality for Column Aggregate Expectations, Column Map Expectations come with that functionality built in; no extra `_validate` needed! @@ -287,7 +287,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_multicolumn_map_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_multicolumn_map_expectations.md index 27e02a1a21d6..0c39dc98c538 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_multicolumn_map_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_multicolumn_map_expectations.md @@ -155,7 +155,7 @@ Completeness checklist for ExpectMulticolumnValuesToBeMultiplesOfThree: ... ``` -:::note +:::note Note For more information on tests and example cases,
see our guide on [how to create example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -167,7 +167,7 @@ This is the stage where you implement the actual business logic for your Expecta To do so, you'll need to implement a function within a Metric, and link it to your Expectation. By the time your Expectation is complete, your Metric will have functions for all three (Pandas, Spark, & SQLAlchemy) supported by Great Expectations. For now, we're only going to define one. -:::note +:::note Note Metrics answer questions about your data posed by your Expectation,
and allow your Expectation to judge whether your data meets ***your*** expectations. ::: @@ -246,7 +246,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -264,7 +264,7 @@ Completeness checklist for ExpectMulticolumnValuesToBeMultiplesOfThree: ... ``` -:::note +:::note Note If you've already built a [Custom Column Aggregate Expectation](./how_to_create_custom_column_aggregate_expectations.md), you may notice that we didn't implement a `_validate` method here. While we have to explicitly create this functionality for Column Aggregate Expectations, Multicolumn Map Expectations come with that functionality built in; no extra `_validate` needed! @@ -286,7 +286,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_parameterized_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_parameterized_expectations.md index d7afda7ea207..a29724754936 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_parameterized_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_parameterized_expectations.md @@ -30,7 +30,7 @@ Notice that we do not need to set `default_kwarg_values` for all kwargs: it is s ````python title="Python" name="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/test_expect_column_mean_to_be_positive.py ExpectColumnMeanToBePositive_class_def" ```` -:::info +:::info Info We could also explicitly override our parent methods to modify the behavior of our new Expectation, for example by updating the configuration validation to require the values we set as defaults not be altered. ```python title="Python" name="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/test_expect_column_mean_to_be_positive.py validate_config" @@ -55,6 +55,6 @@ This is particularly important because ***we*** want to make sure that ***you*** Additionally, you will need to implement some basic examples and test cases before your contribution can be accepted. For guidance on examples and testing, see our [guide on implementing examples and test cases](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_query_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_query_expectations.md index 63acb3faa4ca..70318e1b9906 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_query_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_query_expectations.md @@ -45,7 +45,7 @@ Your Expectation will have two versions of the same name: a `CamelCaseName` and - `ExpectQueriedTableRowCountToBe` - `expect_queried_table_row_count_to_be` -:::info +:::info Info For more on Expectation naming conventions, see the [Expectations section](../../../contributing/style_guides/code_style.md#expectations) of the Code Style Guide. ::: @@ -193,7 +193,7 @@ Completeness checklist for ExpectQueriedTableRowCountToBe: ... ``` -:::note +:::note Note For more information on tests and example cases, see [how to create example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -213,7 +213,7 @@ Becomes something like this: ```python title="Python" name="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/expect_queried_table_row_count_to_be.py query" ``` -:::warning +:::warning Warning As noted above, `QueryExpectations` support parameterization of your . We *strongly* recommend making use of that parameterization as above, by querying against `{active_batch}`. @@ -225,7 +225,7 @@ Metrics for `QueryExpectations` are a thin wrapper, allowing you to execute that Great Expectations provides a small number of simple, ready-to-use `query.*` Metrics that can plug into your Custom Expectation, or serve as a basis for your own custom Metrics. -:::note +:::note Note Query Metric functions have the `@metric_value` decorator, with the appropriate `engine`. The `@metric_value` decorator allows us to explicitly structure queries and directly access our compute domain. @@ -296,7 +296,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -323,7 +323,7 @@ Your Expectation will have two versions of the same name: a `CamelCaseName` and - `ExpectQueriedColumnValueFrequencyToMeetThreshold` - `expect_queried_column_value_frequency_to_meet_threshold` -:::info +:::info Info For more on Expectation naming conventions, see the [Expectations section](../../../contributing/style_guides/code_style.md#expectations) of the Code Style Guide. ::: @@ -470,7 +470,7 @@ Completeness checklist for ExpectQueriedColumnValueFrequencyToMeetThreshold: ... ``` -:::note +:::note Note For more information on tests and example cases, see our guide on [how to create example cases for a Custom Expectation](./how_to_use_custom_expectations.md). ::: @@ -490,7 +490,7 @@ Becomes something like this: ```python title="Python" name="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/expect_queried_column_value_frequency_to_meet_threshold.py query" ``` -:::warning +:::warning Warning As noted above, `QueryExpectations` support parameterization of your , and can support parameterization of a column name. While parameterizing a column name with `{col}` is optional and supports flexibility in your Custom Expectations, @@ -503,7 +503,7 @@ Metrics for `QueryExpectations` are a thin wrapper, allowing you to execute that Great Expectations provides a small number of simple, ready-to-use `query.*` Metrics that can plug into your Custom Expectation, or serve as a basis for your own custom Metrics. -:::note +:::note Note Query Metric functions have the `@metric_value` decorator, with the appropriate `engine`. The `@metric_value` decorator allows us to explicitly structure queries and directly access our compute domain. @@ -576,7 +576,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -622,7 +622,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full scripts used in this page, see them on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_regex_based_column_map_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_regex_based_column_map_expectations.md index 1d0e1cb2dea4..6624e062a2e7 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_regex_based_column_map_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_regex_based_column_map_expectations.md @@ -159,7 +159,7 @@ Completeness checklist for ExpectColumnValuesToOnlyContainVowels: Passes all linting checks ``` -:::note +:::note Note For more information on tests and example cases,
see our guide on [how to create example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -226,7 +226,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -244,7 +244,7 @@ Completeness checklist for ExpectColumnValuesToOnlyContainVowels: ... ``` -:::note +:::note Note If you've already built a [Custom Expectation](../custom_expectations_lp.md) of a different type, you may notice that we didn't explicitly implement a `_validate` method or Metric class here. While we have to explicitly create these for other types of Custom Expectations, the `RegexBasedColumnMapExpectation` class handles Metric creation and result validation implicitly; no extra work needed! @@ -266,7 +266,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_set_based_column_map_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_set_based_column_map_expectations.md index 82bafb7dbaeb..9d90cd85d85c 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_set_based_column_map_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/creating_custom_expectations/how_to_create_custom_set_based_column_map_expectations.md @@ -160,7 +160,7 @@ Completeness checklist for ExpectColumnValuesToBeInSolfegeScaleSet: Passes all linting checks ``` -:::note +:::note Note For more information on tests and example cases,
see our guide on [how to create example cases for a Custom Expectation](../features_custom_expectations/how_to_add_example_cases_for_an_expectation.md). ::: @@ -227,7 +227,7 @@ black ruff --fix ``` -:::info +:::info Info If desired, you can automate this to happen at commit time. See our [guidance on linting](../../../contributing/style_guides/code_style.md#linting) for more on this process. ::: @@ -245,7 +245,7 @@ Completeness checklist for ExpectColumnValuesToBeInSolfegeScaleSet: ... ``` -:::note +:::note Note If you've already built a [Custom Expectation](../custom_expectations_lp.md) of a different type, you may notice that we didn't explicitly implement a `_validate` method or Metric class here. While we have to explicitly create these for other types of Custom Expectations, the `SetBasedColumnMapExpectation` class handles Metric creation and result validation implicitly; no extra work needed! @@ -267,7 +267,7 @@ would become This is particularly important because ***we*** want to make sure that ***you*** get credit for all your hard work! -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_missingness_data_assistant.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_missingness_data_assistant.md index c128482a69f0..4faf17d669ba 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_missingness_data_assistant.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_missingness_data_assistant.md @@ -5,7 +5,7 @@ title: Create an Expectation Suite with the Missingness Data Assistant import Prerequisites from '../../../../components/_prerequisites.jsx' import TechnicalTag from '../../../../reference/learn/term_tags/_tag.mdx'; -:::caution +:::caution Caution Missingness Data Assistant functionality is [Experimental](/oss/contributing/contributing_maturity.md). @@ -39,7 +39,7 @@ This is the `Validator` configuration: ```python title="Python" name="docs/docusaurus/docs/oss/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_missingness_data_assistant.py validator" ``` -:::caution +:::caution Caution The Missingness Data Assistant runs multiple queries against your `Data Source`. Data Assistant performance can vary significantly depending on the number of Batches, the number of records per Batch, and network latency. If Data Assistant runtimes are too long, use a subset of your data when defining your `Data Source` and `Validator`. ::: @@ -59,7 +59,7 @@ To run a Data Assistant, you can call the `run(...)` method for the assistant. T In this example, `context` is your Data Context instance. - :::note + :::note Note The example code uses the default `estimation` parameter (`"exact"`). If you consider your data to be valid, and want to produce Expectations with ranges that are identical to the data in the `Validator`, you don't need to alter the example code. @@ -67,7 +67,7 @@ To run a Data Assistant, you can call the `run(...)` method for the assistant. T To identify potential outliers in your `BatchRequest` data, pass `estimation="flag_outliers"` to the `run(...)` method. ::: - :::note + :::note Note The Missingness Data Assistant `run(...)` method can accept other parameters in addition to `exclude_column_names` such as `include_column_names`, `include_column_name_suffixes`, and `cardinality_limit_mode`. To view the available parameters, see [this information](https://github.com/great-expectations/great_expectations/blob/develop/great_expectations/rule_based_profiler/data_assistant/column_value_missing_data_assistant.py#L44). ::: @@ -95,7 +95,7 @@ You can check the `"success"` key of the Checkpoint's results to verify that you ![Plot Metrics](/docs/oss/images/data_assistant_plot_metrics.png) - :::note + :::note Note Hover over a data point to view more information about the Batch and its calculated Metric value. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_example_cases_for_an_expectation.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_example_cases_for_an_expectation.md index 727da7288cd7..461a7116ef6d 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_example_cases_for_an_expectation.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_example_cases_for_an_expectation.md @@ -108,7 +108,7 @@ Sometimes you need to specify the precise type of the columns for each backend. }, ``` -:::info +:::info Info While Pandas is fairly flexible in typing, Spark and many SQL dialects are much more strict. You may find you wish to use data that is incompatible with a given backend, or write different individual tests for different backends. @@ -140,7 +140,7 @@ If you are interested in contributing your Custom Expectation back to Great Expe ```python title="Python" name="docs/docusaurus/docs/snippets/expect_column_max_to_be_between_custom.py examples" ``` -:::note +:::note Note The optional `only_for` and `suppress_test_for` keys can be specified at the top-level (next to `data` and `tests`) or within specific tests (next to `title`, and so on). Allowed backends include: "bigquery", "mssql", "mysql", "pandas", "postgresql", "redshift", "snowflake", "spark", "sqlite", "trino" @@ -185,7 +185,7 @@ If you're interested in having your contribution accepted at a Beta level, these For full acceptance into the Great Expectations codebase at a Production level, we require a more robust test suite. If you believe your Custom Expectation is otherwise ready for contribution at a Production level, please submit a [Pull Request](https://github.com/great-expectations/great_expectations/pulls), and we will work with you to ensure adequate testing. -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_input_validation_for_an_expectation.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_input_validation_for_an_expectation.md index 871b796ef5dc..dd0d6c4af289 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_input_validation_for_an_expectation.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_input_validation_for_an_expectation.md @@ -109,7 +109,7 @@ The method implemented in this guide is an optional feature for Experimental Exp If you would like to contribute your Custom Expectation to the Great Expectations codebase, please submit a [Pull Request](https://github.com/great-expectations/great_expectations/pulls). -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full script used in this page, see it on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_spark_support_for_an_expectation.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_spark_support_for_an_expectation.md index 5314da7c9a7c..2e83ce521169 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_spark_support_for_an_expectation.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_spark_support_for_an_expectation.md @@ -32,7 +32,7 @@ Within the `examples` defined inside your Expectation class, the optional `only_ ```python title="Python" name="docs/docusaurus/docs/snippets/expect_column_max_to_be_between_custom.py examples" ``` -:::note +:::note Note The optional `only_for` and `suppress_test_for` keys may be specified at the top-level (next to `data` and `tests`) or within specific tests (next to `title`, etc). Allowed backends include: "bigquery", "mssql", "mysql", "pandas", "postgresql", "redshift", "snowflake", "spark", "sqlite", "trino" @@ -136,7 +136,7 @@ This allows us to build and return a query to be executed, providing the result ```python title="Python" name="docs/docusaurus/docs/snippets/expect_column_values_to_equal_three.py spark_query" ``` -:::note +:::note Note Because in Spark we are implementing the window function directly, we have to return the *unexpected* condition: `False` when `column == 3`, otherwise `True`. ::: @@ -176,7 +176,7 @@ If you're interested in having your contribution accepted at a Beta level, your For full acceptance into the Great Expectations codebase at a Production level, we require that your Custom Expectation meets our code standards, including test coverage and style. If you believe your Custom Expectation is otherwise ready for contribution at a Production level, please submit a [Pull Request](https://github.com/great-expectations/great_expectations/pulls), and we will work with you to ensure your Custom Expectation meets these standards. -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full scripts used in this page, see them on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_sqlalchemy_support_for_an_expectation.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_sqlalchemy_support_for_an_expectation.md index 92c319802cde..7f6d908628a1 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_sqlalchemy_support_for_an_expectation.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/expectations/features_custom_expectations/how_to_add_sqlalchemy_support_for_an_expectation.md @@ -34,7 +34,7 @@ Within the `examples` defined inside your Expectation class, the optional `only_ ```python title="Python" name="docs/docusaurus/docs/snippets/expect_column_max_to_be_between_custom.py examples" ``` -:::note +:::note Note The optional `only_for` and `suppress_test_for` keys can be specified at the top-level (next to `data` and `tests`) or within specific tests (next to `title`, and so on). Allowed backends include: "bigquery", "mssql", "mysql", "pandas", "postgresql", "redshift", "snowflake", "spark", "sqlite", "trino" @@ -172,7 +172,7 @@ If you're interested in having your contribution accepted at a Beta level, your For full acceptance into the Great Expectations codebase at a Production level, we require that your Custom Expectation meets our code standards, test coverage and style. If you believe your Custom Expectation is otherwise ready for contribution at a Production level, please submit a [Pull Request](https://github.com/great-expectations/great_expectations/pulls), and we will work with you to ensure your Custom Expectation meets these standards. -:::note +:::note Note For more information on our code standards and contribution, see our guide on [Levels of Maturity](/oss/contributing/contributing_maturity.md#expectation-contributions) for Expectations. To view the full scripts used in this page, see them on GitHub: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_contexts/how_to_configure_credentials.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_contexts/how_to_configure_credentials.md index 2304e55dae85..1b0176a0483c 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_contexts/how_to_configure_credentials.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_contexts/how_to_configure_credentials.md @@ -52,7 +52,7 @@ If using a YAML file, save desired credentials or config values to ``great_expec ```yaml title="YAML" name="docs/docusaurus/docs/oss/guides/setup/configuring_data_contexts/how_to_configure_credentials.py config_variables_yaml" ``` -:::note +:::note Note - If you wish to store values that include the dollar sign character ``$``, please escape them using a backslash ``\`` so substitution is not attempted. For example in the above example for Postgres credentials you could set ``password: pa\$sword`` if your password is ``pa$sword``. Say that 5 times fast, and also please choose a more secure password! - You can also have multiple substitutions for the same item, e.g. ``database_string: ${USER}:${PASSWORD}@${HOST}:${PORT}/${DATABASE}`` @@ -90,7 +90,7 @@ Configure your Great Expectations project to substitute variables from the AWS S - An AWS Secrets Manager instance. See [AWS Secrets Manager](https://docs.aws.amazon.com/secretsmanager/latest/userguide/tutorials_basic.html). -:::warning +:::warning Warning Secrets store substitution uses the configurations from your ``config_variables.yml`` file **after** all other types of substitution are applied from environment variables. @@ -111,7 +111,7 @@ pip install 'great_expectations[aws_secrets]' In order to substitute your value by a secret in AWS Secrets Manager, you need to provide an arn of the secret like this one: ``secret|arn:aws:secretsmanager:123456789012:secret:my_secret-1zAyu6`` -:::note +:::note Note The last 7 characters of the arn are automatically generated by AWS and are not mandatory to retrieve the secret, thus ``secret|arn:aws:secretsmanager:region-name-1:123456789012:secret:my_secret`` will retrieve the same secret. @@ -166,7 +166,7 @@ Configure your Great Expectations project to substitute variables from the GCP S - Configured a secret manager and secrets in the cloud with [GCP Secret Manager](https://cloud.google.com/secret-manager/docs/quickstart) -:::warning +:::warning Warning Secrets store substitution uses the configurations from your ``config_variables.yml`` project config **after** substitutions are applied from environment variables. @@ -236,7 +236,7 @@ Configure your Great Expectations project to substitute variables from the Azure - [Set up a working deployment of Great Expectations](/oss/guides/setup/setup_overview.md) - Configured a secret manager and secrets in the cloud with [Azure Key Vault](https://docs.microsoft.com/en-us/azure/key-vault/general/overview) -:::warning +:::warning Warning Secrets store substitution uses the configurations from your ``config_variables.yml`` file **after** all other types of substitution are applied from environment variables. diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/components_how_to_host_and_share_data_docs_on_amazon_s3/_configure_your_bucket_policy_to_enable_appropriate_access.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/components_how_to_host_and_share_data_docs_on_amazon_s3/_configure_your_bucket_policy_to_enable_appropriate_access.mdx index e73a7f810d08..78e489178700 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/components_how_to_host_and_share_data_docs_on_amazon_s3/_configure_your_bucket_policy_to_enable_appropriate_access.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/components_how_to_host_and_share_data_docs_on_amazon_s3/_configure_your_bucket_policy_to_enable_appropriate_access.mdx @@ -1,7 +1,7 @@ The example policy below **enforces IP-based access**. Modify the bucket name and IP addresses for your environment. After you have customized the example policy to suit your situation, name the file ``ip-policy.json`` and save it in your local directory. -:::caution +:::caution Caution Your policy should limit access to authorized users. Data Docs sites can include sensitive information and should **not** be publicly accessible. @@ -32,12 +32,12 @@ Your policy should limit access to authorized users. Data Docs sites can include } ``` -:::tip +:::tip Tip Because Data Docs include multiple generated pages, it is important to include the `arn:aws:s3:::{your_data_docs_site}/*` path in the `Resource` list along with the `arn:aws:s3:::{your_data_docs_site}` path that permits access to your Data Docs' front page. ::: -:::info REMINDER +:::info Reminder Amazon Web Service's S3 buckets are a third party utility. For more information about configuring AWS S3 bucket policies, see [Using bucket policies](https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-policies.html). \ No newline at end of file diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/host_and_share_data_docs.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/host_and_share_data_docs.md index 907f32bc86cd..4e69f94151a6 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/host_and_share_data_docs.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_data_docs/host_and_share_data_docs.md @@ -129,7 +129,7 @@ To review additional options for configuring the ``config_variables.yml`` file o 2. Optional. Replace the default ``local_site`` to maintain a single Azure Data Docs site. -:::note +:::note Note Since the container is named ``$web``, setting ``container: $web`` in ``great_expectations.yml`` would cause GX to unsuccessfully try to find the ``web`` variable in ``config_variables.yml``. Use an escape char ``\`` before the ``$`` so the ``substitute_config_variable`` can locate the ``$web`` container. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_a_validation_result_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_validation_results_on_s.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_a_validation_result_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_validation_results_on_s.mdx index a1ccc0f0b38d..b30d0947f3e8 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_a_validation_result_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_validation_results_on_s.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_a_validation_result_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_validation_results_on_s.mdx @@ -53,6 +53,6 @@ store_backend: assume_role_duration: session_duration_in_seconds ``` -:::caution +:::caution Caution If you are also storing in S3 [How to configure an Expectation store to use Amazon S3](../configure_expectation_stores.md), or DataDocs in S3 [How to host and share Data Docs](../../configuring_data_docs/host_and_share_data_docs.md), then make sure the ``prefix`` values are disjoint and one is not a substring of the other. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_an_expectation_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_expectations_on_s.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_an_expectation_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_expectations_on_s.mdx index d8f92e4961d7..f1eec6ec4240 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_an_expectation_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_expectations_on_s.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/components_how_to_configure_an_expectation_store_in_amazon_s3/_update_your_configuration_file_to_include_a_new_store_for_expectations_on_s.mdx @@ -54,6 +54,6 @@ store_backend: assume_role_duration: session_duration_in_seconds ``` -:::caution +:::caution Caution If you're storing [Validations in S3](../../configuring_metadata_stores/configure_result_stores.md) or [DataDocs in S3](../../configuring_data_docs/host_and_share_data_docs.md), make sure that the ``prefix`` values are disjoint and one is not a substring of the other. ::: \ No newline at end of file diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_expectation_stores.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_expectation_stores.md index 7e448956964e..4fdebe79f181 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_expectation_stores.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_expectation_stores.md @@ -118,7 +118,7 @@ stores: connection_string: ${AZURE_STORAGE_CONNECTION_STRING} ``` -:::note +:::note Note If the container for [hosting and sharing Data Docs on Azure Blob Storage](../../setup/configuring_data_docs/host_and_share_data_docs.md) is named ``$web``, use ``container: \$web`` to allow access to the ``$web``container. ::: @@ -211,7 +211,7 @@ In the following example, `expectations_store_name` is set to ``expectations_GCS ```yaml title="YAML" name="docs/docusaurus/docs/oss/guides/setup/configuring_metadata_stores/how_to_configure_an_expectation_store_in_gcs.py configured_expectations_store_yaml" ``` -:::warning +:::warning Warning If you are also storing [Validations in GCS](./configure_result_stores.md) or [DataDocs in GCS](../configuring_data_docs/host_and_share_data_docs.md), make sure that the ``prefix`` values are disjoint and one is not a substring of the other. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_result_stores.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_result_stores.md index 975f80abda8e..d23312f314d6 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_result_stores.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/configuring_metadata_stores/configure_result_stores.md @@ -19,7 +19,7 @@ import Tabs from '@theme/Tabs'; A Validation Results Store is a connector that is used to store and retrieve information about objects generated when data is Validated against an Expectation. By default, Validation Results are stored in JSON format in the ``uncommitted/validations/`` subdirectory of your ``gx/`` folder. Use the information provided here to configure a store for your Validation Results. -:::caution +:::caution Caution Validation Results can include sensitive or regulated data that should not be committed to a source control system. @@ -119,7 +119,7 @@ stores: connection_string: ${AZURE_STORAGE_CONNECTION_STRING} ``` -:::note +:::note Note If the container for [hosting and sharing Data Docs on Azure Blob Storage](../../setup/configuring_data_docs/host_and_share_data_docs.md) is named ``$web``, use ``container: \$web`` to allow access to the ``$web``container. ::: @@ -192,7 +192,7 @@ In the following example, `validations_store_name` is set to ``validations_GCS_s ```yaml title="YAML" name="docs/docusaurus/docs/oss/guides/setup/configuring_metadata_stores/how_to_configure_a_validation_result_store_in_gcs.py configured_validations_store_yaml" ``` -:::warning +:::warning Warning If you are also storing [Expectations in GCS](../configuring_metadata_stores/configure_expectation_stores.md) or [DataDocs in GCS](../configuring_data_docs/host_and_share_data_docs.md), make sure that the ``prefix`` values are disjoint and one is not a substring of the other. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_create_an_venv_with_pip.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_create_an_venv_with_pip.mdx index 6ea57352d72f..471e2508e445 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_create_an_venv_with_pip.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_create_an_venv_with_pip.mdx @@ -20,6 +20,6 @@ Run the following code to activate the virtual environment: source my_venv/bin/activate ``` -:::tip +:::tip Tip To change the name of your virtual environment, replace `my_venv` in the example code. ::: \ No newline at end of file diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_preface.mdx b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_preface.mdx index 450d7e1486ff..11391657e3e4 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_preface.mdx +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/setup/installation/components_local/_preface.mdx @@ -14,7 +14,7 @@ import PrereqPython from '../../../../../components/prerequisites/_python_versio - -:::note +:::note Note - Great Expectations is developed and tested on macOS and Linux Ubuntu. The installation on Windows may differ from the following procedure. If you have questions or encounter issues, post your comments on the [Great Expectations Slack channel](https://greatexpectationstalk.slack.com/join/shared_invite/zt-sugx45gn-SFe_ucDBbfi0FZC0mRNm_A#/shared-invite/email). - If you're using a Mac M1, see [Installing Great Expectations on a Mac M1](https://greatexpectations.io/blog/m-one-mac-instructions). diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/advanced/how_to_deploy_a_scheduled_checkpoint_with_cron.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/advanced/how_to_deploy_a_scheduled_checkpoint_with_cron.md index 0cac471fcd2c..492618f53271 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/advanced/how_to_deploy_a_scheduled_checkpoint_with_cron.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/advanced/how_to_deploy_a_scheduled_checkpoint_with_cron.md @@ -43,7 +43,7 @@ To run the Checkpoint ``my_checkpoint`` every morning at 0300, add the following 0 3 * * * /full/path/to/your/environment/bin/great_expectations checkpoint run ratings --directory /full/path/to/my_project/gx/ ``` -:::note +:::note Note - The five fields at the start of your cron schedule correspond to the minute, hour, day of the month, month, and day of the week. - It is critical that you use full paths to both the ``great_expectations`` executable in your project's environment and the full path to the project's ``gx/`` directory. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/checkpoints/how_to_validate_multiple_batches_within_single_checkpoint.md b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/checkpoints/how_to_validate_multiple_batches_within_single_checkpoint.md index 23b6dbe34bc6..2929b1e1ec44 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/checkpoints/how_to_validate_multiple_batches_within_single_checkpoint.md +++ b/docs/docusaurus/versioned_docs/version-0.18/oss/guides/validation/checkpoints/how_to_validate_multiple_batches_within_single_checkpoint.md @@ -25,7 +25,7 @@ The following Python code creates a Batch Request that includes every available ```python title="Python" name="docs/docusaurus/docs/oss/guides/validation/checkpoints/how_to_validate_multiple_batches_within_single_checkpoint.py build_a_batch_request_with_multiple_batches" ``` -:::tip +:::tip Tip A Batch Request can only retrieve multiple Batches from a Data Asset that has been configured to include more than the default single Batch. When working with a Filesystem Data Source and organizing Batches, the `batching_regex` argument determines the inclusion of multiple Batches into a single Data Asset, with each file that matches the `batching_regex` resulting in a single Batch. diff --git a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/conceptual_guides/metricproviders.md b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/conceptual_guides/metricproviders.md index 3572ffac1f52..fea4f044c7ba 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/conceptual_guides/metricproviders.md +++ b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/conceptual_guides/metricproviders.md @@ -64,7 +64,7 @@ Sometimes, the MetricProvider class is created directly from the Expectation cla The API for MetricProvider classes is unusual. MetricProvider classes are never intended to be instantiated, and they don’t have inputs or outputs in the normal sense of method arguments and return values. Instead, the inputs for MetricProvider classes are methods for calculating the Metric on different backend applications. Each method must be decorated with an appropriate decorator. On `new`, the MetricProvider class registers the decorated methods as part of the Metrics registry so that they can be invoked to calculate Metrics. The registered methods are the only output from MetricProviders. -:::note +:::note Note Decorators invoked on `new` can make maintainability challenging. GX intends to address this shortcoming in future releases. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/conditional_expectations.md b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/conditional_expectations.md index 50d7fecb5850..0178c98c800d 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/conditional_expectations.md +++ b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/conditional_expectations.md @@ -2,7 +2,7 @@ title: Conditional Expectations --- -:::note +:::note Note Conditional Expectations are **experimental**, and they are available for Pandas, Spark, and SQLAlchemy backends. ::: @@ -10,7 +10,7 @@ You can create an Expectation for an entire dataset, or for a subset of the data Great Expectations lets you express Conditional Expectations with a `row_condition` argument that can be passed to all Dataset Expectations. The `row_condition` argument should be a boolean expression string. In addition, you must provide the `condition_parser` argument which defines the syntax of conditions. When implementing conditional Expectations with Pandas, this argument must be set to `"pandas"`. When implementing conditional Expectations with Spark or SQLAlchemy, this argument must be set to `"great_expectations__experimental__"`. -:::note +:::note Note In Pandas the `row_condition` value is passed to `pandas.DataFrame.query()` before Expectation Validation. See [pandas.DataFrame.query](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html). In Spark and SQLAlchemy, the `row_condition` value is parsed as a data filter or a query before Expectation Validation. @@ -48,7 +48,7 @@ This returns: } ``` -:::note +:::note Note To get a Validator object, see [How to create Expectations interactively in Python](/oss/guides/expectations/how_to_create_and_edit_expectations_with_instant_feedback_from_a_sample_batch_of_data.md). ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/result_format.md b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/result_format.md index 735d949cabff..30cf41bc26b9 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/result_format.md +++ b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/expectations/result_format.md @@ -23,7 +23,7 @@ When using a dictionary, `result_format` can include the following keys: - `include_unexpected_rows`: When running validations, this returns the entire row for each unexpected value in dictionary form. When using `include_unexpected_rows`, you must explicitly specify `result_format` and `result_format` must be more verbose than `BOOLEAN_ONLY`. -:::note +:::note Note `include_unexpected_rows` returns EVERY row for each unexpected value. In large tables, this could result in an unmanageable amount of data. ::: @@ -49,7 +49,7 @@ Your Checkpoint configuration is defined below the `runtime_configuration` key. The results are stored in the Validation Result after running the Checkpoint. -:::note +:::note Note The `unexpected_index_list`, as represented by primary key (PK) columns, is rendered in Data Docs when `COMPLETE` is selected. diff --git a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/migration_guide.md b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/migration_guide.md new file mode 100644 index 000000000000..7e63bf6bd457 --- /dev/null +++ b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/migration_guide.md @@ -0,0 +1,2145 @@ +--- +id: migration_guide +title: "GX V0 to V1 Migration Guide" +--- +import TabItem from '@theme/TabItem'; +import Tabs from '@theme/Tabs'; + +## Overview +This guide for migrating your Great Expectations V0 configurations to V1 covers all the Great Expectations domain objects found in V0 and shows how they map to their equivalent V1 domain objects. + +### GX Cloud Context Users +If you are a GX cloud user, you are able to immediately try out GX V1! Cloud will do the translation of your configurations for you. Your `context = gx.get_context()` call will return updated configurations. You can inspect your configuration objects by calling `all()` on the appropriate domain namespace. For example, `context.data_sources.all()` will list all of your datasources that have been automatically translated to V1. If there are incompatible configurations, they will be filtered out of this list. You can retrieve them by using a GX `>=0.18.19` Python client. If you need to translate any of these missing configurations to `1.0` you can look at the various **API** sections below the domain object you are interested in to see a comparison of the V0 and V1 API calls and determine what you need to do to translate the configuration. + +### GX File Context +Below in each section you will see a side-by-side comparison of the configuration files for each domain object along with a description of how they have changed and what features have been removed and added. You can use this as a basis for translating your configuration objects from V0 to V1. + +## Domain objects + +### Expectation Suites and Expectations +In GX `0.X` and in GX `1.0`, every Expectation Suite has its own configuration file and the path to them in the Great Expectations project directory is: + +`gx/expectations/.json` + +#### Configuration file differences + +Here is a side-by-side comparison of a suite called `suite_for_yellow_tripdata`: + + + + ```json title="JSON" + { + "expectation_suite_name": "suite_for_yellow_tripdata", + "data_asset_type": "CSVAsset", + "evaluation_parameters": { + "parameter_name": "value" + }, + "expectations": [ + { + "expectation_type": "expect_column_values_to_be_between", + "kwargs": { + "column": "passenger_count", + "max_value": 4, + "min_value": 0 + }, + "meta": {} + }, + { + "expectation_type": "expect_column_values_to_be_in_set", + "kwargs": { + "column": "VendorID", + "value_set": [ + 1, + 2, + 3, + 4 + ] + }, + "meta": {} + } + ], + "ge_cloud_id": null, + "meta": { + "foo": "bar", + "great_expectations_version": "0.18.19" + } + } + ``` + + + ```json title="JSON" + { + "name": "suite_for_yellow_tripdata", + "suite_parameters": { + "parameter_name": "value" + }, + "expectations": [ + { + "type": "expect_column_values_to_be_between", + "kwargs": { + "column": "passenger_count", + "max_value": 4.0, + "min_value": 0.0 + }, + "meta": {}, + "id": "24dc475c-38a3-4234-ab47-b13d0f233242" + }, + { + "type": "expect_column_values_to_be_in_set", + "kwargs": { + "column": "VendorID", + "value_set": [ + 1, + 2, + 3, + 4 + ] + }, + "meta": {}, + "id": "d8b3b4e9-296f-4dd5-bd29-aac6a00cba1c" + } + ], + "id": "77373d6f-3561-4d62-b150-96c36dccbe55", + "meta": { + "foo": "bar", + "great_expectations_version": "1.0.0" + }, + "notes": "This is a new field." + } + ``` + + + +**expectation_suite_name**: This is now called name and has the name of the suite. + +**data_asset_type**: This has been removed. Expectation suites can be associated with any asset type. + +**evaluation_parameters**: This has been renamed to suite_parameters. The contents are unchanged. + +**expectations**: This is a list of expectations. The expectation keys have changed as follows + +> **expectation_type**: This has been changed to type. + +> **kwargs**: This is unchanged + +> **meta**: This dictionary that a user can populate with whatever metadata they would like. The notes key that Great Expectations Cloud used has been pulled out into a top level key. + +> **id**: This new field introduced in 1.0 can be any arbitrary, unique UUID. When migrating, generate and add a UUID. + +> **notes (new field)**: This new top-level field replaces meta.notes. This is consumed by Great Expectations Cloud to display user notes on the Cloud UI. + +**ge_cloud_id**: This is now id. This is now a required field. Migrators can generate a unique, arbitrary UUID and add it. + +**meta**: The format is unchanged. + +**notes**: This is new in 1.0 and is an arbitrary string. + +#### Expectation Suite API Calls + +The suites above were created with the following API calls. This example demonstrates how to create an equivalent suite to your V0 suite in V1. + + + + ```python title="Python" + suite = context.add_expectation_suite( + expectation_suite_name="suite_for_yellow_tripdata", + meta={"foo": "bar", "notes": "Here are some suite notes."}, + evaluation_parameters={"parameter_name": "value"}, + data_asset_type="CSVAsset", # V1 no longer supports this argument, expectations are type independent + ) + validator = context.get_validator(batch_request=asset.build_batch_request(), expectation_suite_name="suite_for_yellow_tripdata") + validator.expect_column_values_to_be_between(column="passenger_count", min_value=0, max_value=4) + validator.expect_column_values_to_be_in_set(column="VendorID", value_set=[1,2,3,4]) + validator.save_expectation_suite(discard_failed_expectations=False) + ``` + + + ```python title="Python" + suite = context.suites.add( + gx.ExpectationSuite( + name="suite_for_yellow_tripdata", + meta={"foo": "bar"}, + suite_parameters={"parameter_name": "value"}, + notes="Here are some suite notes.", + id="77373d6f-3561-4d62-b150-96c36dccbe55", + ) + ) + suite.add_expectation(gxe.ExpectColumnValuesToBeBetween(column="passenger_count", min_value=0, max_value=4)) + suite.add_expectation(gxe.ExpectColumnValuesToBeInSet(column="VendorID", value_set=[1,2,3,4])) + ``` + + + +### Data Sources and Data Assets +Data Source configurations are stored in the YAML file `gx/great_expectations.yml`, in the top-level block whose key is `fluent_datasources.` + +We’ll walk through examples of different Data Source configurations in V0 and V1 so you can see how to translate between the two. + +#### Pandas API + +##### Pandas Filesystem Data +Here is a side-by-side comparison of a Data Source called `pandas_fs_ds` with 4 assets called: `yearly_taxi_data`, `monthly_taxi_data`, `daily_taxi_data`, and `arbitrary_taxi_data`. + + + + ```yaml title="YAML" + fluent_datasources: + pandas_fs_ds: + type: pandas_filesystem + assets: + yearly_taxi_data: + type: csv + batching_regex: sampled_yellow_tripdata_(?P\d{4})\.csv + monthly_taxi_data: + type: csv + batching_regex: sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv + daily_taxi_data: + type: csv + batching_regex: sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv + arbitrary_taxi_data: + type: csv + batching_regex: sampled_yellow_tripdata_(?P\w+)\.csv + base_directory: data + ``` + + + ```yaml title="YAML" + fluent_datasources: + pandas_fs_ds: + type: pandas_filesystem + id: 2ea309bf-bb5f-421b-ab6b-ea1cc9e70c8e + assets: + taxi_data: + type: csv + id: 34b98eca-790f-4504-ab4b-b65bc128b5ee + batch_definitions: + yearly_batches: + id: a04f8071-33d9-4834-b667-e3d8c2ca70aa + partitioner: + regex: sampled_yellow_tripdata_(?P\d{4})\.csv + sort_ascending: true + monthly_batches: + id: f07aa73d-bf56-438e-9dc2-0d05fb7d32a1 + partitioner: + regex: sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv + sort_ascending: true + param_names: + - year + - month + daily_batches: + id: 37b4b2eb-4b37-46c6-b51c-f2d21ba0e6d6 + partitioner: + regex: sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv + sort_ascending: true + param_names: + - year + - month + - day + base_directory: data + ``` + + + +In `0.X`, a Data Source represents where the data lives and the execution engine (e.g. reading data from the local filesystem using pandas) and a Data Asset represents the data file format and how the data should be partitioned (e.g. a parameterized regex which matches file names). In `1.0` the Data Source has the same meaning. However, the Data Asset now only represents the data file format and there is a new concept, *Batch Definition*, which represents how the data is partitioned. This manifests as an extra layer in the YAML asset block. + +**pandas_fs_ds (example)**: The keys below `fluent_datasources` are the names of the Data Source. This is unchanged. + +**type**: The type of Data Source. This is unchanged. + +**assets**: A list of the Data Assets. Each key is an asset name in both V0 and V1. The asset value is different. In V0 the nested keys are: + +> **type**: This is unchanged + +> **batching_regex**: This has been replaced with batch_definitions. The format for batch_definitions follows. You will notice that the regex now lives in the partitioners regex field. The batch_definition configuration format is: + +>> **yearly_batches (example Batch Definition name)**: These keys are the names of the batch definitions. + +>> **id**: This is an arbitrary UUID and can be chosen to be any unique UUID. + +>> **partitioner**: This is a key with information about how the batch is defined + +>>> **regex**: This is the regex previously living on the asset keyed by batching_regex + +>>> **sort_ascending**: A boolean. `true` if the batch order is increasing in time, `false` if the ordering is decreasing in time. Previously in V0 one could specify an order_by field on the asset which could sort the different date components in different orders (eg year could be sorted increasing in time while month could be sorted decreasing in time). This is no longer supported. + +>>> **param_names**: This is a list of the parameter names which will be identical to the named matches from the `regex`. That is, the items will be `year`, `month`, or `day`. If this list would only contain year it can be excluded from the configuration file. + +> **id**: This is a new field and is an arbitrary UUID. If migrating you can pick any unique UUID. + +**base_directory**: The path to the data files. This is unchanged. + +**id**: This is a new field and is an arbitrary UUID. If migrating you can pick any unique UUID. + +:::note Note +We no longer support arbitrary batching regexes. Batches must be defined by one of our temporal batch definitions which are yearly, monthly, or daily. +::: + +##### Pandas Filesystem Creation via API + + + + ```python title="Python" + # Pandas Filesystem Data Source + datasource = context.sources.add_pandas_filesystem(name="pd_fs_ds", base_directory="data") + + # Data Assets + yearly = datasource.add_csv_asset(name="yearly_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})\.csv") + monthly = datasource.add_csv_asset(name="monthly_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv") + daily = datasource.add_csv_asset(name="daily_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv") + arbitrary = datasource.add_csv_asset(name="arbitrary_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\w+)\.csv") + ``` + + + ```python title="Python" + # Pandas Filesystem Data Source + data_source = context.data_sources.add_pandas_filesystem(name="pd_fs_ds", base_directory="data") + + # CSV Data Asset + file_csv_asset = data_source.add_csv_asset(name="taxi_data") + + # Batch Definitions + yearly = file_csv_asset.add_batch_definition_yearly(name="yearly_batches", regex=r"sampled_yellow_tripdata_(?P\d{4})\.csv") + monthly = file_csv_asset.add_batch_definition_monthly(name="monthly_batches", regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv") + daily = file_csv_asset.add_batch_definition_daily(name="daily_batches", regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv") + ``` + + + +##### Pandas Dataframe + + + ```yaml title="YAML" + fluent_datasources: + pd_df_ds: + type: pandas + assets: + taxi_dataframe_asset: + type: dataframe + batch_metadata: {} + ``` + + + ```yaml title="YAML" + fluent_datasources: + pd_df_ds: + type: pandas + assets: + taxi_dataframe_asset: + type: dataframe + batch_metadata: {} + batch_definitions: + taxi_dataframe_batch_def: + id: bf0de640-7791-4654-86b0-5f737319e993 + partitioner: + id: 352b392d-f0a5-4c7c-911f-fd68903599e0 + id: 4e0a4b9c-efc2-40e8-8114-6a45ac697554 + ``` + + + +In both `V0` and `V1` a pandas Data Source reads in data from a pandas dataframe. In `V1` there is a concept of a *Batch Definition* that is used to partition data into batches. For a pandas dataframe the only *Batch Definition* currently available is the whole dataframe *Batch Definition*. + +**pd_df_ds (example)**: The keys below fluent_datasources are the names of the Data Sources. This is unchanged. + +**assets**: A list of the Data Assets. Each key is an asset name in both V0 and V1. The asset value is different. + +> **type**: The type of Data Source. This is unchanged. + +> **batch_metadata**: Arbitrary key/values pairs used to annotate the Data Asset. In V1 this is unchanged, it still describes the asset. + +> **batch_definitions**: This is new in V1. There is only 1 option here. The key is the name of the Batch Definition. It has 2 fields: + +>> **id**: An arbitrary UUID. Migrators can assign any unique UUID. + +>> **partitioner**: This is left empty as we only allow the whole dataframe + +> **id**: In V1, the asset has a unique ID. Migrators can assign any unique UUID. + +**id**: In V1, the Data Source has a unique ID. Migrators can assign any unique UUID. + +##### Pandas Dataframe Creation via API + + + ```python title="Python" + dataframe_ds = context.sources.add_pandas(name="pd_df_ds") + dataframe_asset = dataframe_ds.add_dataframe_asset(name="taxi_dataframe_asset") + ``` + + + ```python title="Python" + dataframe_ds = context.data_sources.add_pandas(name="pd_df_ds") + dataframe_asset = dataframe_ds.add_dataframe_asset(name="taxi_dataframe_asset") + dataframe_bd = dataframe_asset.add_batch_definition_whole_dataframe(name="taxi_dataframe_batch_def") + ``` + + + +#### Snowflake API +Here is a side-by-side comparision of a both a `V0` Snowflake table and query Data Asset to their equivalents in `V1`. We walk through all the currently supported V1 Batch Definitions: yearly, monthly, daily, and whole table. + + + + ```yaml title="YAML" + fluent_datasources: + snowflake_ds: + type: snowflake + assets: + yearly_taxi_data: + type: table + order_by: + - key: year + reverse: false + batch_metadata: {} + splitter: + column_name: pickup_datetime + method_name: split_on_year + table_name: TAXI_DATA_ALL_SAMPLES + schema_name: + monthly_taxi_data: + type: table + order_by: + - key: year + reverse: true + - key: month + reverse: true + batch_metadata: {} + splitter: + column_name: pickup_datetime + method_name: split_on_year_and_month + table_name: TAXI_DATA_ALL_SAMPLES + schema_name: + daily_taxi_data: + type: table + order_by: + - key: year + reverse: false + - key: month + reverse: false + - key: day + reverse: false + batch_metadata: {} + splitter: + column_name: pickup_datetime + method_name: split_on_year_and_month_and_day + table_name: TAXI_DATA_ALL_SAMPLES + schema_name: + all_taxi_data: + type: table + order_by: [] + batch_metadata: {} + table_name: TAXI_DATA_ALL_SAMPLES + schema_name: + query_yearly: + type: query + order_by: + - key: year + reverse: false + batch_metadata: {} + splitter: + column_name: pickup_datetime + method_name: split_on_year + query: select * from TAXI_DATA_ALL_SAMPLES + query_monthly: + type: query + order_by: + - key: year + reverse: true + - key: month + reverse: true + batch_metadata: {} + splitter: + column_name: pickup_datetime + method_name: split_on_year_and_month + query: select * from TAXI_DATA_ALL_SAMPLES + query_daily: + type: query + order_by: + - key: year + reverse: false + - key: month + reverse: false + - key: day + reverse: false + batch_metadata: {} + splitter: + column_name: pickup_datetime + method_name: split_on_year_and_month_and_day + query: select * from TAXI_DATA_ALL_SAMPLES + whole_query: + type: query + order_by: [] + batch_metadata: {} + query: select * from TAXI_DATA_ALL_SAMPLES + connection_string: + snowflake://:@//?warehouse=&role= + ``` + + + ```yaml title="YAML" + fluent_datasources: + snowflake_ds: + type: snowflake + id: f4ac98d6-dccf-4373-b5f3-ac90ed60b139 + assets: + taxi_data: + type: table + id: ad9e8ece-0c14-45bc-bcdd-ef2e40922df4 + batch_metadata: {} + batch_definitions: + table_yearly: + id: 75a41bce-da84-425f-a3d3-92acd5c5f7f8 + partitioner: + column_name: PICKUP_DATETIME + sort_ascending: true + method_name: partition_on_year + table_monthly: + id: 67ec396a-e7ca-499d-8cb7-84a803d976af + partitioner: + column_name: PICKUP_DATETIME + sort_ascending: false + method_name: partition_on_year_and_month + table_daily: + id: 7d410bd4-ca6d-464d-b82d-3b070e6fd229 + partitioner: + column_name: PICKUP_DATETIME + sort_ascending: true + method_name: partition_on_year_and_month_and_day + whole_table: + id: bd88cdd9-a5f4-4bdf-bbf3-e43827996dd0 + partitioner: + table_name: TAXI_DATA_ALL_SAMPLES + schema_name: public + query_data: + type: query + id: 44b0eccc-54f2-46e1-a6f9-3558662d4f8a + batch_metadata: {} + batch_definitions: + query_yearly: + id: 7f3909d4-912f-44aa-8140-7ab4e7b13f4e + partitioner: + column_name: PICKUP_DATETIME + sort_ascending: true + method_name: partition_on_year + query_monthly: + id: d0c347fc-03e5-4880-a8e8-1eff04432c2f + partitioner: + column_name: PICKUP_DATETIME + sort_ascending: false + method_name: partition_on_year_and_month + query_daily: + id: 1f6701bd-b470-4ddb-a001-4cc6167ab4d0 + partitioner: + column_name: PICKUP_DATETIME + sort_ascending: true + method_name: partition_on_year_and_month_and_day + whole_query: + id: 4817cf80-1727-4aad-b31a-5552efeea441 + partitioner: + query: SELECT * FROM TAXI_DATA_ALL_SAMPLES + connection_string: + snowflake://:@//?warehouse=&role= + ``` + + + +In `0.X`, a Data Source represents where the data is persisted and the execution engine (e.g. the Snowflake database) and a Data Asset represents the data and how the data should be partitioned (e.g. by a datetime column). In `1.0` the Data Source has the same meaning. However, the Data Asset now represents only the data and there is a new concept, the batch definition, which represents how the data is partitioned. This manifests as an extra layer in the YAML asset block. + +A few configurations are **NO LONGER SUPPORTED**: + +- In V1, we currently only allow batching by time (e.g. year, month, day). In V0 one could split the data into batches in lots of ways. For example a table could be split by a value in a column or a file regex could contain arbitrary match expressions. We consider non-time based splitting to represent different conceptual assets that may happen to reside in the same table. For those, one should compute views or use a query asset. + +- In V0, one could set the sorting order independently for the year, month, and day dimensions. That is, one could sort ascending by year, but then descending by month and day. In V1 we only allow sorting of all the batches in ascending or descending order. For example, one can no longer sort year and month in the opposite order. + +**snowflake_ds (example)**: The keys under fluent_datasources are the names of the datasources. + +**type**: The type of Data Source, this is unchanged. + +**assets**: The keys to Data Assets are the names of the assets. In this example yearly_taxi_data is the name of a V0 asset. In V1, the asset is called taxi_data. + +> **type**: The type of asset (table or query). This is unchanged. + +> **order_by**: This is no longer a key. The information has moved inside the V1 batch definitions under the partitioner. + +> **splitter**: This is no longer a key has been replaced by batch_definitions. The format for batch definitions is: + +>> **table_yearly (an example)**: The name of the Batch Definition is the key to each configuration. + +>>> **id**: Migrators can assign any unique UUID. + +>>> **partitioner**: Contains the batching and sorting information. This has no value for a “whole table” partitioner. + +>>>> **column_name**: The column on which to split the data. This must be a DATETIME field. + +>>>> **sort_ascending**: A boolean. true sorts the most recent batch first, while false sorts with the most recent batch last. + +>>>> **method_name**: A string indicating the batching resolution. The options are: partition_on_year, partition_on_year_and_month, partition_on_year_and_month_and_day. + +> **batch_metadata**: This is unchanged. + +> **table_name (TableAsset only)**: The name of the table that holds data for this asset. This is unchanged. + +> **schema_name (TableAsset only)**: The name of the schema to which the table belongs. In V1 this is now a required field. + +> **query (QueryAsset only)**: The query to be run to generate the data for this asset. This is unchanged. + +> **id (New in V1)**: This is a new field in V1 and is a random UUID. Migrators can assign any unique UUID. + +**id (New in V1)**: An id for the Data Asset. Migrators can assign any unique UUID. + +##### Snowflake Creation via API + + + ```python title="Python" + # Create datasource + connection_string = "snowflake://:@//?warehouse=&role=" + snowflake_ds = context.sources.add_snowflake(name="snowflake_ds", connection_string=connection_string) + + # Create table assets + yearly_snowflake_asset = snowflake_ds.add_table_asset(name="yearly_taxi_data", table_name="TAXI_DATA_ALL_SAMPLES", order_by=["+year"]) + yearly_snowflake_asset.add_splitter_year(column_name="pickup_datetime") + monthly_snowflake_asset = snowflake_ds.add_table_asset(name="monthly_taxi_data", table_name="TAXI_DATA_ALL_SAMPLES", order_by=["-year", "-month"]) + monthly_snowflake_asset.add_splitter_year_and_month(column_name="pickup_datetime") + daily_snowflake_asset = snowflake_ds.add_table_asset(name="daily_taxi_data", table_name="TAXI_DATA_ALL_SAMPLES", order_by=["+year", "+month", "+day"]) + daily_snowflake_asset.add_splitter_year_and_month_and_day(column_name="pickup_datetime") + whole_table_snowflake_asset = snowflake_ds.add_table_asset(name="all_taxi_data", table_name="TAXI_DATA_ALL_SAMPLES") + + # Create query assets + yearly_query_asset = snowflake_ds.add_query_asset(name="query_yearly", query="select * from TAXI_DATA_ALL_SAMPLES", order_by=["+year"]) + yearly_query_asset.add_splitter_year(column_name="pickup_datetime") + monthly_query_asset = snowflake_ds.add_query_asset(name="query_monthly", query="select * from TAXI_DATA_ALL_SAMPLES", order_by=["-year", "-month"]) + monthly_query_asset.add_splitter_year_and_month(column_name="pickup_datetime") + daily_query_asset = snowflake_ds.add_query_asset(name="query_daily", query="select * from TAXI_DATA_ALL_SAMPLES", order_by=["+year", "+month", "+day"]) + daily_query_asset.add_splitter_year_and_month_and_day(column_name="pickup_datetime") + query_whole_table_asset = snowflake_ds.add_query_asset(name="whole_query", query="select * from TAXI_DATA_ALL_SAMPLES") + ``` + + + ```python title="Python" + # Create datasource + connection_string = "snowflake://:@//?warehouse=&role=" + snowflake_ds = context.data_sources.add_snowflake(name="snowflake_ds", connection_string=connection_string) + + # Create table asset and batch definitions + table_asset = snowflake_ds.add_table_asset(name="taxi_data", table_name="TAXI_DATA_ALL_SAMPLES") + table_yearly = table_asset.add_batch_definition_yearly(name="table_yearly", column="PICKUP_DATETIME", sort_ascending=True) + table_monthly = table_asset.add_batch_definition_monthly(name="table_monthly", column="PICKUP_DATETIME", sort_ascending=False) + table_daily = table_asset.add_batch_definition_daily(name="table_daily", column="PICKUP_DATETIME", sort_ascending=True) + whole_table = table_asset.add_batch_definition_whole_table(name="whole_table") + + # Create query asset and batch definitions + query_asset = snowflake_ds.add_query_asset(name="query_data", query="SELECT * FROM TAXI_DATA_ALL_SAMPLES") + query_yearly = query_asset.add_batch_definition_yearly(name="query_yearly", column="PICKUP_DATETIME", sort_ascending=True) + query_monthly = query_asset.add_batch_definition_monthly(name="query_monthly", column="PICKUP_DATETIME", sort_ascending=False) + query_daily = query_asset.add_batch_definition_daily(name="query_daily", column="PICKUP_DATETIME", sort_ascending=True) + query_whole_table = query_asset.add_batch_definition_whole_table(name="whole_query") + ``` + + + +#### Postgres API +The postgres Data Source/Asset migration from `V0` to `V1` is almost identical to the Snowflake one in terms of fields. All the fields are identical and how to migrate them from `V0` to `V1` is identical so please refer to the Snowflake section for a description. The differences in values are: +- The **type** field value is `postgres` instead of `snowflake` +- We are NOT requiring schemas in V1 for postgres table assets. + +Here is an example great_expectations.yml fluent_datasources block and creation of this datasource and asset via the API. + +The provided connection string is a sample dataset GX maintains. + +Here is an example `great_expectations.yml` `fluent_datasources` block and creation of this datasource and asset via the API. + +The provided connection string is a sample dataset GX maintains. + + + + ```yaml title="YAML" + fluent_datasources: + postgres_ds: + type: postgres + assets: + yearly_taxi_data: + type: table + order_by: + - key: year + reverse: false + batch_metadata: {} + splitter: + column_name: pickup + method_name: split_on_year + table_name: nyc_taxi_data + schema_name: + monthly_taxi_data: + type: table + order_by: + - key: year + reverse: true + - key: month + reverse: true + batch_metadata: {} + splitter: + column_name: pickup + method_name: split_on_year_and_month + table_name: nyc_taxi_data + schema_name: + daily_taxi_data: + type: table + order_by: + - key: year + reverse: false + - key: month + reverse: false + - key: day + reverse: false + batch_metadata: {} + splitter: + column_name: pickup + method_name: split_on_year_and_month_and_day + table_name: nyc_taxi_data + schema_name: + all_taxi_data: + type: table + order_by: [] + batch_metadata: {} + table_name: nyc_taxi_data + schema_name: + query_yearly: + type: query + order_by: + - key: year + reverse: false + batch_metadata: {} + splitter: + column_name: pickup + method_name: split_on_year + query: select * from nyc_taxi_data + query_monthly: + type: query + order_by: + - key: year + reverse: true + - key: month + reverse: true + batch_metadata: {} + splitter: + column_name: pickup + method_name: split_on_year_and_month + query: select * from nyc_taxi_data + query_daily: + type: query + order_by: + - key: year + reverse: false + - key: month + reverse: false + - key: day + reverse: false + batch_metadata: {} + splitter: + column_name: pickup + method_name: split_on_year_and_month_and_day + query: select * from nyc_taxi_data + whole_query: + type: query + order_by: [] + batch_metadata: {} + query: select * from nyc_taxi_data + connection_string: postgresql+psycopg2://try_gx:try_gx@postgres.workshops.greatexpectations.io/gx_example_db + ``` + + + ```yaml title="YAML" + fluent_datasources: + postgres_ds: + type: postgres + id: cc4984f4-dbad-4488-8b0a-47ec47fc294c + assets: + taxi_data: + type: table + id: cb140e3c-d33f-4920-9bfc-2a23de990283 + batch_metadata: {} + batch_definitions: + table_yearly: + id: 23e9d1c7-d22e-44f3-b1fa-eb0db1df4ce8 + partitioner: + column_name: pickup + sort_ascending: true + method_name: partition_on_year + table_monthly: + id: be939a11-a257-4f9a-83c8-8efd1b25d9c9 + partitioner: + column_name: pickup + sort_ascending: false + method_name: partition_on_year_and_month + table_daily: + id: 80fb4af2-2ab2-4a09-a05d-849835677c45 + partitioner: + column_name: pickup + sort_ascending: true + method_name: partition_on_year_and_month_and_day + whole_table: + id: 09674cda-573c-400b-9a64-10dcdaecb60b + partitioner: + table_name: nyc_taxi_data + schema_name: + query_data: + type: query + id: 9ad6b38b-2337-4f51-bae2-31afb212c5f2 + batch_metadata: {} + batch_definitions: + query_yearly: + id: 56455714-0622-46b0-857f-60d964e1d004 + partitioner: + column_name: pickup + sort_ascending: true + method_name: partition_on_year + query_monthly: + id: e96513f1-12b8-419d-a1b9-4aacedfd396d + partitioner: + column_name: pickup + sort_ascending: false + method_name: partition_on_year_and_month + query_daily: + id: 996a2813-6eff-4c8a-88c6-5ca9ab60e275 + partitioner: + column_name: pickup + sort_ascending: true + method_name: partition_on_year_and_month_and_day + whole_query: + id: f947cbc4-3d3b-4f92-bee0-4186fdac2b61 + partitioner: + query: SELECT * FROM nyc_taxi_data + connection_string: postgresql+psycopg2://try_gx:try_gx@postgres.workshops.greatexpectations.io/gx_example_db + ``` + + + +##### Postgresql Creation via API + + + ```python title="Python" + # Creating a datasource + connection_string = "postgresql+psycopg2://try_gx:try_gx@postgres.workshops.greatexpectations.io/gx_example_db" + ds = context.sources.add_postgres(name="postgres_ds", connection_string=connection_string) + + # Creating table assets + yearly_asset = ds.add_table_asset(name="yearly_taxi_data", table_name="nyc_taxi_data", order_by=["+year"]) + yearly_asset.add_splitter_year(column_name="pickup") + monthly_asset = ds.add_table_asset(name="monthly_taxi_data", table_name="nyc_taxi_data", order_by=["-year", "-month"]) + monthly_asset.add_splitter_year_and_month(column_name="pickup") + daily_asset = ds.add_table_asset(name="daily_taxi_data", table_name="nyc_taxi_data", order_by=["+year", "+month", "+day"]) + daily_asset.add_splitter_year_and_month_and_day(column_name="pickup") + whole_table_asset = ds.add_table_asset(name="all_taxi_data", table_name="nyc_taxi_data") + + # Creating query Assets + yearly_query_asset = ds.add_query_asset(name="query_yearly", query="select * from nyc_taxi_data", order_by=["+year"]) + yearly_query_asset.add_splitter_year(column_name="pickup") + monthly_query_asset = ds.add_query_asset(name="query_monthly", query="select * from nyc_taxi_data", order_by=["-year", "-month"]) + monthly_query_asset.add_splitter_year_and_month(column_name="pickup") + daily_query_asset = ds.add_query_asset(name="query_daily", query="select * from nyc_taxi_data", order_by=["+year", "+month", "+day"]) + daily_query_asset.add_splitter_year_and_month_and_day(column_name="pickup") + query_whole_table_asset = ds.add_query_asset(name="whole_query", query="select * from nyc_taxi_data") + ``` + + + ```python title="Python" + # Creating a datasource + connection_string = "postgresql+psycopg2://try_gx:try_gx@postgres.workshops.greatexpectations.io/gx_example_db" + ds = context.data_sources.add_postgres(name="postgres_ds", connection_string=connection_string) + + # Creating a table asset and batch definitions + table_asset = ds.add_table_asset(name="taxi_data", table_name="nyc_taxi_data") + table_yearly = table_asset.add_batch_definition_yearly(name="table_yearly", column="pickup", sort_ascending=True) + table_monthly = table_asset.add_batch_definition_monthly(name="table_monthly", column="pickup", sort_ascending=False) + table_daily = table_asset.add_batch_definition_daily(name="table_daily", column="pickup", sort_ascending=True) + whole_table = table_asset.add_batch_definition_whole_table(name="whole_table") + + # Creating a query asset and batch definitions + query_asset = ds.add_query_asset(name="query_data", query="SELECT * FROM nyc_taxi_data") + query_yearly = query_asset.add_batch_definition_yearly(name="query_yearly", column="pickup", sort_ascending=True) + query_monthly = query_asset.add_batch_definition_monthly(name="query_monthly", column="pickup", sort_ascending=False) + query_daily = query_asset.add_batch_definition_daily(name="query_daily", column="pickup", sort_ascending=True) + query_whole_table = query_asset.add_batch_definition_whole_table(name="whole_query") + ``` + + + +#### Spark API +##### Spark Filesystem +This is almost identical to the pandas filesystem and we only present a daily and a yearly asset conversion here. + + + + ```yaml title="YAML" + fluent_datasources: + spark_fs: + type: spark_filesystem + assets: + yearly_taxi_data: + type: csv + batching_regex: sampled_yellow_tripdata_(?P\d{4})\.csv + daily_taxi_data: + type: csv + batching_regex: sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv + spark_config: + spark.executor.memory: 4g + persist: true + base_directory: data + ``` + + + ```yaml title="YAML" + fluent_datasources: + spark_fs: + type: spark_filesystem + id: 62a7c671-8f2a-468c-be53-a82576d7b436 + assets: + taxi_data: + type: csv + id: 78d5ccc2-1697-490f-886a-c9672d5548c6 + batch_definitions: + yearly_batches: + id: 4a0ff04f-a9fe-4c36-b680-0b1c61f4e0c2 + partitioner: + regex: sampled_yellow_tripdata_(?P\d{4})\.csv + sort_ascending: true + daily_batches: + id: b2e056fe-6f1d-4fdc-ab69-75d3a19f1a44 + partitioner: + regex: sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv + sort_ascending: true + param_names: + - year + - month + - day + spark_config: + spark.executor.memory: 4g + persist: true + base_directory: data + ``` + + + +##### Spark Filesystem API + + + ```python title="Python" + import great_expectations as gx + context = gx.get_context(mode="file") + + datasource = context.sources.add_spark_filesystem(name="spark_fs", base_directory="data", spark_config={"spark.executor.memory": "4g"}, persist=True) + yearly = datasource.add_csv_asset(name="yearly_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})\.csv") + daily = datasource.add_csv_asset(name="daily_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv") + ``` + + + ```python title="Python" + import great_expectations as gx + context = gx.get_context(mode="file") + + data_source = context.data_sources.add_spark_filesystem(name="spark_fs", base_directory="data", spark_config={"spark.executor.memory": "4g"}, persist=True) + asset = data_source.add_csv_asset(name="taxi_data") + yearly = asset.add_batch_definition_yearly(name="yearly_batches", regex=r"sampled_yellow_tripdata_(?P\d{4})\.csv") + daily = asset.add_batch_definition_daily(name="daily_batches", regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})-(?P\d{2})\.csv") + ``` + + + +##### Spark Dataframe +Here is a side-by-side comparison of the Spark dataframe data source configuration. + + + + ```yaml title="YAML" + fluent_datasources: + spark_ds: + type: spark + assets: + taxi_dataframe_asset: + type: dataframe + batch_metadata: {} + spark_config: + spark.executor.memory: 4g + force_reuse_spark_context: true + persist: true + ``` + + + ```yaml title="YAML" + fluent_datasources: + spark_ds: + type: spark + id: 134de28d-bfdc-4980-aa2e-4f59788afef3 + assets: + taxi_dataframe_asset: + type: dataframe + id: 4110d2ff-5711-47df-a4be-eaefc2a638b4 + batch_metadata: {} + batch_definitions: + taxi_dataframe_batch_def: + id: 76738b8b-28ab-4857-aa98-f0ff80c8f137 + partitioner: + spark_config: + spark.executor.memory: 4g + force_reuse_spark_context: true + persist: true + ``` + + + +##### Spark dataframe API + + + ```python title="Python" + import great_expectations as gx + context = gx.get_context(mode="file") + + dataframe_ds = context.sources.add_spark(name="spark_ds", spark_config={"spark.executor.memory": "4g"}, force_reuse_spark_context=True, persist=True) + dataframe_asset = dataframe_ds.add_dataframe_asset(name="taxi_dataframe_asset") + ``` + + + ```python title="Python" + import great_expectations as gx + context = gx.get_context(mode="file") + + dataframe_ds = context.data_sources.add_spark(name="spark_ds", spark_config={"spark.executor.memory": "4g"}, force_reuse_spark_context=True, persist=True) + dataframe_asset = dataframe_ds.add_dataframe_asset(name="taxi_dataframe_asset") + dataframe_bd = dataframe_asset.add_batch_definition_whole_dataframe(name=ƒ"taxi_dataframe_batch_def") + ``` + + + +#### Spark Directory Asset +Spark directory assets are different than our other dataframe Data Assets. These assets pull all the files from a directory into a single dataframe. Then, like for SQL Data Sources, one specifies a column when adding the Batch Definition. This column will be used to split the dataframe into batches. + +For this example all the data files live in directory `data/data2/` relative to our project directory. + +In V0, we split the data based on an exact string. In V1, our batch definitions are all based on datetime (eg batches are by day, month, or year). + + + + ```yaml title="YAML" + fluent_datasources: + spark: + type: spark_filesystem + assets: + spark_asset: + type: directory_csv + header: true + data_directory: data2 + spark_config: + spark.executor.memory: 4g + persist: true + base_directory: data + ``` + + + ```yaml title="YAML" + fluent_datasources: + spark: + type: spark_filesystem + id: a35e995d-dd60-45e4-90f0-061d2bda6544 + assets: + spark_asset: + type: directory_csv + id: 9454840d-f064-4129-b8ff-38cfbb71af99 + batch_definitions: + monthly: + id: 853d02de-54b1-45a7-a4e2-b9f8a8ca0a33 + partitioner: + column_name: tpep_pickup_datetime + method_name: partition_on_year_and_month + header: true + data_directory: data2 + spark_config: + spark.executor.memory: 4g + persist: true + base_directory: data + ``` + + + +The configuration for `0.X` because we only allow splitting the data into batches by exact string match and we require uses to fully specify the batch request options in GX `0.X` (batch parameters in GX `1.0`). I am not showing all the spark specific configuration options. They are both supported in the same way in GX `0.X` and GX `1.0`. + +##### Spark directory asset API + + + ```python title="Python" + import great_expectations as gx + + context = gx.get_context(mode="file") + + ds = context.sources.add_spark_filesystem(name="spark", base_directory="data", spark_config={"spark.executor.memory": "4g"}, persist=True) + + asset = ds.add_directory_csv_asset(name="spark_asset", data_directory="data2", header=True) + # This must really be a year-month date column instead of a datetime column for splitting by month in GX 0.X + asset.add_splitter_column_value(column_name="tpep_pickup_datetime") + # There is no sorting added because in GX 0.X, one has to specify all parameters so sorting is a no-op + + ``` + + + ```python title="Python" + import great_expectations as gx + + context = gx.get_context(mode="file") + ds = context.data_sources.add_spark_filesystem(name="spark", base_directory="data", spark_config={"spark.executor.memory": "4g"}, persist=True) + asset = ds.add_directory_csv_asset(name="spark_asset", data_directory="data2", header=True) + bd = asset.add_batch_definition_monthly(name="monthly", column="tpep_pickup_datetime") + + b = bd.get_batch() + b.head(fetch_all=True) + + ``` + + + +### Checkpoints +In V0, there were multiple equivalent ways to configure the exact same `Checkpoint`. This is because a `Checkpoint` object contained a `validations` parameter which was a list of the validations the `Checkpoint` would run. Each item in this list took all the arguments necessary for a validation such as the Expectation Suite, the Batch Request, the actions, etc. However, all these same arguments are also present on the `Checkpoint` initializer. Usually, if an argument was present in the validation, that would be used, but if any argument was not present in a validation, GX would fall back to the argument defined on the `Checkpoint` itself. We’d call these default values the “top-level values”. In addition, if the `validations` argument was an empty list or `None`, GX would infer the `Checkpoint` had 1 validation and create one using only “top-level values”. In this case, we’d call this validation a “top-level validation”. This fallback led to some confusing behavior, especially since it wasn’t consistently implemented. + +In V1, we have removed all top-level arguments so every validation must be fully specified in the `validation_definitions` argument which is the analog to the old `validations` argument. We’ve also promoted the Validation Definition to its own domain object since it encapsulates the unit of validation. Checkpoints are groupings of Validation Definitions packaged with actions that may be taken after a validation is run. With this in mind, the V0 checkpoint configuration has been broken into 2 files, a Validation Definition configuration file and a checkpoint configuration file. + +We walk through 4 cases of V0 configuration files: + +Case 1: An empty validations argument so only a top-level validation exists. + +Case 2: No top-level validations so all values come from the validations argument. + +Case 3: A validation with values specified both in the validation and on the top level. + +Case 4: A validation with values specified on the top level that is overridden in the validation. + +We hope that this gives enough breadth over the possible ways to convert a Checkpoint that a migrator will have a helpful example. If there are missing cases that you’d like to see appear, please reach out. + +#### Case 1: Empty Validations Argument +The V0 configuration lives in `gx/checkpoints/.yml`. In V1, the configuration is JSON and lives in 2 files: `gx/checkpoints/` and `gx/validation_definitions/`. + + + + ```yaml title="YAML" + name: my_checkpoint + config_version: 1.0 + template_name: + module_name: great_expectations.checkpoint + class_name: Checkpoint + run_name_template: + expectation_suite_name: my_suite + batch_request: + datasource_name: pd_fs_ds + data_asset_name: monthly_taxi_data + action_list: + - name: store_validation_result + action: + class_name: StoreValidationResultAction + - name: store_evaluation_params + action: + class_name: StoreEvaluationParametersAction + - name: update_data_docs + action: + class_name: UpdateDataDocsAction + - name: my_email_action + action: + class_name: EmailAction + notify_on: all + use_tls: true + use_ssl: false + renderer: + module_name: great_expectations.render.renderer.email_renderer + class_name: EmailRenderer + smtp_address: smtp.myserver.com + smtp_port: 587 + sender_login: sender@myserver.com + sender_password: XXXXXXXXXX + sender_alias: alias@myserver.com + receiver_emails: receiver@myserver.com + evaluation_parameters: {} + runtime_configuration: {} + validations: [] + profilers: [] + ge_cloud_id: + expectation_suite_ge_cloud_id: + ``` + + + **gx/validation_definitions/my_validation_definition** + ```json title="JSON" + { + "data": { + "asset": { + "id": "ae696e27-fb6a-45fb-a2a0-bf1b8627c07e", + "name": "taxi_data" + }, + "batch_definition": { + "id": "9b396884-ef73-47f5-b8f7-c2fc1306589b", + "name": "monthly_batches" + }, + "datasource": { + "id": "934fd0e2-4c34-4e88-be1a-6b56ed69d614", + "name": "pd_fs_ds" + } + }, + "id": "cbd6552b-12d4-4b9f-92d5-1223eb6730d8", + "name": "my_validation_definition", + "suite": { + "id": "a71b700d-867a-46be-b5f2-6b9402dcc925", + "name": "my_suite" + } + } + ``` + + **gx/checkpoints/my_checkpoint** + ```json title="JSON" + { + "actions": [ + { + "name": "update_data_docs", + "site_names": [], + "type": "update_data_docs" + }, + { + "name": "my_email_action", + "notify_on": "all", + "notify_with": null, + "receiver_emails": "receiver@myserver.com", + "renderer": { + "class_name": "EmailRenderer", + "module_name": "great_expectations.render.renderer.email_renderer" + }, + "sender_alias": "alias@myserver.com", + "sender_login": "sender@myserver.com", + "sender_password": "XXXXXXXXXX", + "smtp_address": "smtp.myserver.com", + "smtp_port": "587", + "type": "email", + "use_ssl": false, + "use_tls": true + } + ], + "id": "ff7a0cd3-6b64-463a-baa0-4b5b4d7512b5", + "name": "my_checkpoint", + "result_format": "SUMMARY", + "validation_definitions": [ + { + "id": "cbd6552b-12d4-4b9f-92d5-1223eb6730d8", + "name": "my_validation_definition" + } + ] + } + ``` + + + +We provide a mapping from the V0 fields to the V1 fields along with any new V1 fields. + +**name**: This gets mapped to the name field in the V1 Checkpoint configuration file. + +**config_version**: This is no longer a parameter. + +**template_name**: This is no longer a supported feature. If you need to migrate this over, you should find the template values and set them explicitly in the new Checkpoint. + +**module_name**: This is no longer necessary and is inferred so is no longer a supported parameter. + +**class_name**: This is no longer necessary and is inferred so is no longer a supported parameter. + +**run_name_template**: This is no longer a supported feature. + +**expectation_suite_name**: This is now found in the validation definition configuration in **suite.name**. + +**batch_request**: There is no longer a batch request concept in V1. The Data Source and Data Asset are now found in the Validation Definition configuration **data** field. The data field has 3 keys: **asset**, **batch_definition**, and **datasource**. The value is a dictionary with the keys: + +> **name**: The name of the asset/batch_definition/datasource found in great_expectations.yml. + +> **id**: The id for the asset/batch_definition/datasource found in great_expectations.yml. + +**action_list**: This is now mapped to the checkpoint configurations actions key which is a list of dictionaries where each dictionary configures one action. The name for an action in a V0 action list maps to the V1 action dictionary name key. A few things to note: +- V1 has no default actions. +- The `store_validation_result` is no longer an action since validation results are always stored and this is built into running a checkpoint (and running a validation definition directly). +- The `store_evaluation_params` action no longer exists since runtime parameters must now be passed in at runtime so we don’t store defaults anywhere. +- The `update_data_docs` action is no longer automatically added and must be explicitly added. Its configuration is a list of **site_names**. If you’ve configured these in V0, you can move them over directly and they have the same values. There is a new field called type, which all actions have, that is a unique literal string for a particular action. For this action type should be set to “update_data_docs”. + +**evaluation_parameters**: This is no longer supported at the checkpoint level. In V0 one could also configure evaluation_parameters in the expectation suite parameters. One can still do that there (now called suite_parameters, see the Expectation Suites and Expectations section) and using that Expectation Suite will enable these parameters for checkpoints using that suite. + +**runtime_configuration**: The runtime configuration supported by V1 is result format. There is now an explicit result_format key in the checkpoint configuration whose value is one of the following strings: SUMMARY, COMPLETE, BASIC, BOOLEAN_ONLY. + +**validations**: This is now the checkpoint configuration field **validation_definitions** which is a list of dictionaries where each item in the list corresponds to a validation definition. There are 2 keys in the Validation Definition dictionary: + +> **id**: This must match the top-level id field in the validation_definitions configuration file that corresponds to this validation definition. + +> **name**: This must match the top-level name field in the validation_definitions configuration file that corresponds to this validation definition. + +> There are now restrictions on which validations can be grouped together in a checkpoint. Each Validation Definition in a Checkpoint must take the same batch parameters at runtime. So if you grouped together multiple validations together in V0 whose batches are parameterized differently (e.g. one uses a “whole table” batch definition and another uses a “daily” batch definition) they will have to be split up into multiple checkpoints. + +**profilers**: This feature has been removed in V1. Some form of profilers will be re-introduced in V1 at a later date. + +**ge_cloud_id**: This should be empty for file-based configurations and has been removed in V1. + +**expectation_suite_ge_cloud_id**: This should be empty for file based configurations and has been removed in V1. + +##### Case 1: API calls + + + ```python title="Python" + import great_expectations as gx + from great_expectations.checkpoint import EmailAction + + context = gx.get_context(mode="file") + + + datasource = context.sources.add_pandas_filesystem(name="pd_fs_ds", base_directory="data") + monthly = datasource.add_csv_asset(name="monthly_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv") + + suite = context.add_expectation_suite( + expectation_suite_name="my_suite", + data_asset_type="CSVAsset", + ) + + validator = context.get_validator(batch_request=monthly.build_batch_request(), expectation_suite_name="my_suite") + validator.expect_column_values_to_be_between(column="passenger_count", min_value=0, max_value=10) + validator.save_expectation_suite(discard_failed_expectations=False) + + + batch_request = monthly.build_batch_request() # options={"year": "2019", "month": "01"}) + + email_action_config = { + "name": "my_email_action", + "action": { + "class_name": "EmailAction", + "notify_on": "all", + "use_tls": True, + "use_ssl": False, + "renderer": { + "module_name": "great_expectations.render.renderer.email_renderer", + "class_name": "EmailRenderer" + }, + "smtp_address": "smtp.myserver.com", + "smtp_port": 587, + "sender_login": "sender@myserver.com", + "sender_password": "XXXXXXXXXX", + "sender_alias": "alias@myserver.com", + "receiver_emails": "receiver@myserver.com", + } + } + + action_list = [ + {'name': 'store_validation_result', + 'action': {'class_name': 'StoreValidationResultAction'}}, + {'name': 'store_evaluation_params', + 'action': {'class_name': 'StoreEvaluationParametersAction'}}, + {'name': 'update_data_docs', + 'action': {'class_name': 'UpdateDataDocsAction'}}, + email_action_config + ] + + checkpoint_config = { + "name": "my_checkpoint", + "config_version": 1.0, + "class_name": "Checkpoint", + "module_name": "great_expectations.checkpoint", + "expectation_suite_name": "my_suite", + "batch_request": batch_request, + "action_list": action_list, + } + + checkpoint = context.add_checkpoint(**checkpoint_config) + result = context.run_checkpoint("my_checkpoint") + ``` + + + ```python title="Python" + import great_expectations as gx + import great_expectations.expectations as gxe + + context = gx.get_context(mode="file") + data_source = context.data_sources.add_pandas_filesystem(name="pd_fs_ds", base_directory="./data") + file_csv_asset = data_source.add_csv_asset(name="taxi_data") + monthly = file_csv_asset.add_batch_definition_monthly(name="monthly_batches", regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv") + + suite = context.suites.add(gx.ExpectationSuite(name="my_suite")) + suite.add_expectation(gxe.ExpectColumnValuesToBeBetween(column="passenger_count", min_value=0, max_value=10)) + + validation_definition = context.validation_definitions.add( + gx.ValidationDefinition(data=monthly, suite=suite, name="my_validation_definition") + ) + checkpoint = context.checkpoints.add( + gx.Checkpoint( + name="my_checkpoint", + validation_definitions=[validation_definition], + actions=[ + gx.checkpoint.UpdateDataDocsAction(name="update_data_docs"), + gx.checkpoint.EmailAction( + name="my_email_action", + notify_on="all", + use_tls=True, + use_ssl=False, + smtp_address="smtp.myserver.com", + smtp_port=587, + sender_login="sender@myserver.com", + sender_password="XXXXXXXXXX", + sender_alias="alias@myserver.com", + receiver_emails="receiver@myserver.com", + ), + ], + ) + ) + result = checkpoint.run() + ``` + + + +##### Case 2: No top-level arguments + +We only show the V0 configuration and code samples here because the V1 configuration and code is identical to case 1. + +One unique thing to notice is that while in the API code snippet below all actions are defined in the validation argument, you will see in the configuration file that the actions get split up and some end up being defined on the top level and some appear on the validation. All actions will get run when the checkpoint is run, which is inconsistent with the normal “overriding” behavior for values defined in the validation. + + + + ```yaml title="YAML" + name: my_checkpoint + config_version: 1.0 + template_name: + module_name: great_expectations.checkpoint + class_name: Checkpoint + run_name_template: + expectation_suite_name: + batch_request: {} + action_list: + - name: store_validation_result + action: + class_name: StoreValidationResultAction + - name: store_evaluation_params + action: + class_name: StoreEvaluationParametersAction + - name: update_data_docs + action: + class_name: UpdateDataDocsAction + evaluation_parameters: {} + runtime_configuration: {} + validations: + - action_list: + - name: store_validation_result + action: + class_name: StoreValidationResultAction + - name: store_evaluation_params + action: + class_name: StoreEvaluationParametersAction + - name: update_data_docs + action: + class_name: UpdateDataDocsAction + - name: my_email_action + action: + class_name: EmailAction + notify_on: all + use_tls: true + use_ssl: false + renderer: + module_name: great_expectations.render.renderer.email_renderer + class_name: EmailRenderer + smtp_address: smtp.myserver.com + smtp_port: 587 + sender_login: sender@myserver.com + sender_password: XXXXXXXXXX + sender_alias: alias@myserver.com + receiver_emails: receiver@myserver.com + batch_request: + datasource_name: pd_fs_ds + data_asset_name: monthly_taxi_data + options: {} + batch_slice: + expectation_suite_name: my_suite + profilers: [] + ge_cloud_id: + expectation_suite_ge_cloud_id: + ``` + + + +##### Case 2: API calls + + + + ```python title="Python" + import great_expectations as gx + + context = gx.get_context(mode="file") + + datasource = context.sources.add_pandas_filesystem(name="pd_fs_ds", base_directory="data") + monthly = datasource.add_csv_asset(name="monthly_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv") + + suite = context.add_expectation_suite( + expectation_suite_name="my_suite", + data_asset_type="CSVAsset", + ) + validator = context.get_validator(batch_request=monthly.build_batch_request(), expectation_suite_name="my_suite") + validator.expect_column_values_to_be_between(column="passenger_count", min_value=0, max_value=10) + validator.save_expectation_suite(discard_failed_expectations=False) + + batch_request = monthly.build_batch_request() + + email_action_config = { + "name": "my_email_action", + "action": { + "class_name": "EmailAction", + "notify_on": "all", + "use_tls": True, + "use_ssl": False, + "renderer": { + "module_name": "great_expectations.render.renderer.email_renderer", + "class_name": "EmailRenderer" + }, + "smtp_address": "smtp.myserver.com", + "smtp_port": 587, + "sender_login": "sender@myserver.com", + "sender_password": "XXXXXXXXXX", + "sender_alias": "alias@myserver.com", + "receiver_emails": "receiver@myserver.com", + } + } + + action_list = [ + {'name': 'store_validation_result', + 'action': {'class_name': 'StoreValidationResultAction'}}, + {'name': 'store_evaluation_params', + 'action': {'class_name': 'StoreEvaluationParametersAction'}}, + {'name': 'update_data_docs', + 'action': {'class_name': 'UpdateDataDocsAction'}}, + email_action_config + ] + + checkpoint_config = { + "name": "my_checkpoint", + "config_version": 1.0, + "class_name": "Checkpoint", + "module_name": "great_expectations.checkpoint", + "validations": [ + { + "expectation_suite_name": "my_suite", + "batch_request": batch_request, + "action_list": action_list, + } + ], + } + + checkpoint = context.add_checkpoint(**checkpoint_config) + result_case_2 = context.run_checkpoint("my_checkpoint") + ``` + + + +##### Case 3: Combined top level and validation configuration +We only show the V0 configuration and code samples here because the V1 configuration and code is identical to case 1. + + + + ```yaml title="YAML" + name: top_level_and_validation_checkpoint + config_version: 1.0 + template_name: + module_name: great_expectations.checkpoint + class_name: Checkpoint + run_name_template: + expectation_suite_name: my_suite + batch_request: {} + action_list: + - name: store_validation_result + action: + class_name: StoreValidationResultAction + - name: store_evaluation_params + action: + class_name: StoreEvaluationParametersAction + - name: update_data_docs + action: + class_name: UpdateDataDocsAction + - name: my_email_action + action: + class_name: EmailAction + notify_on: all + use_tls: true + use_ssl: false + renderer: + module_name: great_expectations.render.renderer.email_renderer + class_name: EmailRenderer + smtp_address: smtp.myserver.com + smtp_port: 587 + sender_login: sender@myserver.com + sender_password: XXXXXXXXXX + sender_alias: alias@myserver.com + receiver_emails: receiver@myserver.com + evaluation_parameters: {} + runtime_configuration: {} + validations: + - batch_request: + datasource_name: pd_fs_ds + data_asset_name: monthly_taxi_data + options: {} + batch_slice: + profilers: [] + ge_cloud_id: + expectation_suite_ge_cloud_id: + ``` + + + +##### Case 3: API calls + + + + ```python title="Python" + import great_expectations as gx + + context = gx.get_context(mode="file") + + datasource = context.sources.add_pandas_filesystem(name="pd_fs_ds", base_directory="data") + monthly = datasource.add_csv_asset(name="monthly_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv") + + suite = context.add_expectation_suite( + expectation_suite_name="my_suite", + data_asset_type="CSVAsset", + ) + validator = context.get_validator(batch_request=monthly.build_batch_request(), expectation_suite_name="my_suite") + validator.expect_column_values_to_be_between(column="passenger_count", min_value=0, max_value=10) + validator.save_expectation_suite(discard_failed_expectations=False) + + batch_request = monthly.build_batch_request() + + email_action_config = { + "name": "my_email_action", + "action": { + "class_name": "EmailAction", + "notify_on": "all", + "use_tls": True, + "use_ssl": False, + "renderer": { + "module_name": "great_expectations.render.renderer.email_renderer", + "class_name": "EmailRenderer" + }, + "smtp_address": "smtp.myserver.com", + "smtp_port": 587, + "sender_login": "sender@myserver.com", + "sender_password": "XXXXXXXXXX", + "sender_alias": "alias@myserver.com", + "receiver_emails": "receiver@myserver.com", + } + } + + action_list = [ + {'name': 'store_validation_result', + 'action': {'class_name': 'StoreValidationResultAction'}}, + {'name': 'store_evaluation_params', + 'action': {'class_name': 'StoreEvaluationParametersAction'}}, + {'name': 'update_data_docs', + 'action': {'class_name': 'UpdateDataDocsAction'}}, + email_action_config + ] + + checkpoint_config = { + "name": "top_level_and_validation_checkpoint", + "config_version": 1.0, + "class_name": "Checkpoint", + "module_name": "great_expectations.checkpoint", + "expectation_suite_name": "my_suite", + "action_list": action_list, + "validations": [ + { + "batch_request": batch_request, + } + ], + } + + context.add_checkpoint(**checkpoint_config) + result = context.run_checkpoint("top_level_and_validation_checkpoint") + ``` + + + +##### Case 4: Combined top level with validation configuration override +We only show the V0 configuration and code samples here because the V1 configuration and code is identical to case 1. + + + + ```yaml title="YAML" + name: top_level_and_validation_override_checkpoint + config_version: 1.0 + template_name: + module_name: great_expectations.checkpoint + class_name: Checkpoint + run_name_template: + expectation_suite_name: my_suite + batch_request: {} + action_list: + - name: store_validation_result + action: + class_name: StoreValidationResultAction + - name: store_evaluation_params + action: + class_name: StoreEvaluationParametersAction + - name: update_data_docs + action: + class_name: UpdateDataDocsAction + - name: my_email_action + action: + class_name: EmailAction + notify_on: all + use_tls: true + use_ssl: false + renderer: + module_name: great_expectations.render.renderer.email_renderer + class_name: EmailRenderer + smtp_address: smtp.myserver.com + smtp_port: 587 + sender_login: sender@myserver.com + sender_password: XXXXXXXXXX + sender_alias: alias@myserver.com + receiver_emails: receiver@myserver.com + evaluation_parameters: {} + runtime_configuration: {} + validations: + - batch_request: + datasource_name: pd_fs_ds + data_asset_name: monthly_taxi_data + options: {} + batch_slice: + expectation_suite_name: my_other_suite + profilers: [] + ge_cloud_id: + expectation_suite_ge_cloud_id: + ``` + + + +##### Case 4: API calls + + + ```python title="Python" + import great_expectations as gx + + context = gx.get_context(mode="file") + + datasource = context.sources.add_pandas_filesystem(name="pd_fs_ds", base_directory="data") + monthly = datasource.add_csv_asset(name="monthly_taxi_data", batching_regex=r"sampled_yellow_tripdata_(?P\d{4})-(?P\d{2})\.csv") + + suite = context.add_expectation_suite( + expectation_suite_name="my_suite", + data_asset_type="CSVAsset", + ) + validator = context.get_validator(batch_request=monthly.build_batch_request(), expectation_suite_name="my_suite") + validator.expect_column_values_to_be_between(column="passenger_count", min_value=0, max_value=10) + validator.save_expectation_suite(discard_failed_expectations=False) + + other_suite = context.add_expectation_suite( + expectation_suite_name="my_other_suite", + data_asset_type="CSVAsset", + ) + validator = context.get_validator(batch_request=monthly.build_batch_request(), expectation_suite_name="my_other_suite") + validator.expect_column_values_to_be_between(column="passenger_count", min_value=0, max_value=4) + validator.save_expectation_suite(discard_failed_expectations=False) + + batch_request = monthly.build_batch_request() + + email_action_config = { + "name": "my_email_action", + "action": { + "class_name": "EmailAction", + "notify_on": "all", + "use_tls": True, + "use_ssl": False, + "renderer": { + "module_name": "great_expectations.render.renderer.email_renderer", + "class_name": "EmailRenderer" + }, + "smtp_address": "smtp.myserver.com", + "smtp_port": 587, + "sender_login": "sender@myserver.com", + "sender_password": "XXXXXXXXXX", + "sender_alias": "alias@myserver.com", + "receiver_emails": "receiver@myserver.com", + } + } + + action_list = [ + {'name': 'store_validation_result', + 'action': {'class_name': 'StoreValidationResultAction'}}, + {'name': 'store_evaluation_params', + 'action': {'class_name': 'StoreEvaluationParametersAction'}}, + {'name': 'update_data_docs', + 'action': {'class_name': 'UpdateDataDocsAction'}}, + email_action_config + ] + + checkpoint_config = { + "name": "top_level_and_validation_override_checkpoint", + "config_version": 1.0, + "class_name": "Checkpoint", + "module_name": "great_expectations.checkpoint", + "expectation_suite_name": "my_suite", + "action_list": action_list, + "validations": [ + { + "expectation_suite_name": "my_other_suite", + "batch_request": batch_request, + } + ], + } + + context.add_checkpoint(**checkpoint_config) + result = context.run_checkpoint("top_level_and_validation_override_checkpoint") + ``` + + + +### Data Context Variables +The Data Context variables will be automatically converted for GX Cloud users when switching from V0 to V1. For file context users, we will show the difference in the yaml so you can translate the configuration block in `great_expectations.yml`. + + + + ```yaml title="YAML" + config_version: 3.0 + config_variables_file_path: uncommitted/config_variables.yml + plugins_directory: plugins/ + stores: + expectations_store: + class_name: ExpectationsStore + store_backend: + class_name: TupleFilesystemStoreBackend + base_directory: expectations/ + validations_store: + class_name: ValidationsStore + store_backend: + class_name: TupleFilesystemStoreBackend + base_directory: uncommitted/validations/ + evaluation_parameter_store: + class_name: EvaluationParameterStore + checkpoint_store: + class_name: CheckpointStore + store_backend: + class_name: TupleFilesystemStoreBackend + suppress_store_backend_id: true + base_directory: checkpoints/ + profiler_store: + class_name: ProfilerStore + store_backend: + class_name: TupleFilesystemStoreBackend + suppress_store_backend_id: true + base_directory: profilers/ + expectations_store_name: expectations_store + validations_store_name: validations_store + evaluation_parameter_store_name: evaluation_parameter_store + checkpoint_store_name: checkpoint_store + data_docs_sites: + local_site: + class_name: SiteBuilder + show_how_to_buttons: true + store_backend: + class_name: TupleFilesystemStoreBackend + base_directory: uncommitted/data_docs/local_site/ + site_index_builder: + class_name: DefaultSiteIndexBuilder + anonymous_usage_statistics: + data_context_id: a7441dab-9db7-4043-a3e7-011cdab54cfb + enabled: false + usage_statistics_url: https://qa.stats.greatexpectations.io/great_expectations/v1/usage_statistics + fluent_datasources: + spark_fs: + type: spark_filesystem + assets: + directory_csv_asset: + type: directory_csv + data_directory: data + spark_config: + spark.executor.memory: 4g + persist: true + base_directory: data + notebooks: + include_rendered_content: + globally: false + expectation_suite: false + expectation_validation_result: false + ``` + + + ```yaml title="YAML" + config_version: 4.0 + config_variables_file_path: uncommitted/config_variables.yml + plugins_directory: plugins/ + stores: + expectations_store: + class_name: ExpectationsStore + store_backend: + class_name: TupleFilesystemStoreBackend + base_directory: expectations/ + validation_results_store: + class_name: ValidationResultsStore + store_backend: + class_name: TupleFilesystemStoreBackend + base_directory: uncommitted/validations/ + checkpoint_store: + class_name: CheckpointStore + store_backend: + class_name: TupleFilesystemStoreBackend + suppress_store_backend_id: true + base_directory: checkpoints/ + validation_definition_store: + class_name: ValidationDefinitionStore + store_backend: + class_name: TupleFilesystemStoreBackend + base_directory: validation_definitions/ + expectations_store_name: expectations_store + validation_results_store_name: validation_results_store + checkpoint_store_name: checkpoint_store + data_docs_sites: + local_site: + class_name: SiteBuilder + show_how_to_buttons: true + store_backend: + class_name: TupleFilesystemStoreBackend + base_directory: uncommitted/data_docs/local_site/ + site_index_builder: + class_name: DefaultSiteIndexBuilder + analytics_enabled: true + fluent_datasources: + spark_ds: + type: spark + id: 134de28d-bfdc-4980-aa2e-4f59788afef3 + assets: + taxi_dataframe_asset: + type: dataframe + id: 4110d2ff-5711-47df-a4be-eaefc2a638b4 + batch_metadata: {} + batch_definitions: + taxi_dataframe_batch_def: + id: 76738b8b-28ab-4857-aa98-f0ff80c8f137 + partitioner: + spark_config: + spark.executor.memory: 4g + force_reuse_spark_context: true + persist: true + data_context_id: 12bc94a0-8ac3-4e97-bf90-03cd3d92f8c4 + ``` + + + +**config_version**: For V1 this should be set to 4.0 + +**config_variables_file_path**: This is unchanged. + +**plugins_directory**: This is unchanged. + +**stores**: This is a dictionary of store names to configuration. In V0 the keys names were configurable. In V1, there is a fixed set of keys. These V1 keys are: + +> **expectations_store**: The configuration of the Expectations store. The value here is unchanged from the value that was stored with the key that was configured in the top-level variable **expectations_store_name**. + +> **validation_results_store**: The configuration to the validation results store. The value here is slightly changed from the value that was stored with the key that was configured in the top-level variable validations_store_name. The value change is ValidationsStore is now ValidationResultsStore. + +> **checkpoint_store**: This key and value are unchanged between V0 and V1. + +> **validation_definition_store**: Validation definitions are a new concept in V1. For file-based contexts, you can use this example V1 configuration directly. You can update the base_directory if you need to change the path where the configuration for validation definitions get stored. + +**expectations_store_name**: While still present, this must now always be set to “expectations_store”. + +**validations_store_name**: This should now be “validation_results_store_name”. Its value must be the value “validation_results_store“. + +**evaluation_parameter_store_name**: This key has been removed. One can no longer store evaluation_parameters since they are now a runtime concept called expectation_parameters. If you want to set a default value of an expectation parameter, you should do that in code where you run the validation. + +**checkpoint_store_name**: This parameter name is unchanged. The value must be “checkpoint_store”. + +**data_docs_sites**: This key and its value are unchanged in V1. + +**anonymous_usage_statistics**: + +> **enabled**: This value is now the top-level key analytics_enabled + +> **data_context_id**: This value is now the top-level key data_context_id + +> **usage_statistics_url**: This field is no longer configurable. + +**fluent_datasources**: While this appears in the great_expectations.yml file, it is not a data context variable. Please see the “Data Sources and Data Assets” portion of this doc for instructions on migrating this from V0 to V1. + +**notebooks**: This is no longer supported and does not appear in V1’s configuration. + +**include_rendered_content**: This only mattered for GX Cloud users and no longer appears in this configuration file. + +##### New V1 Fields + +**data_context_id**: If previously one had the field **anonymous_usage_statistics.data_context_id** set, one should use that value here. Otherwise, this can be set to a unique, arbitrary UUID. + +##### Store Backends +In previous versions of GX, we supported a number of configurable backend stores, including ones that persisted to databases, S3, Google Cloud Platform, and Azure. V1 drops support for these; file contexts only use `TupleFilesystemStoreBackend` and cloud contexts only use cloud stores. A number of GX users have a need for persisting their configurations, or subsets of their configurations, outside of their filesystem and either cannot or would prefer not to use cloud contexts. While GX no longer supports the tooling for these persistence models directly, users may use external libraries/services to handle this, e.g. copying their config to S3 via boto3. + +#### V1 API +In V1, the configuration for all data context variables can be changed via the Python API. For a data context named context one can view via `context.variables.` and update via: + +```python title="Python" +context.variables. = new_value +context.variables.save() + +# At this time you need to reload the context to have it take effect +context = gx.get_context() +``` \ No newline at end of file diff --git a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/batch.md b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/batch.md index 83291636e7d4..902573e1455a 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/batch.md +++ b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/batch.md @@ -45,7 +45,7 @@ The `BatchRequest` object is the primary API used to construct Batches. You cons - For more information, see [our documentation on Batch Requests](./batch_request.md). -:::note +:::note Note Instantiating a Batch does not necessarily “fetch” the data by immediately running a query or pulling data into memory. Instead, think of a Batch as a wrapper that includes the information that you will need to fetch the right data when it’s time to Validate. diff --git a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/data_docs_store.md b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/data_docs_store.md index 8c72b768cc9a..e2c531afb4e3 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/data_docs_store.md +++ b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/data_docs_store.md @@ -30,7 +30,7 @@ Your Data Context and other objects that interact with Data Docs will access you In the Validate Data step, including `UpdateDataDocsAction` in the `action_list` of a Checkpoint will cause your Data Docs to be updated with the Checkpoint's Validation Results; this process will use your Data Docs Stores behind the scenes. -:::note +:::note Note - To ensure that the Validation Results are included in the updated Data Docs, `UpdateDataDocsAction` should be present *after* `StoreValidationResultAction` in the Checkpoint's `action_list`. ::: diff --git a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/evaluation_parameter.md b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/evaluation_parameter.md index 2917c1c2e658..8886ab37bb6d 100644 --- a/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/evaluation_parameter.md +++ b/docs/docusaurus/versioned_docs/version-0.18/reference/learn/terms/evaluation_parameter.md @@ -99,7 +99,7 @@ Evaluation Parameters are defined by expressions that are evaluated at run time - Temporal values, such as "now" or "timedelta." - Complex values, such as lists. -:::note +:::note Note Although complex values like lists can be used as the value of an Evaluation Parameter, you cannot currently combine complex values with arithmetic expressions. ::: @@ -150,7 +150,7 @@ validator.expect_column_values_to_be_in_set( This Expectation will fail (the NYC taxi data allows for four types of payments), and now we are aware that what we thought we knew about the `payment_type` column wasn't accurate, and that now we need to research what those other two payment types are! -:::note +:::note Note - You cannot currently combine complex values with arithmetic expressions. ::: diff --git a/docs/docusaurus/versioned_sidebars/version-0.18-sidebars.json b/docs/docusaurus/versioned_sidebars/version-0.18-sidebars.json index 0108f5fb7a5e..2af5bdf05dfe 100644 --- a/docs/docusaurus/versioned_sidebars/version-0.18-sidebars.json +++ b/docs/docusaurus/versioned_sidebars/version-0.18-sidebars.json @@ -533,6 +533,7 @@ } ], "learn": [ + "reference/learn/migration_guide", "reference/learn/conceptual_guides/expectation_classes", "reference/learn/conceptual_guides/metricproviders", "reference/learn/usage_statistics", diff --git a/docs/sphinx_api_docs_source/build_sphinx_api_docs.py b/docs/sphinx_api_docs_source/build_sphinx_api_docs.py index 2e4074e66f95..455b2fb73fb7 100644 --- a/docs/sphinx_api_docs_source/build_sphinx_api_docs.py +++ b/docs/sphinx_api_docs_source/build_sphinx_api_docs.py @@ -39,7 +39,10 @@ def my_task( Definition, get_shortest_dotted_path, ) -from docs.sphinx_api_docs_source.utils import apply_markdown_adjustments +from docs.sphinx_api_docs_source.utils import ( + apply_markdown_adjustments, + apply_structure_changes, +) logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) @@ -220,6 +223,7 @@ def _parse_and_process_html_to_mdx( # noqa: C901 title_str = title_str.replace("#", "") apply_markdown_adjustments(soup, html_file_path, html_file_contents) + apply_structure_changes(soup, html_file_path, html_file_contents) sidebar_entry = self._get_sidebar_entry(html_file_path=html_file_path) @@ -517,7 +521,7 @@ def _create_class_md_stub(self, definition: Definition) -> str: .. autoclass:: {dotted_import} :members: :inherited-members: - + :member-order: groupwise ``` """ diff --git a/docs/sphinx_api_docs_source/include_exclude_definition.py b/docs/sphinx_api_docs_source/include_exclude_definition.py index 5985f6bed08f..77ebaae92600 100644 --- a/docs/sphinx_api_docs_source/include_exclude_definition.py +++ b/docs/sphinx_api_docs_source/include_exclude_definition.py @@ -31,3 +31,5 @@ def __post_init__(self): raise ValueError( # noqa: TRY003 "You must provide at least a filepath or filepath and name." ) + if self.filepath and not self.filepath.exists(): + raise FileNotFoundError(f"File {self.filepath} does not exist.") # noqa: TRY003 diff --git a/docs/sphinx_api_docs_source/printable_definition.py b/docs/sphinx_api_docs_source/printable_definition.py new file mode 100644 index 000000000000..866fdee071f3 --- /dev/null +++ b/docs/sphinx_api_docs_source/printable_definition.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import pathlib +from dataclasses import dataclass + + +@dataclass(frozen=True) +class PrintableDefinition: + file: pathlib.Path + name: str + + def __post_init__(self): + if not self.file.exists(): + raise FileNotFoundError(f"File {self.file} does not exist.") # noqa: TRY003 + + def __str__(self) -> str: # type: ignore[explicit-override] + return f"File: {self.file} Name: {self.name}" + + def __lt__(self, other) -> bool: + return str(self) < str(other) diff --git a/docs/sphinx_api_docs_source/public_api_excludes.py b/docs/sphinx_api_docs_source/public_api_excludes.py index a3d58d3c497d..feba54b8fd41 100644 --- a/docs/sphinx_api_docs_source/public_api_excludes.py +++ b/docs/sphinx_api_docs_source/public_api_excludes.py @@ -13,20 +13,6 @@ ) DEFAULT_EXCLUDES: list[IncludeExcludeDefinition] = [ - IncludeExcludeDefinition( - reason="We now use get_context(), this method only exists for backward compatibility.", - name="DataContext", - filepath=pathlib.Path( - "great_expectations/data_context/data_context/data_context.py" - ), - ), - IncludeExcludeDefinition( - reason="We now use get_context(), this method only exists for backward compatibility.", - name="BaseDataContext", - filepath=pathlib.Path( - "great_expectations/data_context/data_context/base_data_context.py" - ), - ), IncludeExcludeDefinition( reason="Fluent is not part of the public API", filepath=pathlib.Path("great_expectations/datasource/fluent/interfaces.py"), @@ -52,85 +38,11 @@ reason="Exclude code from __init__.py", filepath=pathlib.Path("great_expectations/types/__init__.py"), ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/databricks_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/glob_reader_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/manual_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/query_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/s3_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/s3_subdir_reader_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/subdir_reader_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - filepath=pathlib.Path( - "great_expectations/datasource/batch_kwargs_generator/table_batch_kwargs_generator.py" - ), - ), - IncludeExcludeDefinition( - reason="ValidationActions are now run from Checkpoints: https://docs.greatexpectations.io/docs/guides/miscellaneous/migration_guide#manually-migrate-v2-checkpoints-to-v3-checkpoints", - name="run", - filepath=pathlib.Path("great_expectations/checkpoint/actions.py"), - ), - IncludeExcludeDefinition( - reason="CLI internal methods should not be part of the public API", - filepath=pathlib.Path("great_expectations/cli/datasource.py"), - ), - IncludeExcludeDefinition( - reason="CLI internal methods should not be part of the public API", - filepath=pathlib.Path("great_expectations/cli/toolkit.py"), - ), IncludeExcludeDefinition( reason="False match for from datasource_configuration_test_utilities import is_subset", name="is_subset", filepath=pathlib.Path("great_expectations/core/domain.py"), ), - IncludeExcludeDefinition( - reason="Already captured in the Data Context", - name="test_yaml_config", - filepath=pathlib.Path( - "great_expectations/data_context/config_validator/yaml_config_validator.py" - ), - ), IncludeExcludeDefinition( reason="False match for validator.get_metric()", name="get_metric", @@ -138,28 +50,6 @@ "great_expectations/core/expectation_validation_result.py" ), ), - IncludeExcludeDefinition( - reason="False match for context.suites.get()", - name="get_expectation_suite", - filepath=pathlib.Path("great_expectations/data_asset/data_asset.py"), - ), - IncludeExcludeDefinition( - reason="False match for context.save_expectation_suite() and validator.save_expectation_suite()", - name="save_expectation_suite", - filepath=pathlib.Path("great_expectations/data_asset/data_asset.py"), - ), - IncludeExcludeDefinition( - reason="False match for validator.validate()", - name="validate", - filepath=pathlib.Path("great_expectations/data_asset/data_asset.py"), - ), - IncludeExcludeDefinition( - reason="False match for validator.validate()", - name="validate", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/batch_filter.py" - ), - ), IncludeExcludeDefinition( reason="Captured in AbstractDataContext", name="add_checkpoint", @@ -335,85 +225,6 @@ name="file_relative_path", filepath=pathlib.Path("great_expectations/data_context/util.py"), ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_be_between", - filepath=pathlib.Path("great_expectations/dataset/dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_not_be_null", - filepath=pathlib.Path("great_expectations/dataset/dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_table_row_count_to_be_between", - filepath=pathlib.Path("great_expectations/dataset/dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_be_between", - filepath=pathlib.Path("great_expectations/dataset/pandas_dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_not_be_null", - filepath=pathlib.Path("great_expectations/dataset/pandas_dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_be_between", - filepath=pathlib.Path("great_expectations/dataset/sparkdf_dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_not_be_null", - filepath=pathlib.Path("great_expectations/dataset/sparkdf_dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="head", - filepath=pathlib.Path("great_expectations/dataset/sparkdf_dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_be_between", - filepath=pathlib.Path("great_expectations/dataset/sqlalchemy_dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="expect_column_values_to_not_be_null", - filepath=pathlib.Path("great_expectations/dataset/sqlalchemy_dataset.py"), - ), - IncludeExcludeDefinition( - reason="Exclude code from v2 API", - name="head", - filepath=pathlib.Path("great_expectations/dataset/sqlalchemy_dataset.py"), - ), - IncludeExcludeDefinition( - reason="self_check is mentioned but in the docs we currently recommend using test_yaml_config which uses self_check under the hood. E.g. https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_datacontext_components_using_test_yaml_config/#steps", - name="self_check", - filepath=pathlib.Path("great_expectations/checkpoint/checkpoint.py"), - ), - IncludeExcludeDefinition( - reason="self_check is mentioned but in the docs we currently recommend using test_yaml_config which uses self_check under the hood. E.g. https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_datacontext_components_using_test_yaml_config/#steps", - name="self_check", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="self_check is mentioned but in the docs we currently recommend using test_yaml_config which uses self_check under the hood. E.g. https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_datacontext_components_using_test_yaml_config/#steps", - name="self_check", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/runtime_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="self_check is mentioned but in the docs we currently recommend using test_yaml_config which uses self_check under the hood. E.g. https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_datacontext_components_using_test_yaml_config/#steps", - name="self_check", - filepath=pathlib.Path("great_expectations/datasource/new_datasource.py"), - ), IncludeExcludeDefinition( reason="False match for dict `.update()` method.", name="update", @@ -522,31 +333,6 @@ name="IDDict", filepath=pathlib.Path("great_expectations/core/id_dict.py"), ), - IncludeExcludeDefinition( - reason="v2 API", - name="expect_column_mean_to_be_between", - filepath=pathlib.Path("great_expectations/dataset/dataset.py"), - ), - IncludeExcludeDefinition( - reason="v2 API", - name="expect_column_values_to_be_in_set", - filepath=pathlib.Path("great_expectations/dataset/dataset.py"), - ), - IncludeExcludeDefinition( - reason="v2 API", - name="expect_column_values_to_be_in_set", - filepath=pathlib.Path("great_expectations/dataset/pandas_dataset.py"), - ), - IncludeExcludeDefinition( - reason="v2 API", - name="expect_column_values_to_be_in_set", - filepath=pathlib.Path("great_expectations/dataset/sparkdf_dataset.py"), - ), - IncludeExcludeDefinition( - reason="v2 API", - name="expect_column_values_to_be_in_set", - filepath=pathlib.Path("great_expectations/dataset/sqlalchemy_dataset.py"), - ), IncludeExcludeDefinition( reason="to_json_dict is an internal helper method", name="to_json_dict", @@ -642,17 +428,6 @@ name="to_json_dict", filepath=pathlib.Path("great_expectations/validator/exception_info.py"), ), - IncludeExcludeDefinition( - reason="False match for DataAssistant.run()", - name="run", - filepath=pathlib.Path( - "great_expectations/experimental/rule_based_profiler/data_assistant/data_assistant_runner.py" - ), - ), - IncludeExcludeDefinition( - reason="Deprecated v2 api Dataset is not included in the public API", - filepath=pathlib.Path("great_expectations/dataset/dataset.py"), - ), IncludeExcludeDefinition( reason="Validate method on custom type not included in the public API", name="validate", @@ -665,13 +440,6 @@ name="columns", filepath=pathlib.Path("great_expectations/datasource/fluent/sql_datasource.py"), ), - IncludeExcludeDefinition( - reason='The "columns()" property in this module is not included in the public API', - name="columns", - filepath=pathlib.Path( - "great_expectations/datasource/fluent/spark_generic_partitioners.py" - ), - ), IncludeExcludeDefinition( reason="The add method shares a name with a public API method", name="add", @@ -703,14 +471,7 @@ IncludeExcludeDefinition( reason="Internal protocols are not included in the public API.", name="add_dataframe_asset", - filepath=pathlib.Path("great_expectations/core/datasource_dict.py"), - ), - IncludeExcludeDefinition( - reason="This method shares a name with a public API method.", - name="get_validator", - filepath=pathlib.Path( - "great_expectations/experimental/metric_repository/column_descriptive_metrics_metric_retriever.py" - ), + filepath=pathlib.Path("great_expectations/datasource/datasource_dict.py"), ), IncludeExcludeDefinition( reason="Not yet part of the public API", @@ -722,11 +483,6 @@ name="ResultFormat", filepath=pathlib.Path("great_expectations/core/result_format.py"), ), - IncludeExcludeDefinition( - reason="Not yet part of the public API, under active development", - name="BatchDefinition", - filepath=pathlib.Path("great_expectations/core/batch_config.py"), - ), IncludeExcludeDefinition( reason="This method shares a name with a public API method.", name="add_expectation", @@ -741,16 +497,6 @@ "great_expectations/data_context/store/expectations_store.py" ), ), - IncludeExcludeDefinition( - reason="This method shares a name with a public API method.", - name="build_batch_request", - filepath=pathlib.Path("great_expectations/core/batch_config.py"), - ), - IncludeExcludeDefinition( - reason="This method shares a name with a public API method.", - name="save", - filepath=pathlib.Path("great_expectations/core/batch_config.py"), - ), IncludeExcludeDefinition( reason="This method shares a name with a public API method.", name="delete", @@ -801,13 +547,6 @@ name="get_or_create_spark_session", filepath=pathlib.Path("great_expectations/core/util.py"), ), - IncludeExcludeDefinition( - reason="This method does not need to be accessed by users, and will eventually be removed from docs.", - name="get_batch_parameters_keys", - filepath=pathlib.Path( - "great_expectations/datasource/fluent/file_path_data_asset.py" - ), - ), IncludeExcludeDefinition( reason="This method does not need to be accessed by users, and will eventually be removed from docs.", name="get_batch_parameters_keys", diff --git a/docs/sphinx_api_docs_source/public_api_includes.py b/docs/sphinx_api_docs_source/public_api_includes.py index dee89e1c6e9f..209581ebb4c1 100644 --- a/docs/sphinx_api_docs_source/public_api_includes.py +++ b/docs/sphinx_api_docs_source/public_api_includes.py @@ -54,144 +54,6 @@ name="UpdateDataDocsAction", filepath=pathlib.Path("great_expectations/checkpoint/actions.py"), ), - IncludeExcludeDefinition( - reason="Validation Actions are used within Checkpoints but are part of our Public API and can be overridden via plugins.", - name="CloudNotificationAction", - filepath=pathlib.Path("great_expectations/checkpoint/actions.py"), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetFilePathDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_file_path_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetDBFSDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_dbfs_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetGCSDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_gcs_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetDBFSDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_dbfs_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="FilePathDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/file_path_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetAzureDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_azure_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="RuntimeDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/runtime_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetAWSGlueDataCatalogDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_aws_glue_data_catalog_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetS3DataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_s3_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetFilesystemDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_filesystem_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetS3DataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_s3_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetAzureDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_azure_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetSqlDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_sql_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetSqlDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_sql_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="DataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetFilePathDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_file_path_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetAWSGlueDataCatalogDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_aws_glue_data_catalog_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="InferredAssetFilesystemDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/inferred_asset_filesystem_data_connector.py" - ), - ), - IncludeExcludeDefinition( - reason="DataConnectors are part of the public API", - name="ConfiguredAssetGCSDataConnector", - filepath=pathlib.Path( - "great_expectations/datasource/data_connector/configured_asset_gcs_data_connector.py" - ), - ), IncludeExcludeDefinition( reason="Data Context types are part of the public API", name="EphemeralDataContext", @@ -213,13 +75,6 @@ "great_expectations/data_context/data_context/cloud_data_context.py" ), ), - IncludeExcludeDefinition( - reason="Map metric providers are part of the public API", - name="MapMetricProvider", - filepath=pathlib.Path( - "great_expectations/expectations/metrics/map_metric_provider.py" - ), - ), IncludeExcludeDefinition( reason="Map metric providers are part of the public API", name="MetricProvider", @@ -227,11 +82,4 @@ "great_expectations/expectations/metrics/metric_provider.py" ), ), - IncludeExcludeDefinition( - reason="Checkpoint CRUD is part of the public API", - name="delete_checkpoint", - filepath=pathlib.Path( - "great_expectations/data_context/data_context/abstract_data_context.py" - ), - ), ] diff --git a/docs/sphinx_api_docs_source/public_api_missing_threshold.py b/docs/sphinx_api_docs_source/public_api_missing_threshold.py index faaa1a76017e..d1c61827131f 100644 --- a/docs/sphinx_api_docs_source/public_api_missing_threshold.py +++ b/docs/sphinx_api_docs_source/public_api_missing_threshold.py @@ -4,165 +4,780 @@ adding an exclude directive to docs/sphinx_api_docs_source/public_api_excludes.py """ +import pathlib + +from docs.sphinx_api_docs_source.printable_definition import PrintableDefinition + ITEMS_IGNORED_FROM_PUBLIC_API = [ - "File: great_expectations/_docs_decorators.py Name: add", - "File: great_expectations/checkpoint/actions.py Name: _run", - "File: great_expectations/checkpoint/actions.py Name: run", - "File: great_expectations/checkpoint/actions.py Name: update", - "File: great_expectations/checkpoint/checkpoint.py Name: describe_dict", - "File: great_expectations/compatibility/not_imported.py Name: is_version_greater_or_equal", - "File: great_expectations/compatibility/typing_extensions.py Name: override", - "File: great_expectations/core/batch.py Name: head", - "File: great_expectations/core/batch_definition.py Name: build_batch_request", - "File: great_expectations/core/expectation_diagnostics/expectation_doctor.py Name: print_diagnostic_checklist", - "File: great_expectations/core/expectation_diagnostics/expectation_doctor.py Name: run_diagnostics", - "File: great_expectations/core/expectation_suite.py Name: add_expectation_configuration", - "File: great_expectations/core/expectation_suite.py Name: remove_expectation", - "File: great_expectations/core/expectation_validation_result.py Name: describe_dict", - "File: great_expectations/core/factory/factory.py Name: add", - "File: great_expectations/core/factory/factory.py Name: all", - "File: great_expectations/core/factory/factory.py Name: get", - "File: great_expectations/core/metric_domain_types.py Name: MetricDomainTypes", - "File: great_expectations/core/metric_function_types.py Name: MetricPartialFunctionTypes", - "File: great_expectations/core/partitioners.py Name: ColumnPartitionerMonthly", - "File: great_expectations/core/partitioners.py Name: PartitionerColumnValue", - "File: great_expectations/core/yaml_handler.py Name: YAMLHandler", - "File: great_expectations/core/yaml_handler.py Name: dump", - "File: great_expectations/core/yaml_handler.py Name: load", - "File: great_expectations/data_context/data_context/abstract_data_context.py Name: add_store", - "File: great_expectations/data_context/data_context/abstract_data_context.py Name: delete_datasource", - "File: great_expectations/data_context/data_context/abstract_data_context.py Name: get_docs_sites_urls", - "File: great_expectations/data_context/data_context/abstract_data_context.py Name: get_validator", - "File: great_expectations/data_context/data_context/abstract_data_context.py Name: list_datasources", - "File: great_expectations/data_context/data_context/abstract_data_context.py Name: open_data_docs", - "File: great_expectations/data_context/data_context/context_factory.py Name: build_data_docs", - "File: great_expectations/data_context/data_context/context_factory.py Name: get_context", - "File: great_expectations/data_context/data_context/context_factory.py Name: get_docs_sites_urls", - "File: great_expectations/data_context/data_context/context_factory.py Name: get_validator", - "File: great_expectations/data_context/data_context_variables.py Name: save", - "File: great_expectations/data_context/store/_store_backend.py Name: add", - "File: great_expectations/data_context/store/_store_backend.py Name: update", - "File: great_expectations/data_context/store/checkpoint_store.py Name: CheckpointStore", - "File: great_expectations/data_context/store/database_store_backend.py Name: DatabaseStoreBackend", - "File: great_expectations/data_context/store/expectations_store.py Name: ExpectationsStore", - "File: great_expectations/data_context/store/metric_store.py Name: MetricStore", - "File: great_expectations/data_context/store/query_store.py Name: SqlAlchemyQueryStore", - "File: great_expectations/data_context/store/store.py Name: add", - "File: great_expectations/data_context/store/store.py Name: update", - "File: great_expectations/data_context/store/tuple_store_backend.py Name: TupleAzureBlobStoreBackend", - "File: great_expectations/data_context/store/tuple_store_backend.py Name: TupleFilesystemStoreBackend", - "File: great_expectations/data_context/store/tuple_store_backend.py Name: TupleGCSStoreBackend", - "File: great_expectations/data_context/store/tuple_store_backend.py Name: TupleS3StoreBackend", - "File: great_expectations/data_context/store/validation_definition_store.py Name: ValidationDefinitionStore", - "File: great_expectations/data_context/store/validation_results_store.py Name: ValidationResultsStore", - "File: great_expectations/data_context/types/base.py Name: update", - "File: great_expectations/data_context/types/resource_identifiers.py Name: GXCloudIdentifier", - "File: great_expectations/datasource/datasource_dict.py Name: add_dataframe_asset", - "File: great_expectations/datasource/fluent/config.py Name: yaml", - "File: great_expectations/datasource/fluent/config_str.py Name: ConfigStr", - "File: great_expectations/datasource/fluent/data_asset/path/dataframe_partitioners.py Name: columns", - "File: great_expectations/datasource/fluent/data_asset/path/directory_asset.py Name: build_batch_request", - "File: great_expectations/datasource/fluent/data_asset/path/directory_asset.py Name: get_batch_parameters_keys", - "File: great_expectations/datasource/fluent/data_asset/path/file_asset.py Name: build_batch_request", - "File: great_expectations/datasource/fluent/data_asset/path/file_asset.py Name: get_batch_parameters_keys", - "File: great_expectations/datasource/fluent/data_asset/path/path_data_asset.py Name: get_batch", - "File: great_expectations/datasource/fluent/data_asset/path/path_data_asset.py Name: get_batch_parameters_keys", - "File: great_expectations/datasource/fluent/data_connector/batch_filter.py Name: validate", - "File: great_expectations/datasource/fluent/fabric.py Name: build_batch_request", - "File: great_expectations/datasource/fluent/fabric.py Name: get_batch", - "File: great_expectations/datasource/fluent/fluent_base_model.py Name: yaml", - "File: great_expectations/datasource/fluent/invalid_datasource.py Name: build_batch_request", - "File: great_expectations/datasource/fluent/invalid_datasource.py Name: get_asset", - "File: great_expectations/datasource/fluent/invalid_datasource.py Name: get_batch", - "File: great_expectations/datasource/fluent/invalid_datasource.py Name: get_batch_parameters_keys", - "File: great_expectations/datasource/fluent/pandas_datasource.py Name: build_batch_request", - "File: great_expectations/datasource/fluent/pandas_datasource.py Name: get_batch", - "File: great_expectations/datasource/fluent/sources.py Name: delete_datasource", - "File: great_expectations/datasource/fluent/spark_datasource.py Name: build_batch_request", - "File: great_expectations/datasource/fluent/spark_datasource.py Name: get_batch", - "File: great_expectations/datasource/fluent/sql_datasource.py Name: build_batch_request", - "File: great_expectations/datasource/fluent/sql_datasource.py Name: get_batch", - "File: great_expectations/exceptions/exceptions.py Name: DataContextError", - "File: great_expectations/exceptions/exceptions.py Name: InvalidExpectationConfigurationError", - "File: great_expectations/execution_engine/execution_engine.py Name: ExecutionEngine", - "File: great_expectations/execution_engine/execution_engine.py Name: get_compute_domain", - "File: great_expectations/execution_engine/pandas_execution_engine.py Name: PandasExecutionEngine", - "File: great_expectations/execution_engine/pandas_execution_engine.py Name: get_compute_domain", - "File: great_expectations/execution_engine/sparkdf_execution_engine.py Name: SparkDFExecutionEngine", - "File: great_expectations/execution_engine/sparkdf_execution_engine.py Name: get_compute_domain", - "File: great_expectations/execution_engine/sqlalchemy_execution_engine.py Name: SqlAlchemyExecutionEngine", - "File: great_expectations/execution_engine/sqlalchemy_execution_engine.py Name: execute_query", - "File: great_expectations/execution_engine/sqlalchemy_execution_engine.py Name: get_compute_domain", - "File: great_expectations/expectations/core/expect_column_max_to_be_between.py Name: ExpectColumnMaxToBeBetween", - "File: great_expectations/expectations/core/expect_column_to_exist.py Name: ExpectColumnToExist", - "File: great_expectations/expectations/core/expect_column_values_to_be_in_type_list.py Name: ExpectColumnValuesToBeInTypeList", - "File: great_expectations/expectations/core/expect_column_values_to_be_null.py Name: ExpectColumnValuesToBeNull", - "File: great_expectations/expectations/core/expect_column_values_to_be_of_type.py Name: ExpectColumnValuesToBeOfType", - "File: great_expectations/expectations/core/expect_table_column_count_to_be_between.py Name: ExpectTableColumnCountToBeBetween", - "File: great_expectations/expectations/core/expect_table_column_count_to_equal.py Name: ExpectTableColumnCountToEqual", - "File: great_expectations/expectations/core/expect_table_columns_to_match_ordered_list.py Name: ExpectTableColumnsToMatchOrderedList", - "File: great_expectations/expectations/core/expect_table_columns_to_match_set.py Name: ExpectTableColumnsToMatchSet", - "File: great_expectations/expectations/core/expect_table_row_count_to_be_between.py Name: ExpectTableRowCountToBeBetween", - "File: great_expectations/expectations/core/expect_table_row_count_to_equal.py Name: ExpectTableRowCountToEqual", - "File: great_expectations/expectations/core/expect_table_row_count_to_equal_other_table.py Name: ExpectTableRowCountToEqualOtherTable", - "File: great_expectations/expectations/core/unexpected_rows_expectation.py Name: UnexpectedRowsExpectation", - "File: great_expectations/expectations/expectation.py Name: ColumnAggregateExpectation", - "File: great_expectations/expectations/expectation.py Name: ColumnMapExpectation", - "File: great_expectations/expectations/expectation.py Name: UnexpectedRowsExpectation", - "File: great_expectations/expectations/expectation.py Name: render_suite_parameter_string", - "File: great_expectations/expectations/expectation.py Name: validate_configuration", - "File: great_expectations/expectations/expectation_configuration.py Name: ExpectationConfiguration", - "File: great_expectations/expectations/expectation_configuration.py Name: to_domain_obj", - "File: great_expectations/expectations/expectation_configuration.py Name: type", - "File: great_expectations/expectations/metrics/column_aggregate_metric_provider.py Name: ColumnAggregateMetricProvider", - "File: great_expectations/expectations/metrics/column_aggregate_metric_provider.py Name: column_aggregate_partial", - "File: great_expectations/expectations/metrics/column_aggregate_metric_provider.py Name: column_aggregate_value", - "File: great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py Name: column_condition_partial", - "File: great_expectations/expectations/metrics/map_metric_provider/column_map_metric_provider.py Name: ColumnMapMetricProvider", - "File: great_expectations/expectations/metrics/metric_provider.py Name: MetricProvider", - "File: great_expectations/expectations/metrics/metric_provider.py Name: metric_partial", - "File: great_expectations/expectations/metrics/metric_provider.py Name: metric_value", - "File: great_expectations/expectations/regex_based_column_map_expectation.py Name: validate_configuration", - "File: great_expectations/expectations/set_based_column_map_expectation.py Name: validate_configuration", - "File: great_expectations/experimental/metric_repository/metric_retriever.py Name: get_validator", - "File: great_expectations/experimental/rule_based_profiler/helpers/util.py Name: build_batch_request", - "File: great_expectations/experimental/rule_based_profiler/rule_based_profiler.py Name: run", - "File: great_expectations/render/components.py Name: CollapseContent", - "File: great_expectations/render/components.py Name: RenderedStringTemplateContent", - "File: great_expectations/render/components.py Name: RenderedTableContent", - "File: great_expectations/render/components.py Name: validate", - "File: great_expectations/render/renderer/email_renderer.py Name: EmailRenderer", - "File: great_expectations/render/renderer/microsoft_teams_renderer.py Name: MicrosoftTeamsRenderer", - "File: great_expectations/render/renderer/opsgenie_renderer.py Name: OpsgenieRenderer", - "File: great_expectations/render/renderer/renderer.py Name: renderer", - "File: great_expectations/render/renderer/site_builder.py Name: DefaultSiteIndexBuilder", - "File: great_expectations/render/renderer/site_builder.py Name: SiteBuilder", - "File: great_expectations/render/renderer/slack_renderer.py Name: SlackRenderer", - "File: great_expectations/render/util.py Name: handle_strict_min_max", - "File: great_expectations/render/util.py Name: num_to_str", - "File: great_expectations/render/util.py Name: parse_row_condition_string_pandas_engine", - "File: great_expectations/render/util.py Name: substitute_none_for_missing", - "File: great_expectations/validator/metric_configuration.py Name: MetricConfiguration", - "File: great_expectations/validator/metrics_calculator.py Name: columns", - "File: great_expectations/validator/validation_graph.py Name: resolve", - "File: great_expectations/validator/validator.py Name: columns", - "File: great_expectations/validator/validator.py Name: head", - "File: great_expectations/validator/validator.py Name: remove_expectation", - "File: great_expectations/validator/validator.py Name: save_expectation_suite", - "File: great_expectations/validator/validator.py Name: validate", - # Expectations referenced in the Learn data quality use cases: - "File: great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py Name: ExpectColumnKLDivergenceToBeLessThan", - "File: great_expectations/expectations/core/expect_column_mean_to_be_between.py Name: ExpectColumnMeanToBeBetween", - "File: great_expectations/expectations/core/expect_column_median_to_be_between.py Name: ExpectColumnMedianToBeBetween", - "File: great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py Name: ExpectColumnQuantileValuesToBeBetween", - "File: great_expectations/expectations/core/expect_column_value_z_scores_to_be_less_than.py Name: ExpectColumnValueZScoresToBeLessThan", - "File: great_expectations/expectations/core/expect_column_pair_values_a_to_be_greater_than_b.py Name: ExpectColumnPairValuesAToBeGreaterThanB", - "File: great_expectations/expectations/core/expect_column_pair_values_to_be_equal.py Name: ExpectColumnPairValuesToBeEqual", - "File: great_expectations/expectations/core/expect_multicolumn_sum_to_equal.py Name: ExpectMulticolumnSumToEqual", - "File: great_expectations/expectations/core/expect_column_proportion_of_unique_values_to_be_between.py Name: ExpectColumnProportionOfUniqueValuesToBeBetween", - "File: great_expectations/expectations/core/expect_column_unique_value_count_to_be_between.py Name: ExpectColumnUniqueValueCountToBeBetween", - "File: great_expectations/expectations/core/expect_column_values_to_be_unique.py Name: ExpectColumnValuesToBeUnique", - "File: great_expectations/expectations/core/expect_compound_columns_to_be_unique.py Name: ExpectCompoundColumnsToBeUnique", - "File: great_expectations/expectations/core/expect_select_column_values_to_be_unique_within_record.py Name: ExpectSelectColumnValuesToBeUniqueWithinRecord", - "File: great_expectations/expectations/core/expect_column_min_to_be_between.py Name: ExpectColumnMinToBeBetween", + PrintableDefinition( + file=pathlib.Path("great_expectations/_docs_decorators.py"), name="add" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/checkpoint/actions.py"), name="get" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/checkpoint/actions.py"), name="_run" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/checkpoint/actions.py"), name="run" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/checkpoint/actions.py"), name="update" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/checkpoint/checkpoint.py"), + name="describe_dict", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/compatibility/not_imported.py"), + name="is_version_greater_or_equal", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/compatibility/typing_extensions.py"), + name="override", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/batch.py"), name="head" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/batch_definition.py"), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/core/expectation_diagnostics/expectation_doctor.py" + ), + name="print_diagnostic_checklist", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/core/expectation_diagnostics/expectation_doctor.py" + ), + name="run_diagnostics", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/expectation_suite.py"), + name="add_expectation_configuration", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/expectation_suite.py"), + name="remove_expectation", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/expectation_validation_result.py"), + name="describe_dict", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/factory/factory.py"), name="add" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/factory/factory.py"), name="all" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/factory/factory.py"), name="get" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/metric_domain_types.py"), + name="MetricDomainTypes", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/metric_function_types.py"), + name="MetricPartialFunctionTypes", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/partitioners.py"), + name="ColumnPartitionerMonthly", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/partitioners.py"), + name="PartitionerColumnValue", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/yaml_handler.py"), name="YAMLHandler" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/yaml_handler.py"), name="dump" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/core/yaml_handler.py"), name="load" + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/abstract_data_context.py" + ), + name="add_store", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/abstract_data_context.py" + ), + name="delete_datasource", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/abstract_data_context.py" + ), + name="get_docs_sites_urls", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/abstract_data_context.py" + ), + name="get_validator", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/abstract_data_context.py" + ), + name="list_datasources", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/abstract_data_context.py" + ), + name="open_data_docs", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/context_factory.py" + ), + name="build_data_docs", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/context_factory.py" + ), + name="get_context", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/context_factory.py" + ), + name="get_docs_sites_urls", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/data_context/context_factory.py" + ), + name="get_validator", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/data_context_variables.py"), + name="save", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/store/_store_backend.py"), + name="add", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/store/_store_backend.py"), + name="update", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/store/checkpoint_store.py"), + name="CheckpointStore", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/database_store_backend.py" + ), + name="DatabaseStoreBackend", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/expectations_store.py" + ), + name="ExpectationsStore", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/store/metric_store.py"), + name="MetricStore", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/store/query_store.py"), + name="SqlAlchemyQueryStore", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/store/store.py"), name="add" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/store/store.py"), + name="update", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/tuple_store_backend.py" + ), + name="TupleAzureBlobStoreBackend", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/tuple_store_backend.py" + ), + name="TupleFilesystemStoreBackend", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/tuple_store_backend.py" + ), + name="TupleGCSStoreBackend", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/tuple_store_backend.py" + ), + name="TupleS3StoreBackend", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/validation_definition_store.py" + ), + name="ValidationDefinitionStore", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/store/validation_results_store.py" + ), + name="ValidationResultsStore", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/data_context/types/base.py"), + name="update", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/data_context/types/resource_identifiers.py" + ), + name="GXCloudIdentifier", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/config.py"), name="yaml" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/config_str.py"), + name="ConfigStr", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_asset/path/dataframe_partitioners.py" + ), + name="columns", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_asset/path/directory_asset.py" + ), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_asset/path/directory_asset.py" + ), + name="get_batch_parameters_keys", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_asset/path/file_asset.py" + ), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_asset/path/file_asset.py" + ), + name="get_batch_parameters_keys", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_asset/path/path_data_asset.py" + ), + name="get_batch", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_asset/path/path_data_asset.py" + ), + name="get_batch_parameters_keys", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/datasource/fluent/data_connector/batch_filter.py" + ), + name="validate", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/fabric.py"), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/fabric.py"), + name="get_batch", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/fluent_base_model.py"), + name="yaml", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/invalid_datasource.py"), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/invalid_datasource.py"), + name="get_asset", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/invalid_datasource.py"), + name="get_batch", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/invalid_datasource.py"), + name="get_batch_parameters_keys", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/pandas_datasource.py"), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/pandas_datasource.py"), + name="get_batch", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/sources.py"), + name="delete_datasource", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/spark_datasource.py"), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/spark_datasource.py"), + name="get_batch", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/sql_datasource.py"), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/datasource/fluent/sql_datasource.py"), + name="get_batch", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/exceptions/exceptions.py"), + name="DataContextError", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/exceptions/exceptions.py"), + name="InvalidExpectationConfigurationError", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/execution_engine/execution_engine.py"), + name="ExecutionEngine", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/execution_engine/execution_engine.py"), + name="get_compute_domain", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/execution_engine/pandas_execution_engine.py" + ), + name="PandasExecutionEngine", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/execution_engine/pandas_execution_engine.py" + ), + name="get_compute_domain", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/execution_engine/sparkdf_execution_engine.py" + ), + name="SparkDFExecutionEngine", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/execution_engine/sparkdf_execution_engine.py" + ), + name="get_compute_domain", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/execution_engine/sqlalchemy_execution_engine.py" + ), + name="SqlAlchemyExecutionEngine", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/execution_engine/sqlalchemy_execution_engine.py" + ), + name="execute_query", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/execution_engine/sqlalchemy_execution_engine.py" + ), + name="get_compute_domain", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py" + ), + name="ExpectColumnKLDivergenceToBeLessThan", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_max_to_be_between.py" + ), + name="ExpectColumnMaxToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_mean_to_be_between.py" + ), + name="ExpectColumnMeanToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_median_to_be_between.py" + ), + name="ExpectColumnMedianToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_min_to_be_between.py" + ), + name="ExpectColumnMinToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_pair_values_a_to_be_greater_than_b.py" + ), + name="ExpectColumnPairValuesAToBeGreaterThanB", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_pair_values_to_be_equal.py" + ), + name="ExpectColumnPairValuesToBeEqual", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_proportion_of_unique_values_to_be_between.py" + ), + name="ExpectColumnProportionOfUniqueValuesToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py" + ), + name="ExpectColumnQuantileValuesToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_to_exist.py" + ), + name="ExpectColumnToExist", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_unique_value_count_to_be_between.py" + ), + name="ExpectColumnUniqueValueCountToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_value_z_scores_to_be_less_than.py" + ), + name="ExpectColumnValueZScoresToBeLessThan", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_values_to_be_in_type_list.py" + ), + name="ExpectColumnValuesToBeInTypeList", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_values_to_be_null.py" + ), + name="ExpectColumnValuesToBeNull", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_values_to_be_of_type.py" + ), + name="ExpectColumnValuesToBeOfType", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_column_values_to_be_unique.py" + ), + name="ExpectColumnValuesToBeUnique", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_compound_columns_to_be_unique.py" + ), + name="ExpectCompoundColumnsToBeUnique", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_multicolumn_sum_to_equal.py" + ), + name="ExpectMulticolumnSumToEqual", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_select_column_values_to_be_unique_within_record.py" + ), + name="ExpectSelectColumnValuesToBeUniqueWithinRecord", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_table_column_count_to_be_between.py" + ), + name="ExpectTableColumnCountToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_table_column_count_to_equal.py" + ), + name="ExpectTableColumnCountToEqual", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_table_columns_to_match_ordered_list.py" + ), + name="ExpectTableColumnsToMatchOrderedList", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_table_columns_to_match_set.py" + ), + name="ExpectTableColumnsToMatchSet", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_table_row_count_to_be_between.py" + ), + name="ExpectTableRowCountToBeBetween", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_table_row_count_to_equal.py" + ), + name="ExpectTableRowCountToEqual", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/expect_table_row_count_to_equal_other_table.py" + ), + name="ExpectTableRowCountToEqualOtherTable", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/core/unexpected_rows_expectation.py" + ), + name="UnexpectedRowsExpectation", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/expectation.py"), + name="ColumnAggregateExpectation", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/expectation.py"), + name="ColumnMapExpectation", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/expectation.py"), + name="UnexpectedRowsExpectation", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/expectation.py"), + name="render_suite_parameter_string", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/expectation.py"), + name="validate_configuration", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/expectation_configuration.py" + ), + name="ExpectationConfiguration", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/expectation_configuration.py" + ), + name="to_domain_obj", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/expectation_configuration.py" + ), + name="type", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/metrics/column_aggregate_metric_provider.py" + ), + name="ColumnAggregateMetricProvider", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/metrics/column_aggregate_metric_provider.py" + ), + name="column_aggregate_partial", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/metrics/column_aggregate_metric_provider.py" + ), + name="column_aggregate_value", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py" + ), + name="column_condition_partial", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/metrics/map_metric_provider/column_map_metric_provider.py" + ), + name="ColumnMapMetricProvider", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/metrics/metric_provider.py"), + name="MetricProvider", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/metrics/metric_provider.py"), + name="metric_partial", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/expectations/metrics/metric_provider.py"), + name="metric_value", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/regex_based_column_map_expectation.py" + ), + name="validate_configuration", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/expectations/set_based_column_map_expectation.py" + ), + name="validate_configuration", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/experimental/metric_repository/metric_retriever.py" + ), + name="get_validator", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/experimental/rule_based_profiler/helpers/util.py" + ), + name="build_batch_request", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/experimental/rule_based_profiler/rule_based_profiler.py" + ), + name="run", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/components.py"), + name="CollapseContent", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/components.py"), + name="RenderedStringTemplateContent", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/components.py"), + name="RenderedTableContent", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/components.py"), name="validate" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/renderer/email_renderer.py"), + name="EmailRenderer", + ), + PrintableDefinition( + file=pathlib.Path( + "great_expectations/render/renderer/microsoft_teams_renderer.py" + ), + name="MicrosoftTeamsRenderer", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/renderer/opsgenie_renderer.py"), + name="OpsgenieRenderer", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/renderer/renderer.py"), + name="renderer", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/renderer/site_builder.py"), + name="DefaultSiteIndexBuilder", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/renderer/site_builder.py"), + name="SiteBuilder", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/renderer/slack_renderer.py"), + name="SlackRenderer", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/util.py"), + name="handle_strict_min_max", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/util.py"), name="num_to_str" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/util.py"), + name="parse_row_condition_string_pandas_engine", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/render/util.py"), + name="substitute_none_for_missing", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/metric_configuration.py"), + name="MetricConfiguration", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/metrics_calculator.py"), + name="columns", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/validation_graph.py"), + name="resolve", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/validator.py"), name="columns" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/validator.py"), name="head" + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/validator.py"), + name="remove_expectation", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/validator.py"), + name="save_expectation_suite", + ), + PrintableDefinition( + file=pathlib.Path("great_expectations/validator/validator.py"), name="validate" + ), ] diff --git a/docs/sphinx_api_docs_source/public_api_report.py b/docs/sphinx_api_docs_source/public_api_report.py index 9e7c040f29f5..5e9ba4bbb347 100755 --- a/docs/sphinx_api_docs_source/public_api_report.py +++ b/docs/sphinx_api_docs_source/public_api_report.py @@ -66,6 +66,7 @@ public_api_includes, public_api_missing_threshold, ) +from docs.sphinx_api_docs_source.printable_definition import PrintableDefinition if TYPE_CHECKING: from docs.sphinx_api_docs_source.include_exclude_definition import ( @@ -790,11 +791,11 @@ def write_printable_definitions_to_file( """ printable_definitions = self.generate_printable_definitions() with open(filepath, "w") as f: - f.write("\n".join(printable_definitions)) + f.write("\n".join([str(d) for d in printable_definitions])) def generate_printable_definitions( self, - ) -> List[str]: + ) -> List[PrintableDefinition]: """Generate a printable (human readable) definition. Returns: @@ -803,30 +804,30 @@ def generate_printable_definitions( sorted_definitions_list = sorted( list(self.definitions), key=operator.attrgetter("filepath", "name") ) - sorted_definitions_strings: List[str] = [] + sorted_printable_definitions: List[PrintableDefinition] = [] for definition in sorted_definitions_list: if definition.filepath.is_absolute(): - filepath = str(definition.filepath.relative_to(self.repo_root)) + filepath = definition.filepath.relative_to(self.repo_root) else: - filepath = str(definition.filepath) - sorted_definitions_strings.append( - f"File: {filepath} Name: {definition.name}" - ) + filepath = definition.filepath - sorted_definitions_strings_no_dupes = self._deduplicate_strings( - sorted_definitions_strings - ) + printable_definition = PrintableDefinition( + file=filepath, name=definition.name + ) + sorted_printable_definitions.append(printable_definition) - return sorted_definitions_strings_no_dupes + return self._deduplicate_definitions(sorted_printable_definitions) - def _deduplicate_strings(self, strings: List[str]) -> List[str]: + def _deduplicate_definitions( + self, printable_definitions: List[PrintableDefinition] + ) -> List[PrintableDefinition]: """Deduplicate a list of strings, keeping order intact.""" seen = set() no_duplicates = [] - for s in strings: - if s not in seen: - no_duplicates.append(s) - seen.add(s) + for definition in printable_definitions: + if definition not in seen: + no_duplicates.append(definition) + seen.add(definition) return no_duplicates @@ -934,14 +935,14 @@ def generate_public_api_report(write_to_file: bool = False) -> None: f"Items are missing from the public API: {len(undocumented_and_unignored)}" ) for item in sorted(undocumented_and_unignored): - logger.error(" - " + item) + logger.error(" - " + str(item)) has_errors = True if documented_and_ignored: logger.error( f"Items that should be removed from public_api_missing_threshold.ITEMS_IGNORED_FROM_PUBLIC_API: {len(documented_and_ignored)}" ) for item in sorted(documented_and_ignored): - logger.error(" - " + item) + logger.error(" - " + str(item)) has_errors = True if has_errors: diff --git a/docs/sphinx_api_docs_source/utils.py b/docs/sphinx_api_docs_source/utils.py index c3598265839e..8ea3b7942ca1 100644 --- a/docs/sphinx_api_docs_source/utils.py +++ b/docs/sphinx_api_docs_source/utils.py @@ -35,3 +35,30 @@ def apply_markdown_adjustments(soup, html_file_path, html_file_contents): # noq if lengthChildren > 0: item.insert(0, "\r\n") item.append("\r\n") + + +def apply_structure_changes(soup, html_file_path, html_file_contents): + # Add h2 title to Methods section + methods = soup.select(".py.method") + if methods: + add_section_title(soup, methods, "Methods") + + # Add h2 title to Properties section + properties = soup.select(".py.property") + if properties: + add_section_title(soup, properties, "Properties") + + +def add_section_title(soup, items, title): + wrapper_div = soup.new_tag("div") + title_h2 = soup.new_tag("h2") + title_h2.string = title + parent = items[0].parent + + for item in items: + wrapper_div.append(item.extract()) + + wrapper_div.insert(0, "\r\n") + wrapper_div.insert(1, title_h2) + wrapper_div.insert(2, "\r\n") + parent.insert_after(wrapper_div) diff --git a/great_expectations/_docs_decorators.py b/great_expectations/_docs_decorators.py index e5575d5ce507..fa1d1e8fa62e 100644 --- a/great_expectations/_docs_decorators.py +++ b/great_expectations/_docs_decorators.py @@ -40,6 +40,7 @@ class _PublicApiIntrospector: # Only used for testing _class_registry: dict[str, set[str]] = defaultdict(set) + _docstring_violations: set[str] = set() # This is a special key that is used to indicate that a class definition # is being added to the registry. @@ -49,8 +50,14 @@ class _PublicApiIntrospector: def class_registry(self) -> dict[str, set[str]]: return self._class_registry + @property + def docstring_violations(self) -> set[str]: + return self._docstring_violations + def add(self, func: F) -> None: + self._add_to_docstring_violations(func) self._add_to_class_registry(func) + try: # We use an if statement instead of a ternary to work around # mypy's inability to type narrow inside a ternary. @@ -73,6 +80,11 @@ def add(self, func: F) -> None: logger.exception(f"Could not add this function to the public API list: {func}") raise + def _add_to_docstring_violations(self, func: F) -> None: + name = f"{func.__module__}.{func.__qualname__}" + if not func.__doc__ and name.startswith("great_expectations"): + self._docstring_violations.add(name) + def _add_to_class_registry(self, func: F) -> None: if isinstance(func, type): self._add_class_definition_to_registry(func) @@ -135,7 +147,7 @@ def my_method(some_argument): This tag is added at import time. """ public_api_introspector.add(func) - existing_docstring = func.__doc__ if func.__doc__ else "" + existing_docstring = func.__doc__ or "" func.__doc__ = WHITELISTED_TAG + existing_docstring return func @@ -235,7 +247,7 @@ class MyClass: argument_name: Name of the argument to associate with the deprecation note. version: Version number when the method was deprecated. message: Optional deprecation message. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP text = f".. deprecated:: {version}" "\n" f" {message}" @@ -314,7 +326,7 @@ def _add_text_to_function_docstring_after_summary(func: F, text: str) -> F: split_docstring = existing_docstring.split("\n", 1) docstring = "" - if len(split_docstring) == 2: # noqa: PLR2004 + if len(split_docstring) == 2: # noqa: PLR2004 # FIXME CoP short_description, docstring = split_docstring docstring = f"{short_description.strip()}\n" "\n" f"{text}\n" "\n" f"{dedent(docstring)}" elif len(split_docstring) == 1: @@ -372,7 +384,7 @@ def _add_text_below_string_docstring_argument(docstring: str, argument_name: str arg_list = list(param.arg_name for param in parsed_docstring.params) if argument_name not in arg_list: - raise ValueError(f"Please specify an existing argument, you specified {argument_name}.") # noqa: TRY003 + raise ValueError(f"Please specify an existing argument, you specified {argument_name}.") # noqa: TRY003 # FIXME CoP for param in parsed_docstring.params: if param.arg_name == argument_name: diff --git a/great_expectations/_version.py b/great_expectations/_version.py index 22619f5f2afb..a166b3f30dde 100644 --- a/great_expectations/_version.py +++ b/great_expectations/_version.py @@ -116,7 +116,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): rootdirs = [] for i in range(3): - dirname = os.path.basename(root) # noqa: PTH119 + dirname = os.path.basename(root) # noqa: PTH119 # FIXME CoP if dirname.startswith(parentdir_prefix): return { "version": dirname[len(parentdir_prefix) :], @@ -127,15 +127,15 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): } else: rootdirs.append(root) - root = os.path.dirname(root) # up a level # noqa: PTH120 + root = os.path.dirname(root) # up a level # noqa: PTH120 # FIXME CoP if verbose: print(f"Tried directories {rootdirs!s} but none started with prefix {parentdir_prefix}") - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") # noqa: TRY003 + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") # noqa: TRY003 # FIXME CoP @register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): # noqa: C901 - too complex +def git_get_keywords(versionfile_abs): # noqa: C901 # too complex """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, @@ -164,10 +164,10 @@ def git_get_keywords(versionfile_abs): # noqa: C901 - too complex @register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): # noqa: C901 - too complex +def git_versions_from_keywords(keywords, tag_prefix, verbose): # noqa: C901 # too complex """Get version information from git keywords.""" if not keywords: - raise NotThisMethod("no keywords at all, weird") # noqa: TRY003 + raise NotThisMethod("no keywords at all, weird") # noqa: TRY003 # FIXME CoP date = keywords.get("date") if date is not None: # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant @@ -181,7 +181,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # noqa: C901 - t if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") # noqa: TRY003 + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") # noqa: TRY003 # FIXME CoP refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. @@ -226,7 +226,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # noqa: C901 - t @register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs( # noqa: C901 - 11 +def git_pieces_from_vcs( # noqa: C901 # 11 tag_prefix, root, verbose, run_command=run_command ): """Get version from 'git describe' in the root of the source tree. @@ -243,7 +243,7 @@ def git_pieces_from_vcs( # noqa: C901 - 11 if rc != 0: if verbose: print(f"Directory {root} not under git control") - raise NotThisMethod("'git rev-parse --git-dir' returned error") # noqa: TRY003 + raise NotThisMethod("'git rev-parse --git-dir' returned error") # noqa: TRY003 # FIXME CoP # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) @@ -262,11 +262,11 @@ def git_pieces_from_vcs( # noqa: C901 - 11 ) # --long was added in git-1.5.5 if describe_out is None: - raise NotThisMethod("'git describe' failed") # noqa: TRY003 + raise NotThisMethod("'git describe' failed") # noqa: TRY003 # FIXME CoP describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: - raise NotThisMethod("'git rev-parse' failed") # noqa: TRY003 + raise NotThisMethod("'git rev-parse' failed") # noqa: TRY003 # FIXME CoP full_out = full_out.strip() pieces = {} @@ -343,12 +343,12 @@ def render_pep440(pieces): rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 # FIXME CoP if pieces["dirty"]: rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 # FIXME CoP if pieces["dirty"]: rendered += ".dirty" return rendered @@ -363,10 +363,10 @@ def render_pep440_pre(pieces): if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: - rendered += ".post.dev%d" % pieces["distance"] # noqa: UP031 + rendered += ".post.dev%d" % pieces["distance"] # noqa: UP031 # FIXME CoP else: # exception #1 - rendered = "0.post.dev%d" % pieces["distance"] # noqa: UP031 + rendered = "0.post.dev%d" % pieces["distance"] # noqa: UP031 # FIXME CoP return rendered @@ -383,14 +383,14 @@ def render_pep440_post(pieces): if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] # noqa: UP031 + rendered += ".post%d" % pieces["distance"] # noqa: UP031 # FIXME CoP if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += f"g{pieces['short']}" else: # exception #1 - rendered = "0.post%d" % pieces["distance"] # noqa: UP031 + rendered = "0.post%d" % pieces["distance"] # noqa: UP031 # FIXME CoP if pieces["dirty"]: rendered += ".dev0" rendered += f"+g{pieces['short']}" @@ -408,12 +408,12 @@ def render_pep440_old(pieces): if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] # noqa: UP031 + rendered += ".post%d" % pieces["distance"] # noqa: UP031 # FIXME CoP if pieces["dirty"]: rendered += ".dev0" else: # exception #1 - rendered = "0.post%d" % pieces["distance"] # noqa: UP031 + rendered = "0.post%d" % pieces["distance"] # noqa: UP031 # FIXME CoP if pieces["dirty"]: rendered += ".dev0" return rendered @@ -430,7 +430,7 @@ def render_git_describe(pieces): if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 # FIXME CoP else: # exception #1 rendered = pieces["short"] @@ -450,7 +450,7 @@ def render_git_describe_long(pieces): """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) # noqa: UP031 # FIXME CoP else: # exception #1 rendered = pieces["short"] @@ -459,7 +459,7 @@ def render_git_describe_long(pieces): return rendered -def render(pieces, style): # noqa: C901 - too complex +def render(pieces, style): # noqa: C901 # too complex """Render the given version pieces into the requested style.""" if pieces["error"]: return { @@ -486,7 +486,7 @@ def render(pieces, style): # noqa: C901 - too complex elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: - raise ValueError(f"unknown style '{style}'") # noqa: TRY003 + raise ValueError(f"unknown style '{style}'") # noqa: TRY003 # FIXME CoP return { "version": rendered, @@ -518,7 +518,7 @@ def get_versions(): # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for _ in cfg.versionfile_source.split("/"): - root = os.path.dirname(root) # noqa: PTH120 + root = os.path.dirname(root) # noqa: PTH120 # FIXME CoP except NameError: return { "version": "0+unknown", diff --git a/great_expectations/analytics/base_event.py b/great_expectations/analytics/base_event.py index b2e9bdb1ae22..e370c72b0cf9 100644 --- a/great_expectations/analytics/base_event.py +++ b/great_expectations/analytics/base_event.py @@ -15,7 +15,7 @@ class Action: Attributes: name: A description of what happened. For example (.) "validation_result.saved" or "token.deleted" - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str @@ -50,6 +50,10 @@ def oss_id(self) -> UUID | None: def user_id(self) -> UUID | None: return get_config().user_id + @property + def user_agent_str(self) -> str | None: + return get_config().user_agent_str + @property def distinct_id(self) -> UUID | None: """The distinct_id is the primary key for identifying @@ -63,7 +67,7 @@ def distinct_id(self) -> UUID | None: def __post_init__(self): allowed_actions = self.get_allowed_actions() if allowed_actions is not None and self.action not in self.get_allowed_actions(): - raise ValueError(f"Action [{self.action}] must be one of {self.get_allowed_actions()}") # noqa: TRY003 + raise ValueError(f"Action [{self.action}] must be one of {self.get_allowed_actions()}") # noqa: TRY003 # FIXME CoP @classmethod def get_allowed_actions(cls): @@ -75,6 +79,7 @@ def properties(self) -> dict: "oss_id": self.oss_id, "gx_version": gx_version, "service": "gx-core", + "user_agent_str": self.user_agent_str, } if self.user_id is not None: props.update({"user_id": self.user_id, "organization_id": self.organization_id}) diff --git a/great_expectations/analytics/client.py b/great_expectations/analytics/client.py index d2f1825e5eb3..8850517147ae 100644 --- a/great_expectations/analytics/client.py +++ b/great_expectations/analytics/client.py @@ -38,13 +38,14 @@ def submit(event: Event) -> None: pass -def init( # noqa: PLR0913 +def init( # noqa: PLR0913 # FIXME CoP enable: bool, user_id: Optional[UUID] = None, data_context_id: Optional[UUID] = None, organization_id: Optional[UUID] = None, oss_id: Optional[UUID] = None, cloud_mode: bool = False, + user_agent_str: Optional[str] = None, ): """Initializes the analytics platform client.""" conf = {} @@ -56,7 +57,13 @@ def init( # noqa: PLR0913 conf["organization_id"] = organization_id if oss_id: conf["oss_id"] = oss_id - update_config(config=Config(cloud_mode=cloud_mode, **conf)) + update_config( + config=Config( + cloud_mode=cloud_mode, + user_agent_str=user_agent_str, + **conf, + ) + ) enable = enable and not _in_gx_ci() posthog.disabled = not enable diff --git a/great_expectations/analytics/config.py b/great_expectations/analytics/config.py index 023f26e29d39..ed3690a2e3c2 100644 --- a/great_expectations/analytics/config.py +++ b/great_expectations/analytics/config.py @@ -16,6 +16,7 @@ class _EnvConfig(BaseSettings): gx_posthog_debug: bool = False gx_posthog_host: HttpUrl = "https://posthog.greatexpectations.io" # type: ignore[assignment] # default will be coerced gx_posthog_project_api_key: str = "phc_ph6ugZ1zq94dli0r1xgFg19fk2bb1EdDoLn9NZnCvRs" + gx_user_agent_str: Optional[str] = None @property def posthog_enabled(self) -> Optional[bool]: @@ -40,6 +41,7 @@ class Config(GenericModel): data_context_id: Optional[UUID] = None oss_id: Optional[UUID] = None cloud_mode: bool = False + user_agent_str: Optional[str] = None ENV_CONFIG = _EnvConfig() @@ -51,5 +53,5 @@ def get_config() -> Config: def update_config(config: Config): - global _CONFIG # noqa: PLW0603 + global _CONFIG # noqa: PLW0603 # FIXME CoP _CONFIG = config diff --git a/great_expectations/checkpoint/__init__.py b/great_expectations/checkpoint/__init__.py index abad335e1aef..abf1ae0e5ad9 100644 --- a/great_expectations/checkpoint/__init__.py +++ b/great_expectations/checkpoint/__init__.py @@ -9,7 +9,7 @@ UpdateDataDocsAction, ValidationAction, ) -from .checkpoint import Checkpoint +from .checkpoint import ActionContext, Checkpoint, CheckpointResult for _module_name, _package_name in [ (".actions", "great_expectations.checkpoint"), diff --git a/great_expectations/checkpoint/actions.py b/great_expectations/checkpoint/actions.py index 0ff1321a29a8..a076bf83503d 100644 --- a/great_expectations/checkpoint/actions.py +++ b/great_expectations/checkpoint/actions.py @@ -2,7 +2,7 @@ An action is a way to take an arbitrary method and make it configurable and runnable within a Data Context. The only requirement from an action is for it to have a take_action method. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP from __future__ import annotations @@ -23,7 +23,7 @@ ) import requests -from typing_extensions import Annotated +from typing_extensions import dataclass_transform from great_expectations._docs_decorators import public_api from great_expectations.analytics.client import submit as submit_event @@ -35,6 +35,7 @@ BaseModel, Extra, Field, + ModelMetaclass, root_validator, validator, ) @@ -50,6 +51,10 @@ from great_expectations.data_context.util import instantiate_class_from_config from great_expectations.datasource.fluent.config_str import ConfigStr from great_expectations.exceptions import ClassInstantiationError +from great_expectations.exceptions.exceptions import ( + ValidationActionAlreadyRegisteredError, + ValidationActionRegistryRetrievalError, +) from great_expectations.render.renderer import ( EmailRenderer, MicrosoftTeamsRenderer, @@ -57,7 +62,7 @@ SlackRenderer, ) from great_expectations.render.renderer.renderer import Renderer -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from great_expectations.checkpoint.checkpoint import CheckpointResult @@ -83,10 +88,11 @@ def _build_renderer(config: dict) -> Renderer: return renderer +@public_api class ActionContext: """ - Shared context for all actions in a checkpoint run. - Note that order matters in the action list, as the context is updated with each action's result. + Shared context for all Actions in a Checkpoint run. + Note that order matters in the Action list, as the context is updated with each Action's result. """ def __init__(self) -> None: @@ -99,18 +105,99 @@ def data(self) -> list[tuple[ValidationAction, dict]]: def update(self, action: ValidationAction, action_result: dict) -> None: self._data.append((action, action_result)) + @public_api def filter_results(self, class_: Type[ValidationAction]) -> list[dict]: + """ + Filter the results of the actions in the context by class. + + Args: + class_: The class to filter by. + + Returns: + A list of action results. + """ return [action_result for action, action_result in self._data if isinstance(action, class_)] +class ValidationActionRegistry: + """ + Registers ValidationActions to enable deserialization based on their configuration. + + Uses the `type` key from the action configuration to determine which registered class + to instantiate. + """ + + def __init__(self): + self._registered_actions: dict[str, Type[ValidationAction]] = {} + + def register(self, action_type: str, action_class: Type[ValidationAction]) -> None: + """ + Register a ValidationAction class with the registry. + + Args: + action_type: The type of the action to register. + action_class: The ValidationAction class to register. + + Raises: + ValidationActionAlreadyRegisteredError: If the action type is already registered. + """ + if action_type in self._registered_actions: + raise ValidationActionAlreadyRegisteredError(action_type) + + self._registered_actions[action_type] = action_class + + def get(self, action_type: str | None) -> Type[ValidationAction]: + """ + Return a ValidationAction class based on its type. + Used when instantiating actions from a checkpoint configuration. + + Args: + action_type: The 'type' key from the action configuration. + + Returns: + The ValidationAction class corresponding to the configuration. + + Raises: + ValidationActionRegistryRetrievalError: If the action type is not registered. + """ + if action_type not in self._registered_actions: + raise ValidationActionRegistryRetrievalError(action_type) + + return self._registered_actions[action_type] + + +_VALIDATION_ACTION_REGISTRY = ValidationActionRegistry() + + +@dataclass_transform(kw_only_default=True, field_specifiers=(Field,)) # Enables type hinting +class MetaValidationAction(ModelMetaclass): + """MetaValidationAction registers ValidationAction as they are defined, adding them to + the registry. + + Any class inheriting from ValidationAction will be registered based on the value of the + "type" class attribute. + """ + + def __new__(cls, clsname, bases, attrs): + newclass = super().__new__(cls, clsname, bases, attrs) + + action_type = newclass.__fields__.get("type") + if action_type and action_type.default: # Excludes base classes + _VALIDATION_ACTION_REGISTRY.register( + action_type=action_type.default, action_class=newclass + ) + + return newclass + + @public_api -class ValidationAction(BaseModel): +class ValidationAction(BaseModel, metaclass=MetaValidationAction): """ - ValidationActions define a set of steps to be run after a validation result is produced. + Actions define a set of steps to run after a Validation Result is produced. Subclass `ValidationAction` to create a [custom Action](/docs/core/trigger_actions_based_on_results/create_a_custom_action). Through a Checkpoint, one can orchestrate the validation of data and configure notifications, data documentation updates, - and other actions to take place after the validation result is produced. - """ # noqa: E501 + and other actions to take place after the Validation Result is produced. + """ # noqa: E501 # FIXME CoP class Config: extra = Extra.forbid @@ -125,9 +212,20 @@ class Config: def _using_cloud_context(self) -> bool: return project_manager.is_using_cloud() + @public_api def run( self, checkpoint_result: CheckpointResult, action_context: ActionContext | None = None ) -> dict: + """ + Run the action. + + Args: + checkpoint_result: The result of the checkpoint run. + action_context: The context in which the action is run. + + Returns: + A dictionary containing the result of the action. + """ raise NotImplementedError def _get_data_docs_pages_from_prior_action( @@ -222,7 +320,7 @@ class SlackNotificationAction(DataDocsAction): notify_on: Specifies validation status that triggers notification. One of "all", "failure", "success". notify_with: List of DataDocs site names to display in Slack messages. Defaults to all. show_failed_expectations: Shows a list of failed expectation types. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: Literal["slack"] = "slack" @@ -239,7 +337,7 @@ def _validate_renderer(cls, renderer: dict | SlackRenderer) -> SlackRenderer: if isinstance(renderer, dict): _renderer = _build_renderer(config=renderer) if not isinstance(_renderer, SlackRenderer): - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP "renderer must be a SlackRenderer or a valid configuration for one." ) renderer = _renderer @@ -256,7 +354,7 @@ def _root_validate_slack_params(cls, values: dict) -> dict: else: assert slack_token and slack_channel except AssertionError: - raise ValueError("Please provide either slack_webhook or slack_token and slack_channel") # noqa: TRY003 + raise ValueError("Please provide either slack_webhook or slack_token and slack_channel") # noqa: TRY003 # FIXME CoP return values @@ -355,7 +453,7 @@ def _send_slack_notification(self, payload: dict) -> dict: headers = {"Authorization": f"Bearer {slack_token}"} if not url: - raise ValueError("No Slack webhook URL provided.") # noqa: TRY003 + raise ValueError("No Slack webhook URL provided.") # noqa: TRY003 # FIXME CoP try: response = session.post(url=url, headers=headers, json=payload) @@ -391,7 +489,7 @@ class PagerdutyAlertAction(ValidationAction): routing_key: The 32 character Integration Key for an integration on a service or on a global ruleset. notify_on: Specifies validation status that triggers notification. One of "all", "failure", "success". severity: The PagerDuty severity levels determine the level of urgency. One of "critical", "error", "warning", or "info". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: Literal["pagerduty"] = "pagerduty" @@ -442,7 +540,7 @@ class MicrosoftTeamsNotificationAction(ValidationAction): Args: teams_webhook: Incoming Microsoft Teams webhook to which to send notifications. notify_on: Specifies validation status that triggers notification. One of "all", "failure", "success". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: Literal["microsoft"] = "microsoft" @@ -455,7 +553,7 @@ def _validate_renderer(cls, renderer: dict | MicrosoftTeamsRenderer) -> Microsof if isinstance(renderer, dict): _renderer = _build_renderer(config=renderer) if not isinstance(_renderer, MicrosoftTeamsRenderer): - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP "renderer must be a MicrosoftTeamsRenderer or a valid configuration for one." ) renderer = _renderer @@ -489,7 +587,7 @@ def run(self, checkpoint_result: CheckpointResult, action_context: ActionContext def _send_microsoft_teams_notifications(self, payload: dict) -> str | None: webhook = self._substitute_config_str_if_needed(self.teams_webhook) if not webhook: # Necessary to appease mypy; this is guaranteed. - raise ValueError("No Microsoft Teams webhook URL provided.") # noqa: TRY003 + raise ValueError("No Microsoft Teams webhook URL provided.") # noqa: TRY003 # FIXME CoP session = requests.Session() try: @@ -528,7 +626,7 @@ class OpsgenieAlertAction(ValidationAction): priority: Specifies the priority of the alert (P1 - P5). notify_on: Specifies validation status that triggers notification. One of "all", "failure", "success". tags: Tags to include in the alert - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: Literal["opsgenie"] = "opsgenie" @@ -544,7 +642,7 @@ def _validate_renderer(cls, renderer: dict | OpsgenieRenderer) -> OpsgenieRender if isinstance(renderer, dict): _renderer = _build_renderer(config=renderer) if not isinstance(_renderer, OpsgenieRenderer): - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP "renderer must be a OpsgenieRenderer or a valid configuration for one." ) renderer = _renderer @@ -657,7 +755,7 @@ class EmailAction(ValidationAction): use_ssl: Optional. Use of SSL to send the email (using either TLS or SSL is highly recommended). notify_on: "Specifies validation status that triggers notification. One of "all", "failure", "success". notify_with: Optional list of DataDocs site names to display in Slack messages. Defaults to all. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: Literal["email"] = "email" @@ -678,7 +776,7 @@ def _validate_renderer(cls, renderer: dict | EmailRenderer) -> EmailRenderer: if isinstance(renderer, dict): _renderer = _build_renderer(config=renderer) if not isinstance(_renderer, EmailRenderer): - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP "renderer must be a EmailRenderer or a valid configuration for one." ) renderer = _renderer @@ -738,7 +836,7 @@ def run( # sending payload back as dictionary return {"email_result": email_result} - def _send_email( # noqa: C901 + def _send_email( # noqa: C901 # FIXME CoP self, title, html, @@ -777,11 +875,11 @@ def _send_email( # noqa: C901 mailserver.sendmail(sender_alias, receiver_emails_list, msg.as_string()) mailserver.quit() except smtplib.SMTPConnectError: - logger.error(f"Failed to connect to the SMTP server at address: {smtp_address}") # noqa: TRY400 + logger.error(f"Failed to connect to the SMTP server at address: {smtp_address}") # noqa: TRY400 # FIXME CoP except smtplib.SMTPAuthenticationError: - logger.error(f"Failed to authenticate to the SMTP server at address: {smtp_address}") # noqa: TRY400 + logger.error(f"Failed to authenticate to the SMTP server at address: {smtp_address}") # noqa: TRY400 # FIXME CoP except Exception as e: - logger.error(str(e)) # noqa: TRY400 + logger.error(str(e)) # noqa: TRY400 # FIXME CoP else: return "success" @@ -811,7 +909,7 @@ class UpdateDataDocsAction(DataDocsAction): Args: site_names: Optional. A list of the names of sites to update. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: Literal["update_data_docs"] = "update_data_docs" @@ -854,7 +952,7 @@ def _run( if validation_result_suite is None: logger.warning( - f"No validation_result_suite was passed to {type(self).__name__} action. Skipping action." # noqa: E501 + f"No validation_result_suite was passed to {type(self).__name__} action. Skipping action." # noqa: E501 # FIXME CoP ) return @@ -862,13 +960,13 @@ def _run( validation_result_suite_identifier, (ValidationResultIdentifier, GXCloudIdentifier), ): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP "validation_result_id must be of type ValidationResultIdentifier or" f" GeCloudIdentifier, not {type(validation_result_suite_identifier)}" ) # TODO Update for RenderedDataDocs - # build_data_docs will return the index page for the validation results, but we want to return the url for the validation result using the code below # noqa: E501 + # build_data_docs will return the index page for the validation results, but we want to return the url for the validation result using the code below # noqa: E501 # FIXME CoP self._build_data_docs( site_names=self.site_names, resource_identifiers=[ @@ -910,7 +1008,7 @@ class SNSNotificationAction(ValidationAction): Args: sns_topic_arn: The SNS Arn to publish messages to. sns_subject: Optional. The SNS Message Subject - defaults to expectation_suite_identifier.name. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: Literal["sns"] = "sns" @@ -959,7 +1057,7 @@ def _send_sns_notification(self, sns_subject: str, validation_results: str, **kw response = sns.publish(**message_dict) except sns.exceptions.InvalidParameterException: error_msg = f"Received invalid for message: {validation_results}" - logger.error(error_msg) # noqa: TRY400 + logger.error(error_msg) # noqa: TRY400 # FIXME CoP return error_msg else: return ( @@ -998,7 +1096,7 @@ def send_results(self, payload) -> requests.Response: return requests.post(self.url, headers=headers, data=payload) except Exception as e: print(f"Exception when sending data to API - {e}") - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP @staticmethod def create_payload(data_asset_name, suite_name, validation_results_serializable) -> dict: @@ -1007,14 +1105,3 @@ def create_payload(data_asset_name, suite_name, validation_results_serializable) "data_asset_name": data_asset_name, "validation_results": validation_results_serializable, } - - -CheckpointAction = Annotated[ - Union[ - EmailAction, - MicrosoftTeamsNotificationAction, - SlackNotificationAction, - UpdateDataDocsAction, - ], - Field(discriminator="type"), -] diff --git a/great_expectations/checkpoint/checkpoint.py b/great_expectations/checkpoint/checkpoint.py index 68ccb08bddc1..902ebcdba1ff 100644 --- a/great_expectations/checkpoint/checkpoint.py +++ b/great_expectations/checkpoint/checkpoint.py @@ -21,9 +21,10 @@ from great_expectations.analytics import submit as submit_analytics_event from great_expectations.analytics.events import CheckpointRanEvent from great_expectations.checkpoint.actions import ( + _VALIDATION_ACTION_REGISTRY, ActionContext, - CheckpointAction, UpdateDataDocsAction, + ValidationAction, ) from great_expectations.compatibility.pydantic import ( BaseModel, @@ -81,11 +82,11 @@ class Checkpoint(BaseModel): result_format: The format in which to return the results of the validation definitions. Default is ResultFormat.SUMMARY. id: An optional unique identifier for the checkpoint. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str validation_definitions: List[ValidationDefinition] - actions: List[CheckpointAction] = Field(default_factory=list) + actions: List[ValidationAction] = Field(default_factory=list) result_format: ResultFormatUnion = DEFAULT_RESULT_FORMAT id: Union[str, None] = None @@ -121,7 +122,7 @@ class Config: "result_format": "SUMMARY", "id": "b758816-64c8-46cb-8f7e-03c12cea1d67" } - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP extra = Extra.forbid arbitrary_types_allowed = ( @@ -132,8 +133,25 @@ class Config: Renderer: lambda r: r.serialize(), } + @validator("actions", pre=True) + @classmethod + def validate_actions( + cls, action_list: list[ValidationAction] | list[dict] + ) -> list[ValidationAction]: + validated_actions: list[ValidationAction] = [] + for action in action_list: + if isinstance(action, ValidationAction): + validated_actions.append(action) + else: + action_type: str | None = action.get("type") + action_cls = _VALIDATION_ACTION_REGISTRY.get(action_type) + validated_action = action_cls(**action) + validated_actions.append(validated_action) + + return validated_actions + @override - def json( # noqa: PLR0913 + def json( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSet[int | str] | Mapping[int | str, Any] | None = None, @@ -170,7 +188,7 @@ def json( # noqa: PLR0913 return json.dumps(data_with_validation_definitions, **dumps_kwargs) @override - def dict( # noqa: PLR0913 + def dict( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSet[int | str] | Mapping[int | str, Any] | None = None, @@ -260,10 +278,10 @@ def _deserialize_identifier_bundles_to_validation_definitions( try: validation_definition = store.get(key=key) except (KeyError, gx_exceptions.InvalidKeyError): - raise ValueError(f"Unable to retrieve validation definition {id_bundle} from store") # noqa: TRY003 + raise ValueError(f"Unable to retrieve validation definition {id_bundle} from store") # noqa: TRY003 # FIXME CoP if not validation_definition: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "ValidationDefinitionStore did not retrieve a validation definition" ) validation_definitions.append(validation_definition) @@ -277,6 +295,24 @@ def run( expectation_parameters: SuiteParameterDict | None = None, run_id: RunIdentifier | None = None, ) -> CheckpointResult: + """ + Runs the Checkpoint's underlying Validation Definitions and Actions. + + Args: + batch_parameters: Parameters to be used when loading the Batch. + expectation_parameters: Parameters to be used when validating the Batch. + run_id: An optional unique identifier for the run. + + Returns: + A CheckpointResult object containing the results of the run. + + Raises: + CheckpointRunWithoutValidationDefinitionError: If the Checkpoint is run without any + Validation Definitions. + CheckpointNotAddedError: If the Checkpoint has not been added to the Store. + CheckpointNotFreshError: If the Checkpoint has been modified since it was last added + to the Store. + """ if not self.validation_definitions: raise CheckpointRunWithoutValidationDefinitionError() @@ -376,15 +412,15 @@ def _run_actions( ) action_context.update(action=action, action_result=action_result) - def _sort_actions(self) -> List[CheckpointAction]: + def _sort_actions(self) -> List[ValidationAction]: """ UpdateDataDocsActions are prioritized to run first, followed by all other actions. This is due to the fact that certain actions reference data docs sites, which must be updated first. """ - priority_actions: List[CheckpointAction] = [] - secondary_actions: List[CheckpointAction] = [] + priority_actions: List[ValidationAction] = [] + secondary_actions: List[ValidationAction] = [] for action in self.actions: if isinstance(action, UpdateDataDocsAction): priority_actions.append(action) @@ -420,6 +456,7 @@ def is_fresh(self) -> CheckpointFreshnessDiagnostics: @public_api def save(self) -> None: + """Save the current state of this Checkpoint.""" store = project_manager.get_checkpoints_store() key = store.get_key(name=self.name, id=self.id) @@ -439,6 +476,13 @@ def _add_to_store(self) -> None: @public_api class CheckpointResult(BaseModel): + """ + The result of running a Checkpoint. + + Contains information about Expectation successes and failures from running + each Validation Definition in the Checkpoint. + """ + run_id: RunIdentifier run_results: Dict[ValidationResultIdentifier, ExpectationSuiteValidationResult] checkpoint_config: Checkpoint @@ -452,7 +496,7 @@ class Config: def _root_validate_result(cls, values: dict) -> dict: run_results = values["run_results"] if len(run_results) == 0: - raise ValueError("CheckpointResult must contain at least one run result") # noqa: TRY003 + raise ValueError("CheckpointResult must contain at least one run result") # noqa: TRY003 # FIXME CoP if values["success"] is None: values["success"] = all(result.success for result in run_results.values()) diff --git a/great_expectations/compatibility/aws.py b/great_expectations/compatibility/aws.py index 63d7f8cea4f8..478109f034ae 100644 --- a/great_expectations/compatibility/aws.py +++ b/great_expectations/compatibility/aws.py @@ -9,7 +9,7 @@ "AWS Redshift connection component is not installed, please 'pip install sqlalchemy_redshift'" ) ATHENA_NOT_IMPORTED = NotImported( - "AWS Athena connection component is not installed, please 'pip install pyathena[SQLAlchemy]>=2.0.0,<3'" # noqa: E501 + "AWS Athena connection component is not installed, please 'pip install pyathena[SQLAlchemy]>=2.0.0,<3'" # noqa: E501 # FIXME CoP ) try: @@ -43,7 +43,7 @@ redshiftdialect = REDSHIFT_NOT_IMPORTED try: - import pyathena # type: ignore[import-not-found] + import pyathena # type: ignore[import-not-found] # FIXME CoP except ImportError: pyathena = ATHENA_NOT_IMPORTED @@ -53,6 +53,8 @@ sqlalchemy_athena = ATHENA_NOT_IMPORTED try: - from pyathena.sqlalchemy_athena import types as athenatypes # type: ignore[import-not-found] + from pyathena.sqlalchemy_athena import ( # type: ignore[import-not-found] # FIXME CoP + types as athenatypes, + ) except (ImportError, AttributeError): athenatypes = ATHENA_NOT_IMPORTED diff --git a/great_expectations/compatibility/azure.py b/great_expectations/compatibility/azure.py index 7c1400c48dd6..57e73971e3fd 100644 --- a/great_expectations/compatibility/azure.py +++ b/great_expectations/compatibility/azure.py @@ -3,7 +3,7 @@ from great_expectations.compatibility.not_imported import NotImported AZURE_BLOB_STORAGE_NOT_IMPORTED = NotImported( - "azure blob storage components are not installed, please 'pip install azure-storage-blob azure-identity azure-keyvault-secrets'" # noqa: E501 + "azure blob storage components are not installed, please 'pip install azure-storage-blob azure-identity azure-keyvault-secrets'" # noqa: E501 # FIXME CoP ) try: diff --git a/great_expectations/compatibility/bigquery.py b/great_expectations/compatibility/bigquery.py index 6d44299f1238..ed4b4e3ec57a 100644 --- a/great_expectations/compatibility/bigquery.py +++ b/great_expectations/compatibility/bigquery.py @@ -39,6 +39,8 @@ try: from sqlalchemy_bigquery import GEOGRAPHY + + BIGQUERY_GEO_SUPPORT = True except (ImportError, AttributeError): GEOGRAPHY = SQLALCHEMY_BIGQUERY_NOT_IMPORTED diff --git a/great_expectations/compatibility/databricks.py b/great_expectations/compatibility/databricks.py index ad0d5072bf5a..c66d3d7710e6 100644 --- a/great_expectations/compatibility/databricks.py +++ b/great_expectations/compatibility/databricks.py @@ -1,10 +1,48 @@ +from __future__ import annotations + from great_expectations.compatibility.not_imported import NotImported DATABRICKS_CONNECT_NOT_IMPORTED = NotImported( "databricks-connect is not installed, please 'pip install databricks-connect'" ) +# The following types are modeled after the following documentation that is part +# of the databricks package. +# tldr: SQLAlchemy application should (mostly) "just work" with Databricks, +# other than the exceptions below +# https://github.com/databricks/databricks-sql-python/blob/main/src/databricks/sqlalchemy/README.sqlalchemy.md + +try: + from databricks.sqlalchemy._types import ( + TIMESTAMP_NTZ as TIMESTAMP_NTZ, # noqa: PLC0414, RUF100 # FIXME CoP + ) +except (ImportError, AttributeError): + TIMESTAMP_NTZ = DATABRICKS_CONNECT_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from databricks.sqlalchemy._types import ( + DatabricksStringType as STRING, # noqa: PLC0414, RUF100 # FIXME CoP + ) +except (ImportError, AttributeError): + STRING = DATABRICKS_CONNECT_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + try: - from databricks import connect -except ImportError: - connect = DATABRICKS_CONNECT_NOT_IMPORTED + from databricks.sqlalchemy._types import ( + TIMESTAMP as TIMESTAMP, # noqa: PLC0414, RUF100 # FIXME CoP + ) +except (ImportError, AttributeError): + TIMESTAMP = DATABRICKS_CONNECT_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from databricks.sqlalchemy._types import TINYINT as TINYINT # noqa: PLC0414, RUF100 # FIXME CoP +except (ImportError, AttributeError): + TINYINT = DATABRICKS_CONNECT_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + + +class DATABRICKS_TYPES: + """Namespace for Databricks dialect types""" + + TIMESTAMP_NTZ = TIMESTAMP_NTZ + STRING = STRING + TINYINT = TINYINT + TIMESTAMP = TIMESTAMP diff --git a/great_expectations/compatibility/docstring_parser.py b/great_expectations/compatibility/docstring_parser.py index 6601a52db21e..74cc25787dac 100644 --- a/great_expectations/compatibility/docstring_parser.py +++ b/great_expectations/compatibility/docstring_parser.py @@ -13,9 +13,9 @@ try: import docstring_parser except ImportError: - docstring_parser = DOCSTRING_PARSER_NOT_IMPORTED # type: ignore[assignment] + docstring_parser = DOCSTRING_PARSER_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from docstring_parser import DocstringStyle except ImportError: - DocstringStyle = DOCSTRING_PARSER_NOT_IMPORTED # type: ignore[assignment,misc] + DocstringStyle = DOCSTRING_PARSER_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP diff --git a/great_expectations/compatibility/google.py b/great_expectations/compatibility/google.py index 03eeccea689e..361e82d4ff8f 100644 --- a/great_expectations/compatibility/google.py +++ b/great_expectations/compatibility/google.py @@ -5,31 +5,31 @@ from great_expectations.compatibility.not_imported import NotImported GOOGLE_CLOUD_STORAGE_NOT_IMPORTED = NotImported( - "google cloud storage components are not installed, please 'pip install google-cloud-storage google-cloud-secret-manager'" # noqa: E501 + "google cloud storage components are not installed, please 'pip install google-cloud-storage google-cloud-secret-manager'" # noqa: E501 # FIXME CoP ) with warnings.catch_warnings(): # DeprecationWarning: pkg_resources is deprecated as an API warnings.simplefilter(action="ignore", category=DeprecationWarning) try: - from google.cloud import secretmanager # type: ignore[attr-defined] + from google.cloud import secretmanager # type: ignore[attr-defined] # FIXME CoP except (ImportError, AttributeError): secretmanager = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED try: from google.api_core.exceptions import GoogleAPIError except (ImportError, AttributeError): - GoogleAPIError = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] + GoogleAPIError = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from google.auth.exceptions import DefaultCredentialsError except (ImportError, AttributeError): - DefaultCredentialsError = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] + DefaultCredentialsError = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from google.cloud.exceptions import NotFound except (ImportError, AttributeError): - NotFound = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] + NotFound = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from google.cloud import storage @@ -39,7 +39,7 @@ try: from google.cloud import bigquery as python_bigquery except (ImportError, AttributeError): - python_bigquery = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment] + python_bigquery = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from google.cloud.storage import Client except (ImportError, AttributeError): @@ -48,9 +48,9 @@ try: from google.oauth2 import service_account except (ImportError, AttributeError): - service_account = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment] + service_account = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from google.oauth2.service_account import Credentials except (ImportError, AttributeError): - Credentials = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] + Credentials = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP diff --git a/great_expectations/compatibility/numpy.py b/great_expectations/compatibility/numpy.py index 367666aec33e..87f96762c4e9 100644 --- a/great_expectations/compatibility/numpy.py +++ b/great_expectations/compatibility/numpy.py @@ -22,14 +22,14 @@ def numpy_quantile( """ quantile: npt.NDArray if version.parse(np.__version__) >= version.parse("1.22.0"): - quantile = np.quantile( # type: ignore[call-overload] + quantile = np.quantile( # type: ignore[call-overload] # FIXME CoP a=a, q=q, axis=axis, method=method, ) else: - quantile = np.quantile( # type: ignore[call-overload] + quantile = np.quantile( # type: ignore[call-overload] # FIXME CoP a=a, q=q, axis=axis, diff --git a/great_expectations/compatibility/pandas_compatibility.py b/great_expectations/compatibility/pandas_compatibility.py index b6f7c3db5809..7bbe70aef3d1 100644 --- a/great_expectations/compatibility/pandas_compatibility.py +++ b/great_expectations/compatibility/pandas_compatibility.py @@ -9,7 +9,7 @@ ) -def execute_pandas_to_datetime( # noqa: PLR0913 +def execute_pandas_to_datetime( # noqa: PLR0913 # FIXME CoP arg, errors: Literal["raise", "coerce", "ignore"] = "raise", dayfirst: bool = False, @@ -43,14 +43,14 @@ def execute_pandas_to_datetime( # noqa: PLR0913 Returns: Datetime converted output. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if is_version_less_than(pd.__version__, "2.0.0"): return pd.to_datetime( arg=arg, - errors=errors, # type: ignore[arg-type] + errors=errors, # type: ignore[arg-type] # FIXME CoP dayfirst=dayfirst, yearfirst=yearfirst, - utc=utc, # type: ignore[arg-type] + utc=utc, # type: ignore[arg-type] # FIXME CoP format=format, exact=exact, unit=unit, @@ -58,7 +58,7 @@ def execute_pandas_to_datetime( # noqa: PLR0913 origin=origin, cache=cache, ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP # pandas is 2.0.0 or greater if format is None: format = "mixed" @@ -66,10 +66,10 @@ def execute_pandas_to_datetime( # noqa: PLR0913 # infer_datetime_format is deprecated as of 2.0.0 return pd.to_datetime( arg=arg, - errors=errors, # type: ignore[arg-type] + errors=errors, # type: ignore[arg-type] # FIXME CoP dayfirst=dayfirst, yearfirst=yearfirst, - utc=utc, # type: ignore[arg-type] + utc=utc, # type: ignore[arg-type] # FIXME CoP format=format, unit=unit, origin=origin, @@ -78,10 +78,10 @@ def execute_pandas_to_datetime( # noqa: PLR0913 else: return pd.to_datetime( arg=arg, - errors=errors, # type: ignore[arg-type] + errors=errors, # type: ignore[arg-type] # FIXME CoP dayfirst=dayfirst, yearfirst=yearfirst, - utc=utc, # type: ignore[arg-type] + utc=utc, # type: ignore[arg-type] # FIXME CoP format=format, exact=exact, unit=unit, diff --git a/great_expectations/compatibility/postgresql.py b/great_expectations/compatibility/postgresql.py new file mode 100644 index 000000000000..006e17ccd868 --- /dev/null +++ b/great_expectations/compatibility/postgresql.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from great_expectations.compatibility.not_imported import NotImported + +POSTGRESQL_NOT_IMPORTED = NotImported( + "postgresql connection components are not installed, please 'pip install psycopg2'" +) + +try: + import psycopg2 # noqa: F401 # FIXME CoP + import sqlalchemy.dialects.postgresql as postgresqltypes +except ImportError: + postgresqltypes = POSTGRESQL_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import TEXT +except (ImportError, AttributeError): + TEXT = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import CHAR +except (ImportError, AttributeError): + CHAR = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import INTEGER +except (ImportError, AttributeError): + INTEGER = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import SMALLINT +except (ImportError, AttributeError): + SMALLINT = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import BIGINT +except (ImportError, AttributeError): + BIGINT = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import TIMESTAMP +except (ImportError, AttributeError): + TIMESTAMP = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import DATE +except (ImportError, AttributeError): + DATE = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION +except (ImportError, AttributeError): + DOUBLE_PRECISION = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import BOOLEAN +except (ImportError, AttributeError): + BOOLEAN = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + +try: + from sqlalchemy.dialects.postgresql import NUMERIC +except (ImportError, AttributeError): + NUMERIC = POSTGRESQL_NOT_IMPORTED # type: ignore[misc, assignment] # FIXME CoP + + +class POSTGRESQL_TYPES: + """Namespace for PostgreSQL dialect types.""" + + TEXT = TEXT + CHAR = CHAR + INTEGER = INTEGER + SMALLINT = SMALLINT + BIGINT = BIGINT + TIMESTAMP = TIMESTAMP + DATE = DATE + DOUBLE_PRECISION = DOUBLE_PRECISION + BOOLEAN = BOOLEAN + NUMERIC = NUMERIC diff --git a/great_expectations/compatibility/py4j.py b/great_expectations/compatibility/py4j.py index 4e8dbb6c9b62..2882c572b8a9 100644 --- a/great_expectations/compatibility/py4j.py +++ b/great_expectations/compatibility/py4j.py @@ -3,6 +3,6 @@ PY4J_NOT_IMPORTED = NotImported("py4j is not installed, please 'pip install py4j'") try: - from py4j import protocol # type: ignore[import-untyped] + from py4j import protocol # type: ignore[import-untyped] # FIXME CoP except ImportError: protocol = PY4J_NOT_IMPORTED diff --git a/great_expectations/compatibility/pydantic.py b/great_expectations/compatibility/pydantic.py index 4a54f21c6e22..ba9d1f96cfbe 100644 --- a/great_expectations/compatibility/pydantic.py +++ b/great_expectations/compatibility/pydantic.py @@ -8,7 +8,7 @@ if is_version_greater_or_equal(version=pydantic.VERSION, compare_version="2.0.0"): # TODO: don't use star imports - from pydantic.v1 import * # noqa: F403 + from pydantic.v1 import * # noqa: F403 # FIXME CoP from pydantic.v1 import ( AnyUrl, BaseSettings, @@ -29,8 +29,8 @@ else: # TODO: don't use star imports - from pydantic import * # type: ignore[assignment,no-redef] # noqa: F403 - from pydantic import ( # type: ignore[no-redef] + from pydantic import * # type: ignore[assignment,no-redef] # noqa: F403 # FIXME CoP + from pydantic import ( # type: ignore[no-redef] # FIXME CoP AnyUrl, BaseSettings, HttpUrl, @@ -45,8 +45,8 @@ schema, typing, ) - from pydantic.generics import GenericModel # type: ignore[no-redef] - from pydantic.main import ModelMetaclass # type: ignore[no-redef] + from pydantic.generics import GenericModel # type: ignore[no-redef] # FIXME CoP + from pydantic.main import ModelMetaclass # type: ignore[no-redef] # FIXME CoP __all__ = [ "AnyUrl", diff --git a/great_expectations/compatibility/pyspark.py b/great_expectations/compatibility/pyspark.py index 0d25017897dc..099c6dd76a3b 100644 --- a/great_expectations/compatibility/pyspark.py +++ b/great_expectations/compatibility/pyspark.py @@ -7,84 +7,84 @@ SPARK_NOT_IMPORTED = NotImported("pyspark is not installed, please 'pip install pyspark'") with warnings.catch_warnings(): - # DeprecationWarning: typing.io is deprecated, import directly from typing instead. typing.io will be removed in Python 3.12. # noqa: E501 + # DeprecationWarning: typing.io is deprecated, import directly from typing instead. typing.io will be removed in Python 3.12. # noqa: E501 # FIXME CoP warnings.simplefilter(action="ignore", category=DeprecationWarning) try: import pyspark except ImportError: - pyspark = SPARK_NOT_IMPORTED # type: ignore[assignment] + pyspark = SPARK_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from pyspark.sql import functions except (ImportError, AttributeError): - functions = SPARK_NOT_IMPORTED # type: ignore[assignment] + functions = SPARK_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from pyspark.sql import types except (ImportError, AttributeError): - types = SPARK_NOT_IMPORTED # type: ignore[assignment] + types = SPARK_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from pyspark import SparkContext except ImportError: - SparkContext = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + SparkContext = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.ml.feature import Bucketizer except (ImportError, AttributeError): - Bucketizer = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + Bucketizer = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql import Column except (ImportError, AttributeError): - Column = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + Column = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame except (ImportError, AttributeError): - ConnectDataFrame = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + ConnectDataFrame = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql import DataFrame except (ImportError, AttributeError): - DataFrame = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + DataFrame = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql import Row except (ImportError, AttributeError): - Row = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + Row = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql import SparkSession except (ImportError, AttributeError): - SparkSession = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + SparkSession = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql.connect.session import SparkSession as SparkConnectSession except (ImportError, AttributeError): - SparkConnectSession = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + SparkConnectSession = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql import SQLContext except (ImportError, AttributeError): - SQLContext = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + SQLContext = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql import Window except (ImportError, AttributeError): - Window = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + Window = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql.readwriter import DataFrameReader except (ImportError, AttributeError): - DataFrameReader = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + DataFrameReader = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.sql.utils import AnalysisException except (ImportError, AttributeError): - AnalysisException = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + AnalysisException = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP try: from pyspark.errors import PySparkAttributeError except (ImportError, AttributeError): - PySparkAttributeError = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] + PySparkAttributeError = SPARK_NOT_IMPORTED # type: ignore[assignment,misc] # FIXME CoP diff --git a/great_expectations/compatibility/snowflake.py b/great_expectations/compatibility/snowflake.py index 64168ebbbf36..565086d42f77 100644 --- a/great_expectations/compatibility/snowflake.py +++ b/great_expectations/compatibility/snowflake.py @@ -5,7 +5,7 @@ from great_expectations.compatibility.not_imported import NotImported SNOWFLAKE_NOT_IMPORTED = NotImported( - "snowflake connection components are not installed, please 'pip install snowflake-sqlalchemy snowflake-connector-python'" # noqa: E501 + "snowflake connection components are not installed, please 'pip install snowflake-sqlalchemy snowflake-connector-python'" # noqa: E501 # FIXME CoP ) try: diff --git a/great_expectations/compatibility/sqlalchemy.py b/great_expectations/compatibility/sqlalchemy.py index decd902435f8..f53fade7c3f3 100644 --- a/great_expectations/compatibility/sqlalchemy.py +++ b/great_expectations/compatibility/sqlalchemy.py @@ -10,119 +10,119 @@ try: import sqlalchemy except ImportError: - sqlalchemy = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + sqlalchemy = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy.sql.selectable import Subquery except (ImportError, AttributeError): - Subquery = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Subquery = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy import engine except ImportError: - engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy import dialects except ImportError: - dialects = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + dialects = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy import inspect except ImportError: - inspect = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + inspect = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy.dialects import sqlite except (ImportError, AttributeError): - sqlite = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + sqlite = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy.dialects import registry except (ImportError, AttributeError): - registry = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + registry = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy.engine import Dialect except (ImportError, AttributeError): - Dialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Dialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.engine import Inspector except (ImportError, AttributeError): - Inspector = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Inspector = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.engine import reflection except (ImportError, AttributeError): - reflection = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + reflection = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy.engine import Connection except (ImportError, AttributeError): - Connection = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Connection = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.engine import Engine except (ImportError, AttributeError): - Engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.engine import Row except (ImportError, AttributeError): - Row = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Row = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.engine.row import RowProxy except (ImportError, AttributeError): - RowProxy = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + RowProxy = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: - from sqlalchemy.engine.row import LegacyRow # type: ignore[attr-defined] + from sqlalchemy.engine.row import LegacyRow # type: ignore[attr-defined] # FIXME CoP except (ImportError, AttributeError): LegacyRow = SQLALCHEMY_NOT_IMPORTED try: from sqlalchemy.engine.default import DefaultDialect except (ImportError, AttributeError): - DefaultDialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + DefaultDialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.engine import url from sqlalchemy.engine.url import URL except (ImportError, AttributeError): - url = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] - URL = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + url = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP + URL = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.exc import DatabaseError except (ImportError, AttributeError): - DatabaseError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + DatabaseError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.exc import IntegrityError except (ImportError, AttributeError): - IntegrityError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + IntegrityError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.exc import NoSuchTableError except (ImportError, AttributeError): - NoSuchTableError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + NoSuchTableError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.exc import OperationalError except (ImportError, AttributeError): - OperationalError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + OperationalError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.exc import ProgrammingError except (ImportError, AttributeError): - ProgrammingError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + ProgrammingError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.exc import SQLAlchemyError except (ImportError, AttributeError): - SQLAlchemyError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + SQLAlchemyError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.orm import declarative_base @@ -132,140 +132,142 @@ try: from sqlalchemy.sql import functions except (ImportError, AttributeError): - functions = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + functions = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy.sql import Insert except (ImportError, AttributeError): - Insert = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Insert = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.elements import literal except (ImportError, AttributeError): - literal = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + literal = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: from sqlalchemy.sql.elements import TextClause except (ImportError, AttributeError): - TextClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + TextClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.elements import quoted_name except (ImportError, AttributeError): - quoted_name = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + quoted_name = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.elements import _anonymous_label except (ImportError, AttributeError): - _anonymous_label = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + _anonymous_label = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.elements import ColumnElement except (ImportError, AttributeError): - ColumnElement = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + ColumnElement = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import Cast except (ImportError, AttributeError): - Cast = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Cast = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import ColumnOperators except (ImportError, AttributeError): - ColumnOperators = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + ColumnOperators = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import CTE except (ImportError, AttributeError): - CTE = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + CTE = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import BinaryExpression except (ImportError, AttributeError): - BinaryExpression = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + BinaryExpression = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import BooleanClauseList except (ImportError, AttributeError): - BooleanClauseList = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + BooleanClauseList = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import ColumnClause except (ImportError, AttributeError): - ColumnClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + ColumnClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import Label except (ImportError, AttributeError): - Label = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Label = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import Select except (ImportError, AttributeError): - Select = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Select = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql import Selectable except (ImportError, AttributeError): - Selectable = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Selectable = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import TableClause except (ImportError, AttributeError): - TableClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + TableClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import TextualSelect except (ImportError, AttributeError): - TextualSelect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + TextualSelect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.expression import WithinGroup except (ImportError, AttributeError): - WithinGroup = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + WithinGroup = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql.operators import custom_op except (ImportError, AttributeError): - custom_op = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + custom_op = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: - from sqlalchemy.engine.cursor import LegacyCursorResult # type: ignore[attr-defined] + from sqlalchemy.engine.cursor import ( # type: ignore[attr-defined] # FIXME CoP + LegacyCursorResult, + ) except (ImportError, AttributeError): LegacyCursorResult = SQLALCHEMY_NOT_IMPORTED try: from sqlalchemy.engine.cursor import CursorResult except (ImportError, AttributeError): - CursorResult = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + CursorResult = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.pool import StaticPool except (ImportError, AttributeError): - StaticPool = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + StaticPool = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy import Table except (ImportError, AttributeError): - Table = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Table = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy import Column except (ImportError, AttributeError): - Column = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + Column = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy import MetaData except (ImportError, AttributeError): - MetaData = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + MetaData = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy import create_engine except (ImportError, AttributeError): - create_engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + create_engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP try: @@ -281,9 +283,9 @@ try: from sqlalchemy.sql.type_api import TypeEngine except (ImportError, AttributeError): - TypeEngine = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] + TypeEngine = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] # FIXME CoP try: from sqlalchemy.sql import sqltypes except (ImportError, AttributeError): - sqltypes = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + sqltypes = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP diff --git a/great_expectations/compatibility/sqlalchemy_compatibility_wrappers.py b/great_expectations/compatibility/sqlalchemy_compatibility_wrappers.py index 12c5080d4577..453179dd6535 100644 --- a/great_expectations/compatibility/sqlalchemy_compatibility_wrappers.py +++ b/great_expectations/compatibility/sqlalchemy_compatibility_wrappers.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -def read_sql_table_as_df( # noqa: PLR0913 +def read_sql_table_as_df( # noqa: PLR0913 # FIXME CoP table_name, con, dialect: str, @@ -48,7 +48,7 @@ def read_sql_table_as_df( # noqa: PLR0913 chunksize: If specified, returns an iterator where `chunksize` is the number of rows to include in each chunk. dialect: we need to handle `sqlite` differently, so dialect is now optionally passed in. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if is_version_less_than(pd.__version__, "2.0.0"): with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", category=DeprecationWarning) @@ -77,7 +77,7 @@ def read_sql_table_as_df( # noqa: PLR0913 ) -def _read_sql_table_as_df( # noqa: PLR0913 +def _read_sql_table_as_df( # noqa: PLR0913 # FIXME CoP table_name, con, dialect: str, @@ -112,17 +112,17 @@ def _read_sql_table_as_df( # noqa: PLR0913 chunksize: If specified, returns an iterator where `chunksize` is the number of rows to include in each chunk. dialect: we need to handle `sqlite` differently, so dialect is now optionally passed in. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if dialect == GXSqlDialect.TRINO: return pd.read_sql_table( table_name=table_name, con=con, schema=schema, - index_col=index_col, # type: ignore[arg-type] + index_col=index_col, # type: ignore[arg-type] # FIXME CoP coerce_float=coerce_float, parse_dates=parse_dates, columns=columns, - chunksize=chunksize, # type: ignore[arg-type] + chunksize=chunksize, # type: ignore[arg-type] # FIXME CoP ) else: sql_str: str @@ -133,14 +133,14 @@ def _read_sql_table_as_df( # noqa: PLR0913 return pd.read_sql_query( sql=sql_str, con=con, - index_col=index_col, # type: ignore[arg-type] + index_col=index_col, # type: ignore[arg-type] # FIXME CoP coerce_float=coerce_float, parse_dates=parse_dates, - chunksize=chunksize, # type: ignore[arg-type] + chunksize=chunksize, # type: ignore[arg-type] # FIXME CoP ) -def add_dataframe_to_db( # noqa: PLR0913 +def add_dataframe_to_db( # noqa: PLR0913 # FIXME CoP df: pd.DataFrame, name: str, con, @@ -187,7 +187,7 @@ def add_dataframe_to_db( # noqa: PLR0913 * None : Uses standard SQL ``INSERT`` clause (one per row). * 'multi': Pass multiple values in a single ``INSERT`` clause. * callable with signature ``(pd_table, conn, keys, data_iter)``. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if sqlalchemy.sqlalchemy and is_version_less_than(sqlalchemy.sqlalchemy.__version__, "2.0.0"): with warnings.catch_warnings(): # Note that RemovedIn20Warning is the warning class that we see from sqlalchemy @@ -198,22 +198,22 @@ def add_dataframe_to_db( # noqa: PLR0913 name=name, con=con, schema=schema, - if_exists=if_exists, # type: ignore[arg-type] + if_exists=if_exists, # type: ignore[arg-type] # FIXME CoP index=index, index_label=index_label, chunksize=chunksize, dtype=dtype, - method=method, # type: ignore[arg-type] + method=method, # type: ignore[arg-type] # FIXME CoP ) else: df.to_sql( name=name, con=con, schema=schema, - if_exists=if_exists, # type: ignore[arg-type] + if_exists=if_exists, # type: ignore[arg-type] # FIXME CoP index=index, index_label=index_label, chunksize=chunksize, dtype=dtype, - method=method, # type: ignore[arg-type] + method=method, # type: ignore[arg-type] # FIXME CoP ) diff --git a/great_expectations/compatibility/typing_extensions.py b/great_expectations/compatibility/typing_extensions.py index b1d928ac5b3c..68bd6e87e222 100644 --- a/great_expectations/compatibility/typing_extensions.py +++ b/great_expectations/compatibility/typing_extensions.py @@ -6,7 +6,7 @@ # default to the typing_extensions version if available as it contains bug fixes & improvements from typing_extensions import Annotated except ImportError: - from typing import Annotated # type: ignore[assignment] + from typing import Annotated # type: ignore[assignment] # FIXME CoP try: from typing_extensions import override diff --git a/great_expectations/core/batch.py b/great_expectations/core/batch.py index cb9e09c74ff7..e94dd4efe6f6 100644 --- a/great_expectations/core/batch.py +++ b/great_expectations/core/batch.py @@ -24,7 +24,7 @@ from great_expectations.exceptions import InvalidBatchIdError from great_expectations.types import DictDot, SerializableDictDot, safe_deep_copy from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP deep_filter_properties_iterable, load_class, ) @@ -105,7 +105,7 @@ class LegacyBatchDefinition(SerializableDictDot): BatchDefinition """ - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_connector_name: str, @@ -168,31 +168,31 @@ def _validate_batch_definition( batch_identifiers: IDDict, ) -> None: if datasource_name is None: - raise ValueError("A valid datasource must be specified.") # noqa: TRY003 + raise ValueError("A valid datasource must be specified.") # noqa: TRY003 # FIXME CoP if datasource_name and not isinstance(datasource_name, str): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of an datasource name must be a string (Python "str"). The type given is "{type(datasource_name)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if data_connector_name is None: - raise ValueError("A valid data_connector must be specified.") # noqa: TRY003 + raise ValueError("A valid data_connector must be specified.") # noqa: TRY003 # FIXME CoP if data_connector_name and not isinstance(data_connector_name, str): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of a data_connector name must be a string (Python "str"). The type given is "{type(data_connector_name)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if data_asset_name is None: - raise ValueError("A valid data_asset_name must be specified.") # noqa: TRY003 + raise ValueError("A valid data_asset_name must be specified.") # noqa: TRY003 # FIXME CoP if data_asset_name and not isinstance(data_asset_name, str): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of a data_asset name must be a string (Python "str"). The type given is "{type(data_asset_name)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if batch_identifiers and not isinstance(batch_identifiers, IDDict): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of batch_identifiers must be an IDDict object. The type given is \ "{type(batch_identifiers)!s}", which is illegal. """ @@ -266,9 +266,9 @@ class BatchRequestBase(SerializableDictDot): for the internal protocol use, whereby NULL values are allowed as per the internal needs. The BatchRequest class extends BatchRequestBase and adds to it strong validation (described above plus additional attribute validation) so as to formally validate user specified fields. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_connector_name: str, @@ -373,9 +373,9 @@ def to_json_dict(self) -> dict[str, JSONValues]: A JSON-serializable dict representation of this BatchRequestBase. """ # TODO: 2/4/2022 - # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 - # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 - # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 + # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 # FIXME CoP + # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 # FIXME CoP + # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 # FIXME CoP # make this refactoring infeasible at the present time. # if batch_data appears in BatchRequest, temporarily replace it with @@ -384,7 +384,7 @@ def to_json_dict(self) -> dict[str, JSONValues]: serializeable_dict: dict if batch_request_contains_batch_data(batch_request=self): if self.runtime_parameters is None: - raise ValueError("BatchRequestBase missing runtime_parameters during serialization") # noqa: TRY003 + raise ValueError("BatchRequestBase missing runtime_parameters during serialization") # noqa: TRY003 # FIXME CoP batch_data: BatchRequestBase | dict = self.runtime_parameters["batch_data"] self.runtime_parameters["batch_data"] = str(type(batch_data)) @@ -424,7 +424,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -440,7 +440,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() @staticmethod @@ -453,35 +453,35 @@ def _validate_init_parameters( ) -> None: # TODO test and check all logic in this validator! if not (datasource_name and isinstance(datasource_name, str)): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of an datasource name must be a string (Python "str"). The type given is "{type(datasource_name)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if not (data_connector_name and isinstance(data_connector_name, str)): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of data_connector name must be a string (Python "str"). The type given is "{type(data_connector_name)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if not (data_asset_name and isinstance(data_asset_name, str)): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of data_asset name must be a string (Python "str"). The type given is "{type(data_asset_name)!s}", which is illegal. """ ) # TODO Abe 20201015: Switch this to DataConnectorQuery. if data_connector_query and not isinstance(data_connector_query, dict): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of data_connector_query must be a dict object. The type given is "{type(data_connector_query)!s}", which is illegal. """ ) if limit and not isinstance(limit, int): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type of limit must be an integer (Python "int"). The type given is "{type(limit)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) @@ -534,7 +534,7 @@ class BatchRequest(BatchRequestBase): "batch_spec_passthrough", } - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_connector_name: str, @@ -609,7 +609,7 @@ class RuntimeBatchRequest(BatchRequestBase): "batch_spec_passthrough", } - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_connector_name: str, @@ -646,46 +646,46 @@ def _validate_runtime_batch_request_specific_init_parameters( """ We must have both or neither of runtime_parameters and batch_identifiers (but not either one of them). This is strict equivalence ("if-and-only") condition ("exclusive NOR"); otherwise, ("exclusive OR") means error. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if (not runtime_parameters and batch_identifiers) or ( runtime_parameters and not batch_identifiers ): - raise ValueError( # noqa: TRY003 - "It must be that either both runtime_parameters and batch_identifiers are present, or both are missing" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "It must be that either both runtime_parameters and batch_identifiers are present, or both are missing" # noqa: E501 # FIXME CoP ) # if there is a value, make sure it is a dict if runtime_parameters and not (isinstance(runtime_parameters, dict)): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The runtime_parameters must be a non-empty dict object. - The type given is "{type(runtime_parameters)!s}", which is an illegal type or an empty dictionary.""" # noqa: E501 + The type given is "{type(runtime_parameters)!s}", which is an illegal type or an empty dictionary.""" # noqa: E501 # FIXME CoP ) # if there is a value, make sure it is a dict if batch_identifiers and not isinstance(batch_identifiers, dict): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type for batch_identifiers must be a dict object, with keys being identifiers defined in the - data connector configuration. The type given is "{type(batch_identifiers)!s}", which is illegal.""" # noqa: E501 + data connector configuration. The type given is "{type(batch_identifiers)!s}", which is illegal.""" # noqa: E501 # FIXME CoP ) if batch_spec_passthrough and not (isinstance(batch_spec_passthrough, dict)): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"""The type for batch_spec_passthrough must be a dict object. The type given is \ "{type(batch_spec_passthrough)!s}", which is illegal. """ ) -# TODO: The following class is to support the backward compatibility with the legacy design. # noqa: E501 +# TODO: The following class is to support the backward compatibility with the legacy design. # noqa: E501 # FIXME CoP class BatchMarkers(BatchKwargs): """A BatchMarkers is a special type of BatchKwargs (so that it has a batch_fingerprint) but it generally does NOT require specific keys and instead captures information about the OUTPUT of a datasource's fetch - process, such as the timestamp at which a query was executed.""" # noqa: E501 + process, such as the timestamp at which a query was executed.""" # noqa: E501 # FIXME CoP def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) if "ge_load_time" not in self: - raise InvalidBatchIdError("BatchMarkers requires a ge_load_time") # noqa: TRY003 + raise InvalidBatchIdError("BatchMarkers requires a ge_load_time") # noqa: TRY003 # FIXME CoP @property def ge_load_time(self): @@ -708,8 +708,8 @@ def head(self, *args, **kwargs) -> pd.DataFrame: # TODO: This module needs to be cleaned up. # We have Batch used for the legacy design, and we also need Batch for the new design. -# However, right now, the Batch from the legacy design is imported into execution engines of the new design. # noqa: E501 -# As a result, we have multiple, inconsistent versions of BatchMarkers, extending legacy/new classes. # noqa: E501 +# However, right now, the Batch from the legacy design is imported into execution engines of the new design. # noqa: E501 # FIXME CoP +# As a result, we have multiple, inconsistent versions of BatchMarkers, extending legacy/new classes. # noqa: E501 # FIXME CoP # TODO: See also "great_expectations/datasource/types/batch_spec.py". @deprecated_argument(argument_name="data_context", version="0.14.0") @deprecated_argument(argument_name="datasource_name", version="0.14.0") @@ -742,7 +742,7 @@ class Batch(SerializableDictDot): Batch instance created. """ - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, data: BatchDataType | None = None, batch_request: BatchRequestBase | dict | None = None, @@ -762,7 +762,7 @@ def __init__( # noqa: PLR0913 self._batch_request = batch_request if batch_definition is None: - batch_definition = IDDict() # type: ignore[assignment] + batch_definition = IDDict() # type: ignore[assignment] # FIXME CoP self._batch_definition = batch_definition @@ -900,9 +900,9 @@ def head(self, n_rows: int = 5, fetch_all: bool = False) -> pd.DataFrame: Returns: A Pandas DataFrame """ - self._data.execution_engine.batch_manager.load_batch_list(batch_list=[self]) # type: ignore[union-attr] + self._data.execution_engine.batch_manager.load_batch_list(batch_list=[self]) # type: ignore[union-attr] # FIXME CoP metrics_calculator = _get_metrics_calculator_class()( - execution_engine=self._data.execution_engine, # type: ignore[union-attr, arg-type] + execution_engine=self._data.execution_engine, # type: ignore[union-attr, arg-type] # FIXME CoP show_progress_bars=True, ) table_head_df: pd.DataFrame = metrics_calculator.head( @@ -1007,7 +1007,7 @@ def get_batch_request_as_dict( return batch_request # type: ignore[return-value] # FDS BatchRequest is missing data_connector (other fields) -def _get_block_batch_request( # noqa: C901, PLR0913 +def _get_block_batch_request( # noqa: C901, PLR0913 # FIXME CoP *, datasource_name: str | None = None, data_connector_name: str | None = None, @@ -1032,7 +1032,7 @@ def _get_block_batch_request( # noqa: C901, PLR0913 """ if data_connector_query is None: if batch_filter_parameters is not None and batch_identifiers is not None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP 'Must provide either "batch_filter_parameters" or "batch_identifiers", not both.' ) @@ -1094,7 +1094,7 @@ def _get_block_batch_request( # noqa: C901, PLR0913 return BatchRequest(**batch_request_as_dict) -def _get_runtime_batch_request( # noqa: PLR0913 +def _get_runtime_batch_request( # noqa: PLR0913 # FIXME CoP *, datasource_name: str, data_connector_name: str, @@ -1113,7 +1113,7 @@ def _get_runtime_batch_request( # noqa: PLR0913 ): # one of these must be specified for runtime batch requests # parameter checking if len([arg for arg in [batch_data, query, path] if arg is not None]) > 1: - raise ValueError("Must provide only one of batch_data, query, or path.") # noqa: TRY003 + raise ValueError("Must provide only one of batch_data, query, or path.") # noqa: TRY003 # FIXME CoP if runtime_parameters and any( [ @@ -1122,8 +1122,8 @@ def _get_runtime_batch_request( # noqa: PLR0913 path and "path" in runtime_parameters, ] ): - raise ValueError( # noqa: TRY003 - "If batch_data, query, or path arguments are provided, the same keys cannot appear in the " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "If batch_data, query, or path arguments are provided, the same keys cannot appear in the " # noqa: E501 # FIXME CoP "runtime_parameters argument." ) @@ -1167,7 +1167,7 @@ def _get_runtime_batch_request( # noqa: PLR0913 return None -def get_batch_request_from_acceptable_arguments( # noqa: PLR0913 +def get_batch_request_from_acceptable_arguments( # noqa: PLR0913 # FIXME CoP datasource_name: str | None = None, data_connector_name: str | None = None, data_asset_name: str | None = None, @@ -1226,7 +1226,7 @@ def get_batch_request_from_acceptable_arguments( # noqa: PLR0913 Returns: (BatchRequest, RuntimeBatchRequest or FluentBatchRequest) The formal BatchRequest, RuntimeBatchRequest or FluentBatchRequest object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # block-style batch-request args, includes arguments for both runtime and basic batch requests block_config_args = { @@ -1251,10 +1251,10 @@ def block_style_args() -> list[str]: """Returns a list of the block-config batch request arguments""" return [k for k, v in block_config_args.items() if v] - # ensure that the first parameter is datasource_name, which should be a str. This check prevents users # noqa: E501 + # ensure that the first parameter is datasource_name, which should be a str. This check prevents users # noqa: E501 # FIXME CoP # from passing in batch_request as an unnamed parameter. if datasource_name and not isinstance(datasource_name, str): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"the first parameter, datasource_name, must be a str, not {type(datasource_name)}" ) @@ -1264,7 +1264,7 @@ def block_style_args() -> list[str]: batch_request, (BatchRequest, RuntimeBatchRequest, _get_fluent_batch_request_class()), ): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP "batch_request must be a BatchRequest, RuntimeBatchRequest, or a " f"fluent BatchRequest object, not {type(batch_request)}" ) @@ -1292,8 +1292,8 @@ def block_style_args() -> list[str]: if (datasource_name and data_asset_name) and not data_connector_name: block_args = block_style_args() if block_args: - raise ValueError( # noqa: TRY003 - f"Arguments: {', '.join(block_args)} are not supported for Fluent Batch Requests. Block-config Requests require a data connector name" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"Arguments: {', '.join(block_args)} are not supported for Fluent Batch Requests. Block-config Requests require a data connector name" # noqa: E501 # FIXME CoP ) result = _get_fluent_batch_request_class()( diff --git a/great_expectations/core/batch_definition.py b/great_expectations/core/batch_definition.py index 3618377ca573..48a921603c9f 100644 --- a/great_expectations/core/batch_definition.py +++ b/great_expectations/core/batch_definition.py @@ -51,6 +51,9 @@ class BatchDefinition(pydantic.GenericModel, Generic[PartitionerT]): @property @public_api def data_asset(self) -> DataAsset[Any, PartitionerT]: + """ + The parent DataAsset for this Batch Definition. + """ return self._data_asset def set_data_asset(self, data_asset: DataAsset[Any, PartitionerT]) -> None: @@ -66,6 +69,15 @@ def build_batch_request( partitioner=self.partitioner, ) + @public_api + def save(self) -> None: + """ + Save the batch definition to the underlying data context. + """ + project_datasources = project_manager.get_datasources() + data_source = self.data_asset.datasource + project_datasources.set_datasource(name=data_source.name, ds=data_source) + @public_api def get_batch(self, batch_parameters: Optional[BatchParameters] = None) -> Batch: """ diff --git a/great_expectations/core/batch_manager.py b/great_expectations/core/batch_manager.py index 20bb8ff923a4..51509df65cb1 100644 --- a/great_expectations/core/batch_manager.py +++ b/great_expectations/core/batch_manager.py @@ -31,7 +31,7 @@ def __init__( Args: execution_engine: The ExecutionEngine to be used to access cache of loaded Batch objects. batch_list: List of Batch objects available from external source (default is None). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._execution_engine: ExecutionEngine = execution_engine self._active_batch_id: Optional[str] = None @@ -64,7 +64,7 @@ def active_batch_data_id(self) -> Optional[str]: some reason), then if there is exactly and unambiguously one loaded "BatchData" object, then it will play the role of the "active_batch_data_id", which is needed to compute a metric (by the particular ExecutionEngine). However, if there is more than one, then "active_batch_data_id" becomes ambiguous, and thus "None" is returned. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if self._active_batch_data_id is not None: return self._active_batch_data_id @@ -83,7 +83,7 @@ def active_batch_data(self) -> Optional[BatchDataUnion]: @property def batch_cache(self) -> Dict[str, AnyBatch]: - """Getter for ordered dictionary (cache) of "Batch" objects in use (with batch_id as key).""" # noqa: E501 + """Getter for ordered dictionary (cache) of "Batch" objects in use (with batch_id as key).""" # noqa: E501 # FIXME CoP return self._batch_cache @property @@ -96,7 +96,7 @@ def active_batch_id(self) -> Optional[str]: situations, no new "BatchData" objects have been loaded; however, a new "Validator" object was instantiated with the list of "Batch" objects, each of whose BatchData has already been loaded (and cached). Since BatchData IDs are from the same name space as Batch IDs, this helps avoid unnecessary loading of data from different backends. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP active_batch_data_id: Optional[str] = self.active_batch_data_id if active_batch_data_id != self._active_batch_id: logger.warning("ID of active Batch and ID of active loaded BatchData differ.") @@ -147,18 +147,18 @@ def load_batch_list(self, batch_list: Sequence[AnyBatch]) -> None: try: assert isinstance( batch, (Batch, _get_fluent_batch_class()) - ), "Batch objects provided to BatchManager must be formal Great Expectations Batch typed objects." # noqa: E501 + ), "Batch objects provided to BatchManager must be formal Great Expectations Batch typed objects." # noqa: E501 # FIXME CoP except AssertionError as e: - logger.error(str(e)) # noqa: TRY400 + logger.error(str(e)) # noqa: TRY400 # FIXME CoP self._execution_engine.load_batch_data( batch_id=batch.id, - batch_data=batch.data, # type: ignore[arg-type] + batch_data=batch.data, # type: ignore[arg-type] # FIXME CoP ) self._batch_cache[batch.id] = batch - # We set the active_batch_id in each iteration of the loop to keep in sync with the active_batch_data_id # noqa: E501 - # that has been loaded. Hence, the final active_batch_id will be that of the final BatchData loaded. # noqa: E501 + # We set the active_batch_id in each iteration of the loop to keep in sync with the active_batch_data_id # noqa: E501 # FIXME CoP + # that has been loaded. Hence, the final active_batch_id will be that of the final BatchData loaded. # noqa: E501 # FIXME CoP self._active_batch_id = batch.id def save_batch_data(self, batch_id: str, batch_data: BatchDataUnion) -> None: diff --git a/great_expectations/core/batch_spec.py b/great_expectations/core/batch_spec.py index acc7bffc6503..ba56133cc92e 100644 --- a/great_expectations/core/batch_spec.py +++ b/great_expectations/core/batch_spec.py @@ -20,19 +20,19 @@ # TODO: This module needs to be cleaned up. # We have Batch used for the legacy design, and we also need Batch for the new design. -# However, right now, the Batch from the legacy design is imported into execution engines of the new design. # noqa: E501 -# As a result, we have multiple, inconsistent versions of BatchMarkers, extending legacy/new classes. # noqa: E501 +# However, right now, the Batch from the legacy design is imported into execution engines of the new design. # noqa: E501 # FIXME CoP +# As a result, we have multiple, inconsistent versions of BatchMarkers, extending legacy/new classes. # noqa: E501 # FIXME CoP # TODO: See also "great_expectations/core/batch.py". # TODO: The following class is part of the new design. class BatchMarkers(BatchSpec): """A BatchMarkers is a special type of BatchSpec (so that it has a batch_fingerprint) but it generally does NOT require specific keys and instead captures information about the OUTPUT of a datasource's fetch - process, such as the timestamp at which a query was executed.""" # noqa: E501 + process, such as the timestamp at which a query was executed.""" # noqa: E501 # FIXME CoP def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) if "ge_load_time" not in self: - raise InvalidBatchIdError("BatchMarkers requires a ge_load_time") # noqa: TRY003 + raise InvalidBatchIdError("BatchMarkers requires a ge_load_time") # noqa: TRY003 # FIXME CoP @property def ge_load_time(self): @@ -90,15 +90,15 @@ def __init__( kwargs["reader_options"] = reader_options super().__init__(*args, **kwargs) if "path" not in self: - raise InvalidBatchSpecError("PathBatchSpec requires a path element") # noqa: TRY003 + raise InvalidBatchSpecError("PathBatchSpec requires a path element") # noqa: TRY003 # FIXME CoP @property def path(self) -> str: - return self.get("path") # type: ignore[return-value] + return self.get("path") # type: ignore[return-value] # FIXME CoP @property def reader_method(self) -> str: - return self.get("reader_method") # type: ignore[return-value] + return self.get("reader_method") # type: ignore[return-value] # FIXME CoP @property def reader_options(self) -> dict: @@ -143,8 +143,8 @@ def get_reader_function(self) -> Callable[..., pd.DataFrame]: try: return getattr(fabric, self.reader_method) except AttributeError: - raise AttributeError( # noqa: TRY003 - f"FabricBatchSpec reader_method {self.reader_method} not found in sempy.fabric module" # noqa: E501 + raise AttributeError( # noqa: TRY003 # FIXME CoP + f"FabricBatchSpec reader_method {self.reader_method} not found in sempy.fabric module" # noqa: E501 # FIXME CoP ) @@ -181,7 +181,7 @@ def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) if self.batch_data is None: - raise InvalidBatchSpecError("RuntimeDataBatchSpec batch_data cannot be None") # noqa: TRY003 + raise InvalidBatchSpecError("RuntimeDataBatchSpec batch_data cannot be None") # noqa: TRY003 # FIXME CoP @property def batch_data(self): @@ -197,7 +197,7 @@ def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) if self.query is None: - raise InvalidBatchSpecError("RuntimeQueryBatchSpec query cannot be None") # noqa: TRY003 + raise InvalidBatchSpecError("RuntimeQueryBatchSpec query cannot be None") # noqa: TRY003 # FIXME CoP @property def query(self): @@ -212,9 +212,9 @@ class GlueDataCatalogBatchSpec(BatchSpec): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) if "database_name" not in self: - raise InvalidBatchSpecError("GlueDataCatalogBatchSpec requires a database_name") # noqa: TRY003 + raise InvalidBatchSpecError("GlueDataCatalogBatchSpec requires a database_name") # noqa: TRY003 # FIXME CoP if "table_name" not in self: - raise InvalidBatchSpecError("GlueDataCatalogBatchSpec requires a table_name") # noqa: TRY003 + raise InvalidBatchSpecError("GlueDataCatalogBatchSpec requires a table_name") # noqa: TRY003 # FIXME CoP @property def reader_method(self) -> str: diff --git a/great_expectations/core/config_peer.py b/great_expectations/core/config_peer.py index b91d58c1d548..741b1fb67d1a 100644 --- a/great_expectations/core/config_peer.py +++ b/great_expectations/core/config_peer.py @@ -37,7 +37,7 @@ class ConfigPeer(ABC): configuration classes themselves. Hence, as part of this change, ConfigPeer will cease being the superclass of business objects (such as BaseDataContext, BaseCheckpoint, and BaseRuleBasedProfiler). Instead, every persistable business object will contain a reference to its corresponding peer class, supporting the ConfigPeer interfaces. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP @property @abstractmethod @@ -68,7 +68,7 @@ def get_config( elif mode == ConfigOutputModes.JSON_DICT: config_kwargs = config.to_json_dict() else: - raise ValueError(f'Unknown mode {mode} in "BaseCheckpoint.get_config()".') # noqa: TRY003 + raise ValueError(f'Unknown mode {mode} in "BaseCheckpoint.get_config()".') # noqa: TRY003 # FIXME CoP kwargs["inplace"] = True filter_properties_dict( diff --git a/great_expectations/core/config_provider.py b/great_expectations/core/config_provider.py index 1bd89a1e0303..ca0dc1ed7d77 100644 --- a/great_expectations/core/config_provider.py +++ b/great_expectations/core/config_provider.py @@ -9,7 +9,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.core.config_substitutor import _ConfigurationSubstitutor from great_expectations.core.yaml_handler import YAMLHandler -from great_expectations.data_context.types.base import GXCloudConfig # noqa: TCH001 +from great_expectations.data_context.types.base import GXCloudConfig # noqa: TCH001 # FIXME CoP yaml = YAMLHandler() @@ -37,7 +37,7 @@ def substitute_config(self, config: Any, config_values: Optional[Dict[str, str]] Returns: The input config object with any $VARIABLES replaced with their corresponding config values. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if config_values is None: config_values = self.get_values() return self._substitutor.substitute_all_config_variables(config, config_values) @@ -70,7 +70,7 @@ def register_provider(self, provider: _AbstractConfigurationProvider) -> None: """ type_ = type(provider) if type_ in self._providers: - raise ValueError(f"Provider of type {type_} has already been registered!") # noqa: TRY003 + raise ValueError(f"Provider of type {type_} has already been registered!") # noqa: TRY003 # FIXME CoP self._providers[type_] = provider def get_provider( @@ -147,16 +147,16 @@ def __init__( def get_values(self) -> Dict[str, str]: env_vars = dict(os.environ) # noqa: TID251 # os.environ allowed in config files try: - # If the user specifies the config variable path with an environment variable, we want to substitute it # noqa: E501 - defined_path: str = self._substitutor.substitute_config_variable( # type: ignore[assignment] + # If the user specifies the config variable path with an environment variable, we want to substitute it # noqa: E501 # FIXME CoP + defined_path: str = self._substitutor.substitute_config_variable( # type: ignore[assignment] # FIXME CoP self._config_variables_file_path, env_vars ) - if not os.path.isabs(defined_path): # noqa: PTH117 + if not os.path.isabs(defined_path): # noqa: PTH117 # FIXME CoP root_directory: str = self._root_directory or os.curdir else: root_directory = "" - var_path = os.path.join(root_directory, defined_path) # noqa: PTH118 + var_path = os.path.join(root_directory, defined_path) # noqa: PTH118 # FIXME CoP with open(var_path) as config_variables_file: contents = config_variables_file.read() diff --git a/great_expectations/core/config_substitutor.py b/great_expectations/core/config_substitutor.py index edce5f77735b..b6b680eeb771 100644 --- a/great_expectations/core/config_substitutor.py +++ b/great_expectations/core/config_substitutor.py @@ -28,7 +28,7 @@ class _ConfigurationSubstitutor: should be defined herein. """ - AWS_PATTERN = r"^secret\|arn:aws:secretsmanager:([a-z\-0-9]+):([0-9]{12}):secret:([a-zA-Z0-9\/_\+=\.@\-]+)" # noqa: E501 + AWS_PATTERN = r"^secret\|arn:aws:secretsmanager:([a-z\-0-9]+):([0-9]{12}):secret:([a-zA-Z0-9\/_\+=\.@\-]+)" # noqa: E501 # FIXME CoP AWS_SSM_PATTERN = ( r"^secret\|arn:aws:ssm:([a-z\-0-9]+):([0-9]{12}):parameter\/([a-zA-Z0-9\/_\+=\.@\-]+)" ) @@ -39,7 +39,7 @@ class _ConfigurationSubstitutor: ) def __init__(self) -> None: - # Using the @lru_cache decorator on method calls can create memory leaks - an attr is preferred here. # noqa: E501 + # Using the @lru_cache decorator on method calls can create memory leaks - an attr is preferred here. # noqa: E501 # FIXME CoP # Ref: https://stackoverflow.com/a/68550238 self._secret_store_cache = lru_cache(maxsize=None)(self._substitute_value_from_secret_store) @@ -105,7 +105,7 @@ def substitute_config_variable( is not desired. :return: a string with values substituted, or the same object if template_str is not a string. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if template_str is None: return template_str @@ -127,10 +127,10 @@ def substitute_config_variable( return config_variable_value template_str = template_str.replace(m.group(), config_variable_value) else: - raise gx_exceptions.MissingConfigVariableError( # noqa: TRY003 + raise gx_exceptions.MissingConfigVariableError( # noqa: TRY003 # FIXME CoP f"""\n\nUnable to find a match for config substitution variable: `{config_variable_name}`. Please add this missing variable to your `uncommitted/config_variables.yml` file or your environment variables. - See https://docs.greatexpectations.io/docs/core/configure_project_settings/configure_credentials""", # noqa: E501 + See https://docs.greatexpectations.io/docs/core/configure_project_settings/configure_credentials""", # noqa: E501 # FIXME CoP missing_config_variable=config_variable_name, ) @@ -162,7 +162,7 @@ def _substitute_value_from_secret_store(self, value: str) -> str: :return: a string with the value substituted by the secret from the secret store, or the same object if value is not a string. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(value, str): if re.match(self.AWS_PATTERN, value): return self._substitute_value_from_aws_secrets_manager(value) @@ -198,21 +198,21 @@ def _substitute_value_from_aws_secrets_manager(self, value: str) -> str: :return: a string with the value substituted by the secret from the AWS Secrets Manager store :raises: ImportError, ValueError - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex = re.compile( rf"{self.AWS_PATTERN}(?:\:([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}))?(?:\|([^\|]+))?$" ) if not aws.boto3: logger.error( - "boto3 is not installed, please install great_expectations with aws_secrets extra > " # noqa: E501 + "boto3 is not installed, please install great_expectations with aws_secrets extra > " # noqa: E501 # FIXME CoP "pip install great_expectations[aws_secrets]" ) - raise ImportError("Could not import boto3") # noqa: TRY003 + raise ImportError("Could not import boto3") # noqa: TRY003 # FIXME CoP matches = regex.match(value) if not matches: - raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 + raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 # FIXME CoP region_name = matches.group(1) secret_name = matches.group(3) @@ -230,7 +230,7 @@ def _substitute_value_from_aws_secrets_manager(self, value: str) -> str: else: secret_response = client.get_secret_value(SecretId=secret_name) # Decrypts secret using the associated KMS CMK. - # Depending on whether the secret is a string or binary, one of these fields will be populated. # noqa: E501 + # Depending on whether the secret is a string or binary, one of these fields will be populated. # noqa: E501 # FIXME CoP if "SecretString" in secret_response: secret = secret_response["SecretString"] else: @@ -263,21 +263,21 @@ def _substitute_value_from_aws_ssm(self, value: str) -> str: :return: a string with the value substituted by the secret from the AWS Secrets Manager store :raises: ImportError, ValueError - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex = re.compile( rf"{self.AWS_SSM_PATTERN}(?:\:([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}))?(?:\|([^\|]+))?$" ) if not aws.boto3: logger.error( - "boto3 is not installed, please install great_expectations with aws_secrets extra > " # noqa: E501 + "boto3 is not installed, please install great_expectations with aws_secrets extra > " # noqa: E501 # FIXME CoP "pip install great_expectations[aws_secrets]" ) - raise ImportError("Could not import boto3") # noqa: TRY003 + raise ImportError("Could not import boto3") # noqa: TRY003 # FIXME CoP matches = regex.match(value) if not matches: - raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 + raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 # FIXME CoP region_name = matches.group(1) secret_name = matches.group(3) @@ -296,7 +296,7 @@ def _substitute_value_from_aws_ssm(self, value: str) -> str: else: secret_response = client.get_parameter(Name=secret_name, WithDecryption=True) # Decrypts secret using the associated KMS CMK. - # Depending on whether the secret is a string or binary, one of these fields will be populated. # noqa: E501 + # Depending on whether the secret is a string or binary, one of these fields will be populated. # noqa: E501 # FIXME CoP secret = secret_response["Parameter"]["Value"] if secret_key: @@ -324,20 +324,20 @@ def _substitute_value_from_gcp_secret_manager(self, value: str) -> str: :return: a string with the value substituted by the secret from the GCP Secret Manager store :raises: ImportError, ValueError - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex = re.compile(rf"{self.GCP_PATTERN}(?:\/versions\/([a-z0-9]+))?(?:\|([^\|]+))?$") if not google.secretmanager: logger.error( - "secretmanager is not installed, please install great_expectations with gcp extra > " # noqa: E501 + "secretmanager is not installed, please install great_expectations with gcp extra > " # noqa: E501 # FIXME CoP "pip install great_expectations[gcp]" ) - raise ImportError("Could not import secretmanager from google.cloud") # noqa: TRY003 + raise ImportError("Could not import secretmanager from google.cloud") # noqa: TRY003 # FIXME CoP client = google.secretmanager.SecretManagerServiceClient() matches = regex.match(value) if not matches: - raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 + raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 # FIXME CoP project_id = matches.group(1) secret_id = matches.group(2) @@ -377,18 +377,18 @@ def _substitute_value_from_azure_keyvault(self, value: str) -> str: :return: a string with the value substituted by the secret from the Azure Key Vault store :raises: ImportError, ValueError - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex = re.compile(rf"{self.AZURE_PATTERN}(?:\/([a-f0-9]{32}))?(?:\|([^\|]+))?$") if not azure.SecretClient: # type: ignore[truthy-function] # False if NotImported logger.error( - "SecretClient is not installed, please install great_expectations with azure_secrets extra > " # noqa: E501 + "SecretClient is not installed, please install great_expectations with azure_secrets extra > " # noqa: E501 # FIXME CoP "pip install great_expectations[azure_secrets]" ) - raise ImportError("Could not import SecretClient from azure.keyvault.secrets") # noqa: TRY003 + raise ImportError("Could not import SecretClient from azure.keyvault.secrets") # noqa: TRY003 # FIXME CoP matches = regex.match(value) if not matches: - raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 + raise ValueError(f"Could not match the value with regex {regex}") # noqa: TRY003 # FIXME CoP keyvault_uri = matches.group(1) secret_name = matches.group(2) diff --git a/great_expectations/core/data_context_key.py b/great_expectations/core/data_context_key.py index 60ac1778a3b9..6b19ef37442e 100644 --- a/great_expectations/core/data_context_key.py +++ b/great_expectations/core/data_context_key.py @@ -11,7 +11,7 @@ class DataContextKey(metaclass=ABCMeta): A DataContextKey is designed to support clear naming with multiple representations including a hashable version making it suitable for use as the key in a dictionary. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP @abstractmethod def to_tuple(self) -> tuple: diff --git a/great_expectations/core/domain.py b/great_expectations/core/domain.py index bf198157cf63..56fdac220ba4 100644 --- a/great_expectations/core/domain.py +++ b/great_expectations/core/domain.py @@ -10,7 +10,7 @@ from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.types import SerializableDictDot, SerializableDotDict from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP deep_filter_properties_iterable, is_candidate_subset_of_target, ) @@ -57,7 +57,7 @@ def to_json_dict(self) -> dict: class Domain(SerializableDotDict): # Adding an explicit constructor to highlight the specific properties that will be used. - def __init__( # noqa: C901 - too complex + def __init__( # noqa: C901 # too complex self, domain_type: Union[str, MetricDomainTypes], domain_kwargs: Optional[Union[Dict[str, Any], DomainKwargs]] = None, @@ -68,16 +68,16 @@ def __init__( # noqa: C901 - too complex try: domain_type = MetricDomainTypes(domain_type.lower()) except (TypeError, KeyError) as e: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f""" {e}: Cannot instantiate Domain (domain_type "{domain_type!s}" of type \ "{type(domain_type)!s}" is not supported). """ ) elif not isinstance(domain_type, MetricDomainTypes): - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP f"""Cannot instantiate Domain (domain_type "{domain_type!s}" of type "{type(domain_type)!s}" is \ not supported). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if domain_kwargs is None: @@ -114,11 +114,11 @@ def __init__( # noqa: C901 - too complex break if not is_consistent: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Cannot instantiate Domain (domain_type "{domain_type!s}" of type \ "{type(domain_type)!s}" -- key "{semantic_domain_key}", detected in "{INFERRED_SEMANTIC_TYPE_KEY}" dictionary, does \ not exist as value of appropriate key in "domain_kwargs" dictionary. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) super().__init__( @@ -155,20 +155,20 @@ def __ne__(self, other): return not self.__eq__(other=other) @override - def __hash__(self) -> int: # type: ignore[override] + def __hash__(self) -> int: # type: ignore[override] # FIXME CoP """Overrides the default implementation""" _result_hash: int = hash(self.id) return _result_hash def is_superset(self, other: Domain) -> bool: - """Determines if other "Domain" object (provided as argument) is contained within this "Domain" object.""" # noqa: E501 + """Determines if other "Domain" object (provided as argument) is contained within this "Domain" object.""" # noqa: E501 # FIXME CoP if other is None: return True return other.is_subset(other=self) def is_subset(self, other: Domain) -> bool: - """Determines if this "Domain" object is contained within other "Domain" object (provided as argument).""" # noqa: E501 + """Determines if this "Domain" object is contained within other "Domain" object (provided as argument).""" # noqa: E501 # FIXME CoP if other is None: return False @@ -177,7 +177,7 @@ def is_subset(self, other: Domain) -> bool: return is_candidate_subset_of_target(candidate=this_json_dict, target=other_json_dict) - # Adding this property for convenience (also, in the future, arguments may not be all set to their default values). # noqa: E501 + # Adding this property for convenience (also, in the future, arguments may not be all set to their default values). # noqa: E501 # FIXME CoP @property def id(self) -> str: return IDDict(self.to_json_dict()).to_id() @@ -193,7 +193,7 @@ def to_json_dict(self) -> dict: if key == INFERRED_SEMANTIC_TYPE_KEY: column_name: str semantic_type: Union[str, SemanticDomainTypes] - value = { # noqa: PLW2901 + value = { # noqa: PLW2901 # FIXME CoP column_name: SemanticDomainTypes(semantic_type.lower()).value if isinstance(semantic_type, str) else semantic_type.value @@ -219,7 +219,7 @@ def deep_convert_properties_iterable_to_domain_kwargs( if isinstance(source, dict): return _deep_convert_properties_iterable_to_domain_kwargs(source=DomainKwargs(source)) - # Must allow for non-dictionary source types, since their internal nested structures may contain dictionaries. # noqa: E501 + # Must allow for non-dictionary source types, since their internal nested structures may contain dictionaries. # noqa: E501 # FIXME CoP if isinstance(source, (list, set, tuple)): data_type: type = type(source) diff --git a/great_expectations/core/expectation_diagnostics/expectation_diagnostics.py b/great_expectations/core/expectation_diagnostics/expectation_diagnostics.py index 79400cdea232..d1b949b9dc01 100644 --- a/great_expectations/core/expectation_diagnostics/expectation_diagnostics.py +++ b/great_expectations/core/expectation_diagnostics/expectation_diagnostics.py @@ -8,7 +8,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.core.expectation_diagnostics.expectation_test_data_cases import ( - ExpectationTestDataCases, # noqa: TCH001 + ExpectationTestDataCases, # noqa: TCH001 # FIXME CoP ) from great_expectations.core.expectation_diagnostics.supporting_types import ( AugmentedLibraryMetadata, @@ -28,7 +28,7 @@ ExpectationConfiguration, ) from great_expectations.types import SerializableDictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP @dataclass(frozen=True) @@ -40,7 +40,7 @@ class ExpectationDiagnostics(SerializableDictDot): 1. `ExpectationDiagnostics.to_dict()` creates the JSON object that populates the Gallery. 2. `ExpectationDiagnostics.generate_checklist()` creates CLI-type string output to assist with development. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # This object is taken directly from the Expectation class, without modification examples: List[ExpectationTestDataCases] @@ -109,7 +109,7 @@ def _check_docstring( ) -> ExpectationDiagnosticCheckMessage: """Check whether the Expectation has an informative docstring""" - message = 'Has a docstring, including a one-line short description that begins with "Expect" and ends with a period' # noqa: E501 + message = 'Has a docstring, including a one-line short description that begins with "Expect" and ends with a period' # noqa: E501 # FIXME CoP if "short_description" in description: short_description = description["short_description"] else: @@ -148,7 +148,7 @@ def _check_example_cases( examples: List[ExpectationTestDataCases], tests: List[ExpectationTestDiagnostics], ) -> ExpectationDiagnosticCheckMessage: - """Check whether this Expectation has at least one positive and negative example case (and all test cases return the expected output)""" # noqa: E501 + """Check whether this Expectation has at least one positive and negative example case (and all test cases return the expected output)""" # noqa: E501 # FIXME CoP message = "Has at least one positive and negative example case, and all test cases pass" ( @@ -169,7 +169,7 @@ def _check_example_cases( def _check_core_logic_for_at_least_one_execution_engine( backend_test_result_counts: List[ExpectationBackendTestResultCounts], ) -> ExpectationDiagnosticCheckMessage: - """Check whether core logic for this Expectation exists and passes tests on at least one Execution Engine""" # noqa: E501 + """Check whether core logic for this Expectation exists and passes tests on at least one Execution Engine""" # noqa: E501 # FIXME CoP sub_messages: List[ExpectationDiagnosticCheckMessageDict] = [] passed = False @@ -185,7 +185,7 @@ def _check_core_logic_for_at_least_one_execution_engine( for result in all_passing: sub_messages.append( { - "message": f"All {result.num_passed} tests for {result.backend} are passing", # noqa: E501 + "message": f"All {result.num_passed} tests for {result.backend} are passing", # noqa: E501 # FIXME CoP "passed": True, } ) @@ -233,7 +233,7 @@ def _get_backends_from_test_results( def _check_core_logic_for_all_applicable_execution_engines( backend_test_result_counts: List[ExpectationBackendTestResultCounts], ) -> ExpectationDiagnosticCheckMessage: - """Check whether core logic for this Expectation exists and passes tests on all applicable Execution Engines""" # noqa: E501 + """Check whether core logic for this Expectation exists and passes tests on all applicable Execution Engines""" # noqa: E501 # FIXME CoP sub_messages: list[ExpectationDiagnosticCheckMessageDict] = [] passed = False @@ -265,13 +265,13 @@ def _check_core_logic_for_all_applicable_execution_engines( for result in some_failing: sub_messages.append( { - "message": f"Only {result.num_passed} / {result.num_passed + result.num_failed} tests for {result.backend} are passing", # noqa: E501 + "message": f"Only {result.num_passed} / {result.num_passed + result.num_failed} tests for {result.backend} are passing", # noqa: E501 # FIXME CoP "passed": False, } ) sub_messages.append( { - "message": f" - Failing: {', '.join(result.failing_names)}", # type: ignore[arg-type] + "message": f" - Failing: {', '.join(result.failing_names)}", # type: ignore[arg-type] # FIXME CoP "passed": False, } ) @@ -294,7 +294,7 @@ def _check_core_logic_for_all_applicable_execution_engines( def _count_positive_and_negative_example_cases( examples: List[ExpectationTestDataCases], ) -> Tuple[int, int]: - """Scans examples and returns a 2-ple with the numbers of cases with success == True and success == False""" # noqa: E501 + """Scans examples and returns a 2-ple with the numbers of cases with success == True and success == False""" # noqa: E501 # FIXME CoP positive_cases: int = 0 negative_cases: int = 0 @@ -329,7 +329,7 @@ def _convert_checks_into_output_message( maturity_level: str, maturity_messages: ExpectationDiagnosticMaturityMessages, ) -> str: - """Converts a list of checks into an output string (potentially nested), with ✔ to indicate checks that passed.""" # noqa: E501 + """Converts a list of checks into an output string (potentially nested), with ✔ to indicate checks that passed.""" # noqa: E501 # FIXME CoP output_message = f"Completeness checklist for {class_name} ({maturity_level}):" @@ -433,7 +433,7 @@ def _check_renderer_methods( if all_renderer_types & renderer_types == all_renderer_types: passed = True return ExpectationDiagnosticCheckMessage( - # message="Has all four statement Renderers: question, descriptive, prescriptive, diagnostic", # noqa: E501 + # message="Has all four statement Renderers: question, descriptive, prescriptive, diagnostic", # noqa: E501 # FIXME CoP message="Has both statement Renderers: prescriptive and diagnostic", passed=passed, ) @@ -454,6 +454,6 @@ def _check_manual_code_review( ) -> ExpectationDiagnosticCheckMessage: """Check library_metadata to see if a manual code review has been performed""" return ExpectationDiagnosticCheckMessage( - message="Has passed a manual review by a code owner for code standards and style guides", # noqa: E501 + message="Has passed a manual review by a code owner for code standards and style guides", # noqa: E501 # FIXME CoP passed=library_metadata.manually_reviewed_code, # type: ignore[union-attr] # could be ExpectationDescriptionDiagnostics ) diff --git a/great_expectations/core/expectation_diagnostics/expectation_doctor.py b/great_expectations/core/expectation_diagnostics/expectation_doctor.py index ea177bd9638d..7f38a7cbf1b7 100644 --- a/great_expectations/core/expectation_diagnostics/expectation_doctor.py +++ b/great_expectations/core/expectation_diagnostics/expectation_doctor.py @@ -107,7 +107,7 @@ def print_diagnostic_checklist( return checklist - def run_diagnostics( # noqa: PLR0913 + def run_diagnostics( # noqa: PLR0913 # FIXME CoP self, raise_exceptions_for_backends: bool = False, ignore_suppress: bool = False, @@ -118,15 +118,15 @@ def run_diagnostics( # noqa: PLR0913 context: Optional[AbstractDataContext] = None, ) -> ExpectationDiagnostics: if debug_logger is not None: - _debug = lambda x: debug_logger.debug( # noqa: E731 + _debug = lambda x: debug_logger.debug( # noqa: E731 # FIXME CoP f"(run_diagnostics) {x}" ) - _error = lambda x: debug_logger.error( # noqa: E731 + _error = lambda x: debug_logger.error( # noqa: E731 # FIXME CoP f"(run_diagnostics) {x}" ) else: - _debug = lambda x: x # noqa: E731 - _error = lambda x: x # noqa: E731 + _debug = lambda x: x # noqa: E731 # FIXME CoP + _error = lambda x: x # noqa: E731 # FIXME CoP library_metadata: AugmentedLibraryMetadata = self._get_augmented_library_metadata() examples: List[ExpectationTestDataCases] = self._get_examples( @@ -135,7 +135,7 @@ def run_diagnostics( # noqa: PLR0913 gallery_examples: List[ExpectationTestDataCases] = [] for example in examples: _tests_to_include = [test for test in example.tests if test.include_in_gallery] - example = copy.deepcopy(example) # noqa: PLW2901 + example = copy.deepcopy(example) # noqa: PLW2901 # FIXME CoP if _tests_to_include: example.tests = _tests_to_include gallery_examples.append(example) @@ -149,7 +149,7 @@ def run_diagnostics( # noqa: PLR0913 ) if not _expectation_config: _error( - f"Was NOT able to get Expectation configuration for {self._expectation.expectation_type}. " # noqa: E501 + f"Was NOT able to get Expectation configuration for {self._expectation.expectation_type}. " # noqa: E501 # FIXME CoP "Is there at least one sample test where 'success' is True?" ) metric_diagnostics_list: List[ExpectationMetricDiagnostics] = ( @@ -170,7 +170,7 @@ def run_diagnostics( # noqa: PLR0913 if i is True ] _debug( - f"Implemented engines for {self._expectation.expectation_type}: {', '.join(engines_implemented)}" # noqa: E501 + f"Implemented engines for {self._expectation.expectation_type}: {', '.join(engines_implemented)}" # noqa: E501 # FIXME CoP ) _debug("Getting test results") @@ -193,7 +193,7 @@ def run_diagnostics( # noqa: PLR0913 renderers: List[ExpectationRendererDiagnostics] = self._get_renderer_diagnostics( expectation_type=description_diagnostics.snake_name, test_diagnostics=test_results, - registered_renderers=_registered_renderers, # type: ignore[arg-type] + registered_renderers=_registered_renderers, # type: ignore[arg-type] # FIXME CoP ) maturity_checklist: ExpectationDiagnosticMaturityMessages = self._get_maturity_checklist( @@ -249,7 +249,7 @@ def run_diagnostics( # noqa: PLR0913 ) def _get_augmented_library_metadata(self): - """Introspect the Expectation's library_metadata object (if it exists), and augment it with additional information.""" # noqa: E501 + """Introspect the Expectation's library_metadata object (if it exists), and augment it with additional information.""" # noqa: E501 # FIXME CoP augmented_library_metadata = { "maturity": Maturity.CONCEPT_ONLY, @@ -378,7 +378,7 @@ def _get_examples_from_json(self): results = data["datasets"] return results - def _get_examples( # noqa: C901 - too complex + def _get_examples( # noqa: C901 # too complex self, return_only_gallery_examples: bool = True ) -> List[ExpectationTestDataCases]: """ @@ -388,14 +388,14 @@ def _get_examples( # noqa: C901 - too complex :param return_only_gallery_examples: if True, include only test examples where `include_in_gallery` is true :return: list of examples or [], if no examples exist - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Currently, only community contrib expectations have an examples attribute all_examples: List[dict] = self._expectation.examples or self._get_examples_from_json() included_examples = [] for i, example in enumerate(all_examples, 1): included_test_cases = [] - # As of commit 7766bb5caa4e0 on 1/28/22, only_for does not need to be applied to individual tests # noqa: E501 + # As of commit 7766bb5caa4e0 on 1/28/22, only_for does not need to be applied to individual tests # noqa: E501 # FIXME CoP # See: # - https://github.com/great-expectations/great_expectations/blob/7766bb5caa4e0e5b22fa3b3a5e1f2ac18922fdeb/tests/test_definitions/column_map_expectations/expect_column_values_to_be_unique.json#L174 # - https://github.com/great-expectations/great_expectations/pull/4073 @@ -403,8 +403,8 @@ def _get_examples( # noqa: C901 - too complex top_level_suppress_test_for = example.get("suppress_test_for") for test in example["tests"]: if ( - test.get("include_in_gallery") == True # noqa: E712 - or return_only_gallery_examples == False # noqa: E712 + test.get("include_in_gallery") == True # noqa: E712 # FIXME CoP + or return_only_gallery_examples == False # noqa: E712 # FIXME CoP ): copied_test = copy.deepcopy(test) if top_level_only_for: @@ -420,7 +420,7 @@ def _get_examples( # noqa: C901 - too complex included_test_cases.append(ExpectationLegacyTestCaseAdapter(**copied_test)) # If at least one ExpectationTestCase from the ExpectationTestDataCases was selected, - # then keep a copy of the ExpectationTestDataCases including data and the selected ExpectationTestCases. # noqa: E501 + # then keep a copy of the ExpectationTestDataCases including data and the selected ExpectationTestCases. # noqa: E501 # FIXME CoP if len(included_test_cases) > 0: copied_example = copy.deepcopy(example) copied_example["tests"] = included_test_cases @@ -472,7 +472,7 @@ def _get_description_diagnostics(self) -> ExpectationDescriptionDiagnostics: } ) - def _get_expectation_configuration_from_examples( # noqa: C901 - too complex + def _get_expectation_configuration_from_examples( # noqa: C901 # too complex self, examples: List[ExpectationTestDataCases], ) -> Optional[ExpectationConfiguration]: @@ -509,7 +509,7 @@ def _get_execution_engine_diagnostics( """Check to see which execution_engines are fully supported for this Expectation. In order for a given execution engine to count, *every* metric must have support on that execution engines. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not execution_engine_names: execution_engine_names = [ "PandasExecutionEngine", @@ -533,7 +533,7 @@ def _get_execution_engine_diagnostics( except KeyError: # https://github.com/great-expectations/great_expectations/blob/abd8f68a162eaf9c33839d2c412d8ba84f5d725b/great_expectations/expectations/core/expect_table_row_count_to_equal_other_table.py#L174-L181 # expect_table_row_count_to_equal_other_table does tricky things and replaces - # registered metric "table.row_count" with "table.row_count.self" and "table.row_count.other" # noqa: E501 + # registered metric "table.row_count" with "table.row_count.self" and "table.row_count.other" # noqa: E501 # FIXME CoP if "table.row_count" in metric_diagnostics.name: continue @@ -547,8 +547,8 @@ def _get_metric_diagnostics_list( ) -> List[ExpectationMetricDiagnostics]: """Check to see which Metrics are upstream validation_dependencies for this Expectation.""" - # NOTE: Abe 20210102: Strictly speaking, identifying upstream metrics shouldn't need to rely on an expectation config. # noqa: E501 - # There's probably some part of get_validation_dependencies that can be factored out to remove the dependency. # noqa: E501 + # NOTE: Abe 20210102: Strictly speaking, identifying upstream metrics shouldn't need to rely on an expectation config. # noqa: E501 # FIXME CoP + # There's probably some part of get_validation_dependencies that can be factored out to remove the dependency. # noqa: E501 # FIXME CoP if not expectation_config: return [] @@ -569,7 +569,7 @@ def _get_metric_diagnostics_list( return metric_diagnostics_list @classmethod - def _get_test_results( # noqa: PLR0913 + def _get_test_results( # noqa: PLR0913 # FIXME CoP cls, expectation_type: str, test_data_cases: List[ExpectationTestDataCases], @@ -584,15 +584,15 @@ def _get_test_results( # noqa: PLR0913 """Generate test results. This is an internal method for run_diagnostics.""" if debug_logger is not None: - _debug = lambda x: debug_logger.debug( # noqa: E731 + _debug = lambda x: debug_logger.debug( # noqa: E731 # FIXME CoP f"(_get_test_results) {x}" ) - _error = lambda x: debug_logger.error( # noqa: E731 + _error = lambda x: debug_logger.error( # noqa: E731 # FIXME CoP f"(_get_test_results) {x}" ) else: - _debug = lambda x: x # noqa: E731 - _error = lambda x: x # noqa: E731 + _debug = lambda x: x # noqa: E731 # FIXME CoP + _error = lambda x: x # noqa: E731 # FIXME CoP _debug("Starting") test_results = [] @@ -650,7 +650,7 @@ def _get_test_results( # noqa: PLR0913 _duration = _end - _start backend_test_times[exp_test["backend"]].append(_duration) _debug( - f"Took {_duration} seconds to evaluate_json_test_v3_api for {exp_combined_test_name}" # noqa: E501 + f"Took {_duration} seconds to evaluate_json_test_v3_api for {exp_combined_test_name}" # noqa: E501 # FIXME CoP ) if error_message is None: _debug(f"PASSED {exp_combined_test_name}") @@ -669,7 +669,7 @@ def _get_test_results( # noqa: PLR0913 if validation_result: # The ExpectationTestDiagnostics instance will error when calling it's to_dict() - # method (AttributeError: 'ExpectationConfiguration' object has no attribute 'raw_kwargs') # noqa: E501 + # method (AttributeError: 'ExpectationConfiguration' object has no attribute 'raw_kwargs') # noqa: E501 # FIXME CoP validation_result.expectation_config.raw_kwargs = ( validation_result.expectation_config._raw_kwargs ) @@ -687,7 +687,7 @@ def _get_test_results( # noqa: PLR0913 for backend_name, test_times in sorted(backend_test_times.items()): _debug( - f"Took {sum(test_times)} seconds to run {len(test_times)} tests {backend_name}--{expectation_type}" # noqa: E501 + f"Took {sum(test_times)} seconds to run {len(test_times)} tests {backend_name}--{expectation_type}" # noqa: E501 # FIXME CoP ) return test_results @@ -701,7 +701,7 @@ def _get_renderer_diagnostics( List[Union[str, LegacyRendererType, LegacyDiagnosticRendererType]] ] = None, ) -> List[ExpectationRendererDiagnostics]: - """Generate Renderer diagnostics for this Expectation, based primarily on a list of ExpectationTestDiagnostics.""" # noqa: E501 + """Generate Renderer diagnostics for this Expectation, based primarily on a list of ExpectationTestDiagnostics.""" # noqa: E501 # FIXME CoP if not standard_renderers: standard_renderers = [ @@ -716,14 +716,14 @@ def _get_renderer_diagnostics( supported_renderers = self._get_registered_renderers( expectation_type=expectation_type, - registered_renderers=registered_renderers, # type: ignore[arg-type] + registered_renderers=registered_renderers, # type: ignore[arg-type] # FIXME CoP ) renderer_diagnostic_list = [] for renderer_name in set(standard_renderers).union(set(supported_renderers)): samples = [] if renderer_name in supported_renderers: - _, renderer = registered_renderers[expectation_type][renderer_name] # type: ignore[call-overload] + _, renderer = registered_renderers[expectation_type][renderer_name] # type: ignore[call-overload] # FIXME CoP for test_diagnostic in test_diagnostics: test_title = test_diagnostic["test_title"] @@ -779,7 +779,7 @@ def _get_registered_renderers( supported_renderers.sort() return supported_renderers - def _get_rendered_result_as_string( # noqa: C901, PLR0912 + def _get_rendered_result_as_string( # noqa: C901, PLR0912 # FIXME CoP self, rendered_result ) -> str: """Convenience method to get rendered results as strings.""" @@ -826,8 +826,8 @@ def _get_rendered_result_as_string( # noqa: C901, PLR0912 result = "" else: - raise TypeError( # noqa: TRY003 - f"Expectation._get_rendered_result_as_string can't render type {type(rendered_result)} as a string." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + f"Expectation._get_rendered_result_as_string can't render type {type(rendered_result)} as a string." # noqa: E501 # FIXME CoP ) if "inf" in result: diff --git a/great_expectations/core/expectation_diagnostics/expectation_test_data_cases.py b/great_expectations/core/expectation_diagnostics/expectation_test_data_cases.py index c72535aeadeb..4335fd5a13d4 100644 --- a/great_expectations/core/expectation_diagnostics/expectation_test_data_cases.py +++ b/great_expectations/core/expectation_diagnostics/expectation_test_data_cases.py @@ -68,7 +68,7 @@ class ExpectationLegacyTestCaseAdapter(ExpectationTestCase): * Legacy test cases used "in" (a python reserved word). This has been changed to "input". * To maintain parallelism, we've also made the corresponding change from "out" to "output". * To avoid any ambiguity, ExpectationLegacyTestCaseAdapter only accepts keyword arguments. Positional arguments are not allowed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, diff --git a/great_expectations/core/expectation_diagnostics/supporting_types.py b/great_expectations/core/expectation_diagnostics/supporting_types.py index fd1e0b990be8..3f34348a3d7a 100644 --- a/great_expectations/core/expectation_diagnostics/supporting_types.py +++ b/great_expectations/core/expectation_diagnostics/supporting_types.py @@ -9,7 +9,7 @@ from typing_extensions import TypedDict from great_expectations.core.expectation_validation_result import ( - ExpectationValidationResult, # noqa: TCH001 + ExpectationValidationResult, # noqa: TCH001 # FIXME CoP ) from great_expectations.types import SerializableDictDot @@ -25,7 +25,7 @@ class Maturity(str, Enum): @dataclass class AugmentedLibraryMetadata(SerializableDictDot): - """An augmented version of the Expectation.library_metadata object, used within ExpectationDiagnostics""" # noqa: E501 + """An augmented version of the Expectation.library_metadata object, used within ExpectationDiagnostics""" # noqa: E501 # FIXME CoP maturity: Maturity tags: List[str] @@ -44,7 +44,7 @@ class AugmentedLibraryMetadata(SerializableDictDot): @classmethod def from_legacy_dict(cls, dict): - """This method is a temporary adapter to allow typing of legacy library_metadata objects, without needing to immediately clean up every object.""" # noqa: E501 + """This method is a temporary adapter to allow typing of legacy library_metadata objects, without needing to immediately clean up every object.""" # noqa: E501 # FIXME CoP temp_dict = {} for k, v in dict.items(): # Ignore parameters that don't match the type definition @@ -52,7 +52,7 @@ def from_legacy_dict(cls, dict): temp_dict[k] = v else: logging.warning( - f"WARNING: Got extra parameter: {k} while instantiating AugmentedLibraryMetadata." # noqa: E501 + f"WARNING: Got extra parameter: {k} while instantiating AugmentedLibraryMetadata." # noqa: E501 # FIXME CoP "This parameter will be ignored." "You probably need to clean up a library_metadata object." ) @@ -71,7 +71,7 @@ def from_legacy_dict(cls, dict): @dataclass class ExpectationDescriptionDiagnostics(SerializableDictDot): - """Captures basic descriptive info about an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 + """Captures basic descriptive info about an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 # FIXME CoP camel_name: str snake_name: str @@ -81,7 +81,7 @@ class ExpectationDescriptionDiagnostics(SerializableDictDot): @dataclass class RendererTestDiagnostics(SerializableDictDot): - """Captures information from executing Renderer test cases. Used within the ExpectationRendererDiagnostics object.""" # noqa: E501 + """Captures information from executing Renderer test cases. Used within the ExpectationRendererDiagnostics object.""" # noqa: E501 # FIXME CoP test_title: str rendered_successfully: bool @@ -92,7 +92,7 @@ class RendererTestDiagnostics(SerializableDictDot): @dataclass class ExpectationRendererDiagnostics(SerializableDictDot): - """Captures information about a specific Renderer within an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 + """Captures information about a specific Renderer within an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 # FIXME CoP name: str is_supported: bool @@ -102,7 +102,7 @@ class ExpectationRendererDiagnostics(SerializableDictDot): @dataclass class ExpectationMetricDiagnostics(SerializableDictDot): - """Captures information about a specific Metric dependency for an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 + """Captures information about a specific Metric dependency for an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 # FIXME CoP name: str has_question_renderer: bool @@ -110,7 +110,7 @@ class ExpectationMetricDiagnostics(SerializableDictDot): @dataclass class ExpectationExecutionEngineDiagnostics(SerializableDictDot): - """Captures which of the three Execution Engines are supported by an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 + """Captures which of the three Execution Engines are supported by an Expectation. Used within the ExpectationDiagnostic object.""" # noqa: E501 # FIXME CoP PandasExecutionEngine: bool SqlAlchemyExecutionEngine: bool @@ -127,7 +127,7 @@ class ExpectationErrorDiagnostics(SerializableDictDot): @dataclass class ExpectationTestDiagnostics(SerializableDictDot): - """Captures information from executing Expectation test cases. Used within the ExpectationDiagnostic object.""" # noqa: E501 + """Captures information from executing Expectation test cases. Used within the ExpectationDiagnostic object.""" # noqa: E501 # FIXME CoP test_title: str backend: str @@ -166,7 +166,7 @@ class ExpectationDiagnosticCheckMessage(SerializableDictDot): @dataclass class ExpectationDiagnosticMaturityMessages(SerializableDictDot): - """A holder for ExpectationDiagnosticCheckMessages, grouping them by maturity level. Used within the ExpectationDiagnostic object.""" # noqa: E501 + """A holder for ExpectationDiagnosticCheckMessages, grouping them by maturity level. Used within the ExpectationDiagnostic object.""" # noqa: E501 # FIXME CoP experimental: List[ExpectationDiagnosticCheckMessage] beta: List[ExpectationDiagnosticCheckMessage] diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py index c58b6e328123..747175db99b6 100644 --- a/great_expectations/core/expectation_suite.py +++ b/great_expectations/core/expectation_suite.py @@ -45,8 +45,8 @@ from great_expectations.exceptions.exceptions import InvalidKeyError from great_expectations.types import SerializableDictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 - ensure_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP + ensure_json_serializable, # noqa: TID251 # FIXME CoP ) if TYPE_CHECKING: @@ -74,7 +74,7 @@ class ExpectationSuite(SerializableDictDot): id: Great Expectations Cloud id for this Expectation Suite. """ - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: Optional[str] = None, expectations: Optional[Sequence[Union[dict, ExpectationConfiguration, Expectation]]] = None, @@ -84,7 +84,7 @@ def __init__( # noqa: PLR0913 id: Optional[str] = None, ) -> None: if not name or not isinstance(name, str): - raise ValueError("name must be provided as a non-empty string") # noqa: TRY003 + raise ValueError("name must be provided as a non-empty string") # noqa: TRY003 # FIXME CoP self.name = name self.id = id @@ -126,7 +126,7 @@ def suite_parameter_options(self) -> tuple[str, ...]: Returns: tuple[str, ...]: The keys of the suite parameters used by all Expectations of this suite at runtime. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP output: set[str] = set() for expectation in self.expectations: output.update(expectation.suite_parameter_options) @@ -136,7 +136,7 @@ def suite_parameter_options(self) -> tuple[str, ...]: def add_expectation(self, expectation: _TExpectation) -> _TExpectation: """Add an Expectation to the collection.""" if expectation.id: - raise RuntimeError( # noqa: TRY003 + raise RuntimeError( # noqa: TRY003 # FIXME CoP "Cannot add Expectation because it already belongs to an ExpectationSuite. " "If you want to update an existing Expectation, please call Expectation.save(). " "If you are copying this Expectation to a new ExpectationSuite, please copy " @@ -169,9 +169,12 @@ def _expectations_are_equalish(expectation_a: Expectation, expectation_b: Expect that are not relevant for uniqueness in the suite. """ exclude_params = {"id", "rendered_content", "notes", "meta"} - return expectation_a.dict(exclude=exclude_params) == expectation_b.dict( + # pydantic model.dict() excludes ClassVars, so we compare Expectation type explicitly + types_are_equal = expectation_a.expectation_type == expectation_b.expectation_type + attributes_are_equal = expectation_a.dict(exclude=exclude_params) == expectation_b.dict( exclude=exclude_params ) + return types_are_equal and attributes_are_equal def _submit_expectation_created_event(self, expectation: Expectation) -> None: if expectation.__module__.startswith("great_expectations."): @@ -198,7 +201,7 @@ def _process_expectation( Raises: ValueError: If expectation_like is of type Expectation and expectation_like.id is not None. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.expectations.expectation import Expectation from great_expectations.expectations.expectation_configuration import ( ExpectationConfiguration, @@ -206,9 +209,9 @@ def _process_expectation( if isinstance(expectation_like, Expectation): if expectation_like.id: - raise ValueError( # noqa: TRY003 - "Expectations in parameter `expectations` must not belong to another ExpectationSuite. " # noqa: E501 - "Instead, please use copies of Expectations, by calling `copy.copy(expectation)`." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Expectations in parameter `expectations` must not belong to another ExpectationSuite. " # noqa: E501 # FIXME CoP + "Instead, please use copies of Expectations, by calling `copy.copy(expectation)`." # noqa: E501 # FIXME CoP ) expectation_like.register_save_callback(save_callback=self._save_expectation) return expectation_like @@ -219,8 +222,8 @@ def _process_expectation( expectation_configuration=ExpectationConfiguration(**expectation_like) ) else: - raise TypeError( # noqa: TRY003 - f"Expected Expectation, ExpectationConfiguration, or dict, but received type {type(expectation_like)}." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + f"Expected Expectation, ExpectationConfiguration, or dict, but received type {type(expectation_like)}." # noqa: E501 # FIXME CoP ) @public_api @@ -239,7 +242,7 @@ def delete_expectation(self, expectation: Expectation) -> Expectation: if not self._expectations_are_equalish(exp, expectation) ] if len(remaining_expectations) != len(self.expectations) - 1: - raise KeyError("No matching expectation was found.") # noqa: TRY003 + raise KeyError("No matching expectation was found.") # noqa: TRY003 # FIXME CoP self.expectations = remaining_expectations @@ -251,7 +254,7 @@ def delete_expectation(self, expectation: Expectation) -> Expectation: # rollback this change # expectation suite is set-like so order of expectations doesn't matter self.expectations.append(expectation) - raise exc # noqa: TRY201 + raise exc # noqa: TRY201 # FIXME CoP submit_event( event=ExpectationSuiteExpectationDeletedEvent( @@ -265,7 +268,7 @@ def delete_expectation(self, expectation: Expectation) -> Expectation: @public_api def save(self) -> None: """Save this ExpectationSuite.""" - # TODO: Need to emit an event from here - we've opted out of an ExpectationSuiteUpdated event for now # noqa: E501 + # TODO: Need to emit an event from here - we've opted out of an ExpectationSuiteUpdated event for now # noqa: E501 # FIXME CoP if self._include_rendered_content: self.render() key = self._store.get_key(name=self.name, id=self.id) @@ -332,7 +335,7 @@ def expectation_configurations(self) -> list[ExpectationConfiguration]: @expectation_configurations.setter def expectation_configurations(self, value): - raise AttributeError( # noqa: TRY003 + raise AttributeError( # noqa: TRY003 # FIXME CoP "Cannot set ExpectationSuite.expectation_configurations. " "Please use ExpectationSuite.expectations instead." ) @@ -383,7 +386,7 @@ def to_json_dict(self) -> Dict[str, JSONValues]: A JSON-serializable dict representation of this ExpectationSuite. """ myself = expectationSuiteSchema.dump(self) - # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 + # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 # FIXME CoP # schemas to get serialization all-the-way down via dump expectation_configurations = [exp.configuration for exp in self.expectations] myself["expectations"] = convert_to_json_serializable(expectation_configurations) @@ -418,18 +421,18 @@ def remove_expectation( Raises: TypeError: Must provide either expectation_configuration or id. ValueError: No match or multiple matches found (and remove_multiple_matches=False). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_configurations = [exp.configuration for exp in self.expectations] if expectation_configuration is None and id is None: - raise TypeError("Must provide either expectation_configuration or id") # noqa: TRY003 + raise TypeError("Must provide either expectation_configuration or id") # noqa: TRY003 # FIXME CoP found_expectation_indexes = self._find_expectation_indexes( expectation_configuration=expectation_configuration, match_type=match_type, - id=id, # type: ignore[arg-type] + id=id, # type: ignore[arg-type] # FIXME CoP ) if len(found_expectation_indexes) < 1: - raise ValueError("No matching expectation was found.") # noqa: TRY003 + raise ValueError("No matching expectation was found.") # noqa: TRY003 # FIXME CoP elif len(found_expectation_indexes) > 1: if remove_multiple_matches: @@ -441,8 +444,8 @@ def remove_expectation( ] return removed_expectations else: - raise ValueError( # noqa: TRY003 - "More than one matching expectation was found. Specify more precise matching criteria," # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "More than one matching expectation was found. Specify more precise matching criteria," # noqa: E501 # FIXME CoP "or set remove_multiple_matches=True" ) @@ -478,18 +481,18 @@ def _find_expectation_indexes( Raises: InvalidExpectationConfigurationError - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.expectations.expectation_configuration import ( ExpectationConfiguration, ) if expectation_configuration is None and id is None: - raise TypeError("Must provide either expectation_configuration or id") # noqa: TRY003 + raise TypeError("Must provide either expectation_configuration or id") # noqa: TRY003 # FIXME CoP if expectation_configuration and not isinstance( expectation_configuration, ExpectationConfiguration ): - raise gx_exceptions.InvalidExpectationConfigurationError( # noqa: TRY003 + raise gx_exceptions.InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP "Ensure that expectation configuration is valid." ) @@ -498,9 +501,9 @@ def _find_expectation_indexes( if id is not None: if expectation.id == id: match_indexes.append(idx) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if expectation.configuration.isEquivalentTo( - other=expectation_configuration, # type: ignore[arg-type] + other=expectation_configuration, # type: ignore[arg-type] # FIXME CoP match_type=match_type, ): match_indexes.append(idx) @@ -529,23 +532,23 @@ def _add_expectation( Raises: More than one match One match if overwrite_existing = False - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP found_expectation_indexes = self._find_expectation_indexes( expectation_configuration=expectation_configuration, match_type=match_type ) if len(found_expectation_indexes) > 1: - raise ValueError( # noqa: TRY003 - "More than one matching expectation was found. Please be more specific with your search " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "More than one matching expectation was found. Please be more specific with your search " # noqa: E501 # FIXME CoP "criteria" ) elif len(found_expectation_indexes) == 1: - # Currently, we completely replace the expectation_configuration, but we could potentially use patch_expectation # noqa: E501 + # Currently, we completely replace the expectation_configuration, but we could potentially use patch_expectation # noqa: E501 # FIXME CoP # to update instead. We need to consider how to handle meta in that situation. # patch_expectation = jsonpatch.make_patch(self.expectations[found_expectation_index] \ # .kwargs, expectation_configuration.kwargs) - # patch_expectation.apply(self.expectations[found_expectation_index].kwargs, in_place=True) # noqa: E501 + # patch_expectation.apply(self.expectations[found_expectation_index].kwargs, in_place=True) # noqa: E501 # FIXME CoP if overwrite_existing: # if existing Expectation has a id, add it back to the new Expectation Configuration existing_expectation_id = self.expectations[found_expectation_indexes[0]].id @@ -556,8 +559,8 @@ def _add_expectation( expectation_configuration=expectation_configuration ) else: - raise gx_exceptions.DataContextError( # noqa: TRY003 - "A matching ExpectationConfiguration already exists. If you would like to overwrite this " # noqa: E501 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP + "A matching ExpectationConfiguration already exists. If you would like to overwrite this " # noqa: E501 # FIXME CoP "ExpectationConfiguration, set overwrite_existing=True" ) else: @@ -589,7 +592,7 @@ def add_expectation_configurations( Raises: More than one match One match if overwrite_existing = False - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_configuration: ExpectationConfiguration expectation_configurations_attempted_to_be_added: List[ExpectationConfiguration] = [ self.add_expectation_configuration( @@ -623,8 +626,8 @@ def add_expectation_configuration( ValueError: More than one match DataContextError: One match if overwrite_existing = False - # noqa: DAR402 - """ # noqa: E501 + # noqa: DAR402 # FIXME CoP + """ # noqa: E501 # FIXME CoP self._build_expectation(expectation_configuration) return self._add_expectation( expectation_configuration=expectation_configuration, @@ -643,7 +646,7 @@ def _build_expectation( gx_exceptions.ExpectationNotFoundError, gx_exceptions.InvalidExpectationConfigurationError, ) as e: - raise gx_exceptions.InvalidExpectationConfigurationError( # noqa: TRY003 + raise gx_exceptions.InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP f"Could not add expectation; provided configuration is not valid: {e.message}" ) from e @@ -651,7 +654,7 @@ def render(self) -> None: """ Renders content using the atomic prescriptive renderer for each expectation configuration associated with this ExpectationSuite to ExpectationConfiguration.rendered_content. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP for expectation in self.expectations: expectation.render() @@ -674,13 +677,13 @@ class ExpectationSuiteSchema(Schema): meta = fields.Dict() notes = fields.Raw(required=False, allow_none=True) - # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields; # noqa: E501 + # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields; # noqa: E501 # FIXME CoP # doing so could also allow us not to have to make a copy of data in the pre_dump method. # noinspection PyMethodMayBeStatic def clean_empty(self, data: _TExpectationSuite) -> _TExpectationSuite: if isinstance(data, ExpectationSuite): # We are hitting this TypeVar narrowing mypy bug: https://github.com/python/mypy/issues/10817 - data = self._clean_empty_suite(data) # type: ignore[assignment] + data = self._clean_empty_suite(data) # type: ignore[assignment] # FIXME CoP elif isinstance(data, dict): data = self._clean_empty_dict(data) return data diff --git a/great_expectations/core/expectation_validation_result.py b/great_expectations/core/expectation_validation_result.py index de8c15b62752..e809d77c5ed3 100644 --- a/great_expectations/core/expectation_validation_result.py +++ b/great_expectations/core/expectation_validation_result.py @@ -10,14 +10,14 @@ import great_expectations.exceptions as gx_exceptions from great_expectations._docs_decorators import public_api -from great_expectations.alias_types import JSONValues # noqa: TCH001 +from great_expectations.alias_types import JSONValues # noqa: TCH001 # FIXME CoP from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.batch import ( # noqa: TCH001 +from great_expectations.core.batch import ( # noqa: TCH001 # FIXME CoP BatchMarkers, LegacyBatchDefinition, ) -from great_expectations.core.id_dict import BatchSpec # noqa: TCH001 -from great_expectations.core.run_identifier import RunIdentifier # noqa: TCH001 +from great_expectations.core.id_dict import BatchSpec # noqa: TCH001 # FIXME CoP +from great_expectations.core.run_identifier import RunIdentifier # noqa: TCH001 # FIXME CoP from great_expectations.data_context.util import instantiate_class_from_config from great_expectations.exceptions import ClassInstantiationError from great_expectations.render import ( @@ -27,8 +27,8 @@ ) from great_expectations.types import SerializableDictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 - ensure_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP + ensure_json_serializable, # noqa: TID251 # FIXME CoP ) if TYPE_CHECKING: @@ -82,7 +82,7 @@ class ExpectationValidationResult(SerializableDictDot): InvalidCacheValueError: Raised if the result does not pass validation. """ - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, success: Optional[bool] = None, expectation_config: Optional[ExpectationConfiguration] = None, @@ -114,7 +114,7 @@ def __init__( # noqa: PLR0913 self.rendered_content = rendered_content def __eq__(self, other): # type: ignore[explicit-override] # FIXME - """ExpectationValidationResult equality ignores instance identity, relying only on properties.""" # noqa: E501 + """ExpectationValidationResult equality ignores instance identity, relying only on properties.""" # noqa: E501 # FIXME CoP # NOTE: JPC - 20200213 - need to spend some time thinking about whether we want to # consistently allow dict as a comparison alternative in situations like these... # if isinstance(other, dict): @@ -144,8 +144,8 @@ def __eq__(self, other): # type: ignore[explicit-override] # FIXME other=other.expectation_config, match_type="success" ) ), - # Result is a dictionary allowed to have nested dictionaries that are still of complex types (e.g. # noqa: E501 - # numpy) consequently, series' comparison can persist. Wrapping in all() ensures comparison is # noqa: E501 + # Result is a dictionary allowed to have nested dictionaries that are still of complex types (e.g. # noqa: E501 # FIXME CoP + # numpy) consequently, series' comparison can persist. Wrapping in all() ensures comparison is # noqa: E501 # FIXME CoP # handled appropriately. not (self.result or other.result) or contents_equal, self.meta == other.meta, @@ -157,7 +157,7 @@ def __eq__(self, other): # type: ignore[explicit-override] # FIXME return False def __ne__(self, other): # type: ignore[explicit-override] # FIXME - # Negated implementation of '__eq__'. TODO the method should be deleted when it will coincide with __eq__. # noqa: E501 + # Negated implementation of '__eq__'. TODO the method should be deleted when it will coincide with __eq__. # noqa: E501 # FIXME CoP # return not self == other if not isinstance(other, self.__class__): # Delegate comparison to the other instance's __ne__. @@ -171,7 +171,7 @@ def __ne__(self, other): # type: ignore[explicit-override] # FIXME self.expectation_config is not None and not self.expectation_config.isEquivalentTo(other.expectation_config) ), - # TODO should it be wrapped in all()/any()? Since it is the only difference to __eq__: # noqa: E501 + # TODO should it be wrapped in all()/any()? Since it is the only difference to __eq__: # noqa: E501 # FIXME CoP (self.result is None and other.result is not None) or (self.result != other.result), self.meta != other.meta, @@ -188,7 +188,7 @@ def __repr__(self) -> str: # TODO: 5/9/2022 This implementation is non-ideal (it was agreed to employ it for development expediency). A better approach would consist of "__str__()" calling "__repr__()", while all output options are handled through state variables. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() return json.dumps(json_dict, indent=2) @@ -198,7 +198,7 @@ def __str__(self) -> str: # TODO: 5/9/2022 This implementation is non-ideal (it was agreed to employ it for development expediency). A better approach would consist of "__str__()" calling "__repr__()", while all output options are handled through state variables. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return json.dumps(self.to_json_dict(), indent=2) def render(self) -> None: @@ -245,16 +245,16 @@ def validate_result_dict(result): if result.get("unexpected_count") and result["unexpected_count"] < 0: return False if result.get("unexpected_percent") and ( - result["unexpected_percent"] < 0 or result["unexpected_percent"] > 100 # noqa: PLR2004 + result["unexpected_percent"] < 0 or result["unexpected_percent"] > 100 # noqa: PLR2004 # FIXME CoP ): return False if result.get("missing_percent") and ( - result["missing_percent"] < 0 or result["missing_percent"] > 100 # noqa: PLR2004 + result["missing_percent"] < 0 or result["missing_percent"] > 100 # noqa: PLR2004 # FIXME CoP ): return False if result.get("unexpected_percent_nonmissing") and ( result["unexpected_percent_nonmissing"] < 0 - or result["unexpected_percent_nonmissing"] > 100 # noqa: PLR2004 + or result["unexpected_percent_nonmissing"] > 100 # noqa: PLR2004 # FIXME CoP ): return False return not (result.get("missing_count") and result["missing_count"] < 0) @@ -268,7 +268,7 @@ def to_json_dict(self) -> dict[str, JSONValues]: A JSON-serializable dict representation of this ExpectationValidationResult. """ myself = expectationValidationResultSchema.dump(self) - # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 + # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 # FIXME CoP # schemas to get serialization all-the-way down via dump if "expectation_config" in myself: myself["expectation_config"] = convert_to_json_serializable( @@ -284,9 +284,9 @@ def to_json_dict(self) -> dict[str, JSONValues]: myself["rendered_content"] = convert_to_json_serializable(myself["rendered_content"]) return myself - def get_metric(self, metric_name, **kwargs): # noqa: C901 - too complex + def get_metric(self, metric_name, **kwargs): # noqa: C901 # too complex if not self.expectation_config: - raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 + raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 # FIXME CoP "No ExpectationConfig found in this ExpectationValidationResult. Unable to " "return a metric." ) @@ -303,29 +303,29 @@ def get_metric(self, metric_name, **kwargs): # noqa: C901 - too complex metric_kwargs_id or "None", curr_metric_kwargs or "None" ) ) - if len(metric_name_parts) < 2: # noqa: PLR2004 - raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 + if len(metric_name_parts) < 2: # noqa: PLR2004 # FIXME CoP + raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 # FIXME CoP "Expectation-defined metrics must include a requested metric." ) - elif len(metric_name_parts) == 2: # noqa: PLR2004 + elif len(metric_name_parts) == 2: # noqa: PLR2004 # FIXME CoP if metric_name_parts[1] == "success": return self.success else: - raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 + raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 # FIXME CoP "Metric name must have more than two parts for keys other than " "success." ) elif metric_name_parts[1] == "result": try: - if len(metric_name_parts) == 3: # noqa: PLR2004 + if len(metric_name_parts) == 3: # noqa: PLR2004 # FIXME CoP return self.result.get(metric_name_parts[2]) elif metric_name_parts[2] == "details": return self.result["details"].get(metric_name_parts[3]) except KeyError: - raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 + raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 # FIXME CoP f"Unable to get metric {metric_name} -- KeyError in " "ExpectationValidationResult." ) - raise gx_exceptions.UnavailableMetricError(f"Unrecognized metric name {metric_name}") # noqa: TRY003 + raise gx_exceptions.UnavailableMetricError(f"Unrecognized metric name {metric_name}") # noqa: TRY003 # FIXME CoP def describe_dict(self) -> dict: if self.expectation_config: @@ -342,7 +342,7 @@ def describe_dict(self) -> dict: } if self.exception_info.get("raised_exception"): describe_dict["exception_info"] = self.exception_info - return describe_dict + return convert_to_json_serializable(describe_dict) @public_api def describe(self) -> str: @@ -353,7 +353,7 @@ def describe(self) -> str: class ExpectationValidationResultSchema(Schema): success = fields.Bool(required=False, allow_none=True) expectation_config = fields.Nested( - lambda: "ExpectationConfigurationSchema", # type: ignore[arg-type,return-value] + lambda: "ExpectationConfigurationSchema", # type: ignore[arg-type,return-value] # FIXME CoP required=False, allow_none=True, ) @@ -379,7 +379,7 @@ def convert_result_to_serializable(self, data, **kwargs): @post_dump def clean_null_attrs(self, data: dict, **kwargs: dict) -> dict: """Removes the attributes in ExpectationValidationResultSchema.REMOVE_KEYS_IF_NONE during serialization if - their values are None.""" # noqa: E501 + their values are None.""" # noqa: E501 # FIXME CoP from great_expectations.expectations.expectation_configuration import ( ExpectationConfigurationSchema, ) @@ -468,9 +468,9 @@ class ExpectationSuiteValidationResult(SerializableDictDot): meta: Instance of ExpectationSuiteValidationResult, a Dict of meta values, or None. batch_id: A unique identifier for the batch of data that was validated. result_url: A URL where the results are stored. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, success: bool, results: list[ExpectationValidationResult], @@ -502,7 +502,7 @@ def asset_name(self) -> str | None: return None def __eq__(self, other): # type: ignore[explicit-override] # FIXME - """ExpectationSuiteValidationResult equality ignores instance identity, relying only on properties.""" # noqa: E501 + """ExpectationSuiteValidationResult equality ignores instance identity, relying only on properties.""" # noqa: E501 # FIXME CoP if not isinstance(other, self.__class__): # Delegate comparison to the other instance's __eq__. return NotImplemented @@ -531,7 +531,7 @@ def to_json_dict(self): A JSON-serializable dict representation of this ExpectationSuiteValidationResult. """ myself = deepcopy(self) - # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 + # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 # FIXME CoP # schemas to get serialization all-the-way down via dump myself["suite_parameters"] = convert_to_json_serializable(myself["suite_parameters"]) myself["statistics"] = convert_to_json_serializable(myself["statistics"]) @@ -540,17 +540,17 @@ def to_json_dict(self): myself = expectationSuiteValidationResultSchema.dump(myself) return myself - def get_metric(self, metric_name, **kwargs): # noqa: C901 - too complex + def get_metric(self, metric_name, **kwargs): # noqa: C901 # too complex metric_name_parts = metric_name.split(".") metric_kwargs_id = get_metric_kwargs_id(metric_kwargs=kwargs) metric_value = None # Expose overall statistics if metric_name_parts[0] == "statistics": - if len(metric_name_parts) == 2: # noqa: PLR2004 + if len(metric_name_parts) == 2: # noqa: PLR2004 # FIXME CoP return self.statistics.get(metric_name_parts[1]) else: - raise gx_exceptions.UnavailableMetricError(f"Unrecognized metric {metric_name}") # noqa: TRY003 + raise gx_exceptions.UnavailableMetricError(f"Unrecognized metric {metric_name}") # noqa: TRY003 # FIXME CoP # Expose expectation-defined metrics elif metric_name_parts[0].lower().startswith("expect_"): @@ -569,7 +569,7 @@ def get_metric(self, metric_name, **kwargs): # noqa: C901 - too complex self._metrics[(metric_name, metric_kwargs_id)] = metric_value return metric_value - raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 + raise gx_exceptions.UnavailableMetricError( # noqa: TRY003 # FIXME CoP f"Metric {metric_name} with metric_kwargs_id {metric_kwargs_id} is not available." ) @@ -604,12 +604,14 @@ def get_failed_validation_results( ) def describe_dict(self) -> dict: - return { - "success": self.success, - "statistics": self.statistics, - "expectations": [expectation.describe_dict() for expectation in self.results], - "result_url": self.result_url, - } + return convert_to_json_serializable( + { + "success": self.success, + "statistics": self.statistics, + "expectations": [expectation.describe_dict() for expectation in self.results], + "result_url": self.result_url, + } + ) @public_api def describe(self) -> str: diff --git a/great_expectations/core/factory/checkpoint_factory.py b/great_expectations/core/factory/checkpoint_factory.py index c0cf83e5455e..76dedf7565c7 100644 --- a/great_expectations/core/factory/checkpoint_factory.py +++ b/great_expectations/core/factory/checkpoint_factory.py @@ -15,6 +15,7 @@ from great_expectations.exceptions import DataContextError if TYPE_CHECKING: + from great_expectations import ValidationDefinition from great_expectations.core.data_context_key import StringKey from great_expectations.data_context.store.checkpoint_store import ( CheckpointStore, @@ -24,6 +25,10 @@ @public_api class CheckpointFactory(Factory[Checkpoint]): + """ + Responsible for basic CRUD operations on a Data Context's Checkpoints. + """ + def __init__(self, store: CheckpointStore): self._store = store @@ -40,7 +45,7 @@ def add(self, checkpoint: Checkpoint) -> Checkpoint: """ key = self._store.get_key(name=checkpoint.name, id=None) if self._store.has_key(key=key): - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP f"Cannot add Checkpoint with name {checkpoint.name} because it already exists." ) @@ -83,7 +88,7 @@ def delete(self, name: str) -> None: try: checkpoint = self.get(name=name) except DataContextError as e: - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP f"Cannot delete Checkpoint with name {name} because it cannot be found." ) from e @@ -109,7 +114,7 @@ def get(self, name: str) -> Checkpoint: """ key = self._store.get_key(name=name, id=None) if not self._store.has_key(key=key): - raise DataContextError(f"Checkpoint with name {name} was not found.") # noqa: TRY003 + raise DataContextError(f"Checkpoint with name {name} was not found.") # noqa: TRY003 # FIXME CoP return self._get(key=key) @@ -122,6 +127,53 @@ def all(self) -> Iterable[Checkpoint]: def _get(self, key: GXCloudIdentifier | StringKey) -> Checkpoint: checkpoint = self._store.get(key=key) if not isinstance(checkpoint, Checkpoint): - raise ValueError(f"Object with key {key} was found, but it is not a Checkpoint.") # noqa: TRY003, TRY004 + raise ValueError(f"Object with key {key} was found, but it is not a Checkpoint.") # noqa: TRY003, TRY004 # FIXME CoP + + return checkpoint + + @public_api + @override + def add_or_update(self, checkpoint: Checkpoint) -> Checkpoint: + """Add or update a Checkpoint by name. + + If a Checkpoint with the same name exists, overwrite it, otherwise + create a new Checkpoint. + Args: + checkpoint: Checkpoint to add or update + """ + + try: + existing_checkpoint = self.get(name=checkpoint.name) + except DataContextError: + # checkpoint doesn't exist yet, so add it + self._add_or_update_validation_definitions( + validation_definitions=checkpoint.validation_definitions, + existing_validation_definitions=[], + ) + return self.add(checkpoint=checkpoint) + + # update checkpoint + checkpoint.id = existing_checkpoint.id + self._add_or_update_validation_definitions( + validation_definitions=checkpoint.validation_definitions, + existing_validation_definitions=existing_checkpoint.validation_definitions, + ) + checkpoint.save() return checkpoint + + def _add_or_update_validation_definitions( + self, + validation_definitions: list[ValidationDefinition], + existing_validation_definitions: list[ValidationDefinition], + ): + from great_expectations.data_context import project_manager + + val_def_ids_by_name = { + val_def.name: val_def.id for val_def in existing_validation_definitions + } + val_def_factory = project_manager.get_validation_definitions_factory() + for val_def in validation_definitions: + if val_def.name in val_def_ids_by_name: + val_def.id = val_def_ids_by_name[val_def.name] + val_def_factory.add_or_update(validation=val_def) diff --git a/great_expectations/core/factory/factory.py b/great_expectations/core/factory/factory.py index 4ecae45fe063..fdf316b72bfe 100644 --- a/great_expectations/core/factory/factory.py +++ b/great_expectations/core/factory/factory.py @@ -24,3 +24,7 @@ def get(self, name: str) -> T: @abstractmethod def all(self) -> Iterable[T]: pass + + @abstractmethod + def add_or_update(self, obj: T) -> T: + pass diff --git a/great_expectations/core/factory/suite_factory.py b/great_expectations/core/factory/suite_factory.py index e17e666b3bc0..c88026b96c59 100644 --- a/great_expectations/core/factory/suite_factory.py +++ b/great_expectations/core/factory/suite_factory.py @@ -21,6 +21,10 @@ @public_api class SuiteFactory(Factory[ExpectationSuite]): + """ + Responsible for basic CRUD operations on a Data Context's ExpectationSuites. + """ + def __init__(self, store: ExpectationsStore): self._store = store @@ -33,7 +37,7 @@ def _include_rendered_content(self) -> bool: def add(self, suite: ExpectationSuite) -> ExpectationSuite: """Add an ExpectationSuite to the collection. - Parameters: + Args: suite: ExpectationSuite to add Raises: @@ -41,7 +45,7 @@ def add(self, suite: ExpectationSuite) -> ExpectationSuite: """ key = self._store.get_key(name=suite.name, id=None) if self._store.has_key(key=key): - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP f"Cannot add ExpectationSuite with name {suite.name} because it already exists." ) self._store.add(key=key, value=suite) @@ -62,7 +66,7 @@ def add(self, suite: ExpectationSuite) -> ExpectationSuite: def delete(self, name: str) -> None: """Delete an ExpectationSuite from the collection. - Parameters: + Args: name: The name of the ExpectationSuite to delete Raises: @@ -71,7 +75,7 @@ def delete(self, name: str) -> None: try: suite = self.get(name=name) except DataContextError as e: - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP f"Cannot delete ExpectationSuite with name {name} because it cannot be found." ) from e @@ -89,7 +93,7 @@ def delete(self, name: str) -> None: def get(self, name: str) -> ExpectationSuite: """Get an ExpectationSuite from the collection by name. - Parameters: + Args: name: Name of ExpectationSuite to get Raises: @@ -98,7 +102,7 @@ def get(self, name: str) -> ExpectationSuite: key = self._store.get_key(name=name, id=None) if not self._store.has_key(key=key): - raise DataContextError(f"ExpectationSuite with name {name} was not found.") # noqa: TRY003 + raise DataContextError(f"ExpectationSuite with name {name} was not found.") # noqa: TRY003 # FIXME CoP suite_dict = self._store.get(key=key) return self._store.deserialize_suite_dict(suite_dict) @@ -125,3 +129,34 @@ def all(self) -> Iterable[ExpectationSuite]: self._store.submit_all_deserialization_event(e) raise return deserializable_suites + + @public_api + @override + def add_or_update(self, suite: ExpectationSuite) -> ExpectationSuite: + """Add or update an ExpectationSuite by name. + + If an ExpectationSuite with the same name exists, overwrite it, otherwise + create a new ExpectationSuite. On update, Expectations in the Suite which + match a previously existing Expectation maintain a stable ID, and + Expectations which have changed receive a new ID. + + Args: + suite: ExpectationSuite to add or update + """ + try: + existing_suite = self.get(name=suite.name) + except DataContextError: + return self.add(suite=suite) + + # add IDs to expectations that haven't changed + existing_expectations = existing_suite.expectations + for expectation in suite.expectations: + try: + index = existing_expectations.index(expectation) + expectation.id = existing_expectations[index].id + except ValueError: + pass # expectation is new or updated + + suite.id = existing_suite.id + suite.save() + return suite diff --git a/great_expectations/core/factory/validation_definition_factory.py b/great_expectations/core/factory/validation_definition_factory.py index 681d7d06ddd1..20693e726a53 100644 --- a/great_expectations/core/factory/validation_definition_factory.py +++ b/great_expectations/core/factory/validation_definition_factory.py @@ -11,6 +11,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.core.factory.factory import Factory from great_expectations.core.validation_definition import ValidationDefinition +from great_expectations.data_context.data_context.context_factory import project_manager from great_expectations.exceptions.exceptions import DataContextError if TYPE_CHECKING: @@ -21,6 +22,10 @@ @public_api class ValidationDefinitionFactory(Factory[ValidationDefinition]): + """ + Responsible for basic CRUD operations on a Data Context's ValidationDefinitions. + """ + def __init__(self, store: ValidationDefinitionStore) -> None: self._store = store @@ -37,8 +42,8 @@ def add(self, validation: ValidationDefinition) -> ValidationDefinition: """ key = self._store.get_key(name=validation.name, id=None) if self._store.has_key(key=key): - raise DataContextError( # noqa: TRY003 - f"Cannot add ValidationDefinition with name {validation.name} because it already exists." # noqa: E501 + raise DataContextError( # noqa: TRY003 # FIXME CoP + f"Cannot add ValidationDefinition with name {validation.name} because it already exists." # noqa: E501 # FIXME CoP ) self._store.add(key=key, value=validation) @@ -64,7 +69,7 @@ def delete(self, name: str) -> None: try: validation_definition = self.get(name=name) except DataContextError as e: - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP f"Cannot delete ValidationDefinition with name {name} because it cannot be found." ) from e @@ -90,7 +95,7 @@ def get(self, name: str) -> ValidationDefinition: """ key = self._store.get_key(name=name, id=None) if not self._store.has_key(key=key): - raise DataContextError(f"ValidationDefinition with name {name} was not found.") # noqa: TRY003 + raise DataContextError(f"ValidationDefinition with name {name} was not found.") # noqa: TRY003 # FIXME CoP return cast(ValidationDefinition, self._store.get(key=key)) @@ -99,3 +104,33 @@ def get(self, name: str) -> ValidationDefinition: def all(self) -> Iterable[ValidationDefinition]: """Get all ValidationDefinitions.""" return self._store.get_all() + + @public_api + @override + def add_or_update(self, validation: ValidationDefinition) -> ValidationDefinition: + """Add or update an ValidationDefinition by name. + + If an ValidationDefinition with the same name exists, overwrite it, otherwise + create a new ValidationDefinition. + + Args: + validation: ValidationDefinition to add or update + """ + # Always add or update underlying suite to avoid freshness issues + suite_factory = project_manager.get_suite_factory() + validation.suite = suite_factory.add_or_update(suite=validation.suite) + + try: + existing_validation = self.get(name=validation.name) + existing_batch_definition = existing_validation.data + except DataContextError: + return self.add(validation=validation) + + batch_definition = validation.data + batch_definition.id = existing_batch_definition.id + batch_definition.save() + + validation.id = existing_validation.id + validation.save() + + return validation diff --git a/great_expectations/core/http.py b/great_expectations/core/http.py index 22540569c97d..a377a92141bb 100644 --- a/great_expectations/core/http.py +++ b/great_expectations/core/http.py @@ -34,7 +34,7 @@ def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) @override - def send(self, request: requests.PreparedRequest, **kwargs) -> requests.Response: # type: ignore[override] + def send(self, request: requests.PreparedRequest, **kwargs) -> requests.Response: # type: ignore[override] # FIXME CoP kwargs["timeout"] = kwargs.get("timeout", self.timeout) return super().send(request, **kwargs) diff --git a/great_expectations/core/id_dict.py b/great_expectations/core/id_dict.py index e5f7e078428d..cf68fc4e676f 100644 --- a/great_expectations/core/id_dict.py +++ b/great_expectations/core/id_dict.py @@ -5,7 +5,7 @@ from typing import Any, Set, TypeVar, Union from great_expectations.compatibility.typing_extensions import override -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP T = TypeVar("T") @@ -29,7 +29,7 @@ def to_id(self, id_keys=None, id_ignore_keys=None): return hashlib.md5(json.dumps(_id_dict, sort_keys=True).encode("utf-8")).hexdigest() @override - def __hash__(self) -> int: # type: ignore[override] + def __hash__(self) -> int: # type: ignore[override] # FIXME CoP """Overrides the default implementation""" _result_hash: int = hash(self.to_id()) return _result_hash @@ -41,7 +41,7 @@ def deep_convert_properties_iterable_to_id_dict( if isinstance(source, dict): return _deep_convert_properties_iterable_to_id_dict(source=IDDict(source)) - # Must allow for non-dictionary source types, since their internal nested structures may contain dictionaries. # noqa: E501 + # Must allow for non-dictionary source types, since their internal nested structures may contain dictionaries. # noqa: E501 # FIXME CoP if isinstance(source, (list, set, tuple)): data_type: type = type(source) diff --git a/great_expectations/core/metric_domain_types.py b/great_expectations/core/metric_domain_types.py index 84ced7774a62..22ef9685ee5d 100644 --- a/great_expectations/core/metric_domain_types.py +++ b/great_expectations/core/metric_domain_types.py @@ -11,7 +11,7 @@ class MetricDomainTypes(enum.Enum): A wide variety of "Domain" types can be defined with applicable metrics associated with their respective "Domain" types. The "Domain" types currently in use (`TABLE`, `COLUMN`, `COLUMN_PAIR`, and `MULTICOLUMN`) are declared here. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP TABLE = "table" COLUMN = "column" diff --git a/great_expectations/core/metric_function_types.py b/great_expectations/core/metric_function_types.py index dbaeb1c41452..bd6e30681f78 100644 --- a/great_expectations/core/metric_function_types.py +++ b/great_expectations/core/metric_function_types.py @@ -22,7 +22,7 @@ class MetricFunctionTypes(enum.Enum): specified windowing operation over "Domain" values. - `AGGREGATE_VALUE` (never used and deprecated) -- metric implementation function returns the result of applying a specified aggregation operation to every "Domain" value. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP VALUE = "value" @@ -54,7 +54,7 @@ class MetricPartialFunctionTypes(enum.Enum): specified aggregated quantity. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP MAP_FN = "map_fn" # pertains to "PandasExecutionEngine" MAP_SERIES = "map_series" # pertains to "PandasExecutionEngine" @@ -63,7 +63,7 @@ class MetricPartialFunctionTypes(enum.Enum): "map_condition_fn" # pertains to "SqlAlchemyExecutionEngine" and "SparkDFExecutionEngine" ) MAP_CONDITION_SERIES = "map_condition_series" # pertains to "PandasExecutionEngine" - WINDOW_CONDITION_FN = "window_condition_fn" # pertains to "SqlAlchemyExecutionEngine" and "SparkDFExecutionEngine" # noqa: E501 + WINDOW_CONDITION_FN = "window_condition_fn" # pertains to "SqlAlchemyExecutionEngine" and "SparkDFExecutionEngine" # noqa: E501 # FIXME CoP AGGREGATE_FN = ( "aggregate_fn" # pertains to "SqlAlchemyExecutionEngine" and "SparkDFExecutionEngine" ) @@ -74,7 +74,7 @@ def metric_suffix(self) -> str: Returns: (str) designated metric name suffix - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if self.name in [ "MAP_FN", "MAP_SERIES", @@ -96,7 +96,7 @@ def metric_suffix(self) -> str: class MetricPartialFunctionTypeSuffixes(enum.Enum): - """Enum type, whose members specify available suffixes for metrics representing partial functions.""" # noqa: E501 + """Enum type, whose members specify available suffixes for metrics representing partial functions.""" # noqa: E501 # FIXME CoP MAP = "map" CONDITION = "condition" @@ -104,7 +104,7 @@ class MetricPartialFunctionTypeSuffixes(enum.Enum): class SummarizationMetricNameSuffixes(enum.Enum): - """Enum type, whose members specify suffixes for metrics used for summarizing Expectation validation results.""" # noqa: E501 + """Enum type, whose members specify suffixes for metrics used for summarizing Expectation validation results.""" # noqa: E501 # FIXME CoP FILTERED_ROW_COUNT = "filtered_row_count" UNEXPECTED_COUNT = "unexpected_count" diff --git a/great_expectations/core/run_identifier.py b/great_expectations/core/run_identifier.py index e985cb97cfb8..3cc693df07e2 100644 --- a/great_expectations/core/run_identifier.py +++ b/great_expectations/core/run_identifier.py @@ -10,7 +10,7 @@ from marshmallow import Schema, fields, post_load, pre_dump from great_expectations._docs_decorators import public_api -from great_expectations.alias_types import JSONValues # noqa: TCH001 +from great_expectations.alias_types import JSONValues # noqa: TCH001 # FIXME CoP from great_expectations.compatibility.typing_extensions import override from great_expectations.core.data_context_key import DataContextKey @@ -48,7 +48,7 @@ def __init__( if not run_time: try: - run_time = parse(run_name) # type: ignore[arg-type] + run_time = parse(run_name) # type: ignore[arg-type] # FIXME CoP except (ValueError, TypeError): run_time = None diff --git a/great_expectations/core/serializer.py b/great_expectations/core/serializer.py index 796593884467..ce1d4c392136 100644 --- a/great_expectations/core/serializer.py +++ b/great_expectations/core/serializer.py @@ -13,12 +13,12 @@ config = ModelConfig(...) serializer = DictConfigSerializer(schema=modelConfigSchema) serialized_value = serializer.serialize(config) -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP import abc from typing import TYPE_CHECKING -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from marshmallow import Schema @@ -30,7 +30,7 @@ class AbstractConfigSerializer(abc.ABC): """Serializer interface. Note: When mypy coverage is enhanced further, this Abstract class can be replaced with a Protocol. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__(self, schema: Schema) -> None: """ diff --git a/great_expectations/core/suite_parameters.py b/great_expectations/core/suite_parameters.py index ead7ca380189..1809d08bbaf3 100644 --- a/great_expectations/core/suite_parameters.py +++ b/great_expectations/core/suite_parameters.py @@ -27,7 +27,7 @@ ) from great_expectations.exceptions import SuiteParameterError -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from typing_extensions import TypeAlias, TypeGuard @@ -77,7 +77,7 @@ class SuiteParameterParser: expr :: term [ addop term ]* The parser is modified from: https://github.com/pyparsing/pyparsing/blob/master/examples/fourFn.py - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # map operator symbols to corresponding arithmetic operations opn = { @@ -153,7 +153,7 @@ def get_parser(self): keyval = dictOf(key.setParseAction(self.push_first), value) kwarglist = delimitedList(keyval) - # add parse action that replaces the function identifier with a (name, number of args, has_fn_kwargs) tuple # noqa: E501 + # add parse action that replaces the function identifier with a (name, number of args, has_fn_kwargs) tuple # noqa: E501 # FIXME CoP # 20211009 - JPC - Note that it's important that we consider kwarglist # first as part of disabling backtracking for the function's arguments fn_call = (variable + lpar + rpar).setParseAction( @@ -174,7 +174,7 @@ def get_parser(self): ) ).setParseAction(self.push_unary_minus) - # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # noqa: E501 + # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # noqa: E501 # FIXME CoP # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor <<= atom + (expop + factor).setParseAction(self.push_first)[...] @@ -183,7 +183,7 @@ def get_parser(self): self._parser = expr return self._parser - def evaluate_stack(self, s): # noqa: C901, PLR0911, PLR0912 + def evaluate_stack(self, s): # noqa: C901, PLR0911, PLR0912 # FIXME CoP op, num_args, has_fn_kwargs = s.pop(), 0, False if isinstance(op, tuple): op, num_args, has_fn_kwargs = op @@ -211,8 +211,8 @@ def evaluate_stack(self, s): # noqa: C901, PLR0911, PLR0912 args = reversed([self.evaluate_stack(s) for _ in range(num_args)]) return self.fn[op](*args) else: - # Require that the *entire* expression evaluates to number or datetime UNLESS there is *exactly one* # noqa: E501 - # expression to substitute (see cases where len(parse_results) == 1 in the parse_suite_parameter # noqa: E501 + # Require that the *entire* expression evaluates to number or datetime UNLESS there is *exactly one* # noqa: E501 # FIXME CoP + # expression to substitute (see cases where len(parse_results) == 1 in the parse_suite_parameter # noqa: E501 # FIXME CoP # method). evaluated: Union[int, float, datetime.datetime] try: @@ -230,7 +230,7 @@ def evaluate_stack(self, s): # noqa: C901, PLR0911, PLR0912 logger.info("Suite parameter operand successfully parsed as datetime.") except ValueError as e: logger.info("Parsing suite parameter operand as datetime failed.") - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP return evaluated @@ -243,7 +243,7 @@ def build_suite_parameters( """Build a dictionary of parameters to evaluate, using the provided suite_parameters, AND mutate expectation_args by removing any parameter values passed in as temporary values during exploratory work. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP suite_args = copy.deepcopy(expectation_args) substituted_parameters = {} @@ -251,7 +251,7 @@ def build_suite_parameters( # specified parameters. for key, value in suite_args.items(): if isinstance(value, dict) and "$PARAMETER" in value: - # We do not even need to search for a value if we are not going to do interactive evaluation # noqa: E501 + # We do not even need to search for a value if we are not going to do interactive evaluation # noqa: E501 # FIXME CoP if not interactive_evaluation: continue @@ -281,7 +281,7 @@ def build_suite_parameters( EXPR = SuiteParameterParser() -def parse_suite_parameter( # noqa: C901 +def parse_suite_parameter( # noqa: C901 # FIXME CoP parameter_expression: str, suite_parameters: Optional[Dict[str, Any]] = None, data_context: Optional[AbstractDataContext] = None, @@ -298,7 +298,7 @@ def parse_suite_parameter( # noqa: C901 obtain integer values when needed for certain expectations (e.g. expect_column_value_length_to_be_between). Valid variables must begin with an alphabetic character and may contain alphanumeric characters plus '_' and '$'. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if suite_parameters is None: suite_parameters = {} @@ -306,20 +306,20 @@ def parse_suite_parameter( # noqa: C901 if _is_single_function_no_args(parse_results): # Necessary to catch `now()` (which only needs to be evaluated with `expr.exprStack`) - # NOTE: 20211122 - Chetan - Any future built-ins that are zero arity functions will match this behavior # noqa: E501 + # NOTE: 20211122 - Chetan - Any future built-ins that are zero arity functions will match this behavior # noqa: E501 # FIXME CoP pass elif len(parse_results) == 1 and parse_results[0] not in suite_parameters: - # In this special case there were no operations to find, so only one value, but we don't have something to # noqa: E501 + # In this special case there were no operations to find, so only one value, but we don't have something to # noqa: E501 # FIXME CoP # substitute for that value - raise SuiteParameterError( # noqa: TRY003 + raise SuiteParameterError( # noqa: TRY003 # FIXME CoP f"No value found for $PARAMETER {parse_results[0]!s}" ) elif len(parse_results) == 1: - # In this case, we *do* have a substitution for a single type. We treat this specially because in this # noqa: E501 - # case, we allow complex type substitutions (i.e. do not coerce to string as part of parsing) # noqa: E501 - # NOTE: 20201023 - JPC - to support MetricDefinition as an suite parameter type, we need to handle that # noqa: E501 + # In this case, we *do* have a substitution for a single type. We treat this specially because in this # noqa: E501 # FIXME CoP + # case, we allow complex type substitutions (i.e. do not coerce to string as part of parsing) # noqa: E501 # FIXME CoP + # NOTE: 20201023 - JPC - to support MetricDefinition as an suite parameter type, we need to handle that # noqa: E501 # FIXME CoP # case here; is the suite parameter provided here in fact a metric definition? return suite_parameters[parse_results[0]] @@ -331,7 +331,7 @@ def parse_suite_parameter( # noqa: C901 else: err_str, err_line, err_col = parse_results[-1] - raise SuiteParameterError( # noqa: TRY003 + raise SuiteParameterError( # noqa: TRY003 # FIXME CoP f"Parse Failure: {err_str}\nStatement: {err_line}\nColumn: {err_col}" ) @@ -342,7 +342,7 @@ def parse_suite_parameter( # noqa: C901 exception_traceback = traceback.format_exc() exception_message = f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' logger.debug(exception_message, e, exc_info=True) - raise SuiteParameterError( # noqa: TRY003 + raise SuiteParameterError( # noqa: TRY003 # FIXME CoP f"Error while evaluating suite parameter expression: {e!s}" ) from e @@ -374,7 +374,7 @@ def _is_single_function_no_args(parse_results: Union[ParseResults, list]) -> boo ) -def _deduplicate_suite_parameter_dependencies(dependencies: dict) -> dict: # noqa: C901 - too complex +def _deduplicate_suite_parameter_dependencies(dependencies: dict) -> dict: # noqa: C901 # too complex deduplicated: dict = {} for suite_name, required_metrics in dependencies.items(): deduplicated[suite_name] = [] diff --git a/great_expectations/core/util.py b/great_expectations/core/util.py index a261485b5612..81f4f683bbca 100644 --- a/great_expectations/core/util.py +++ b/great_expectations/core/util.py @@ -32,8 +32,8 @@ if not LegacyRow: LegacyRow = SQLALCHEMY_NOT_IMPORTED -if not Row: # type: ignore[truthy-function] - Row = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc] +if not Row: # type: ignore[truthy-function] # FIXME CoP + Row = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc] # FIXME CoP SCHEMAS = { "api_np": { @@ -99,7 +99,7 @@ def nested_update( def in_jupyter_notebook(): try: - from IPython import get_ipython # type: ignore[import-not-found] + from IPython import get_ipython # type: ignore[import-not-found] # FIXME CoP shell = get_ipython().__class__.__name__ if shell == "ZMQInteractiveShell": @@ -137,9 +137,9 @@ def substitute_all_strftime_format_strings( """ This utility function will iterate over input data and for all strings, replace any strftime format elements using either the provided datetime_obj or the current datetime - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - datetime_obj = datetime_obj or datetime.datetime.now() # noqa: DTZ005 + datetime_obj = datetime_obj or datetime.datetime.now() # noqa: DTZ005 # FIXME CoP if isinstance(data, (dict, OrderedDict)): return { k: substitute_all_strftime_format_strings(v, datetime_obj=datetime_obj) @@ -159,22 +159,22 @@ def parse_string_to_datetime( datetime_string: str, datetime_format_string: Optional[str] = None ) -> datetime.datetime: if not isinstance(datetime_string, str): - raise gx_exceptions.SorterError( # noqa: TRY003 + raise gx_exceptions.SorterError( # noqa: TRY003 # FIXME CoP f"""Source "datetime_string" must have string type (actual type is "{type(datetime_string)!s}"). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if not datetime_format_string: return dateutil.parser.parse(timestr=datetime_string) if datetime_format_string and not isinstance(datetime_format_string, str): - raise gx_exceptions.SorterError( # noqa: TRY003 + raise gx_exceptions.SorterError( # noqa: TRY003 # FIXME CoP f"""DateTime parsing formatter "datetime_format_string" must have string type (actual type is "{type(datetime_format_string)!s}"). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) - return datetime.datetime.strptime( # noqa: DTZ007 + return datetime.datetime.strptime( # noqa: DTZ007 # FIXME CoP datetime_string, datetime_format_string ) @@ -218,7 +218,7 @@ def __init__(self, url: str) -> None: search = re.search(AzureUrl.AZURE_BLOB_STORAGE_HTTPS_URL_REGEX_PATTERN, url) assert ( search is not None - ), "The provided URL does not adhere to the format specified by the Azure SDK (.blob.core.windows.net//)" # noqa: E501 + ), "The provided URL does not adhere to the format specified by the Azure SDK (.blob.core.windows.net//)" # noqa: E501 # FIXME CoP self._protocol = search.group(1) self._account_name = search.group(2) self._container = search.group(3) @@ -353,7 +353,7 @@ def convert_to_file_semantics_version(path: str) -> str: if re.search("^/dbfs", path): return path - raise ValueError("Path should start with either /dbfs or dbfs:") # noqa: TRY003 + raise ValueError("Path should start with either /dbfs or dbfs:") # noqa: TRY003 # FIXME CoP @staticmethod def convert_to_protocol_version(path: str) -> str: @@ -372,7 +372,7 @@ def convert_to_protocol_version(path: str) -> str: return path - raise ValueError("Path should start with either /dbfs or dbfs:") # noqa: TRY003 + raise ValueError("Path should start with either /dbfs or dbfs:") # noqa: TRY003 # FIXME CoP def sniff_s3_compression(s3_url: S3Url) -> Union[str, None]: @@ -388,8 +388,8 @@ def get_or_create_spark_application( # deprecated-v1.0.0 warnings.warn( - "Utility method get_or_create_spark_application() is deprecated and will be removed in v1.0.0. " # noqa: E501 - "Please pass your spark_config to the relevant Spark Datasource, or create your Spark Session outside of GX.", # noqa: E501 + "Utility method get_or_create_spark_application() is deprecated and will be removed in v1.0.0. " # noqa: E501 # FIXME CoP + "Please pass your spark_config to the relevant Spark Datasource, or create your Spark Session outside of GX.", # noqa: E501 # FIXME CoP category=DeprecationWarning, ) if force_reuse_spark_context is not None: @@ -397,8 +397,8 @@ def get_or_create_spark_application( warnings.warn( "force_reuse_spark_context is deprecated and will be removed in version 1.0. " "In environments that allow it, the existing Spark context will be reused, adding the " - "spark_config options that have been passed. If the Spark context cannot be updated with " # noqa: E501 - "the spark_config, the context will be stopped and restarted with the new spark_config.", # noqa: E501 + "spark_config options that have been passed. If the Spark context cannot be updated with " # noqa: E501 # FIXME CoP + "the spark_config, the context will be stopped and restarted with the new spark_config.", # noqa: E501 # FIXME CoP category=DeprecationWarning, ) return SparkDFExecutionEngine.get_or_create_spark_session( @@ -416,18 +416,18 @@ def get_or_create_spark_session( Returns: SparkSession - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.execution_engine import SparkDFExecutionEngine # deprecated-v1.0.0 warnings.warn( "Utility method get_or_create_spark_session() is deprecated and will be removed in v1.0.0. " - "Please pass your spark_config to the relevant Spark Datasource, or create your Spark Session outside of GX.", # noqa: E501 + "Please pass your spark_config to the relevant Spark Datasource, or create your Spark Session outside of GX.", # noqa: E501 # FIXME CoP category=DeprecationWarning, ) return SparkDFExecutionEngine.get_or_create_spark_session( - spark_config=spark_config or {}, # type: ignore[arg-type] + spark_config=spark_config or {}, # type: ignore[arg-type] # FIXME CoP ) @@ -438,7 +438,7 @@ def get_sql_dialect_floating_point_infinity_value(schema: str, negative: bool = return -np.inf else: return np.inf - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if negative: return res["NegativeInfinity"] else: diff --git a/great_expectations/core/validation_definition.py b/great_expectations/core/validation_definition.py index 8dd944386683..d1f4f167c800 100644 --- a/great_expectations/core/validation_definition.py +++ b/great_expectations/core/validation_definition.py @@ -101,7 +101,7 @@ class Config: }, "id": "20dna816-64c8-46cb-8f7e-03c12cea1d67" } - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_encoders = { ExpectationSuite: lambda e: e.identifier_bundle(), BatchDefinition: lambda b: b.identifier_bundle(), @@ -115,11 +115,17 @@ class Config: @property @public_api def batch_definition(self) -> BatchDefinition: + """ + The Batch Definition to validate. + """ return self.data @property @public_api def asset(self) -> DataAsset: + """ + The parent Data Asset of the Batch Definition. + """ return self.data.data_asset @property @@ -162,33 +168,33 @@ def is_fresh(self) -> ValidationDefinitionFreshnessDiagnostics: @validator("suite", pre=True) def _validate_suite(cls, v: dict | ExpectationSuite): - # Input will be a dict of identifiers if being deserialized or a suite object if being constructed by a user. # noqa: E501 + # Input will be a dict of identifiers if being deserialized or a suite object if being constructed by a user. # noqa: E501 # FIXME CoP if isinstance(v, dict): return cls._decode_suite(v) elif isinstance(v, ExpectationSuite): return v - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "Suite must be a dictionary (if being deserialized) or an ExpectationSuite object." ) @validator("data", pre=True) def _validate_data(cls, v: dict | BatchDefinition): - # Input will be a dict of identifiers if being deserialized or a rich type if being constructed by a user. # noqa: E501 + # Input will be a dict of identifiers if being deserialized or a rich type if being constructed by a user. # noqa: E501 # FIXME CoP if isinstance(v, dict): return cls._decode_data(v) elif isinstance(v, BatchDefinition): return v - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "Data must be a dictionary (if being deserialized) or a BatchDefinition object." ) @classmethod def _decode_suite(cls, suite_dict: dict) -> ExpectationSuite: - # Take in raw JSON, ensure it contains appropriate identifiers, and use them to retrieve the actual suite. # noqa: E501 + # Take in raw JSON, ensure it contains appropriate identifiers, and use them to retrieve the actual suite. # noqa: E501 # FIXME CoP try: suite_identifiers = _IdentifierBundle.parse_obj(suite_dict) except ValidationError as e: - raise ValueError("Serialized suite did not contain expected identifiers") from e # noqa: TRY003 + raise ValueError("Serialized suite did not contain expected identifiers") from e # noqa: TRY003 # FIXME CoP name = suite_identifiers.name id = suite_identifiers.id @@ -199,7 +205,7 @@ def _decode_suite(cls, suite_dict: dict) -> ExpectationSuite: try: config: dict = expectation_store.get(key) except gx_exceptions.InvalidKeyError as e: - raise ValueError(f"Could not find suite with name: {name} and id: {id}") from e # noqa: TRY003 + raise ValueError(f"Could not find suite with name: {name} and id: {id}") from e # noqa: TRY003 # FIXME CoP suite = ExpectationSuite(**config) if suite._include_rendered_content: @@ -208,11 +214,11 @@ def _decode_suite(cls, suite_dict: dict) -> ExpectationSuite: @classmethod def _decode_data(cls, data_dict: dict) -> BatchDefinition: - # Take in raw JSON, ensure it contains appropriate identifiers, and use them to retrieve the actual data. # noqa: E501 + # Take in raw JSON, ensure it contains appropriate identifiers, and use them to retrieve the actual data. # noqa: E501 # FIXME CoP try: data_identifiers = _EncodedValidationData.parse_obj(data_dict) except ValidationError as e: - raise ValueError("Serialized data did not contain expected identifiers") from e # noqa: TRY003 + raise ValueError("Serialized data did not contain expected identifiers") from e # noqa: TRY003 # FIXME CoP ds_name = data_identifiers.datasource.name asset_name = data_identifiers.asset.name @@ -222,20 +228,20 @@ def _decode_data(cls, data_dict: dict) -> BatchDefinition: try: ds = datasource_dict[ds_name] except KeyError as e: - raise ValueError(f"Could not find datasource named '{ds_name}'.") from e # noqa: TRY003 + raise ValueError(f"Could not find datasource named '{ds_name}'.") from e # noqa: TRY003 # FIXME CoP try: asset = ds.get_asset(asset_name) except LookupError as e: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"Could not find asset named '{asset_name}' within '{ds_name}' datasource." ) from e try: batch_definition = asset.get_batch_definition(batch_definition_name) except KeyError as e: - raise ValueError( # noqa: TRY003 - f"Could not find batch definition named '{batch_definition_name}' within '{asset_name}' asset and '{ds_name}' datasource." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"Could not find batch definition named '{batch_definition_name}' within '{asset_name}' asset and '{ds_name}' datasource." # noqa: E501 # FIXME CoP ) from e return batch_definition @@ -275,7 +281,7 @@ def run( """ diagnostics = self.is_fresh() if not diagnostics.success: - # The validation definition itself is not added but all children are - we can add it for the user # noqa: E501 + # The validation definition itself is not added but all children are - we can add it for the user # noqa: E501 # FIXME CoP if not diagnostics.parent_added and diagnostics.children_added: self._add_to_store() else: @@ -365,6 +371,7 @@ def identifier_bundle(self) -> _IdentifierBundle: @public_api def save(self) -> None: + """Save the current state of this ValidationDefinition.""" store = project_manager.get_validation_definition_store() key = store.get_key(name=self.name, id=self.id) diff --git a/great_expectations/core/yaml_handler.py b/great_expectations/core/yaml_handler.py index 512259c2cf57..fdc92f464814 100644 --- a/great_expectations/core/yaml_handler.py +++ b/great_expectations/core/yaml_handler.py @@ -5,7 +5,7 @@ from ruamel.yaml import YAML -from great_expectations.alias_types import JSONValues # noqa: TCH001 +from great_expectations.alias_types import JSONValues # noqa: TCH001 # FIXME CoP class YAMLHandler: @@ -60,7 +60,7 @@ def load(self, stream: io.TextIOWrapper | str) -> dict[str, JSONValues]: Returns: The deserialized dictionary form of the input stream. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._handler.load(stream=stream) def dump( @@ -90,9 +90,9 @@ def dump( Returns: If no stream argument is provided, the str that results from ``_handler.dump()``. Otherwise, None as the ``_handler.dump()`` works in place and will exercise the handler accordingly. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if stream: - return self._dump(data=data, stream=stream, **kwargs) # type: ignore[func-returns-value] + return self._dump(data=data, stream=stream, **kwargs) # type: ignore[func-returns-value] # FIXME CoP return self._dump_and_return_value(data=data, **kwargs) def _dump(self, data: dict, stream, **kwargs) -> None: diff --git a/great_expectations/data_context/data_context/abstract_data_context.py b/great_expectations/data_context/data_context/abstract_data_context.py index 1cd57b3e6a9b..686a1205b25a 100644 --- a/great_expectations/data_context/data_context/abstract_data_context.py +++ b/great_expectations/data_context/data_context/abstract_data_context.py @@ -29,6 +29,7 @@ import great_expectations as gx import great_expectations.exceptions as gx_exceptions from great_expectations._docs_decorators import ( + deprecated_method_or_class, new_argument, new_method_or_class, public_api, @@ -175,9 +176,9 @@ class AbstractDataContext(ConfigPeer, ABC): - ✅ - ✅ - ✅ - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - # NOTE: These can become a property like ExpectationsStore.__name__ or placed in a separate # noqa: E501 + # NOTE: These can become a property like ExpectationsStore.__name__ or placed in a separate # noqa: E501 # FIXME CoP # test_yml_config module so AbstractDataContext is not so cluttered. _ROOT_CONF_DIR = pathlib.Path.home() / ".great_expectations" _ROOT_CONF_FILE = _ROOT_CONF_DIR / "great_expectations.conf" @@ -189,18 +190,24 @@ class AbstractDataContext(ConfigPeer, ABC): # instance attribute type annotations fluent_config: GxConfig - def __init__(self, runtime_environment: Optional[dict] = None) -> None: + def __init__( + self, + runtime_environment: Optional[dict] = None, + user_agent_str: Optional[str] = None, + ) -> None: """ Constructor for AbstractDataContext. Will handle instantiation logic that is common to all DataContext objects Args: runtime_environment (dict): a dictionary of config variables that override those set in config_variables.yml and the environment - """ # noqa: E501 + user_agent_str (str | None): UserAgent string to be used in analytics events + """ # noqa: E501 # FIXME CoP if runtime_environment is None: runtime_environment = {} self.runtime_environment = runtime_environment + self._user_agent_str = user_agent_str self._config_provider = self._init_config_provider() self._config_variables = self._load_config_variables() @@ -210,7 +217,7 @@ def __init__(self, runtime_environment: Optional[dict] = None) -> None: self.fluent_config = self._load_fluent_config(self._config_provider) # Init plugin support - if self.plugins_directory is not None and os.path.exists( # noqa: PTH110 + if self.plugins_directory is not None and os.path.exists( # noqa: PTH110 # FIXME CoP self.plugins_directory ): sys.path.append(self.plugins_directory) @@ -224,7 +231,7 @@ def __init__(self, runtime_environment: Optional[dict] = None) -> None: self._stores: dict = {} self._init_primary_stores(self.project_config_with_variables_substituted.stores) - # The DatasourceStore is inherent to all DataContexts but is not an explicit part of the project config. # noqa: E501 + # The DatasourceStore is inherent to all DataContexts but is not an explicit part of the project config. # noqa: E501 # FIXME CoP # As such, it must be instantiated separately. self._datasource_store = self._init_datasource_store() self._init_datasources() @@ -269,6 +276,7 @@ def _init_analytics(self) -> None: data_context_id=self._data_context_id, organization_id=None, oss_id=self._get_oss_id(), + user_agent_str=self._user_agent_str, ) def _determine_analytics_enabled(self) -> bool: @@ -301,7 +309,7 @@ def _register_providers(self, config_provider: _ConfigurationProvider) -> None: - Config variables - Environment variables - Runtime environment - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP config_variables_file_path = self._project_config.config_variables_file_path if config_variables_file_path: config_provider.register_provider( @@ -331,7 +339,7 @@ def _save_project_config(self) -> None: - FileDataContext : Filesystem. - CloudDataContext : Cloud endpoint - Ephemeral : not saved, and logging message outputted - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.variables.save() @public_api @@ -346,6 +354,15 @@ def enable_analytics(self, enable: Optional[bool]) -> None: self._init_analytics() self.variables.save() + def set_user_agent_str(self, user_agent_str: Optional[str]) -> None: + """ + Set the user agent string for this DataContext. + + This method is used by GX internally for analytics tracking. + """ + self._user_agent_str = user_agent_str + self._init_analytics() + @public_api def update_project_config( self, project_config: DataContextConfig | Mapping @@ -401,7 +418,7 @@ def root_directory(self) -> Optional[str]: # TODO: This should be a `pathlib.Pa """The root directory for configuration objects in the data context; the location in which ``great_expectations.yml`` is located. """ - # NOTE: Why does this exist in AbstractDataContext? CloudDataContext and # noqa: E501 + # NOTE: Why does this exist in AbstractDataContext? CloudDataContext and # noqa: E501 # FIXME CoP # FileDataContext both use it. Determine whether this should stay here or in child classes return getattr(self, "_context_root_directory", None) @@ -412,7 +429,7 @@ def project_config_with_variables_substituted(self) -> DataContextConfig: @property def plugins_directory(self) -> Optional[str]: """The directory in which custom plugin modules should be placed.""" - # NOTE: Why does this exist in AbstractDataContext? CloudDataContext and # noqa: E501 + # NOTE: Why does this exist in AbstractDataContext? CloudDataContext and # noqa: E501 # FIXME CoP # FileDataContext both use it. Determine whether this should stay here or in child classes return self._normalize_absolute_or_relative_path(self.variables.plugins_directory) @@ -428,8 +445,11 @@ def datasource_store(self) -> DatasourceStore: @property @public_api def suites(self) -> SuiteFactory: + """ + Responsible for basic CRUD operations on a context's ExpectationSuites. + """ if not self._suites: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "DataContext requires a configured ExpectationsStore to persist ExpectationSuites." ) return self._suites @@ -437,8 +457,11 @@ def suites(self) -> SuiteFactory: @property @public_api def checkpoints(self) -> CheckpointFactory: + """ + Responsible for basic CRUD operations on a context's Checkpoints. + """ if not self._checkpoints: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "DataContext requires a configured CheckpointStore to persist Checkpoints." ) return self._checkpoints @@ -446,8 +469,11 @@ def checkpoints(self) -> CheckpointFactory: @property @public_api def validation_definitions(self) -> ValidationDefinitionFactory: + """ + Responsible for basic CRUD operations on a context's ValidationDefinitions. + """ if not self._validation_definitions: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "DataContext requires a configured ValidationDefinitionStore to persist " "Validations." ) @@ -507,7 +533,7 @@ def checkpoint_store_name(self) -> Optional[str]: return name if CheckpointStore.default_checkpoints_exist( - directory_path=self.root_directory # type: ignore[arg-type] + directory_path=self.root_directory # type: ignore[arg-type] # FIXME CoP ): return DataContextConfigDefaults.DEFAULT_CHECKPOINT_STORE_NAME.value @@ -531,6 +557,9 @@ def checkpoint_store(self) -> CheckpointStore: @property @public_api def data_sources(self) -> DataSourceManager: + """ + Responsible for basic CRUD operations on a context's DataSources. + """ return self._data_sources @property @@ -546,14 +575,14 @@ def _add_fluent_datasource( datasource_name = kwargs.get("name", "") if not datasource_name: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Can not write the fluent datasource, because no name was provided." ) # We currently don't allow one to overwrite a datasource with this internal method if datasource_name in self.data_sources.all(): - raise gx_exceptions.DataContextError( # noqa: TRY003 - f"Can not write the fluent datasource {datasource_name} because a datasource of that " # noqa: E501 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP + f"Can not write the fluent datasource {datasource_name} because a datasource of that " # noqa: E501 # FIXME CoP "name already exists in the data context." ) @@ -580,7 +609,7 @@ def _update_fluent_datasource( datasource_name = kwargs.get("name", "") if not datasource_name: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Can not write the fluent datasource, because no name was provided." ) @@ -606,7 +635,7 @@ def _delete_fluent_datasource(self, name: str, _call_store: bool = True) -> None """ _call_store = False allows for local deletes without deleting the persisted storage datasource. This should generally be avoided. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self.fluent_config.pop_datasource(name, None) datasource = self.data_sources.all().get(name) if datasource: @@ -698,10 +727,10 @@ def _validate_add_datasource_args( error_message += " (but not both)" raise TypeError(error_message) - # "type" is only used in FDS so we check for its existence (equivalent for block-style would be "class_name" and "module_name") # noqa: E501 + # "type" is only used in FDS so we check for its existence (equivalent for block-style would be "class_name" and "module_name") # noqa: E501 # FIXME CoP if "type" in kwargs: - raise TypeError( # noqa: TRY003 - "Creation of fluent-datasources with individual arguments is not supported and should be done through the `context.sources` API." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "Creation of fluent-datasources with individual arguments is not supported and should be done through the `context.data_sources` API." # noqa: E501 # FIXME CoP ) def _add_datasource( @@ -717,7 +746,7 @@ def _add_datasource( datasource=datasource, ) else: - raise DataContextError("Datasource is not a FluentDatasource") # noqa: TRY003 + raise DataContextError("Datasource is not a FluentDatasource") # noqa: TRY003 # FIXME CoP return datasource def update_datasource( @@ -761,6 +790,7 @@ def add_or_update_datasource( ... @new_method_or_class(version="0.15.48") + @deprecated_method_or_class(version="1.3.0") def add_or_update_datasource( self, name: str | None = None, @@ -777,7 +807,13 @@ def add_or_update_datasource( Returns: The Datasource added or updated by the input `kwargs`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP + # deprecated-v1.3.0 + warnings.warn( + "add_or_update_datasource() from the DataContext is deprecated and will be removed " + "in a future version of GX. Please use `context.data_sources.add_or_update` instead.", + category=DeprecationWarning, + ) self._validate_add_datasource_args(name=name, datasource=datasource) return_datasource: FluentDatasource @@ -791,7 +827,7 @@ def add_or_update_datasource( return_datasource = self.data_sources.all()[name] else: if datasource is None: - raise ValueError("Either datasource or kwargs are required") # noqa: TRY003 + raise ValueError("Either datasource or kwargs are required") # noqa: TRY003 # FIXME CoP if datasource.name in self.data_sources.all(): self._update_fluent_datasource(datasource=datasource) else: @@ -802,7 +838,7 @@ def add_or_update_datasource( def get_site_names(self) -> List[str]: """Get a list of configured site names.""" - return list(self.variables.data_docs_sites.keys()) # type: ignore[union-attr] + return list(self.variables.data_docs_sites.keys()) # type: ignore[union-attr] # FIXME CoP def get_config_with_variables_substituted( self, config: Optional[DataContextConfig] = None @@ -812,7 +848,7 @@ def get_config_with_variables_substituted( in order of precedence: gx_cloud_config (for Data Contexts in GX Cloud mode), runtime_environment, environment variables, config_variables, or gx_cloud_config_variable_defaults (allows certain variables to be optional in GX Cloud mode). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not config: config = self._project_config return DataContextConfig(**self.config_provider.substitute_config(config)) @@ -823,12 +859,12 @@ def list_stores(self) -> List[Store]: for ( name, value, - ) in self.variables.stores.items(): # type: ignore[union-attr] + ) in self.variables.stores.items(): # type: ignore[union-attr] # FIXME CoP store_config = copy.deepcopy(value) store_config["name"] = name masked_config = PasswordMasker.sanitize_config(store_config) stores.append(masked_config) - return stores # type: ignore[return-value] + return stores # type: ignore[return-value] # FIXME CoP def list_active_stores(self) -> List[Store]: """ @@ -836,21 +872,21 @@ def list_active_stores(self) -> List[Store]: expectations_store_name, validation_results_store_name, checkpoint_store_name - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP active_store_names: List[str] = [ - self.expectations_store_name, # type: ignore[list-item] - self.validation_results_store_name, # type: ignore[list-item] + self.expectations_store_name, # type: ignore[list-item] # FIXME CoP + self.validation_results_store_name, # type: ignore[list-item] # FIXME CoP ] try: - active_store_names.append(self.checkpoint_store_name) # type: ignore[arg-type] + active_store_names.append(self.checkpoint_store_name) # type: ignore[arg-type] # FIXME CoP except (AttributeError, gx_exceptions.InvalidTopLevelConfigKeyError): logger.info("Checkpoint store is not configured; omitting it from active stores") return [ store for store in self.list_stores() - if store.get("name") in active_store_names # type: ignore[arg-type,operator] + if store.get("name") in active_store_names # type: ignore[arg-type,operator] # FIXME CoP ] def get_datasource(self, name: str = "default") -> FluentDatasource: @@ -908,7 +944,7 @@ def add_data_docs_site(self, site_name: str, site_config: DataDocsSiteConfigType """ if self.config.data_docs_sites is not None: if site_name in self.config.data_docs_sites: - raise gx_exceptions.InvalidKeyError( # noqa: TRY003 + raise gx_exceptions.InvalidKeyError( # noqa: TRY003 # FIXME CoP f"Data Docs Site `{site_name}` already exists in the Data Context." ) @@ -944,7 +980,7 @@ def update_data_docs_site( """ if self.config.data_docs_sites is not None: if site_name not in self.config.data_docs_sites: - raise gx_exceptions.InvalidKeyError( # noqa: TRY003 + raise gx_exceptions.InvalidKeyError( # noqa: TRY003 # FIXME CoP f"Data Docs Site `{site_name}` does not already exist in the Data Context." ) @@ -963,7 +999,7 @@ def delete_data_docs_site(self, site_name: str): """ if self.config.data_docs_sites is not None: if site_name not in self.config.data_docs_sites: - raise gx_exceptions.InvalidKeyError( # noqa: TRY003 + raise gx_exceptions.InvalidKeyError( # noqa: TRY003 # FIXME CoP f"Data Docs Site `{site_name}` does not already exist in the Data Context." ) @@ -983,7 +1019,7 @@ def delete_store(self, name: str) -> None: StoreConfigurationError if the target Store is not found. """ if name not in self.config.stores and name not in self._stores: - raise gx_exceptions.StoreConfigurationError( # noqa: TRY003 + raise gx_exceptions.StoreConfigurationError( # noqa: TRY003 # FIXME CoP f'Attempted to delete a store named: "{name}". It is not a configured store.' ) @@ -1016,13 +1052,13 @@ def delete_datasource(self, name: Optional[str]) -> None: """ if not name: - raise ValueError("Datasource names must be a datasource name") # noqa: TRY003 + raise ValueError("Datasource names must be a datasource name") # noqa: TRY003 # FIXME CoP self._delete_fluent_datasource(name) self._save_project_config() - def get_validator( # noqa: PLR0913 + def get_validator( # noqa: PLR0913 # FIXME CoP self, datasource_name: Optional[str] = None, data_connector_name: Optional[str] = None, @@ -1104,7 +1140,7 @@ def get_validator( # noqa: PLR0913 ValueError: If more than one exclusive parameter is specified (ex: specifing more than one of `batch_data`, `query` or `path`), or if the `ExpectationSuite` cannot be created or retrieved using either the provided name or identifier - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_suite = self._get_expectation_suite_from_inputs( expectation_suite=expectation_suite, expectation_suite_name=expectation_suite_name, @@ -1141,7 +1177,7 @@ def get_validator( # noqa: PLR0913 batch_list=batch_list, ) - def _get_batch_list_from_inputs( # noqa: PLR0913 + def _get_batch_list_from_inputs( # noqa: PLR0913 # FIXME CoP self, datasource_name: str | None, data_connector_name: str | None, @@ -1179,8 +1215,8 @@ def _get_batch_list_from_inputs( # noqa: PLR0913 ) > 1 ): - raise ValueError( # noqa: TRY003 - "No more than one of batch, batch_list, batch_request, or batch_request_list can be specified" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "No more than one of batch, batch_list, batch_request, or batch_request_list can be specified" # noqa: E501 # FIXME CoP ) if batch_list: @@ -1193,7 +1229,7 @@ def _get_batch_list_from_inputs( # noqa: PLR0913 if not batch_request_list: # batch_request could actually be None here since we do explicit None checks in the # sum check above while here we do a truthy check. - batch_request_list = [batch_request] # type: ignore[list-item] + batch_request_list = [batch_request] # type: ignore[list-item] # FIXME CoP for batch_req in batch_request_list: computed_batch_list.append( self.get_last_batch( @@ -1243,7 +1279,7 @@ def _get_expectation_suite_from_inputs( Raises: ValueError if the inputs are not valid - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if ( sum( bool(x) @@ -1256,7 +1292,7 @@ def _get_expectation_suite_from_inputs( ) > 1 ): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "No more than one of expectation_suite_name, " f"{'expectation_suite_id, ' if expectation_suite_id else ''}" "expectation_suite, or create_expectation_suite_with_name can be specified" @@ -1294,13 +1330,13 @@ def get_validator_using_batch_list( """ if len(batch_list) == 0: - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP """Validator could not be created because BatchRequest returned an empty batch_list. Please check your parameters and try again.""" ) - # We get a single batch_definition so we can get the execution_engine here. All batches will share the same one # noqa: E501 - # So the batch itself doesn't matter. But we use -1 because that will be the latest batch loaded. # noqa: E501 + # We get a single batch_definition so we can get the execution_engine here. All batches will share the same one # noqa: E501 # FIXME CoP + # So the batch itself doesn't matter. But we use -1 because that will be the latest batch loaded. # noqa: E501 # FIXME CoP execution_engine: ExecutionEngine batch = batch_list[-1] assert isinstance(batch, FluentBatch) @@ -1316,7 +1352,7 @@ def get_validator_using_batch_list( return validator - def get_last_batch( # noqa: PLR0913 + def get_last_batch( # noqa: PLR0913 # FIXME CoP self, datasource_name: Optional[str] = None, data_connector_name: Optional[str] = None, @@ -1388,7 +1424,7 @@ def get_last_batch( # noqa: PLR0913 ValueError: If more than one exclusive parameter is specified (ex: specifing more than one of `batch_data`, `query` or `path`) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._get_last_batch( datasource_name=datasource_name, data_connector_name=data_connector_name, @@ -1413,7 +1449,7 @@ def get_last_batch( # noqa: PLR0913 **kwargs, ) - def _get_last_batch( # noqa: PLR0913 + def _get_last_batch( # noqa: PLR0913 # FIXME CoP self, datasource_name: Optional[str] = None, data_connector_name: Optional[str] = None, @@ -1478,8 +1514,8 @@ def _validate_datasource_names(self, datasource_names: list[str] | str | None) - elif isinstance(datasource_names, str): datasource_names = [datasource_names] elif not isinstance(datasource_names, list): - raise ValueError( # noqa: TRY003 - "Datasource names must be a datasource name, list of datasource names or None (to list all datasources)" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Datasource names must be a datasource name, list of datasource names or None (to list all datasources)" # noqa: E501 # FIXME CoP ) return datasource_names @@ -1500,7 +1536,7 @@ def get_available_data_asset_names( Raises: ValueError: `datasource_names` is not None, a string, or list of strings. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP fluent_data_asset_names: dict[str, BlockConfigDataAssetNames | FluentDataAssetNames] = {} datasource_names = self._validate_datasource_names(datasource_names) @@ -1519,8 +1555,8 @@ def get_available_data_asset_names( fluent_data_asset_names[datasource_names[0]] = sorted(datasource.get_asset_names()) else: - raise ValueError( # noqa: TRY003 - "If providing batch kwargs generator, you must either specify one for each datasource or only " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "If providing batch kwargs generator, you must either specify one for each datasource or only " # noqa: E501 # FIXME CoP "one datasource." ) else: # generator_names is None @@ -1554,7 +1590,7 @@ def build_batch_kwargs( Returns: BatchKwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP datasource_obj = self.data_sources.get(datasource) batch_kwargs = datasource_obj.build_batch_kwargs( batch_kwargs_generator=batch_kwargs_generator, @@ -1647,7 +1683,7 @@ def get_docs_sites_urls( # Filter out sites that are not in site_names sites = ( - {k: v for k, v in unfiltered_sites.items() if k in site_names} # type: ignore[union-attr] + {k: v for k, v in unfiltered_sites.items() if k in site_names} # type: ignore[union-attr] # FIXME CoP if site_names else unfiltered_sites ) @@ -1659,7 +1695,7 @@ def get_docs_sites_urls( if site_name: if site_name not in sites: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP f"Could not find site named {site_name}. Please check your configurations" ) site = sites[site_name] @@ -1709,14 +1745,14 @@ def clean_data_docs(self, site_name=None) -> bool: """ data_docs_sites = self.variables.data_docs_sites if not data_docs_sites: - raise gx_exceptions.DataContextError( # noqa: TRY003 - "No data docs sites were found on this DataContext, therefore no sites will be cleaned.", # noqa: E501 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP + "No data docs sites were found on this DataContext, therefore no sites will be cleaned.", # noqa: E501 # FIXME CoP ) data_docs_site_names = list(data_docs_sites.keys()) if site_name: if site_name not in data_docs_site_names: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP f"The specified site name `{site_name}` does not exist in this project." ) return self._clean_data_docs_site(site_name) @@ -1764,10 +1800,10 @@ def _get_global_config_value( assert (conf_file_section and conf_file_option) or ( not conf_file_section and not conf_file_option ), "Must pass both 'conf_file_section' and 'conf_file_option' or neither." - if environment_variable and os.environ.get( # noqa: TID251 + if environment_variable and os.environ.get( # noqa: TID251 # FIXME CoP environment_variable, "" ): - return os.environ.get(environment_variable) # noqa: TID251 + return os.environ.get(environment_variable) # noqa: TID251 # FIXME CoP if conf_file_section and conf_file_option: for config_path in AbstractDataContext.GLOBAL_CONFIG_PATHS: config: configparser.ConfigParser = configparser.ConfigParser() @@ -1780,7 +1816,7 @@ def _get_global_config_value( return None @staticmethod - def _get_metric_configuration_tuples( # noqa: C901 + def _get_metric_configuration_tuples( # noqa: C901 # FIXME CoP metric_configuration: Union[str, dict], base_kwargs: Optional[dict] = None ) -> List[Tuple[str, Union[dict, Any]]]: if base_kwargs is None: @@ -1792,7 +1828,7 @@ def _get_metric_configuration_tuples( # noqa: C901 metric_configurations_list = [] for kwarg_name in metric_configuration: if not isinstance(metric_configuration[kwarg_name], dict): - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Invalid metric_configuration: each key must contain a " "dictionary." ) if ( @@ -1800,12 +1836,12 @@ def _get_metric_configuration_tuples( # noqa: C901 ): # this special case allows a hash of multiple kwargs for metric_kwargs_id in metric_configuration[kwarg_name]: if base_kwargs != {}: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Invalid metric_configuration: when specifying " "metric_kwargs_id, no other keys or values may be defined." ) if not isinstance(metric_configuration[kwarg_name][metric_kwargs_id], list): - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Invalid metric_configuration: each value must contain a " "list." ) metric_configurations_list += [ @@ -1816,7 +1852,7 @@ def _get_metric_configuration_tuples( # noqa: C901 for kwarg_value in metric_configuration[kwarg_name]: base_kwargs.update({kwarg_name: kwarg_value}) if not isinstance(metric_configuration[kwarg_name][kwarg_value], list): - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Invalid metric_configuration: each value must contain a " "list." ) for nested_configuration in metric_configuration[kwarg_name][kwarg_value]: @@ -1844,11 +1880,11 @@ def get_or_create_data_context_config( Raises: ValidationError if the input config does not adhere to the required shape of a DataContextConfig. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(project_config, DataContextConfig): return project_config - # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. # noqa: E501 + # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. # noqa: E501 # FIXME CoP project_config_dict = dataContextConfigSchema.dump(project_config) project_config_dict = dataContextConfigSchema.load(project_config_dict) context_config: DataContextConfig = DataContextConfig(**project_config_dict) @@ -1866,10 +1902,10 @@ def _normalize_absolute_or_relative_path(self, path: Optional[str]) -> Optional[ """ if path is None: return None - if os.path.isabs(path): # noqa: PTH117 + if os.path.isabs(path): # noqa: PTH117 # FIXME CoP return path else: - return os.path.join(self.root_directory, path) # type: ignore[arg-type] # noqa: PTH118 + return os.path.join(self.root_directory, path) # type: ignore[arg-type] # noqa: PTH118 # FIXME CoP def _load_config_variables(self) -> Dict: config_var_provider = self.config_provider.get_provider( @@ -1892,7 +1928,7 @@ def _build_store_from_config(self, name: str, config: dict | StoreConfigTypedDic # Set suppress_store_backend_id = True if store is inactive and has a store_backend. if ( - name not in [store["name"] for store in self.list_active_stores()] # type: ignore[index] + name not in [store["name"] for store in self.list_active_stores()] # type: ignore[index] # FIXME CoP and config.get("store_backend") is not None ): config["store_backend"].update({"suppress_store_backend_id": True}) @@ -1950,14 +1986,14 @@ def _init_datasource_store(self) -> DatasourceStore: Please note that the DatasourceStore lacks the same extensibility that other analagous Stores do; a default implementation is provided based on the user's environment but is not customizable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP raise NotImplementedError def _update_config_variables(self) -> None: """Updates config_variables cache by re-calling _load_config_variables(). Necessary after running methods that modify config AND could contain config_variables for credentials (example is add_datasource()) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._config_variables = self._load_config_variables() @classmethod @@ -1967,7 +2003,7 @@ def _get_oss_id(cls) -> uuid.UUID | None: If no such value is present, a new UUID is generated and written to disk for subsequent usage. If there is an error when reading from / writing to disk, we default to a NoneType. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP config = configparser.ConfigParser() if not cls._ROOT_CONF_FILE.exists(): @@ -2054,7 +2090,7 @@ def _construct_data_context_id(self) -> uuid.UUID | None: # Choose the id of the currently-configured expectations store, if it is a persistent store expectations_store = self.stores[self.expectations_store_name] if isinstance(expectations_store.store_backend, TupleStoreBackend): - # suppress_warnings since a warning will already have been issued during the store creation # noqa: E501 + # suppress_warnings since a warning will already have been issued during the store creation # noqa: E501 # FIXME CoP # if there was an invalid store config return expectations_store.store_backend_id_warnings_suppressed @@ -2062,7 +2098,7 @@ def _construct_data_context_id(self) -> uuid.UUID | None: else: return self.variables.data_context_id - def get_validation_result( # noqa: C901 + def get_validation_result( # noqa: C901 # FIXME CoP self, expectation_suite_name, run_id=None, @@ -2081,7 +2117,7 @@ def get_validation_result( # noqa: C901 Returns: validation_result - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if validation_results_store_name is None: validation_results_store_name = self.validation_results_store_name selected_store = self.stores[validation_results_store_name] @@ -2090,7 +2126,7 @@ def get_validation_result( # noqa: C901 # Get most recent run id # NOTE : This method requires a (potentially very inefficient) list_keys call. # It should probably move to live in an appropriate Store class, - # but when we do so, that Store will need to function as more than just a key-value Store. # noqa: E501 + # but when we do so, that Store will need to function as more than just a key-value Store. # noqa: E501 # FIXME CoP key_list = selected_store.list_keys() filtered_key_list = [] for key in key_list: @@ -2159,13 +2195,13 @@ def _store_metrics(self, requested_metrics, validation_results, target_store_nam ) for expectation_suite_dependency, metrics_list in requested_metrics.items(): - if (expectation_suite_dependency != "*") and ( # noqa: PLR1714 + if (expectation_suite_dependency != "*") and ( # noqa: PLR1714 # FIXME CoP expectation_suite_dependency != expectation_suite_name ): continue if not isinstance(metrics_list, list): - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Invalid requested_metrics configuration: metrics requested for " "each expectation suite must be a list." ) @@ -2192,7 +2228,7 @@ def _store_metrics(self, requested_metrics, validation_results, target_store_nam except gx_exceptions.UnavailableMetricError: # This will happen frequently in larger pipelines logger.debug( - f"metric {metric_name} was requested by another expectation suite but is not available in " # noqa: E501 + f"metric {metric_name} was requested by another expectation suite but is not available in " # noqa: E501 # FIXME CoP "this validation result." ) @@ -2224,14 +2260,14 @@ def build_data_docs( URLs of the sites that *would* be built, but it does not build these sites. build_index: a flag if False, skips building the index page - Returns: A dictionary with the names of the updated data documentation sites as keys and the location info of their index.html files as values Raises: ClassInstantiationError: Site config in your Data Context config is not valid. - """ # noqa: E501 + + """ # noqa: E501 # FIXME CoP return self._build_data_docs( site_names=site_names, resource_identifiers=resource_identifiers, @@ -2323,7 +2359,7 @@ def view_validation_result(self, result: CheckpointResult) -> None: def _view_validation_result(self, result: CheckpointResult) -> None: validation_result_identifier = tuple(result.run_results.keys())[0] - self.open_data_docs(resource_identifier=validation_result_identifier) # type: ignore[arg-type] + self.open_data_docs(resource_identifier=validation_result_identifier) # type: ignore[arg-type] # FIXME CoP def escape_all_config_variables( self, @@ -2382,7 +2418,7 @@ def save_config_variable( Returns: None - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP config_variables = self.config_variables value = self.escape_all_config_variables( value, @@ -2390,24 +2426,24 @@ def save_config_variable( skip_if_substitution_variable=skip_if_substitution_variable, ) config_variables[name] = value - # Required to call _variables instead of variables property because we don't want to trigger substitutions # noqa: E501 + # Required to call _variables instead of variables property because we don't want to trigger substitutions # noqa: E501 # FIXME CoP config = self._variables.config config_variables_filepath = config.config_variables_file_path if not config_variables_filepath: - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 - "'config_variables_file_path' property is not found in config - setting it is required to use this feature" # noqa: E501 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP + "'config_variables_file_path' property is not found in config - setting it is required to use this feature" # noqa: E501 # FIXME CoP ) - config_variables_filepath = os.path.join( # noqa: PTH118 - self.root_directory, # type: ignore[arg-type] + config_variables_filepath = os.path.join( # noqa: PTH118 # FIXME CoP + self.root_directory, # type: ignore[arg-type] # FIXME CoP config_variables_filepath, ) - os.makedirs( # noqa: PTH103 - os.path.dirname(config_variables_filepath), # noqa: PTH120 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.dirname(config_variables_filepath), # noqa: PTH120 # FIXME CoP exist_ok=True, ) - if not os.path.isfile(config_variables_filepath): # noqa: PTH113 + if not os.path.isfile(config_variables_filepath): # noqa: PTH113 # FIXME CoP logger.info(f"Creating new substitution_variables file at {config_variables_filepath}") with open(config_variables_filepath, "w") as template: template.write(CONFIG_VARIABLES_TEMPLATE) @@ -2418,7 +2454,7 @@ def save_config_variable( def _load_fluent_config(self, config_provider: _ConfigurationProvider) -> GxConfig: """Called at beginning of DataContext __init__ after config_providers init.""" logger.debug( - f"{self.__class__.__name__} has not implemented `_load_fluent_config()` returning empty `GxConfig`" # noqa: E501 + f"{self.__class__.__name__} has not implemented `_load_fluent_config()` returning empty `GxConfig`" # noqa: E501 # FIXME CoP ) return GxConfig(fluent_datasources=[]) diff --git a/great_expectations/data_context/data_context/cloud_data_context.py b/great_expectations/data_context/data_context/cloud_data_context.py index 041237806909..85a4cbfc5d0d 100644 --- a/great_expectations/data_context/data_context/cloud_data_context.py +++ b/great_expectations/data_context/data_context/cloud_data_context.py @@ -84,9 +84,9 @@ def __init__(self): @public_api class CloudDataContext(SerializableDataContext): - """Subclass of AbstractDataContext that contains functionality necessary to work in a GX Cloud-backed environment.""" # noqa: E501 + """Subclass of AbstractDataContext that contains functionality necessary to work in a GX Cloud-backed environment.""" # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, project_config: Optional[Union[DataContextConfig, Mapping]] = None, context_root_dir: Optional[PathStr] = None, @@ -95,6 +95,7 @@ def __init__( # noqa: PLR0913 cloud_base_url: Optional[str] = None, cloud_access_token: Optional[str] = None, cloud_organization_id: Optional[str] = None, + user_agent_str: Optional[str] = None, ) -> None: """ CloudDataContext constructor @@ -104,7 +105,7 @@ def __init__( # noqa: PLR0913 runtime_environment (dict): a dictionary of config variables that override both those set in config_variables.yml and the environment cloud_config (GXCloudConfig): GXCloudConfig corresponding to current CloudDataContext - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._check_if_latest_version() self._cloud_config = self.get_cloud_config( cloud_base_url=cloud_base_url, @@ -117,13 +118,14 @@ def __init__( # noqa: PLR0913 ) self._project_config = self._init_project_config(project_config) - # The DataAssetStore is relevant only for CloudDataContexts and is not an explicit part of the project config. # noqa: E501 + # The DataAssetStore is relevant only for CloudDataContexts and is not an explicit part of the project config. # noqa: E501 # FIXME CoP # As such, it must be instantiated separately. self._data_asset_store = self._init_data_asset_store() super().__init__( context_root_dir=self._context_root_directory, runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ) def _check_if_latest_version(self) -> None: @@ -140,6 +142,7 @@ def _init_analytics(self) -> None: organization_id=uuid.UUID(organization_id) if organization_id else None, oss_id=self._get_oss_id(), cloud_mode=True, + user_agent_str=self._user_agent_str, ) def _get_cloud_user_id(self) -> uuid.UUID | None: @@ -203,7 +206,7 @@ def is_cloud_config_available( Returns: bool: Is all the information needed to build a cloud_config is available? - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP cloud_config_dict = cls._get_cloud_config_dict( cloud_base_url=cloud_base_url, cloud_access_token=cloud_access_token, @@ -221,13 +224,13 @@ def determine_context_root_directory( context_root_dir=context_root_dir, project_root_dir=project_root_dir ) if context_root_dir is None: - context_root_dir = os.getcwd() # noqa: PTH109 + context_root_dir = os.getcwd() # noqa: PTH109 # FIXME CoP logger.debug( f'context_root_dir was not provided - defaulting to current working directory "' f'{context_root_dir}".' ) - return os.path.abspath( # noqa: PTH100 - os.path.expanduser(context_root_dir) # noqa: PTH111 + return os.path.abspath( # noqa: PTH100 # FIXME CoP + os.path.expanduser(context_root_dir) # noqa: PTH111 # FIXME CoP ) @classmethod @@ -243,7 +246,7 @@ def retrieve_data_context_config_from_cloud( over the wire. :return: the configuration object retrieved from the Cloud API - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP response = cls._request_cloud_backend( cloud_config=cloud_config, resource="data_context_configuration" ) @@ -344,7 +347,7 @@ def _request_cloud_backend(cls, cloud_config: GXCloudConfig, resource: str) -> R try: response.raise_for_status() except HTTPError: - raise gx_exceptions.GXCloudError( # noqa: TRY003 + raise gx_exceptions.GXCloudError( # noqa: TRY003 # FIXME CoP f"Bad request made to GX Cloud; {response.text}", response=response ) @@ -377,7 +380,7 @@ def get_cloud_config( Raises: GXCloudError if a GX Cloud variable is missing - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP cloud_config_dict = cls._get_cloud_config_dict( cloud_base_url=cloud_base_url, cloud_access_token=cloud_access_token, @@ -391,8 +394,8 @@ def get_cloud_config( if len(missing_keys) > 0: missing_keys_str = [f'"{key}"' for key in missing_keys] global_config_path_str = [f'"{path}"' for path in super().GLOBAL_CONFIG_PATHS] - raise DataContextError( # noqa: TRY003 - f"{(', ').join(missing_keys_str)} arg(s) required for ge_cloud_mode but neither provided nor found in " # noqa: E501 + raise DataContextError( # noqa: TRY003 # FIXME CoP + f"{(', ').join(missing_keys_str)} arg(s) required for ge_cloud_mode but neither provided nor found in " # noqa: E501 # FIXME CoP f"environment or in global configs ({(', ').join(global_config_path_str)})." ) @@ -442,7 +445,7 @@ def _get_cloud_config_dict( @override def _init_datasources(self) -> None: # Note that Cloud does NOT populate self._datasources with existing objects on init. - # Objects are retrieved only when requested and are NOT cached (this differs in ephemeral/file-backed contexts). # noqa: E501 + # Objects are retrieved only when requested and are NOT cached (this differs in ephemeral/file-backed contexts). # noqa: E501 # FIXME CoP self._datasources = DatasourceDict( context=self, datasource_store=self._datasource_store, @@ -510,7 +513,7 @@ def _include_rendered_content(self) -> bool: @override def _init_variables(self) -> CloudDataContextVariables: ge_cloud_base_url: str = self.ge_cloud_config.base_url - ge_cloud_organization_id: str = self.ge_cloud_config.organization_id # type: ignore[assignment] + ge_cloud_organization_id: str = self.ge_cloud_config.organization_id # type: ignore[assignment] # FIXME CoP ge_cloud_access_token: str = self.ge_cloud_config.access_token variables = CloudDataContextVariables( @@ -529,7 +532,7 @@ def _construct_data_context_id(self) -> uuid.UUID | None: If not, it should choose the id stored in DataContextConfig. Returns: UUID to use as the data_context_id - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP org_id = self.ge_cloud_config.organization_id if org_id: return uuid.UUID(org_id) @@ -544,7 +547,7 @@ def get_config_with_variables_substituted( in order of precedence: cloud_config (for Data Contexts in GX Cloud mode), runtime_environment, environment variables, config_variables, or ge_cloud_config_variable_defaults (allows certain variables to be optional in GX Cloud mode). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not config: config = self.config @@ -567,10 +570,10 @@ def get_config_with_variables_substituted( ) logger.info( "Config variables were not found in environment or global config (" - f"{self.GLOBAL_CONFIG_PATHS}). Using default values instead. {missing_config_var_repr} ;" # noqa: E501 + f"{self.GLOBAL_CONFIG_PATHS}). Using default values instead. {missing_config_var_repr} ;" # noqa: E501 # FIXME CoP " If you would like to " "use a different value, please specify it in an environment variable or in a " - "great_expectations.conf file located at one of the above paths, in a section named " # noqa: E501 + "great_expectations.conf file located at one of the above paths, in a section named " # noqa: E501 # FIXME CoP '"ge_cloud_config".' ) @@ -619,7 +622,7 @@ def _save_project_config(self) -> None: Explicitly override base class implementation to retain legacy behavior. """ logger.debug( - "CloudDataContext._save_project_config() was called. Base class impl was override to be no-op to retain " # noqa: E501 + "CloudDataContext._save_project_config() was called. Base class impl was override to be no-op to retain " # noqa: E501 # FIXME CoP "legacy behavior." ) diff --git a/great_expectations/data_context/data_context/context_factory.py b/great_expectations/data_context/data_context/context_factory.py index 568a828fb56a..21e6108b5dfd 100644 --- a/great_expectations/data_context/data_context/context_factory.py +++ b/great_expectations/data_context/data_context/context_factory.py @@ -29,6 +29,8 @@ from great_expectations.alias_types import PathStr from great_expectations.core.config_provider import _ConfigurationProvider + from great_expectations.core.factory import ValidationDefinitionFactory + from great_expectations.core.factory.suite_factory import SuiteFactory from great_expectations.data_context import ( AbstractDataContext, CloudDataContext, @@ -59,7 +61,7 @@ class ProjectManager: def __init__(self): self.__project = None - def get_project( # noqa: PLR0913 + def get_project( # noqa: PLR0913 # FIXME CoP self, project_config: DataContextConfig | Mapping | None = None, context_root_dir: PathStr | None = None, @@ -69,6 +71,7 @@ def get_project( # noqa: PLR0913 cloud_access_token: str | None = None, cloud_organization_id: str | None = None, cloud_mode: bool | None = None, + user_agent_str: str | None = None, mode: ContextModes | None = None, ) -> AbstractDataContext: self.__project = self._build_context( @@ -80,6 +83,7 @@ def get_project( # noqa: PLR0913 cloud_access_token=cloud_access_token, cloud_organization_id=cloud_organization_id, cloud_mode=cloud_mode, + user_agent_str=user_agent_str, mode=mode, ) return self.__project @@ -105,12 +109,18 @@ def get_validation_results_store(self) -> ValidationResultsStore: def get_validation_definition_store(self) -> ValidationDefinitionStore: return self._project.validation_definition_store + def get_validation_definitions_factory(self) -> ValidationDefinitionFactory: + return self._project.validation_definitions + def get_datasources(self) -> DatasourceDict: return self._project.data_sources.all() def get_validator(self, batch_request: BatchRequest) -> Validator: return self._project.get_validator(batch_request=batch_request) + def get_suite_factory(self) -> SuiteFactory: + return self._project.suites + def is_using_cloud(self) -> bool: from great_expectations.data_context import CloudDataContext @@ -147,7 +157,7 @@ def get_docs_sites_urls( def get_config_provider(self) -> _ConfigurationProvider: return self._project.config_provider - def _build_context( # noqa: PLR0913 + def _build_context( # noqa: PLR0913 # FIXME CoP self, project_config: DataContextConfig | Mapping | None = None, context_root_dir: PathStr | None = None, @@ -157,6 +167,7 @@ def _build_context( # noqa: PLR0913 cloud_access_token: str | None = None, cloud_organization_id: str | None = None, cloud_mode: bool | None = None, + user_agent_str: str | None = None, mode: ContextModes | None = None, ) -> AbstractDataContext: project_config = self._prepare_project_config(project_config) @@ -165,12 +176,14 @@ def _build_context( # noqa: PLR0913 "ephemeral": dict( project_config=project_config, runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ), "file": dict( project_config=project_config, context_root_dir=context_root_dir, project_root_dir=project_root_dir or Path.cwd(), runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ), "cloud": dict( project_config=project_config, @@ -180,6 +193,7 @@ def _build_context( # noqa: PLR0913 cloud_base_url=cloud_base_url, cloud_access_token=cloud_access_token, cloud_organization_id=cloud_organization_id, + user_agent_str=user_agent_str, ), None: dict( project_config=project_config, @@ -189,13 +203,14 @@ def _build_context( # noqa: PLR0913 cloud_base_url=cloud_base_url, cloud_access_token=cloud_access_token, cloud_organization_id=cloud_organization_id, + user_agent_str=user_agent_str, cloud_mode=cloud_mode, ), } try: kwargs = param_lookup[mode] except KeyError: - raise ValueError(f"Unknown mode {mode}. Please choose one of: ephemeral, file, cloud.") # noqa: TRY003 + raise ValueError(f"Unknown mode {mode}. Please choose one of: ephemeral, file, cloud.") # noqa: TRY003 # FIXME CoP from great_expectations.data_context.data_context import ( AbstractDataContext, @@ -214,7 +229,7 @@ def _build_context( # noqa: PLR0913 "ephemeral": EphemeralDataContext, "file": FileDataContext, "cloud": CloudDataContext, - None: AbstractDataContext, # type: ignore[type-abstract] + None: AbstractDataContext, # type: ignore[type-abstract] # FIXME CoP } context_fn_map: dict[ContextModes | None, Callable] = { @@ -229,15 +244,15 @@ def _build_context( # noqa: PLR0913 expected_type = expected_ctx_types[mode] if not isinstance(context, expected_type): - # example I want an ephemeral context but the presence of a GX_CLOUD env var gives me a cloud context # noqa: E501 + # example I want an ephemeral context but the presence of a GX_CLOUD env var gives me a cloud context # noqa: E501 # FIXME CoP # this kind of thing should not be possible but there may be some edge cases - raise ValueError( # noqa: TRY003, TRY004 - f"Provided mode {mode} returned context of type {type(context).__name__} instead of {expected_type.__name__}; please check your input arguments." # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + f"Provided mode {mode} returned context of type {type(context).__name__} instead of {expected_type.__name__}; please check your input arguments." # noqa: E501 # FIXME CoP ) return context - def _get_default_context( # noqa: PLR0913 + def _get_default_context( # noqa: PLR0913 # FIXME CoP self, project_config: DataContextConfig | None = None, context_root_dir: PathStr | None = None, @@ -246,6 +261,7 @@ def _get_default_context( # noqa: PLR0913 cloud_base_url: str | None = None, cloud_access_token: str | None = None, cloud_organization_id: str | None = None, + user_agent_str: str | None = None, cloud_mode: bool | None = None, ) -> AbstractDataContext: """Infer which type of DataContext a user wants based on available parameters.""" @@ -259,6 +275,7 @@ def _get_default_context( # noqa: PLR0913 cloud_base_url=cloud_base_url, cloud_access_token=cloud_access_token, cloud_organization_id=cloud_organization_id, + user_agent_str=user_agent_str, ) if cloud_context: @@ -270,6 +287,7 @@ def _get_default_context( # noqa: PLR0913 context_root_dir=context_root_dir, project_root_dir=project_root_dir, runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ) if file_context: return file_context @@ -278,6 +296,7 @@ def _get_default_context( # noqa: PLR0913 return self._get_ephemeral_context( project_config=project_config, runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ) def _prepare_project_config( @@ -296,7 +315,7 @@ def _prepare_project_config( return project_config - def _get_cloud_context( # noqa: PLR0913 + def _get_cloud_context( # noqa: PLR0913 # FIXME CoP self, project_config: DataContextConfig | Mapping | None = None, context_root_dir: PathStr | None = None, @@ -305,6 +324,7 @@ def _get_cloud_context( # noqa: PLR0913 cloud_base_url: str | None = None, cloud_access_token: str | None = None, cloud_organization_id: str | None = None, + user_agent_str: str | None = None, cloud_mode: bool | None = None, ) -> CloudDataContext | None: from great_expectations.data_context.data_context import CloudDataContext @@ -325,11 +345,12 @@ def _get_cloud_context( # noqa: PLR0913 cloud_base_url=cloud_base_url, cloud_access_token=cloud_access_token, cloud_organization_id=cloud_organization_id, + user_agent_str=user_agent_str, ) if cloud_mode and not config_available: - raise GXCloudConfigurationError( # noqa: TRY003 - "GX Cloud Mode enabled, but missing env vars: GX_CLOUD_ORGANIZATION_ID, GX_CLOUD_ACCESS_TOKEN" # noqa: E501 + raise GXCloudConfigurationError( # noqa: TRY003 # FIXME CoP + "GX Cloud Mode enabled, but missing env vars: GX_CLOUD_ORGANIZATION_ID, GX_CLOUD_ACCESS_TOKEN" # noqa: E501 # FIXME CoP ) return None @@ -340,6 +361,7 @@ def _get_file_context( context_root_dir: PathStr | None = None, project_root_dir: PathStr | None = None, runtime_environment: dict | None = None, + user_agent_str: str | None = None, ) -> FileDataContext | None: from great_expectations.data_context.data_context import FileDataContext @@ -349,6 +371,7 @@ def _get_file_context( context_root_dir=context_root_dir, project_root_dir=project_root_dir, runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ) except gx_exceptions.ConfigNotFoundError: logger.info("Could not find local file-backed GX project") @@ -358,6 +381,7 @@ def _get_ephemeral_context( self, project_config: DataContextConfig | None = None, runtime_environment: dict | None = None, + user_agent_str: str | None = None, ) -> EphemeralDataContext: from great_expectations.data_context.data_context import EphemeralDataContext from great_expectations.data_context.types.base import ( @@ -373,6 +397,7 @@ def _get_ephemeral_context( return EphemeralDataContext( project_config=project_config, runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ) @@ -390,6 +415,7 @@ def get_context( cloud_access_token: None = ..., cloud_organization_id: None = ..., cloud_mode: Literal[False] | None = ..., + user_agent_str: str | None = ..., mode: Literal["ephemeral"] = ..., ) -> EphemeralDataContext: ... @@ -397,13 +423,14 @@ def get_context( @overload def get_context( project_config: DataContextConfig | Mapping | None = ..., - context_root_dir: PathStr = ..., # If context_root_dir is provided, project_root_dir shouldn't be # noqa: E501 + context_root_dir: PathStr = ..., # If context_root_dir is provided, project_root_dir shouldn't be # noqa: E501 # FIXME CoP project_root_dir: None = ..., runtime_environment: dict | None = ..., cloud_base_url: None = ..., cloud_access_token: None = ..., cloud_organization_id: None = ..., cloud_mode: Literal[False] | None = ..., + user_agent_str: str | None = ..., ) -> FileDataContext: ... @@ -411,12 +438,13 @@ def get_context( def get_context( project_config: DataContextConfig | Mapping | None = ..., context_root_dir: None = ..., - project_root_dir: PathStr = ..., # If project_root_dir is provided, context_root_dir shouldn't be # noqa: E501 + project_root_dir: PathStr = ..., # If project_root_dir is provided, context_root_dir shouldn't be # noqa: E501 # FIXME CoP runtime_environment: dict | None = ..., cloud_base_url: None = ..., cloud_access_token: None = ..., cloud_organization_id: None = ..., cloud_mode: Literal[False] | None = ..., + user_agent_str: str | None = ..., mode: Literal["file"] | None = ..., ) -> FileDataContext: ... @@ -431,6 +459,7 @@ def get_context( cloud_access_token: str | None = ..., cloud_organization_id: str | None = ..., cloud_mode: Literal[True] = ..., + user_agent_str: str | None = ..., mode: Literal["cloud"] | None = ..., ) -> CloudDataContext: ... @@ -445,12 +474,13 @@ def get_context( cloud_access_token: str | None = ..., cloud_organization_id: str | None = ..., cloud_mode: bool | None = ..., + user_agent_str: str | None = ..., mode: None = ..., ) -> EphemeralDataContext | FileDataContext | CloudDataContext: ... @public_api -def get_context( # noqa: PLR0913 +def get_context( # noqa: PLR0913 # FIXME CoP project_config: DataContextConfig | Mapping | None = None, context_root_dir: PathStr | None = None, project_root_dir: PathStr | None = None, @@ -459,6 +489,7 @@ def get_context( # noqa: PLR0913 cloud_access_token: str | None = None, cloud_organization_id: str | None = None, cloud_mode: bool | None = None, + user_agent_str: str | None = None, mode: ContextModes | None = None, ) -> AbstractDataContext: """Method to return the appropriate Data Context depending on parameters and environment. @@ -521,6 +552,7 @@ def get_context( # noqa: PLR0913 cloud_organization_id: org_id for GX Cloud account. cloud_mode: whether to run GX in Cloud mode (default is None). If None, cloud mode is assumed if cloud credentials are set up. Set to False to override. + user_agent_str: Optional string, should be of format / mode: which mode to use. One of: ephemeral, file, cloud. Note: if mode is specified, cloud_mode is ignored. @@ -531,7 +563,7 @@ def get_context( # noqa: PLR0913 Raises: GXCloudConfigurationError: Cloud mode enabled, but missing configuration. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return project_manager.get_project( project_config=project_config, context_root_dir=context_root_dir, @@ -541,6 +573,7 @@ def get_context( # noqa: PLR0913 cloud_access_token=cloud_access_token, cloud_organization_id=cloud_organization_id, cloud_mode=cloud_mode, + user_agent_str=user_agent_str, mode=mode, ) diff --git a/great_expectations/data_context/data_context/ephemeral_data_context.py b/great_expectations/data_context/data_context/ephemeral_data_context.py index 46aedc2db0ef..f2727903d85c 100644 --- a/great_expectations/data_context/data_context/ephemeral_data_context.py +++ b/great_expectations/data_context/data_context/ephemeral_data_context.py @@ -27,12 +27,13 @@ @public_api class EphemeralDataContext(AbstractDataContext): - """Subclass of AbstractDataContext that uses runtime values to generate a temporary or in-memory DataContext.""" # noqa: E501 + """Subclass of AbstractDataContext that uses runtime values to generate a temporary or in-memory DataContext.""" # noqa: E501 # FIXME CoP def __init__( self, project_config: Union[DataContextConfig, Mapping], runtime_environment: Optional[dict] = None, + user_agent_str: str | None = None, ) -> None: """EphemeralDataContext constructor @@ -42,7 +43,7 @@ def __init__( """ self._project_config = self._init_project_config(project_config) - super().__init__(runtime_environment=runtime_environment) + super().__init__(runtime_environment=runtime_environment, user_agent_str=user_agent_str) @override def _init_project_config( diff --git a/great_expectations/data_context/data_context/file_data_context.py b/great_expectations/data_context/data_context/file_data_context.py index fbae32bb4273..94c41d7b3ff2 100644 --- a/great_expectations/data_context/data_context/file_data_context.py +++ b/great_expectations/data_context/data_context/file_data_context.py @@ -35,7 +35,7 @@ @public_api class FileDataContext(SerializableDataContext): - """Subclass of AbstractDataContext that contains functionality necessary to work in a filesystem-backed environment.""" # noqa: E501 + """Subclass of AbstractDataContext that contains functionality necessary to work in a filesystem-backed environment.""" # noqa: E501 # FIXME CoP def __init__( self, @@ -43,6 +43,7 @@ def __init__( context_root_dir: Optional[PathStr] = None, project_root_dir: Optional[PathStr] = None, runtime_environment: Optional[dict] = None, + user_agent_str: Optional[str] = None, ) -> None: """FileDataContext constructor @@ -52,7 +53,7 @@ def __init__( searches for the file based on conventions for project subdirectories. runtime_environment (Optional[dict]): a dictionary of config variables that override both those set in config_variables.yml and the environment - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._context_root_directory = self._init_context_root_directory( context_root_dir=context_root_dir, project_root_dir=project_root_dir, @@ -63,6 +64,7 @@ def __init__( super().__init__( context_root_dir=self._context_root_directory, runtime_environment=runtime_environment, + user_agent_str=user_agent_str, ) def _init_context_root_directory( @@ -123,7 +125,7 @@ def _init_datasource_store(self) -> DatasourceStore: "root_directory": self.root_directory, "data_context": self, # By passing this value in our runtime_environment, - # we ensure that the same exact context (memory address and all) is supplied to the Store backend # noqa: E501 + # we ensure that the same exact context (memory address and all) is supplied to the Store backend # noqa: E501 # FIXME CoP } datasource_store = DatasourceStore( @@ -161,7 +163,7 @@ def _save_project_config(self) -> None: if fluent_datasources: self.fluent_config.update_datasources(datasources=fluent_datasources) logger.info( - f"Saving {len(self.fluent_config.datasources)} Fluent Datasources to {config_filepath}" # noqa: E501 + f"Saving {len(self.fluent_config.datasources)} Fluent Datasources to {config_filepath}" # noqa: E501 # FIXME CoP ) fluent_json_dict: dict[str, JSONValues] = self.fluent_config._json_dict() fluent_json_dict = ( @@ -186,12 +188,12 @@ def _load_file_backed_project_config( config_commented_map_from_yaml = yaml.load(data) except DuplicateKeyError: - raise gx_exceptions.InvalidConfigurationYamlError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigurationYamlError( # noqa: TRY003 # FIXME CoP "Error: duplicate key found in project YAML file." ) except YAMLError as err: - raise gx_exceptions.InvalidConfigurationYamlError( # noqa: TRY003 - f"Your configuration file is not a valid yml file likely due to a yml syntax error:\n\n{err}" # noqa: E501 + raise gx_exceptions.InvalidConfigurationYamlError( # noqa: TRY003 # FIXME CoP + f"Your configuration file is not a valid yml file likely due to a yml syntax error:\n\n{err}" # noqa: E501 # FIXME CoP ) except OSError: raise gx_exceptions.ConfigNotFoundError() @@ -200,7 +202,7 @@ def _load_file_backed_project_config( return DataContextConfig.from_commented_map( commented_map=config_commented_map_from_yaml ) - except gx_exceptions.InvalidDataContextConfigError: # noqa: TRY203 + except gx_exceptions.InvalidDataContextConfigError: # noqa: TRY203 # FIXME CoP # Just to be explicit about what we intended to catch raise diff --git a/great_expectations/data_context/data_context/serializable_data_context.py b/great_expectations/data_context/data_context/serializable_data_context.py index 7398ceff91e6..5346bd63f537 100644 --- a/great_expectations/data_context/data_context/serializable_data_context.py +++ b/great_expectations/data_context/data_context/serializable_data_context.py @@ -58,12 +58,16 @@ def __init__( self, context_root_dir: PathStr, runtime_environment: Optional[dict] = None, + user_agent_str: Optional[str] = None, ) -> None: if isinstance(context_root_dir, pathlib.Path): # TODO: (kilo59) 122022 should be saving and passing around `pathlib.Path` not str context_root_dir = str(context_root_dir) self._context_root_directory = context_root_dir - super().__init__(runtime_environment=runtime_environment) + super().__init__( + runtime_environment=runtime_environment, + user_agent_str=user_agent_str, + ) def _init_datasource_store(self): # type: ignore[explicit-override] # FIXME raise NotImplementedError # Required by parent ABC but this class is never instantiated @@ -93,8 +97,8 @@ def _resolve_context_root_dir_and_project_root_dir( cls, context_root_dir: PathStr | None, project_root_dir: PathStr | None ) -> PathStr | None: if project_root_dir and context_root_dir: - raise TypeError( # noqa: TRY003 - "'project_root_dir' and 'context_root_dir' are conflicting args; please only provide one" # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "'project_root_dir' and 'context_root_dir' are conflicting args; please only provide one" # noqa: E501 # FIXME CoP ) if project_root_dir: @@ -130,7 +134,7 @@ def _create( Returns: DataContext - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP gx_dir = cls._scaffold( project_root_dir=project_root_dir, ) @@ -218,7 +222,7 @@ def _scaffold_directories(cls, base_dir: pathlib.Path) -> None: try: cls._scaffold_gitignore(base_dir) except Exception as e: - raise gx_exceptions.GitIgnoreScaffoldingError( # noqa: TRY003 + raise gx_exceptions.GitIgnoreScaffoldingError( # noqa: TRY003 # FIXME CoP f"Could not create .gitignore in {base_dir} because of an error: {e}" ) @@ -282,19 +286,19 @@ def find_context_root_dir(cls) -> str: yml_path = None gx_home_environment = os.getenv("GX_HOME") if gx_home_environment: - gx_home_environment = os.path.expanduser( # noqa: PTH111 + gx_home_environment = os.path.expanduser( # noqa: PTH111 # FIXME CoP gx_home_environment ) - if os.path.isdir( # noqa: PTH112 + if os.path.isdir( # noqa: PTH112 # FIXME CoP gx_home_environment - ) and os.path.isfile( # noqa: PTH113 - os.path.join(gx_home_environment, cls.GX_YML) # noqa: PTH118 + ) and os.path.isfile( # noqa: PTH113 # FIXME CoP + os.path.join(gx_home_environment, cls.GX_YML) # noqa: PTH118 # FIXME CoP ): result = gx_home_environment else: yml_path = cls._find_context_yml_file() if yml_path: - result = os.path.dirname(yml_path) # noqa: PTH120 + result = os.path.dirname(yml_path) # noqa: PTH120 # FIXME CoP if result is None: raise gx_exceptions.ConfigNotFoundError() @@ -317,25 +321,25 @@ def get_ge_config_version(cls, context_root_dir: Optional[PathStr] = None) -> Op @classmethod def set_ge_config_version( cls, - config_version: Union[int, float], # noqa: PYI041 + config_version: Union[int, float], # noqa: PYI041 # FIXME CoP context_root_dir: Optional[str] = None, validate_config_version: bool = True, ) -> bool: if not isinstance(config_version, (int, float)): - raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 + raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 # FIXME CoP "The argument `config_version` must be a number.", ) if validate_config_version: if config_version < MINIMUM_SUPPORTED_CONFIG_VERSION: - raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 + raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 # FIXME CoP f"""Invalid config version ({config_version})\n - The version number must be at least {MINIMUM_SUPPORTED_CONFIG_VERSION}""" # noqa: E501 + The version number must be at least {MINIMUM_SUPPORTED_CONFIG_VERSION}""" # noqa: E501 # FIXME CoP ) elif config_version > CURRENT_GX_CONFIG_VERSION: - raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 + raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 # FIXME CoP f"""Invalid config version ({config_version}).\n - The maximum valid version is {CURRENT_GX_CONFIG_VERSION}.""" # noqa: E501 + The maximum valid version is {CURRENT_GX_CONFIG_VERSION}.""" # noqa: E501 # FIXME CoP ) yml_path = cls._find_context_yml_file(search_start_dir=context_root_dir) diff --git a/great_expectations/data_context/data_context_variables.py b/great_expectations/data_context/data_context_variables.py index 9820d59f32a6..9384554e15b5 100644 --- a/great_expectations/data_context/data_context_variables.py +++ b/great_expectations/data_context/data_context_variables.py @@ -74,7 +74,7 @@ class DataContextVariables(ABC): config: A reference to the DataContextConfig to perform CRUD on. config_provider: Responsible for determining config values and substituting them in GET calls. _store: An instance of a DataContextStore with the appropriate backend to persist config changes. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP config: DataContextConfig config_provider: _ConfigurationProvider @@ -120,7 +120,7 @@ def save(self) -> Any: """ Persist any changes made to variables utilizing the configured Store. """ - key: ConfigurationIdentifier = self.get_key() # type: ignore[assignment] + key: ConfigurationIdentifier = self.get_key() # type: ignore[assignment] # FIXME CoP return self.store.set(key=key, value=self.config) @property @@ -253,14 +253,14 @@ class FileDataContextVariables(DataContextVariables): def __post_init__(self) -> None: # Chetan - 20220607 - Although the above argument is not truly optional, we are - # required to use default values because the parent class defines arguments with default values # noqa: E501 + # required to use default values because the parent class defines arguments with default values # noqa: E501 # FIXME CoP # ("Fields without default values cannot appear after fields with default values"). # # Python 3.10 resolves this issue around dataclass inheritance using `kw_only=True` (https://docs.python.org/3/library/dataclasses.html) # This should be modified once our lowest supported version is 3.10. if self.data_context is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"A reference to a data context is required for {self.__class__.__name__}" ) @@ -274,7 +274,7 @@ def _init_store(self) -> DataContextStore: ) # Chetan - 20230222 - `instantiate_class_from_config` used in the Store constructor - # causes a runtime error with InlineStoreBackend due to attempting to deepcopy a DataContext. # noqa: E501 + # causes a runtime error with InlineStoreBackend due to attempting to deepcopy a DataContext. # noqa: E501 # FIXME CoP # # This should be resolved by moving the specific logic required from the context to a class # and injecting that object instead of the entire context. @@ -316,7 +316,7 @@ def _fluent_objects_stash( try: if config_fluent_datasources_stash: logger.info( - f"Stashing `FluentDatasource` during {type(self).__name__}.save() - {len(config_fluent_datasources_stash)} stashed" # noqa: E501 + f"Stashing `FluentDatasource` during {type(self).__name__}.save() - {len(config_fluent_datasources_stash)} stashed" # noqa: E501 # FIXME CoP ) for fluent_datasource_name in config_fluent_datasources_stash: self.data_context.data_sources.all().pop(fluent_datasource_name) @@ -342,7 +342,7 @@ class CloudDataContextVariables(DataContextVariables): def __post_init__(self) -> None: # Chetan - 20220607 - Although the above arguments are not truly optional, we are - # required to use default values because the parent class defines arguments with default values # noqa: E501 + # required to use default values because the parent class defines arguments with default values # noqa: E501 # FIXME CoP # ("Fields without default values cannot appear after fields with default values"). # # Python 3.10 resolves this issue around dataclass inheritance using `kw_only=True` (https://docs.python.org/3/library/dataclasses.html) @@ -356,8 +356,8 @@ def __post_init__(self) -> None: self.ge_cloud_access_token, ) ): - raise ValueError( # noqa: TRY003 - f"All of the following attributes are required for{ self.__class__.__name__}:\n self.ge_cloud_base_url\n self.ge_cloud_organization_id\n self.ge_cloud_access_token" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"All of the following attributes are required for{ self.__class__.__name__}:\n self.ge_cloud_base_url\n self.ge_cloud_organization_id\n self.ge_cloud_access_token" # noqa: E501 # FIXME CoP ) @override @@ -391,7 +391,7 @@ def _init_store(self) -> DataContextStore: def get_key(self) -> GXCloudIdentifier: """ Generates a GX Cloud-specific key for use with Stores. See parent "DataContextVariables.get_key" for more details. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.data_context.cloud_constants import GXCloudRESTResource key = GXCloudIdentifier(resource_type=GXCloudRESTResource.DATA_CONTEXT_VARIABLES) diff --git a/great_expectations/data_context/migrator/file_migrator.py b/great_expectations/data_context/migrator/file_migrator.py index 724db0fda5ef..957953b0ba32 100644 --- a/great_expectations/data_context/migrator/file_migrator.py +++ b/great_expectations/data_context/migrator/file_migrator.py @@ -93,7 +93,7 @@ def _migrate_primary_stores(self, target_stores: dict[str, Store]) -> None: ) else: logger.warning( - f"Could not migrate the contents of store {name}; only default named stores are migrated" # noqa: E501 + f"Could not migrate the contents of store {name}; only default named stores are migrated" # noqa: E501 # FIXME CoP ) def _migrate_datasource_store(self, target_store: DatasourceStore) -> None: diff --git a/great_expectations/data_context/store/_store_backend.py b/great_expectations/data_context/store/_store_backend.py index 5a5f8880512e..303f1fc8cf4f 100644 --- a/great_expectations/data_context/store/_store_backend.py +++ b/great_expectations/data_context/store/_store_backend.py @@ -43,7 +43,7 @@ def __init__( suppress_store_backend_id: skip construction of a StoreBackend.store_backend_id manually_initialize_store_backend_id: UUID as a string to use if the store_backend_id is not already set store_name: store name given in the DataContextConfig (via either in-code or yaml configuration) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._fixed_length_key = fixed_length_key self._suppress_store_backend_id = suppress_store_backend_id self._manually_initialize_store_backend_id: str = ( @@ -76,7 +76,7 @@ def _construct_store_backend_id(self, suppress_warning: bool = False) -> Optiona if self._suppress_store_backend_id: if not suppress_warning: logger.warning( - f"You are attempting to access the store_backend_id of a store or store_backend named {self.store_name} that has been explicitly suppressed." # noqa: E501 + f"You are attempting to access the store_backend_id of a store or store_backend named {self.store_name} that has been explicitly suppressed." # noqa: E501 # FIXME CoP ) return None try: @@ -103,11 +103,11 @@ def _construct_store_backend_id(self, suppress_warning: bool = False) -> Optiona except Exception as e: if not suppress_warning: logger.warning( - f"Invalid store configuration: Please check the configuration of your {self.__class__.__name__} named {self.store_name}. Exception was: \n {e}" # noqa: E501 + f"Invalid store configuration: Please check the configuration of your {self.__class__.__name__} named {self.store_name}. Exception was: \n {e}" # noqa: E501 # FIXME CoP ) return uuid.UUID(self.STORE_BACKEND_INVALID_CONFIGURATION_ID) - # NOTE: AJB20201130 This store_backend_id and store_backend_id_warnings_suppressed was implemented to remove multiple warnings in DataContext.__init__ but this can be done more cleanly by more carefully going through initialization order in DataContext # noqa: E501 + # NOTE: AJB20201130 This store_backend_id and store_backend_id_warnings_suppressed was implemented to remove multiple warnings in DataContext.__init__ but this can be done more cleanly by more carefully going through initialization order in DataContext # noqa: E501 # FIXME CoP @property def store_backend_id(self): return self._construct_store_backend_id(suppress_warning=False) @@ -132,7 +132,7 @@ def set(self, key, value, **kwargs): return self._set(key, value, **kwargs) except ValueError as e: logger.debug(str(e)) - raise StoreBackendError("ValueError while calling _set on store backend.") # noqa: TRY003 + raise StoreBackendError("ValueError while calling _set on store backend.") # noqa: TRY003 # FIXME CoP def add(self, key, value, **kwargs): """ @@ -142,7 +142,7 @@ def add(self, key, value, **kwargs): def _add(self, key, value, **kwargs): if self.has_key(key): - raise StoreBackendError(f"Store already has the following key: {key}.") # noqa: TRY003 + raise StoreBackendError(f"Store already has the following key: {key}.") # noqa: TRY003 # FIXME CoP return self.set(key=key, value=value, **kwargs) def update(self, key, value, **kwargs): @@ -153,7 +153,7 @@ def update(self, key, value, **kwargs): def _update(self, key, value, **kwargs): if not self.has_key(key): - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Store does not have a value associated the following key: {key}." ) return self.set(key=key, value=value, **kwargs) @@ -187,7 +187,7 @@ def _url_path_escape_special_characters(path: str) -> str: def get_url_for_key(self, key, protocol=None) -> str: raise StoreError( - "Store backend of type {:s} does not have an implementation of get_url_for_key".format( # noqa: UP032 + "Store backend of type {:s} does not have an implementation of get_url_for_key".format( # noqa: UP032 # FIXME CoP type(self).__name__ ) ) @@ -197,14 +197,14 @@ def _validate_key(self, key) -> None: for key_element in key: if not isinstance(key_element, str): raise TypeError( - "Elements within tuples passed as keys to {} must be instances of {}, not {}".format( # noqa: E501 UP032 + "Elements within tuples passed as keys to {} must be instances of {}, not {}".format( # noqa: E501, UP032 # FIXME CoP self.__class__.__name__, str, type(key_element), ) ) else: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"Keys in {self.__class__.__name__} must be instances of {tuple}, not {type(key)}" ) diff --git a/great_expectations/data_context/store/checkpoint_store.py b/great_expectations/data_context/store/checkpoint_store.py index b0b9d421dd5c..c4e3ea9f2a61 100644 --- a/great_expectations/data_context/store/checkpoint_store.py +++ b/great_expectations/data_context/store/checkpoint_store.py @@ -78,7 +78,7 @@ def _convert_raw_json_to_object_dict(data: dict) -> dict: @override def serialize(self, value): - # In order to enable the custom json_encoders in Checkpoint, we need to set `models_as_dict` off # noqa: E501 + # In order to enable the custom json_encoders in Checkpoint, we need to set `models_as_dict` off # noqa: E501 # FIXME CoP # Ref: https://docs.pydantic.dev/1.10/usage/exporting_models/#serialising-self-reference-or-other-models data = value.json(models_as_dict=False, indent=2, sort_keys=True, exclude_none=True) @@ -107,15 +107,15 @@ def _update(self, key: DataContextKey, value: Checkpoint, **kwargs): super()._update(key=key, value=value, **kwargs) except gx_exceptions.StoreBackendError as e: name = key.to_tuple()[0] - raise ValueError(f"Could not update Checkpoint '{name}'") from e # noqa: TRY003 + raise ValueError(f"Could not update Checkpoint '{name}'") from e # noqa: TRY003 # FIXME CoP @staticmethod def default_checkpoints_exist(directory_path: str) -> bool: if not directory_path: return False - checkpoints_directory_path: str = os.path.join( # noqa: PTH118 + checkpoints_directory_path: str = os.path.join( # noqa: PTH118 # FIXME CoP directory_path, DataContextConfigDefaults.DEFAULT_CHECKPOINT_STORE_BASE_DIRECTORY_RELATIVE_NAME.value, ) - return os.path.isdir(checkpoints_directory_path) # noqa: PTH112 + return os.path.isdir(checkpoints_directory_path) # noqa: PTH112 # FIXME CoP diff --git a/great_expectations/data_context/store/configuration_store.py b/great_expectations/data_context/store/configuration_store.py index 67486ebfff5c..cb0d901a3110 100644 --- a/great_expectations/data_context/store/configuration_store.py +++ b/great_expectations/data_context/store/configuration_store.py @@ -36,7 +36,7 @@ class ConfigurationStore(Store): """ Configuration Store provides a way to store any Marshmallow Schema compatible Configuration (using the YAML format). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP _key_class = ConfigurationIdentifier @@ -50,8 +50,8 @@ def __init__( runtime_environment: Optional[dict] = None, ) -> None: if not issubclass(self._configuration_class, BaseYamlConfig): - raise gx_exceptions.DataContextError( # noqa: TRY003 - "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class." # noqa: E501 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP + "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class." # noqa: E501 # FIXME CoP ) if store_backend is not None: @@ -73,8 +73,8 @@ def __init__( store_name=store_name, ) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "store_name": store_name, "store_backend": store_backend, @@ -104,7 +104,7 @@ def deserialize(self, value): # type: ignore[explicit-override] # FIXME # Just to be explicit about what we intended to catch raise except marshmallow.ValidationError as e: - raise gx_exceptions.InvalidBaseYamlConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidBaseYamlConfigError( # noqa: TRY003 # FIXME CoP f"Deserialized configuration failed validation: {e}" ) @@ -134,6 +134,6 @@ def get_key( resource_name=name, ) else: - key = ConfigurationIdentifier(configuration_key=name) # type: ignore[arg-type] + key = ConfigurationIdentifier(configuration_key=name) # type: ignore[arg-type] # FIXME CoP return key diff --git a/great_expectations/data_context/store/data_asset_store.py b/great_expectations/data_context/store/data_asset_store.py index e0bef8d005cc..67344616e3e2 100644 --- a/great_expectations/data_context/store/data_asset_store.py +++ b/great_expectations/data_context/store/data_asset_store.py @@ -59,11 +59,11 @@ def __init__( super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, - store_name=store_name, # type: ignore[arg-type] + store_name=store_name, # type: ignore[arg-type] # FIXME CoP ) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, @@ -95,13 +95,13 @@ def deserialize(self, value: dict) -> FluentDataAsset: type_ = value.get("type") data_asset_model = DataSourceManager.type_lookup.get(type_) if not data_asset_model: - raise LookupError(f"Unknown DataAsset 'type': '{type_}'") # noqa: TRY003 + raise LookupError(f"Unknown DataAsset 'type': '{type_}'") # noqa: TRY003 # FIXME CoP return data_asset_model(**value) @override @staticmethod def gx_cloud_response_json_to_object_dict( - response_json: CloudResponsePayloadTD, # type: ignore[override] + response_json: CloudResponsePayloadTD, # type: ignore[override] # FIXME CoP ) -> dict: """ This method takes full json response from GX cloud and outputs a dict appropriate for @@ -112,7 +112,7 @@ def gx_cloud_response_json_to_object_dict( if isinstance(data, list): if len(data) > 1: # TODO: handle larger arrays of DataAssets - raise TypeError(f"GX Cloud returned {len(data)} DataAssets but expected 1") # noqa: TRY003 + raise TypeError(f"GX Cloud returned {len(data)} DataAssets but expected 1") # noqa: TRY003 # FIXME CoP data = data[0] data_asset_id: str = data["id"] data_asset_config_dict: dict = data["attributes"]["data_asset_config"] diff --git a/great_expectations/data_context/store/database_store_backend.py b/great_expectations/data_context/store/database_store_backend.py index 20916c6c2133..cbd09b3865ff 100644 --- a/great_expectations/data_context/store/database_store_backend.py +++ b/great_expectations/data_context/store/database_store_backend.py @@ -27,7 +27,7 @@ class DatabaseStoreBackend(StoreBackend): - def __init__( # noqa: C901, PLR0912, PLR0913 + def __init__( # noqa: C901, PLR0912, PLR0913 # FIXME CoP self, table_name, key_columns, @@ -48,12 +48,12 @@ def __init__( # noqa: C901, PLR0912, PLR0913 store_name=store_name, ) if not sa: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "ModuleNotFoundError: No module named 'sqlalchemy'" ) if not self.fixed_length_key: - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP "DatabaseStoreBackend requires use of a fixed-length-key" ) @@ -65,7 +65,7 @@ def __init__( # noqa: C901, PLR0912, PLR0913 if engine is not None: if credentials is not None: logger.warning( - "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " # noqa: E501 + "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " # noqa: E501 # FIXME CoP "Ignoring credentials." ) self.engine = engine @@ -78,8 +78,8 @@ def __init__( # noqa: C901, PLR0912, PLR0913 self.drivername = parsed_url.drivername self.engine = sa.create_engine(url, **kwargs) else: - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 - "Credentials, url, connection_string, or an engine are required for a DatabaseStoreBackend." # noqa: E501 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP + "Credentials, url, connection_string, or an engine are required for a DatabaseStoreBackend." # noqa: E501 # FIXME CoP ) meta = sa.MetaData(schema=self._schema_name) @@ -88,17 +88,17 @@ def __init__( # noqa: C901, PLR0912, PLR0913 cols = [] for column_ in key_columns: if column_ == "value": - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP "'value' cannot be used as a key_element name" ) cols.append(sa.Column(column_, sa.String, primary_key=True)) cols.append(sa.Column("value", sa.String)) try: table = sa.Table(table_name, meta, autoload_with=self.engine) - # We do a "light" check: if the columns' names match, we will proceed, otherwise, create the table # noqa: E501 + # We do a "light" check: if the columns' names match, we will proceed, otherwise, create the table # noqa: E501 # FIXME CoP if {str(col.name).lower() for col in table.columns} != (set(key_columns) | {"value"}): - raise gx_exceptions.StoreBackendError( # noqa: TRY003 - f"Unable to use table {table_name}: it exists, but does not have the expected schema." # noqa: E501 + raise gx_exceptions.StoreBackendError( # noqa: TRY003 # FIXME CoP + f"Unable to use table {table_name}: it exists, but does not have the expected schema." # noqa: E501 # FIXME CoP ) except sqlalchemy.NoSuchTableError: table = sa.Table(table_name, meta, *cols) @@ -110,16 +110,16 @@ def __init__( # noqa: C901, PLR0912, PLR0913 ) meta.create_all(self.engine) except SQLAlchemyError as e: - raise gx_exceptions.StoreBackendError( # noqa: TRY003 - f"Unable to connect to table {table_name} because of an error. It is possible your table needs to be migrated to a new schema. SqlAlchemyError: {e!s}" # noqa: E501 + raise gx_exceptions.StoreBackendError( # noqa: TRY003 # FIXME CoP + f"Unable to connect to table {table_name} because of an error. It is possible your table needs to be migrated to a new schema. SqlAlchemyError: {e!s}" # noqa: E501 # FIXME CoP ) self._table = table # Initialize with store_backend_id self._store_backend_id = None self._store_backend_id = self.store_backend_id - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "table_name": table_name, "key_columns": key_columns, @@ -145,7 +145,7 @@ def store_backend_id(self) -> str: Ephemeral store_backend_id for database_store_backend until there is a place to store metadata Returns: store_backend_id which is a UUID(version=4) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not self._store_backend_id: store_id = ( @@ -156,11 +156,11 @@ def store_backend_id(self) -> str: self._store_backend_id = f"{self.STORE_BACKEND_ID_PREFIX}{store_id}" return self._store_backend_id.replace(self.STORE_BACKEND_ID_PREFIX, "") - def _build_engine(self, credentials, **kwargs) -> "sa.engine.Engine": # noqa: UP037 + def _build_engine(self, credentials, **kwargs) -> "sa.engine.Engine": # noqa: UP037 # FIXME CoP """ Using a set of given credentials, constructs an Execution Engine , connecting to a database using a URL or a private key path. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Update credentials with anything passed during connection time drivername = credentials.pop("drivername") create_engine_kwargs = kwargs @@ -193,7 +193,7 @@ def _get_sqlalchemy_key_pair_auth_url(drivername: str, credentials: dict) -> Tup Returns: a tuple consisting of a url with the serialized key-pair authentication, and a dictionary of engine kwargs. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization @@ -214,7 +214,7 @@ def _get_sqlalchemy_key_pair_auth_url(drivername: str, credentials: dict) -> Tup message="Decryption of key failed, was the passphrase incorrect?", ) from e else: - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP pkb = p_key.private_bytes( encoding=serialization.Encoding.DER, format=serialization.PrivateFormat.PKCS8, @@ -247,7 +247,7 @@ def _get(self, key): # type: ignore[explicit-override] # FIXME return row except (IndexError, SQLAlchemyError) as e: logger.debug(f"Error fetching value: {e!s}") - raise gx_exceptions.StoreError(f"Unable to fetch value for key: {key!s}") # noqa: TRY003 + raise gx_exceptions.StoreError(f"Unable to fetch value for key: {key!s}") # noqa: TRY003 # FIXME CoP @override def _get_all(self) -> list[Any]: @@ -266,9 +266,9 @@ def _set(self, key, value, allow_update=True, **kwargs) -> None: .values(**cols) ) else: - ins = self._table.insert().values(**cols) # type: ignore[assignment] + ins = self._table.insert().values(**cols) # type: ignore[assignment] # FIXME CoP else: - ins = self._table.insert().values(**cols) # type: ignore[assignment] + ins = self._table.insert().values(**cols) # type: ignore[assignment] # FIXME CoP try: with self.engine.begin() as connection: @@ -277,12 +277,12 @@ def _set(self, key, value, allow_update=True, **kwargs) -> None: if self._get(key) == value: logger.info(f"Key {key!s} already exists with the same value.") else: - raise gx_exceptions.StoreBackendError( # noqa: TRY003 + raise gx_exceptions.StoreBackendError( # noqa: TRY003 # FIXME CoP f"Integrity error {e!s} while trying to store key" ) @override - def _move(self) -> None: # type: ignore[override] + def _move(self) -> None: # type: ignore[override] # FIXME CoP raise NotImplementedError @override @@ -356,7 +356,7 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME with self.engine.begin() as connection: return connection.execute(delete_statement) except SQLAlchemyError as e: - raise gx_exceptions.StoreBackendError( # noqa: TRY003 + raise gx_exceptions.StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to delete key: got sqlalchemy error {e!s}" ) diff --git a/great_expectations/data_context/store/datasource_store.py b/great_expectations/data_context/store/datasource_store.py index b4061bfd2652..300933a48b6e 100644 --- a/great_expectations/data_context/store/datasource_store.py +++ b/great_expectations/data_context/store/datasource_store.py @@ -59,11 +59,11 @@ def __init__( super().__init__( store_backend=store_backend, runtime_environment=runtime_environment, - store_name=store_name, # type: ignore[arg-type] + store_name=store_name, # type: ignore[arg-type] # FIXME CoP ) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, @@ -105,13 +105,13 @@ def deserialize(self, value: dict | FluentDatasource) -> FluentDatasource: """ See parent 'Store.deserialize()' for more information """ - # When using the InlineStoreBackend, objects are already converted to their respective config types. # noqa: E501 + # When using the InlineStoreBackend, objects are already converted to their respective config types. # noqa: E501 # FIXME CoP if isinstance(value, FluentDatasource): return value else: type_: str | None = value["type"] if not type_: - raise ValueError("Datasource type is missing") # noqa: TRY003 + raise ValueError("Datasource type is missing") # noqa: TRY003 # FIXME CoP try: datasource_model = DataSourceManager.type_lookup[type_] return datasource_model(**value) @@ -128,7 +128,7 @@ def deserialize(self, value: dict | FluentDatasource) -> FluentDatasource: @classmethod def gx_cloud_response_json_to_object_dict( cls, - response_json: CloudResponsePayloadTD, # type: ignore[override] + response_json: CloudResponsePayloadTD, # type: ignore[override] # FIXME CoP ) -> dict: """ This method takes full json response from GX cloud and outputs a dict appropriate for @@ -137,16 +137,16 @@ def gx_cloud_response_json_to_object_dict( data = response_json["data"] if isinstance(data, list): if len(data) > 1: - # Larger arrays of datasources should be handled by `gx_cloud_response_json_to_object_collection` # noqa: E501 - raise TypeError(f"GX Cloud returned {len(data)} Datasources but expected 1") # noqa: TRY003 + # Larger arrays of datasources should be handled by `gx_cloud_response_json_to_object_collection` # noqa: E501 # FIXME CoP + raise TypeError(f"GX Cloud returned {len(data)} Datasources but expected 1") # noqa: TRY003 # FIXME CoP data = data[0] return DatasourceStore._convert_raw_json_to_object_dict(data) @override @staticmethod - def _convert_raw_json_to_object_dict(data: DataPayload) -> dict: # type: ignore[override] - return data # type: ignore[return-value] + def _convert_raw_json_to_object_dict(data: DataPayload) -> dict: # type: ignore[override] # FIXME CoP + return data # type: ignore[return-value] # FIXME CoP def retrieve_by_name(self, name: str) -> FluentDatasource: """Retrieves a Datasource persisted in the store by it's given name. @@ -165,11 +165,11 @@ def retrieve_by_name(self, name: str) -> FluentDatasource: self.store_backend.build_key(name=name) ) if not self.has_key(datasource_key): - raise ValueError( # noqa: TRY003 - f"Unable to load datasource `{name}` -- no configuration found or invalid configuration." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"Unable to load datasource `{name}` -- no configuration found or invalid configuration." # noqa: E501 # FIXME CoP ) - datasource_config: FluentDatasource = copy.deepcopy(self.get(datasource_key)) # type: ignore[arg-type] + datasource_config: FluentDatasource = copy.deepcopy(self.get(datasource_key)) # type: ignore[arg-type] # FIXME CoP datasource_config.name = name return datasource_config @@ -183,7 +183,7 @@ def delete(self, datasource_config: FluentDatasource) -> None: self.remove_key(self._build_key_from_config(datasource_config)) @override - def _build_key_from_config( # type: ignore[override] + def _build_key_from_config( # type: ignore[override] # FIXME CoP self, datasource_config: FluentDatasource ) -> Union[GXCloudIdentifier, DataContextVariableKey]: id_: str | None = ( @@ -198,7 +198,7 @@ def get_fluent_datasource_by_name(self, name: str) -> FluentDatasource: ) datasource = self.get(key) if not isinstance(datasource, FluentDatasource): - raise ValueError("Datasource is not a FluentDatasource") # noqa: TRY003, TRY004 + raise ValueError("Datasource is not a FluentDatasource") # noqa: TRY003, TRY004 # FIXME CoP return datasource @override @@ -227,14 +227,14 @@ def _persist_datasource( # values that may have been added to the config by the StoreBackend (i.e. object ids) ref: Optional[Union[bool, GXCloudResourceRef]] = super().set(key=key, value=config) if ref and isinstance(ref, GXCloudResourceRef): - key.id = ref.id # type: ignore[attr-defined] + key.id = ref.id # type: ignore[attr-defined] # FIXME CoP - return_value: FluentDatasource = self.get(key) # type: ignore[assignment] + return_value: FluentDatasource = self.get(key) # type: ignore[assignment] # FIXME CoP if not return_value.name and isinstance(key, DataContextVariableKey): - # Setting the name in the config is currently needed to handle adding the name to v2 datasource # noqa: E501 + # Setting the name in the config is currently needed to handle adding the name to v2 datasource # noqa: E501 # FIXME CoP # configs and can be refactored (e.g. into `get()`) if not key.resource_name: - raise ValueError("Missing resource name") # noqa: TRY003 + raise ValueError("Missing resource name") # noqa: TRY003 # FIXME CoP return_value.name = key.resource_name return return_value diff --git a/great_expectations/data_context/store/expectations_store.py b/great_expectations/data_context/store/expectations_store.py index c659f5ae3a7e..f01ed4c45c42 100644 --- a/great_expectations/data_context/store/expectations_store.py +++ b/great_expectations/data_context/store/expectations_store.py @@ -41,6 +41,7 @@ class Config: rendered_content: List[dict] = pydantic.Field(default_factory=list) kwargs: dict meta: Union[dict, None] + description: Union[str, None] expectation_context: Union[dict, None] @@ -95,8 +96,8 @@ def __init__( store_name=store_name, ) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, @@ -119,7 +120,7 @@ def gx_cloud_response_json_to_object_dict(cls, response_json: dict) -> dict: if len(response_json["data"]) == 1: suite_data = response_json["data"][0] else: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "More than one Expectation Suite was found with the expectation_suite_name." ) else: @@ -159,17 +160,17 @@ def add_expectation(self, suite: ExpectationSuite, expectation: _TExpectation) - if len(new_ids) > 1: # edge case: suite has been changed remotely, and one or more new expectations # have been added. Since the store doesn't return the updated object, - # we have no reliable way to know which new ID belongs to this expectation, # noqa: E501 + # we have no reliable way to know which new ID belongs to this expectation, # noqa: E501 # FIXME CoP # so we raise an exception and ask the user to refresh their suite. # The Expectation should have been successfully added to the suite. - raise RuntimeError( # noqa: TRY003 - "Expectation was added, however this ExpectationSuite is out of sync with the Cloud backend. " # noqa: E501 - f'Please fetch the latest state of this suite by calling `context.suites.get(name="{suite.name}")`.' # noqa: E501 + raise RuntimeError( # noqa: TRY003 # FIXME CoP + "Expectation was added, however this ExpectationSuite is out of sync with the Cloud backend. " # noqa: E501 # FIXME CoP + f'Please fetch the latest state of this suite by calling `context.suites.get(name="{suite.name}")`.' # noqa: E501 # FIXME CoP ) elif len(new_ids) == 0: - # edge case: this is an unexpected state - if the cloud backend failed to add the expectation, # noqa: E501 + # edge case: this is an unexpected state - if the cloud backend failed to add the expectation, # noqa: E501 # FIXME CoP # it should have already raised an exception. - raise RuntimeError("Unknown error occurred and Expectation was not added.") # noqa: TRY003 + raise RuntimeError("Unknown error occurred and Expectation was not added.") # noqa: TRY003 # FIXME CoP else: new_id = new_ids[0] expectation.id = new_id @@ -179,7 +180,7 @@ def update_expectation(self, suite: ExpectationSuite, expectation: Expectation) suite_identifier, fetched_suite = self._refresh_suite(suite) if expectation.id not in {exp.id for exp in fetched_suite.expectations}: - raise KeyError("Cannot update Expectation because it was not found.") # noqa: TRY003 + raise KeyError("Cannot update Expectation because it was not found.") # noqa: TRY003 # FIXME CoP for i, old_expectation in enumerate(fetched_suite.expectations): if old_expectation.id == expectation.id: @@ -196,7 +197,7 @@ def delete_expectation(self, suite: ExpectationSuite, expectation: Expectation) suite_identifier, suite = self._refresh_suite(suite) if expectation.id not in {exp.id for exp in suite.expectations}: - raise KeyError("Cannot delete Expectation because it was not found.") # noqa: TRY003 + raise KeyError("Cannot delete Expectation because it was not found.") # noqa: TRY003 # FIXME CoP for i, old_expectation in enumerate(suite.expectations): if old_expectation.id == expectation.id: @@ -234,7 +235,7 @@ def _add(self, key, value, **kwargs): # type: ignore[explicit-override] # FIXME ) return result except gx_exceptions.StoreBackendError as exc: - raise gx_exceptions.ExpectationSuiteError( # noqa: TRY003 + raise gx_exceptions.ExpectationSuiteError( # noqa: TRY003 # FIXME CoP f"An error occurred while trying to save ExpectationSuite: {exc.message}" ) from exc @@ -286,7 +287,7 @@ def _add_ids_on_update(self, suite: ExpectationSuite) -> ExpectationSuite: # enforce that every ID in this suite is unique expectation_ids = [exp.id for exp in suite.expectations if exp.id] if len(expectation_ids) != len(set(expectation_ids)): - raise RuntimeError("Expectation IDs must be unique within a suite.") # noqa: TRY003 + raise RuntimeError("Expectation IDs must be unique within a suite.") # noqa: TRY003 # FIXME CoP for expectation in suite.expectations: if not expectation.id: @@ -309,14 +310,14 @@ def _add_cloud_ids_to_local_suite_and_expectations( @override def get(self, key) -> dict: - return super().get(key) # type: ignore[return-value] + return super().get(key) # type: ignore[return-value] # FIXME CoP @override - def _validate_key( # type: ignore[override] + def _validate_key( # type: ignore[override] # FIXME CoP self, key: ExpectationSuiteIdentifier | GXCloudIdentifier ) -> None: if isinstance(key, GXCloudIdentifier) and not key.id and not key.resource_name: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "GXCloudIdentifier for ExpectationsStore must contain either " "an id or a resource_name, but neither are present." ) @@ -335,7 +336,7 @@ def deserialize(self, value): # type: ignore[explicit-override] # FIXME elif isinstance(value, str): return self._expectationSuiteSchema.loads(value) else: - raise TypeError(f"Cannot deserialize value of unknown type: {type(value)}") # noqa: TRY003 + raise TypeError(f"Cannot deserialize value of unknown type: {type(value)}") # noqa: TRY003 # FIXME CoP def deserialize_suite_dict(self, suite_dict: dict) -> ExpectationSuite: suite = ExpectationSuite(**suite_dict) diff --git a/great_expectations/data_context/store/gx_cloud_store_backend.py b/great_expectations/data_context/store/gx_cloud_store_backend.py index d96567f970a5..4b24721d8b56 100644 --- a/great_expectations/data_context/store/gx_cloud_store_backend.py +++ b/great_expectations/data_context/store/gx_cloud_store_backend.py @@ -91,7 +91,7 @@ class GXCloudStoreBackend(StoreBackend, metaclass=ABCMeta): } RESOURCE_PLURALITY_LOOKUP_DICT: bidict = bidict( # type: ignore[misc] # Keywords must be str - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP GXCloudRESTResource.CHECKPOINT: "checkpoints", GXCloudRESTResource.DATASOURCE: "datasources", GXCloudRESTResource.DATA_ASSET: "data_assets", @@ -119,7 +119,7 @@ class GXCloudStoreBackend(StoreBackend, metaclass=ABCMeta): # with a fallback default of EndpointVersion.V0. _ENDPOINT_VERSION_LOOKUP[value] = _ENDPOINT_VERSION_LOOKUP.get(key, EndpointVersion.V0) - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, ge_cloud_credentials: Dict, ge_cloud_base_url: str = CLOUD_DEFAULT_BASE_URL, @@ -166,8 +166,8 @@ def __init__( # noqa: PLR0913 # https://docs.python.org/3.11/library/weakref.html#weakref.finalize self._finalizer = weakref.finalize(self, close_session, self._session) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "ge_cloud_base_url": ge_cloud_base_url, "ge_cloud_resource_name": ge_cloud_resource_name, @@ -182,14 +182,14 @@ def __init__( # noqa: PLR0913 filter_properties_dict(properties=self._config, inplace=True) @override - def _get( # type: ignore[override] + def _get( # type: ignore[override] # FIXME CoP self, key: Tuple[GXCloudRESTResource, str | None, str | None] ) -> dict: url = self.get_url_for_key(key=key) # if name is included in the key, add as a param params: dict | None - if len(key) > 2 and key[2]: # noqa: PLR2004 + if len(key) > 2 and key[2]: # noqa: PLR2004 # FIXME CoP params = {"name": key[2]} url = url.rstrip("/") else: @@ -201,14 +201,14 @@ def _get( # type: ignore[override] # We need to validate that even if we have a 200, there are contents to support existence response_has_data = bool(payload.get("data")) if not response_has_data: - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP "Unable to get object in GX Cloud Store Backend: Object does not exist." ) return payload @override - def _get_all(self) -> dict: # type: ignore[override] + def _get_all(self) -> dict: # type: ignore[override] # FIXME CoP url = self.construct_versioned_url( base_url=self.ge_cloud_base_url, organization_id=self.ge_cloud_credentials["organization_id"], @@ -228,30 +228,30 @@ def _send_get_request_to_api(self, url: str, params: dict | None = None) -> dict response_json: dict = response.json() return response_json except json.JSONDecodeError as jsonError: - logger.debug( # noqa: PLE1205 + logger.debug( # noqa: PLE1205 # FIXME CoP "Failed to parse GX Cloud Response into JSON", str(response.text), # type: ignore[possibly-undefined] # will be present for json error str(jsonError), ) - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to get object in GX Cloud Store Backend: {jsonError}" ) from jsonError except requests.HTTPError as http_err: - raise StoreBackendError( # noqa: TRY003 - f"Unable to get object in GX Cloud Store Backend: {get_user_friendly_error_message(http_err)}" # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + f"Unable to get object in GX Cloud Store Backend: {get_user_friendly_error_message(http_err)}" # noqa: E501 # FIXME CoP ) from http_err except requests.ConnectionError as conn_err: - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to get object in GX Cloud Store Backend: {conn_err}" ) from conn_err except requests.Timeout as timeout_exc: - logger.exception(timeout_exc) # noqa: TRY401 - raise StoreBackendTransientError( # noqa: TRY003 - "Unable to get object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 + logger.exception(timeout_exc) # noqa: TRY401 # FIXME CoP + raise StoreBackendTransientError( # noqa: TRY003 # FIXME CoP + "Unable to get object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 # FIXME CoP ) from timeout_exc @override - def _move(self) -> None: # type: ignore[override] + def _move(self) -> None: # type: ignore[override] # FIXME CoP pass def _put(self, id: str, value: Any) -> GXCloudResourceRef | bool: @@ -289,7 +289,7 @@ def _put(self, id: str, value: Any) -> GXCloudResourceRef | bool: # for the ExpectationSuite endpoint. As such, this is a temporary fork to # ensure that legacy PATCH behavior is supported. if ( - response_status_code == 405 # noqa: PLR2004 + response_status_code == 405 # noqa: PLR2004 # FIXME CoP and resource_type is GXCloudRESTResource.EXPECTATION_SUITE ): response = self._session.patch(url, json=data) @@ -312,17 +312,17 @@ def _put(self, id: str, value: Any) -> GXCloudResourceRef | bool: ) except requests.HTTPError as http_exc: - raise StoreBackendError( # noqa: TRY003 - f"Unable to update object in GX Cloud Store Backend: {get_user_friendly_error_message(http_exc)}" # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + f"Unable to update object in GX Cloud Store Backend: {get_user_friendly_error_message(http_exc)}" # noqa: E501 # FIXME CoP ) from http_exc except requests.Timeout as timeout_exc: - logger.exception(timeout_exc) # noqa: TRY401 - raise StoreBackendTransientError( # noqa: TRY003 - "Unable to update object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 + logger.exception(timeout_exc) # noqa: TRY401 # FIXME CoP + raise StoreBackendTransientError( # noqa: TRY003 # FIXME CoP + "Unable to update object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 # FIXME CoP ) from timeout_exc except Exception as e: logger.debug(repr(e)) - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to update object in GX Cloud Store Backend: {e}" ) from e @@ -338,13 +338,13 @@ def validate_set_kwargs(self, kwargs: dict) -> Union[bool, None]: return True if not (kwarg_names <= self.allowed_set_kwargs): extra_kwargs = kwarg_names - self.allowed_set_kwargs - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f'Invalid kwargs: {(", ").join(extra_kwargs)}' ) return None @override - def _set( # type: ignore[override] + def _set( # type: ignore[override] # FIXME CoP self, key: Tuple[GXCloudRESTResource, ...], value: Any, @@ -403,17 +403,17 @@ def _post(self, value: Any, **kwargs) -> GXCloudResourceRef: response_json=response_json, ) except requests.HTTPError as http_exc: - raise StoreBackendError( # noqa: TRY003 - f"Unable to set object in GX Cloud Store Backend: {get_user_friendly_error_message(http_exc)}" # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + f"Unable to set object in GX Cloud Store Backend: {get_user_friendly_error_message(http_exc)}" # noqa: E501 # FIXME CoP ) from http_exc except requests.Timeout as timeout_exc: - logger.exception(timeout_exc) # noqa: TRY401 - raise StoreBackendTransientError( # noqa: TRY003 - "Unable to set object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 + logger.exception(timeout_exc) # noqa: TRY401 # FIXME CoP + raise StoreBackendTransientError( # noqa: TRY003 # FIXME CoP + "Unable to set object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 # FIXME CoP ) from timeout_exc except Exception as e: logger.debug(str(e)) - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to set object in GX Cloud Store Backend: {e}" ) from e @@ -462,7 +462,7 @@ def list_keys(self, prefix: Tuple = ()) -> List[Tuple[GXCloudRESTResource, str, return keys except Exception as e: logger.debug(str(e)) - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to list keys in GX Cloud Store Backend: {e}" ) from e @@ -486,7 +486,7 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME key = key.to_tuple() id = key[1] - if len(key) == 3: # noqa: PLR2004 + if len(key) == 3: # noqa: PLR2004 # FIXME CoP resource_object_name = key[2] else: resource_object_name = None @@ -514,18 +514,18 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME response.raise_for_status() return True except requests.HTTPError as http_exc: - logger.exception(http_exc) # noqa: TRY401 - raise StoreBackendError( # noqa: TRY003 - f"Unable to delete object in GX Cloud Store Backend: {get_user_friendly_error_message(http_exc)}" # noqa: E501 + logger.exception(http_exc) # noqa: TRY401 # FIXME CoP + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + f"Unable to delete object in GX Cloud Store Backend: {get_user_friendly_error_message(http_exc)}" # noqa: E501 # FIXME CoP ) from http_exc except requests.Timeout as timeout_exc: - logger.exception(timeout_exc) # noqa: TRY401 - raise StoreBackendTransientError( # noqa: TRY003 - "Unable to delete object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 + logger.exception(timeout_exc) # noqa: TRY401 # FIXME CoP + raise StoreBackendTransientError( # noqa: TRY003 # FIXME CoP + "Unable to delete object in GX Cloud Store Backend: This is likely a transient error. Please try again." # noqa: E501 # FIXME CoP ) from timeout_exc except Exception as e: logger.debug(str(e)) - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to delete object in GX Cloud Store Backend: {e!r}" ) from e @@ -538,14 +538,14 @@ def _get_one_or_none_from_response_data( GET requests to cloud can either return response data that is a single object (get by id) or a list of objects with length >= 0 (get by name). This method takes this response data and returns a single object or None. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not isinstance(response_data, list): return response_data if len(response_data) == 0: return None if len(response_data) == 1: return response_data[0] - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to update object in GX Cloud Store Backend: the provided key ({key}) maps " f"to more than one object." ) @@ -559,13 +559,13 @@ def _update( ) -> GXCloudResourceRef: # todo: ID should never be optional for update - remove this additional get response_data = self._get(key)["data"] - # if the provided key does not contain id (only name), cloud will return a list of resources filtered # noqa: E501 + # if the provided key does not contain id (only name), cloud will return a list of resources filtered # noqa: E501 # FIXME CoP # by name, with length >= 0, instead of a single object (or error if not found) existing = self._get_one_or_none_from_response_data(response_data=response_data, key=key) if existing is None: - raise StoreBackendError( # noqa: TRY003 - f"Unable to update object in GX Cloud Store Backend: could not find object associated with key {key}." # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + f"Unable to update object in GX Cloud Store Backend: could not find object associated with key {key}." # noqa: E501 # FIXME CoP ) if key[1] is None: @@ -580,7 +580,7 @@ def _add_or_update(self, key, value, **kwargs): # type: ignore[explicit-overrid logger.info(f"Could not find object associated with key {key}: {e}") response_data = None - # if the provided key does not contain id (only name), cloud will return a list of resources filtered # noqa: E501 + # if the provided key does not contain id (only name), cloud will return a list of resources filtered # noqa: E501 # FIXME CoP # by name, with length >= 0, instead of a single object (or error if not found) existing = self._get_one_or_none_from_response_data(response_data=response_data, key=key) @@ -612,7 +612,7 @@ def build_key( id: Optional[str] = None, name: Optional[str] = None, ) -> GXCloudIdentifier: - """Get the store backend specific implementation of the key. ignore resource_type since it is defined when initializing the cloud store backend.""" # noqa: E501 + """Get the store backend specific implementation of the key. ignore resource_type since it is defined when initializing the cloud store backend.""" # noqa: E501 # FIXME CoP return GXCloudIdentifier( resource_type=self.ge_cloud_resource_type, id=id, @@ -621,16 +621,16 @@ def build_key( @override def _validate_key(self, key) -> None: - if not isinstance(key, tuple) or len(key) != 3: # noqa: PLR2004 - raise TypeError( # noqa: TRY003 - "Key used for GXCloudStoreBackend must contain a resource_type, id, and resource_name; see GXCloudIdentifier for more information." # noqa: E501 + if not isinstance(key, tuple) or len(key) != 3: # noqa: PLR2004 # FIXME CoP + raise TypeError( # noqa: TRY003 # FIXME CoP + "Key used for GXCloudStoreBackend must contain a resource_type, id, and resource_name; see GXCloudIdentifier for more information." # noqa: E501 # FIXME CoP ) resource_type, _id, _resource_name = key try: GXCloudRESTResource(resource_type) except ValueError as e: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"The provided resource_type {resource_type} is not a valid GXCloudRESTResource" ) from e @@ -683,7 +683,7 @@ def construct_versioned_payload( elif attributes_value is None: payload = kwargs else: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"Parameter attributes_value of type {type(attributes_value)}" f" is unsupported in GX V1." ) diff --git a/great_expectations/data_context/store/html_site_store.py b/great_expectations/data_context/store/html_site_store.py index 4ef50c5a0a3c..66703928b75c 100644 --- a/great_expectations/data_context/store/html_site_store.py +++ b/great_expectations/data_context/store/html_site_store.py @@ -100,11 +100,11 @@ class HtmlSiteStore: bug_risk: Moderate --ge-feature-maturity-info-- - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP _key_class = SiteSectionIdentifier - def __init__( # noqa: C901 - 11 + def __init__( # noqa: C901 # 11 self, store_backend=None, runtime_environment=None ) -> None: store_backend_module_name = store_backend.get( @@ -116,14 +116,14 @@ def __init__( # noqa: C901 - 11 # Store Class was loaded successfully; verify that it is of a correct subclass. if not issubclass(store_class, (TupleStoreBackend, GXCloudStoreBackend)): - raise DataContextError( # noqa: TRY003 - f"Invalid configuration: HtmlSiteStore needs a {TupleStoreBackend.__name__} or {GXCloudStoreBackend.__name__}" # noqa: E501 + raise DataContextError( # noqa: TRY003 # FIXME CoP + f"Invalid configuration: HtmlSiteStore needs a {TupleStoreBackend.__name__} or {GXCloudStoreBackend.__name__}" # noqa: E501 # FIXME CoP ) if "filepath_template" in store_backend or ( "fixed_length_key" in store_backend and store_backend["fixed_length_key"] is True ): logger.warning( - "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: " # noqa: E501 + "Configuring a filepath_template or using fixed_length_key is not supported in SiteBuilder: " # noqa: E501 # FIXME CoP "filepaths will be selected based on the type of asset rendered." ) @@ -234,16 +234,16 @@ def __init__( # noqa: C901 - 11 # NOTE: Instead of using the filesystem as the source of record for keys, # this class tracks keys separately in an internal set. - # This means that keys are stored for a specific session, but can't be fetched after the original # noqa: E501 + # This means that keys are stored for a specific session, but can't be fetched after the original # noqa: E501 # FIXME CoP # HtmlSiteStore instance leaves scope. - # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # noqa: E501 + # Doing it this way allows us to prevent namespace collisions among keys while still having multiple # noqa: E501 # FIXME CoP # backends that write to the same directory structure. - # It's a pretty reasonable way for HtmlSiteStore to do its job---you just have to remember that it # noqa: E501 + # It's a pretty reasonable way for HtmlSiteStore to do its job---you just have to remember that it # noqa: E501 # FIXME CoP # can't necessarily set and list_keys like most other Stores. - self.keys = set() # type: ignore[var-annotated] + self.keys = set() # type: ignore[var-annotated] # FIXME CoP - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, @@ -288,7 +288,7 @@ def get_url_for_resource(self, resource_identifier=None, only_if_exists=True) -> key = resource_identifier.to_tuple() else: # this method does not support getting the URL of static assets - raise ValueError(f"Cannot get URL for resource {resource_identifier!s:s}") # noqa: TRY003 + raise ValueError(f"Cannot get URL for resource {resource_identifier!s:s}") # noqa: TRY003 # FIXME CoP # : this is a hack for Taylor. Change this back. 20200924 # if only_if_exists: @@ -308,7 +308,7 @@ def get_url_for_resource(self, resource_identifier=None, only_if_exists=True) -> ) else: return store_backend.get_public_url_for_key(key) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if only_if_exists: return store_backend.get_url_for_key(key) if store_backend.has_key(key) else None else: @@ -316,7 +316,7 @@ def get_url_for_resource(self, resource_identifier=None, only_if_exists=True) -> def _validate_key(self, key): if not isinstance(key, SiteSectionIdentifier): - raise TypeError(f"key: {key!r} must be a SiteSectionIdentifier, not {type(key)!r}") # noqa: TRY003 + raise TypeError(f"key: {key!r} must be a SiteSectionIdentifier, not {type(key)!r}") # noqa: TRY003 # FIXME CoP for key_class in self.store_backends: try: @@ -328,8 +328,8 @@ def _validate_key(self, key): continue # The key's resource_identifier didn't match any known key_class - raise TypeError( # noqa: TRY003 - f"resource_identifier in key: {key!r} must one of {set(self.store_backends.keys())}, not {type(key)!r}" # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + f"resource_identifier in key: {key!r} must one of {set(self.store_backends.keys())}, not {type(key)!r}" # noqa: E501 # FIXME CoP ) def list_keys(self): @@ -363,23 +363,23 @@ def clean_site(self) -> None: for key in keys: target_store_backend.remove_key(key) - def copy_static_assets( # noqa: C901 - 11 + def copy_static_assets( # noqa: C901 # 11 self, static_assets_source_dir: str | None = None ): """ Copies static assets, using a special "static_assets" backend store that accepts variable-length tuples as keys, with no filepath_template. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP file_exclusions: list[str] = [".DS_Store"] dir_exclusions: list[str] = [] if not static_assets_source_dir: static_assets_source_dir = file_relative_path( __file__, - os.path.join("..", "..", "render", "view", "static"), # noqa: PTH118 + os.path.join("..", "..", "render", "view", "static"), # noqa: PTH118 # FIXME CoP ) - # If `static_assets_source_absdir` contains the string ".zip", then we try to extract (unzip) # noqa: E501 + # If `static_assets_source_absdir` contains the string ".zip", then we try to extract (unzip) # noqa: E501 # FIXME CoP # the static files. If the unzipping is successful, that means that Great Expectations is # installed into a zip file (see PEP 273) and we need to run this function again if ".zip" in static_assets_source_dir.lower(): @@ -390,13 +390,13 @@ def copy_static_assets( # noqa: C901 - 11 for item in os.listdir(static_assets_source_dir): # Directory - if os.path.isdir( # noqa: PTH112 - os.path.join(static_assets_source_dir, item) # noqa: PTH118 + if os.path.isdir( # noqa: PTH112 # FIXME CoP + os.path.join(static_assets_source_dir, item) # noqa: PTH118 # FIXME CoP ): if item in dir_exclusions: continue # Recurse - new_source_dir = os.path.join( # noqa: PTH118 + new_source_dir = os.path.join( # noqa: PTH118 # FIXME CoP static_assets_source_dir, item ) self.copy_static_assets(new_source_dir) @@ -405,7 +405,7 @@ def copy_static_assets( # noqa: C901 - 11 # Copy file over using static assets store backend if item in file_exclusions: continue - source_name = os.path.join( # noqa: PTH118 + source_name = os.path.join( # noqa: PTH118 # FIXME CoP static_assets_source_dir, item ) with open(source_name, "rb") as f: @@ -444,7 +444,7 @@ def _unzip_assets(self, assets_full_path: str, unzip_directory: str) -> bool: Otherwise, this function returns False """ - static_assets_source_absdir = os.path.abspath(assets_full_path) # noqa: PTH100 + static_assets_source_absdir = os.path.abspath(assets_full_path) # noqa: PTH100 # FIXME CoP zip_re = re.match( f"(.+[.]zip){re.escape(os.sep)}(.+)", diff --git a/great_expectations/data_context/store/in_memory_store_backend.py b/great_expectations/data_context/store/in_memory_store_backend.py index 9e1b5380020f..7fb025828d54 100644 --- a/great_expectations/data_context/store/in_memory_store_backend.py +++ b/great_expectations/data_context/store/in_memory_store_backend.py @@ -38,8 +38,8 @@ def __init__( if not self._suppress_store_backend_id: _ = self.store_backend_id - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "runtime_environment": runtime_environment, "fixed_length_key": fixed_length_key, @@ -88,13 +88,13 @@ def config(self) -> dict: return self._config @override - def build_key( # type: ignore[override] + def build_key( # type: ignore[override] # FIXME CoP self, resource_type: Optional[DataContextVariableSchema] = None, id: Optional[str] = None, name: Optional[str] = None, ) -> DataContextVariableKey: - """Get the store backend specific implementation of the key. id included for super class compatibility.""" # noqa: E501 + """Get the store backend specific implementation of the key. id included for super class compatibility.""" # noqa: E501 # FIXME CoP return DataContextVariableKey( resource_name=name, ) diff --git a/great_expectations/data_context/store/inline_store_backend.py b/great_expectations/data_context/store/inline_store_backend.py index 81d31c641c16..9e1fed7d2f67 100644 --- a/great_expectations/data_context/store/inline_store_backend.py +++ b/great_expectations/data_context/store/inline_store_backend.py @@ -43,9 +43,9 @@ class InlineStoreBackend(StoreBackend): It performs these actions through a reference to a DataContext instance. Please note that is it only to be used with file-backed DataContexts (DataContext and FileDataContext). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, data_context: FileDataContext, resource_type: DataContextVariableSchema, @@ -65,8 +65,8 @@ def __init__( # noqa: PLR0913 self._data_context = data_context self._resource_type = resource_type - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "runtime_environment": runtime_environment, "fixed_length_key": fixed_length_key, @@ -116,10 +116,10 @@ def _set(self, key: tuple[str, ...], value: Any, **kwargs: dict) -> None: if resource_type is DataContextVariableSchema.ALL_VARIABLES: config_commented_map_from_yaml = yaml.load(value) - # NOTE: fluent datasources may be present under both the `fluent_datasources` & `datasources` key # noqa: E501 - # if fluent datasource is part of `datasources` it will attempt to validate using a marshmallow Datasource schema and fail # noqa: E501 - for name in config_commented_map_from_yaml.get("fluent_datasources", {}): # type: ignore[union-attr] - config_commented_map_from_yaml.get("datasources", {}).pop(name, None) # type: ignore[union-attr,arg-type,call-arg] + # NOTE: fluent datasources may be present under both the `fluent_datasources` & `datasources` key # noqa: E501 # FIXME CoP + # if fluent datasource is part of `datasources` it will attempt to validate using a marshmallow Datasource schema and fail # noqa: E501 # FIXME CoP + for name in config_commented_map_from_yaml.get("fluent_datasources", {}): # type: ignore[union-attr] # FIXME CoP + config_commented_map_from_yaml.get("datasources", {}).pop(name, None) # type: ignore[union-attr,arg-type,call-arg] # FIXME CoP value = DataContextConfig.from_commented_map( commented_map=config_commented_map_from_yaml ) @@ -133,8 +133,8 @@ def _set(self, key: tuple[str, ...], value: Any, **kwargs: dict) -> None: @override def _move(self, source_key: tuple[str, ...], dest_key: tuple[str, ...], **kwargs: dict) -> None: - raise StoreBackendError( # noqa: TRY003 - "InlineStoreBackend does not support moving of keys; the DataContext's config variables schema is immutable" # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + "InlineStoreBackend does not support moving of keys; the DataContext's config variables schema is immutable" # noqa: E501 # FIXME CoP ) @override @@ -148,7 +148,7 @@ def list_keys(self, prefix: tuple[str, ...] = ()) -> list[tuple]: Returns: A list of string keys from the user's project config. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP config_section: str | None = None if self._resource_type is not DataContextVariableSchema.ALL_VARIABLES: config_section = self._resource_type @@ -162,7 +162,7 @@ def list_keys(self, prefix: tuple[str, ...] = ()) -> list[tuple]: else: config_values: dict = config_dict[config_section] if not isinstance(config_values, dict): - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP "Cannot list keys in a non-iterable section of a project config" ) keys = list((key,) for key in config_values) @@ -178,15 +178,15 @@ def remove_key(self, key: tuple[str, ...]) -> None: resource_type = self._resource_type if resource_type is DataContextVariableSchema.ALL_VARIABLES: - raise StoreBackendError( # noqa: TRY003 - "InlineStoreBackend does not support the deletion of the overall DataContext project config" # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + "InlineStoreBackend does not support the deletion of the overall DataContext project config" # noqa: E501 # FIXME CoP ) if resource_name is None: - raise StoreBackendError( # noqa: TRY003 - "InlineStoreBackend does not support the deletion of top level keys; the DataContext's config variables schema is immutable" # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + "InlineStoreBackend does not support the deletion of top level keys; the DataContext's config variables schema is immutable" # noqa: E501 # FIXME CoP ) elif not self._has_key(key): - raise StoreBackendError(f"Could not find a value associated with key `{key}`") # noqa: TRY003 + raise StoreBackendError(f"Could not find a value associated with key `{key}`") # noqa: TRY003 # FIXME CoP del self._data_context.config[resource_type][resource_name] @@ -198,7 +198,7 @@ def build_key( id: str | None = None, name: str | None = None, ) -> DataContextVariableKey: - """Get the store backend specific implementation of the key. id included for super class compatibility.""" # noqa: E501 + """Get the store backend specific implementation of the key. id included for super class compatibility.""" # noqa: E501 # FIXME CoP return DataContextVariableKey( resource_name=name, ) diff --git a/great_expectations/data_context/store/json_site_store.py b/great_expectations/data_context/store/json_site_store.py index 44b8e7170b9b..7088468bcbaa 100644 --- a/great_expectations/data_context/store/json_site_store.py +++ b/great_expectations/data_context/store/json_site_store.py @@ -17,7 +17,7 @@ class JsonSiteStore(Store): """ A JsonSiteStore manages the JSON artifacts of our renderers, which allows us to render them into final views in HTML by GX Cloud. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__(self, store_backend=None, runtime_environment=None, store_name=None) -> None: if store_backend is not None: @@ -35,8 +35,8 @@ def __init__(self, store_backend=None, runtime_environment=None, store_name=None store_name=store_name, ) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, diff --git a/great_expectations/data_context/store/query_store.py b/great_expectations/data_context/store/query_store.py index ff4ca68eefce..dc2a0514d7eb 100644 --- a/great_expectations/data_context/store/query_store.py +++ b/great_expectations/data_context/store/query_store.py @@ -17,7 +17,7 @@ if is_version_greater_or_equal(sa.__version__, "1.4.0"): url_create_fn = sqlalchemy.URL.create else: - url_create_fn = sqlalchemy.URL # type: ignore[assignment] + url_create_fn = sqlalchemy.URL # type: ignore[assignment] # FIXME CoP logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ class SqlAlchemyQueryStore(Store): """SqlAlchemyQueryStore stores queries by name, and makes it possible to retrieve the resulting value by query - name.""" # noqa: E501 + name.""" # noqa: E501 # FIXME CoP _key_class: ClassVar[Type] = StringKey @@ -38,7 +38,7 @@ def __init__( store_name=None, ) -> None: if not sa: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "sqlalchemy module not found, but is required for " "SqlAlchemyQueryStore" ) super().__init__( @@ -47,7 +47,7 @@ def __init__( store_name=store_name, ) if queries: - # If queries are defined in configuration, then we load them into an InMemoryStoreBackend # noqa: E501 + # If queries are defined in configuration, then we load them into an InMemoryStoreBackend # noqa: E501 # FIXME CoP try: assert isinstance( queries, dict @@ -55,7 +55,7 @@ def __init__( assert ( store_backend is None or store_backend["class_name"] == "InMemoryStoreBackend" ), ( - "If queries are provided in configuration, then store_backend must be empty or an " # noqa: E501 + "If queries are provided in configuration, then store_backend must be empty or an " # noqa: E501 # FIXME CoP "InMemoryStoreBackend" ) for k, v in queries.items(): @@ -75,8 +75,8 @@ def __init__( options = url_create_fn(drivername, **credentials) self.engine = sa.create_engine(options) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "credentials": credentials, "queries": queries, @@ -107,7 +107,7 @@ def get_query_result(self, key, query_parameters=None): query = result.get("query") return_type = result.get("return_type", "list") if return_type not in ["list", "scalar"]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "The return_type of a SqlAlchemyQueryStore query must be one of either 'list' " "or 'scalar'" ) diff --git a/great_expectations/data_context/store/store.py b/great_expectations/data_context/store/store.py index 576460aea194..986fae7c2407 100644 --- a/great_expectations/data_context/store/store.py +++ b/great_expectations/data_context/store/store.py @@ -88,7 +88,7 @@ def __init__( store_backend: runtime_environment: store_name: store name given in the DataContextConfig (via either in-code or yaml configuration) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if store_backend is None: store_backend = {"class_name": "InMemoryStoreBackend"} self._store_name = store_name @@ -107,7 +107,7 @@ def __init__( module_name=module_name, package_name=None, class_name=store_backend ) if not isinstance(self._store_backend, StoreBackend): - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP "Invalid StoreBackend configuration: expected a StoreBackend instance." ) self._use_fixed_length_key = self._store_backend.fixed_length_key @@ -135,11 +135,11 @@ def gx_cloud_response_json_to_object_collection(cls, response_json: Dict) -> Lis """ This method takes full json response from GX cloud and outputs a list of dicts appropriate for deserialization into a collection of GX objects - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP logger.debug(f"GE Cloud Response JSON ->\n{pf(response_json, depth=3)}") data = response_json["data"] if not isinstance(data, list): - raise TypeError("GX Cloud did not return a collection of Datasources when expected") # noqa: TRY003 + raise TypeError("GX Cloud did not return a collection of Datasources when expected") # noqa: TRY003 # FIXME CoP return [cls._convert_raw_json_to_object_dict(d) for d in data] @@ -161,7 +161,7 @@ def _validate_key(self, key: DataContextKey) -> None: if key == StoreBackend.STORE_BACKEND_ID_KEY or isinstance(key, self.key_class): return else: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"key must be an instance of {self.key_class.__name__}, not {type(key)}" ) @@ -198,7 +198,7 @@ def store_backend_id_warnings_suppressed(self) -> str: Report the store_backend_id of the currently-configured StoreBackend, suppressing warnings for invalid configurations. Returns: store_backend_id which is a UUID(version=4) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._store_backend.store_backend_id_warnings_suppressed @property @@ -217,7 +217,7 @@ def key_to_tuple(self, key: DataContextKey) -> Tuple[str, ...]: def tuple_to_key(self, tuple_: Tuple[str, ...]) -> DataContextKey: if tuple_ == StoreBackend.STORE_BACKEND_ID_KEY: - return StoreBackend.STORE_BACKEND_ID_KEY[0] # type: ignore[return-value] + return StoreBackend.STORE_BACKEND_ID_KEY[0] # type: ignore[return-value] # FIXME CoP if self._use_fixed_length_key: return self.key_class.from_fixed_length_tuple(tuple_) return self.key_class.from_tuple(tuple_) @@ -348,7 +348,7 @@ def remove_key(self, key): def _build_key_from_config(self, config: AbstractConfig) -> DataContextKey: id: Optional[str] = None - # Chetan - 20220831 - Explicit fork in logic to cover legacy behavior (particularly around Checkpoints). # noqa: E501 + # Chetan - 20220831 - Explicit fork in logic to cover legacy behavior (particularly around Checkpoints). # noqa: E501 # FIXME CoP if hasattr(config, "id"): id = config.id @@ -366,7 +366,7 @@ def build_store_from_config( runtime_environment: Optional[dict] = None, ) -> Store: if config is None or module_name is None: - raise gx_exceptions.StoreConfigurationError( # noqa: TRY003 + raise gx_exceptions.StoreConfigurationError( # noqa: TRY003 # FIXME CoP "Cannot build a store without both a store_config and a module_name" ) diff --git a/great_expectations/data_context/store/tuple_store_backend.py b/great_expectations/data_context/store/tuple_store_backend.py index fac14102e597..0004fcfbed21 100644 --- a/great_expectations/data_context/store/tuple_store_backend.py +++ b/great_expectations/data_context/store/tuple_store_backend.py @@ -27,9 +27,9 @@ class TupleStoreBackend(StoreBackend, metaclass=ABCMeta): For example, in the following template path: expectations/{0}/{1}/{2}/prefix-{2}.json, keys must have three components. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, filepath_template=None, filepath_prefix=None, @@ -54,7 +54,7 @@ def __init__( # noqa: PLR0913 self.platform_specific_separator = platform_specific_separator if filepath_template is not None and filepath_suffix is not None: - raise ValueError("filepath_suffix may only be used when filepath_template is None") # noqa: TRY003 + raise ValueError("filepath_suffix may only be used when filepath_template is None") # noqa: TRY003 # FIXME CoP self.filepath_template = filepath_template if filepath_prefix and len(filepath_prefix) > 0: @@ -89,15 +89,15 @@ def _validate_key(self, key) -> None: for key_element in key: for substring in self.forbidden_substrings: if substring in key_element: - raise ValueError( # noqa: TRY003 - f"Keys in {self.__class__.__name__} must not contain substrings in {self.forbidden_substrings} : {key}" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"Keys in {self.__class__.__name__} must not contain substrings in {self.forbidden_substrings} : {key}" # noqa: E501 # FIXME CoP ) @override def _validate_value(self, value) -> None: if not isinstance(value, str) and not isinstance(value, bytes): - raise TypeError( # noqa: TRY003 - f"Values in {self.__class__.__name__} must be instances of {str} or {bytes}, not {type(value)}" # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + f"Values in {self.__class__.__name__} must be instances of {str} or {bytes}, not {type(value)}" # noqa: E501 # FIXME CoP ) def _convert_key_to_filepath(self, key): @@ -122,7 +122,7 @@ def _convert_key_to_filepath(self, key): return converted_string - def _convert_filepath_to_key(self, filepath): # noqa: C901, PLR0912 + def _convert_filepath_to_key(self, filepath): # noqa: C901, PLR0912 # FIXME CoP if filepath == self.STORE_BACKEND_ID_KEY[0]: return self.STORE_BACKEND_ID_KEY if self.platform_specific_separator: @@ -133,9 +133,9 @@ def _convert_filepath_to_key(self, filepath): # noqa: C901, PLR0912 not filepath.startswith(self.filepath_prefix) and len(filepath) >= len(self.filepath_prefix) + 1 ): - # If filepath_prefix is set, we expect that it is the first component of a valid filepath. # noqa: E501 - raise ValueError( # noqa: TRY003 - "filepath must start with the filepath_prefix when one is set by the store_backend" # noqa: E501 + # If filepath_prefix is set, we expect that it is the first component of a valid filepath. # noqa: E501 # FIXME CoP + raise ValueError( # noqa: TRY003 # FIXME CoP + "filepath must start with the filepath_prefix when one is set by the store_backend" # noqa: E501 # FIXME CoP ) else: # Remove the prefix before processing @@ -144,9 +144,9 @@ def _convert_filepath_to_key(self, filepath): # noqa: C901, PLR0912 if self.filepath_suffix: if not filepath.endswith(self.filepath_suffix): - # If filepath_suffix is set, we expect that it is the last component of a valid filepath. # noqa: E501 - raise ValueError( # noqa: TRY003 - "filepath must end with the filepath_suffix when one is set by the store_backend" # noqa: E501 + # If filepath_suffix is set, we expect that it is the last component of a valid filepath. # noqa: E501 # FIXME CoP + raise ValueError( # noqa: TRY003 # FIXME CoP + "filepath must end with the filepath_suffix when one is set by the store_backend" # noqa: E501 # FIXME CoP ) else: # Remove the suffix before processing @@ -154,9 +154,9 @@ def _convert_filepath_to_key(self, filepath): # noqa: C901, PLR0912 if self.filepath_template: # filepath_template is always specified with forward slashes, but it is then - # used to (1) dynamically construct and evaluate a regex, and (2) split the provided (observed) filepath # noqa: E501 + # used to (1) dynamically construct and evaluate a regex, and (2) split the provided (observed) filepath # noqa: E501 # FIXME CoP if self.platform_specific_separator: - filepath_template = os.path.join( # noqa: PTH118 + filepath_template = os.path.join( # noqa: PTH118 # FIXME CoP *self.filepath_template.split("/") ) filepath_template = filepath_template.replace("\\", "\\\\") @@ -203,15 +203,15 @@ def get_random_hex(size=4): filepath = self._convert_key_to_filepath(key) new_key = self._convert_filepath_to_key(filepath) if key != new_key: - raise ValueError( # noqa: TRY003 - f"filepath template {self.filepath_template} for class {self.__class__.__name__} is not reversible for a tuple of length {self.key_length}. " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"filepath template {self.filepath_template} for class {self.__class__.__name__} is not reversible for a tuple of length {self.key_length}. " # noqa: E501 # FIXME CoP "Have you included all elements in the key tuple?" ) @property @override def config(self) -> dict: - return self._config # type: ignore[attr-defined] + return self._config # type: ignore[attr-defined] # FIXME CoP class TupleFilesystemStoreBackend(TupleStoreBackend): @@ -220,9 +220,9 @@ class TupleFilesystemStoreBackend(TupleStoreBackend): The key to this StoreBackend must be a tuple with fixed length based on the filepath_template, or a variable-length tuple may be used and returned with an optional filepath_suffix (to be) added. The filepath_template is a string template used to convert the key to a filepath. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, base_directory, filepath_template=None, @@ -249,32 +249,32 @@ def __init__( # noqa: PLR0913 base_public_path=base_public_path, store_name=store_name, ) - if os.path.isabs(base_directory): # noqa: PTH117 + if os.path.isabs(base_directory): # noqa: PTH117 # FIXME CoP self.full_base_directory = base_directory - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if root_directory is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "base_directory must be an absolute path if root_directory is not provided" ) - elif not os.path.isabs(root_directory): # noqa: PTH117 - raise ValueError( # noqa: TRY003 + elif not os.path.isabs(root_directory): # noqa: PTH117 # FIXME CoP + raise ValueError( # noqa: TRY003 # FIXME CoP f"root_directory must be an absolute path. Got {root_directory} instead." ) else: - self.full_base_directory = os.path.join( # noqa: PTH118 + self.full_base_directory = os.path.join( # noqa: PTH118 # FIXME CoP root_directory, base_directory ) - os.makedirs( # noqa: PTH103 - str(os.path.dirname(self.full_base_directory)), # noqa: PTH120 + os.makedirs( # noqa: PTH103 # FIXME CoP + str(os.path.dirname(self.full_base_directory)), # noqa: PTH120 # FIXME CoP exist_ok=True, ) # Initialize with store_backend_id if not part of an HTMLSiteStore if not self._suppress_store_backend_id: _ = self.store_backend_id - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "base_directory": base_directory, "filepath_template": filepath_template, @@ -294,15 +294,15 @@ def __init__( # noqa: PLR0913 filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True) def _get(self, key): # type: ignore[explicit-override] # FIXME - filepath: str = os.path.join( # noqa: PTH118 + filepath: str = os.path.join( # noqa: PTH118 # FIXME CoP self.full_base_directory, self._convert_key_to_filepath(key) ) try: with open(filepath) as infile: contents: str = infile.read().rstrip("\n") except FileNotFoundError as e: - raise InvalidKeyError( # noqa: TRY003 - f"Unable to retrieve object from TupleFilesystemStoreBackend with the following Key: {filepath!s}" # noqa: E501 + raise InvalidKeyError( # noqa: TRY003 # FIXME CoP + f"Unable to retrieve object from TupleFilesystemStoreBackend with the following Key: {filepath!s}" # noqa: E501 # FIXME CoP ) from e return contents @@ -315,12 +315,12 @@ def _get_all(self) -> list[Any]: def _set(self, key, value, **kwargs): # type: ignore[explicit-override] # FIXME if not isinstance(key, tuple): key = key.to_tuple() - filepath = os.path.join( # noqa: PTH118 + filepath = os.path.join( # noqa: PTH118 # FIXME CoP self.full_base_directory, self._convert_key_to_filepath(key) ) path, _filename = os.path.split(filepath) - os.makedirs(str(path), exist_ok=True) # noqa: PTH103 + os.makedirs(str(path), exist_ok=True) # noqa: PTH103 # FIXME CoP with open(filepath, "wb") as outfile: if isinstance(value, str): outfile.write(value.encode("utf-8")) @@ -329,17 +329,17 @@ def _set(self, key, value, **kwargs): # type: ignore[explicit-override] # FIXME return filepath def _move(self, source_key, dest_key, **kwargs): # type: ignore[explicit-override] # FIXME - source_path = os.path.join( # noqa: PTH118 + source_path = os.path.join( # noqa: PTH118 # FIXME CoP self.full_base_directory, self._convert_key_to_filepath(source_key) ) - dest_path = os.path.join( # noqa: PTH118 + dest_path = os.path.join( # noqa: PTH118 # FIXME CoP self.full_base_directory, self._convert_key_to_filepath(dest_key) ) dest_dir, _dest_filename = os.path.split(dest_path) - if os.path.exists(source_path): # noqa: PTH110 - os.makedirs(dest_dir, exist_ok=True) # noqa: PTH103 + if os.path.exists(source_path): # noqa: PTH110 # FIXME CoP + os.makedirs(dest_dir, exist_ok=True) # noqa: PTH103 # FIXME CoP shutil.move(source_path, dest_path) return dest_key @@ -349,11 +349,11 @@ def _move(self, source_key, dest_key, **kwargs): # type: ignore[explicit-overri def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: key_list = [] for root, dirs, files in os.walk( - os.path.join(self.full_base_directory, *prefix) # noqa: PTH118 + os.path.join(self.full_base_directory, *prefix) # noqa: PTH118 # FIXME CoP ): for file_ in files: full_path, file_name = os.path.split( - os.path.join(root, file_) # noqa: PTH118 + os.path.join(root, file_) # noqa: PTH118 # FIXME CoP ) relative_path = os.path.relpath( full_path, @@ -362,7 +362,7 @@ def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: if relative_path == ".": filepath = file_name else: - filepath = os.path.join(relative_path, file_name) # noqa: PTH118 + filepath = os.path.join(relative_path, file_name) # noqa: PTH118 # FIXME CoP if self._is_missing_prefix_or_suffix( filepath_prefix=self.filepath_prefix, @@ -383,11 +383,11 @@ def rrmdir(self, mroot, curpath) -> None: try: while ( not os.listdir(curpath) - and os.path.exists(curpath) # noqa: PTH110 + and os.path.exists(curpath) # noqa: PTH110 # FIXME CoP and mroot != curpath ): - f2 = os.path.dirname(curpath) # noqa: PTH120 - os.rmdir(curpath) # noqa: PTH106 + f2 = os.path.dirname(curpath) # noqa: PTH120 # FIXME CoP + os.rmdir(curpath) # noqa: PTH106 # FIXME CoP curpath = f2 except (NotADirectoryError, FileNotFoundError): pass @@ -396,13 +396,13 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME if not isinstance(key, tuple): key = key.to_tuple() - filepath = os.path.join( # noqa: PTH118 + filepath = os.path.join( # noqa: PTH118 # FIXME CoP self.full_base_directory, self._convert_key_to_filepath(key) ) - if os.path.exists(filepath): # noqa: PTH110 - d_path = os.path.dirname(filepath) # noqa: PTH120 - os.remove(filepath) # noqa: PTH107 + if os.path.exists(filepath): # noqa: PTH110 # FIXME CoP + d_path = os.path.dirname(filepath) # noqa: PTH120 # FIXME CoP + os.remove(filepath) # noqa: PTH107 # FIXME CoP self.rrmdir(self.full_base_directory, d_path) return True return False @@ -411,7 +411,7 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME def get_url_for_key(self, key, protocol=None) -> str: path = self._convert_key_to_filepath(key) escaped_path = self._url_path_escape_special_characters(path=path) - full_path = os.path.join(self.full_base_directory, escaped_path) # noqa: PTH118 + full_path = os.path.join(self.full_base_directory, escaped_path) # noqa: PTH118 # FIXME CoP if protocol is None: protocol = "file:" @@ -420,18 +420,18 @@ def get_url_for_key(self, key, protocol=None) -> str: def get_public_url_for_key(self, key, protocol=None): if not self.base_public_path: - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP """Error: No base_public_path was configured! - A public URL was requested base_public_path was not configured for the TupleFilesystemStoreBackend - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) path = self._convert_key_to_filepath(key) public_url = self.base_public_path + path return public_url def _has_key(self, key): # type: ignore[explicit-override] # FIXME - return os.path.isfile( # noqa: PTH113 - os.path.join( # noqa: PTH118 + return os.path.isfile( # noqa: PTH113 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP self.full_base_directory, self._convert_key_to_filepath(key) ) ) @@ -449,9 +449,9 @@ class TupleS3StoreBackend(TupleStoreBackend): The key to this StoreBackend must be a tuple with fixed length based on the filepath_template, or a variable-length tuple may be used and returned with an optional filepath_suffix (to be) added. The filepath_template is a string template used to convert the key to a filepath. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, bucket, prefix="", @@ -501,8 +501,8 @@ def __init__( # noqa: PLR0913 if not self._suppress_store_backend_id: _ = self.store_backend_id - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "bucket": bucket, "prefix": prefix, @@ -527,12 +527,12 @@ def __init__( # noqa: PLR0913 def _build_s3_object_key(self, key): if self.platform_specific_separator: if self.prefix: - s3_object_key = os.path.join( # noqa: PTH118 + s3_object_key = os.path.join( # noqa: PTH118 # FIXME CoP self.prefix, self._convert_key_to_filepath(key) ) else: s3_object_key = self._convert_key_to_filepath(key) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if self.prefix: s3_object_key = "/".join((self.prefix, self._convert_key_to_filepath(key))) else: @@ -557,19 +557,35 @@ def _get_all(self) -> list[Any]: s3_object_keys = [self._build_s3_object_key(key) for key in keys] return [self._get_by_s3_object_key(client, key) for key in s3_object_keys] - def _get_by_s3_object_key(self, s3_client, s3_object_key): + def _get_by_s3_object_key(self, s3_client, s3_object_key) -> Any: try: s3_response_object = s3_client.get_object(Bucket=self.bucket, Key=s3_object_key) except (s3_client.exceptions.NoSuchKey, s3_client.exceptions.NoSuchBucket) as e: - raise InvalidKeyError( # noqa: TRY003 - f"Unable to retrieve object from TupleS3StoreBackend with the following Key: {s3_object_key!s}" # noqa: E501 + raise InvalidKeyError( # noqa: TRY003 # FIXME CoP + f"Unable to retrieve object from TupleS3StoreBackend with the following Key: {s3_object_key!s}" # noqa: E501 # FIXME CoP ) from e - return ( - s3_response_object["Body"] - .read() - .decode(s3_response_object.get("ContentEncoding", "utf-8")) - ) + # ContentEncoding is an unknown string per + # https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/get_object.html#get-object + content_encoding: str = s3_response_object.get("ContentEncoding", "utf-8") + # We found the string could take the form of an array, e.g. `utf-8,aws-chunked`. + # As of boto3 1.36.0, we aren't aware of any time when the string-list can have more + # than 1 item except when `aws-chunked` is included. In order to preserve unknown + # encodings included with `aws-chunked` we will remove the `aws-chunked` string from + # the list. We do not intend to add support for reading in chunks at this time. + # Calling botocore.response.StreamingBody.read() without arguments + # will read the entire stream. + encodings: list[str] = content_encoding.split(",") + if "aws-chunked" in encodings: + encodings.remove("aws-chunked") + data = s3_response_object["Body"].read() + # Maybe it's theoretically possible to encode an entire file with multiple encodings. + # It's an odd choice of format for ContentEncoding to allow multiple encoding strings. + # As far as we know, this list will never end up with length>1 after the possible + # encoding `aws-chunked` is removed. + for encoding in encodings: + data = data.decode(encoding) + return data def _set( # type: ignore[explicit-override] # FIXME self, @@ -596,7 +612,7 @@ def _set( # type: ignore[explicit-override] # FIXME result_s3.put(Body=value, ContentType=content_type, **self.s3_put_options) except s3.meta.client.exceptions.ClientError as e: logger.debug(str(e)) - raise StoreBackendError("Unable to set object in s3.") from e # noqa: TRY003 + raise StoreBackendError("Unable to set object in s3.") from e # noqa: TRY003 # FIXME CoP return s3_object_key @@ -606,18 +622,18 @@ def _move(self, source_key, dest_key, **kwargs) -> None: source_filepath = self._convert_key_to_filepath(source_key) if not source_filepath.startswith(self.prefix): - source_filepath = os.path.join(self.prefix, source_filepath) # noqa: PTH118 + source_filepath = os.path.join(self.prefix, source_filepath) # noqa: PTH118 # FIXME CoP dest_filepath = self._convert_key_to_filepath(dest_key) if not dest_filepath.startswith(self.prefix): - dest_filepath = os.path.join(self.prefix, dest_filepath) # noqa: PTH118 + dest_filepath = os.path.join(self.prefix, dest_filepath) # noqa: PTH118 # FIXME CoP s3.Bucket(self.bucket).copy({"Bucket": self.bucket, "Key": source_filepath}, dest_filepath) s3.Object(self.bucket, source_filepath).delete() @override - def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: # noqa: C901 - too complex - # Note that the prefix arg is only included to maintain consistency with the parent class signature # noqa: E501 + def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: # noqa: C901 # too complex + # Note that the prefix arg is only included to maintain consistency with the parent class signature # noqa: E501 # FIXME CoP s3r = self._create_resource() bucket = s3r.Bucket(self.bucket) key_list = [] @@ -629,11 +645,11 @@ def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: # noqa: C901 - too comp s3_object_key = s3_object_info.key if self.platform_specific_separator: s3_object_key = os.path.relpath(s3_object_key, self.prefix) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if self.prefix is None: if s3_object_key.startswith("/"): s3_object_key = s3_object_key[1:] - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if s3_object_key.startswith(f"{self.prefix}/"): s3_object_key = s3_object_key[len(self.prefix) + 1 :] @@ -673,7 +689,7 @@ def get_url_for_key(self, key, protocol=None) -> str: def get_public_url_for_key(self, key, protocol=None): if not self.base_public_path: - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP """Error: No base_public_path was configured! - A public URL was requested base_public_path was not configured for the """ @@ -695,7 +711,7 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME # Check if the object exists if self.has_key(key): - # This implementation deletes the object if non-versioned or adds a delete marker if versioned # noqa: E501 + # This implementation deletes the object if non-versioned or adds a delete marker if versioned # noqa: E501 # FIXME CoP s3.Object(self.bucket, s3_object_key).delete() return True else: @@ -751,9 +767,9 @@ class TupleGCSStoreBackend(TupleStoreBackend): or a variable-length tuple may be used and returned with an optional filepath_suffix (to be) added. The filepath_template is a string template used to convert the key to a filepath. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, bucket, project, @@ -790,8 +806,8 @@ def __init__( # noqa: PLR0913 if not self._suppress_store_backend_id: _ = self.store_backend_id - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "bucket": bucket, "project": project, @@ -815,12 +831,12 @@ def __init__( # noqa: PLR0913 def _build_gcs_object_key(self, key): if self.platform_specific_separator: if self.prefix: - gcs_object_key = os.path.join( # noqa: PTH118 + gcs_object_key = os.path.join( # noqa: PTH118 # FIXME CoP self.prefix, self._convert_key_to_filepath(key) ) else: gcs_object_key = self._convert_key_to_filepath(key) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if self.prefix: gcs_object_key = "/".join((self.prefix, self._convert_key_to_filepath(key))) else: @@ -850,8 +866,8 @@ def _get_by_gcs_object_key(self, bucket, key): gcs_object_key = self._build_gcs_object_key(key) gcs_response_object = bucket.get_blob(gcs_object_key) if not gcs_response_object: - raise InvalidKeyError( # noqa: TRY003 - f"Unable to retrieve object from TupleGCSStoreBackend with the following Key: {key!s}" # noqa: E501 + raise InvalidKeyError( # noqa: TRY003 # FIXME CoP + f"Unable to retrieve object from TupleGCSStoreBackend with the following Key: {key!s}" # noqa: E501 # FIXME CoP ) else: return gcs_response_object.download_as_bytes().decode("utf-8") @@ -888,17 +904,17 @@ def _move(self, source_key, dest_key, **kwargs) -> None: source_filepath = self._convert_key_to_filepath(source_key) if not source_filepath.startswith(self.prefix): - source_filepath = os.path.join(self.prefix, source_filepath) # noqa: PTH118 + source_filepath = os.path.join(self.prefix, source_filepath) # noqa: PTH118 # FIXME CoP dest_filepath = self._convert_key_to_filepath(dest_key) if not dest_filepath.startswith(self.prefix): - dest_filepath = os.path.join(self.prefix, dest_filepath) # noqa: PTH118 + dest_filepath = os.path.join(self.prefix, dest_filepath) # noqa: PTH118 # FIXME CoP blob = bucket.blob(source_filepath) _ = bucket.rename_blob(blob, dest_filepath) @override def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: - # Note that the prefix arg is only included to maintain consistency with the parent class signature # noqa: E501 + # Note that the prefix arg is only included to maintain consistency with the parent class signature # noqa: E501 # FIXME CoP key_list = [] from great_expectations.compatibility import google @@ -937,7 +953,7 @@ def get_url_for_key(self, key, protocol=None) -> str: def get_public_url_for_key(self, key, protocol=None): if not self.base_public_path: - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP """Error: No base_public_path was configured! - A public URL was requested base_public_path was not configured for the """ @@ -950,7 +966,7 @@ def get_public_url_for_key(self, key, protocol=None): def _get_path_url(self, path): if self.prefix: path_url = "/".join((self.bucket, self.prefix, path)) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if self.base_public_path: if self.base_public_path[-1] != "/": path_url = f"/{path}" @@ -986,10 +1002,10 @@ class TupleAzureBlobStoreBackend(TupleStoreBackend): You need to setup the connection string environment variable https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # We will use blobclient here - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, container, connection_string=None, @@ -1017,15 +1033,15 @@ def __init__( # noqa: PLR0913 manually_initialize_store_backend_id=manually_initialize_store_backend_id, store_name=store_name, ) - self.connection_string = connection_string or os.environ.get( # noqa: TID251 + self.connection_string = connection_string or os.environ.get( # noqa: TID251 # FIXME CoP "AZURE_STORAGE_CONNECTION_STRING" ) - self.credential = credential or os.environ.get( # noqa: TID251 + self.credential = credential or os.environ.get( # noqa: TID251 # FIXME CoP "AZURE_CREDENTIAL" ) self.prefix = prefix or "" self.container = container - self.account_url = account_url or os.environ.get( # noqa: TID251 + self.account_url = account_url or os.environ.get( # noqa: TID251 # FIXME CoP "AZURE_STORAGE_ACCOUNT_URL" ) @@ -1034,7 +1050,7 @@ def __init__( # noqa: PLR0913 def _container_client(self) -> Any: from great_expectations.compatibility import azure - # Validate that "azure" libraries were successfully imported and attempt to create "azure_client" handle. # noqa: E501 + # Validate that "azure" libraries were successfully imported and attempt to create "azure_client" handle. # noqa: E501 # FIXME CoP if azure.BlobServiceClient: # type: ignore[truthy-function] # False if NotImported try: if self.connection_string: @@ -1051,23 +1067,23 @@ def _container_client(self) -> Any: account_url=self.account_url, credential=self.credential ) else: - raise StoreBackendError( # noqa: TRY003, TRY301 - "Unable to initialize ServiceClient, AZURE_STORAGE_CONNECTION_STRING should be set" # noqa: E501 + raise StoreBackendError( # noqa: TRY003, TRY301 # FIXME CoP + "Unable to initialize ServiceClient, AZURE_STORAGE_CONNECTION_STRING should be set" # noqa: E501 # FIXME CoP ) except Exception as e: - # Failure to create "azure_client" is most likely due invalid "azure_options" dictionary. # noqa: E501 - raise StoreBackendError( # noqa: TRY003 + # Failure to create "azure_client" is most likely due invalid "azure_options" dictionary. # noqa: E501 # FIXME CoP + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f'Due to exception: "{e!s}", "azure_client" could not be created.' ) from e else: - raise StoreBackendError( # noqa: TRY003 - 'Unable to create azure "BlobServiceClient" due to missing azure.storage.blob dependency.' # noqa: E501 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP + 'Unable to create azure "BlobServiceClient" due to missing azure.storage.blob dependency.' # noqa: E501 # FIXME CoP ) return blob_service_client.get_container_client(self.container) def _get(self, key): # type: ignore[explicit-override] # FIXME - az_blob_key = os.path.join( # noqa: PTH118 + az_blob_key = os.path.join( # noqa: PTH118 # FIXME CoP self.prefix, self._convert_key_to_filepath(key) ) return self._container_client.download_blob(az_blob_key).readall().decode("utf-8") @@ -1080,7 +1096,7 @@ def _get_all(self) -> list[Any]: def _set(self, key, value, content_encoding="utf-8", **kwargs): # type: ignore[explicit-override] # FIXME from great_expectations.compatibility.azure import ContentSettings - az_blob_key = os.path.join( # noqa: PTH118 + az_blob_key = os.path.join( # noqa: PTH118 # FIXME CoP self.prefix, self._convert_key_to_filepath(key) ) @@ -1107,7 +1123,7 @@ def _set(self, key, value, content_encoding="utf-8", **kwargs): # type: ignore[ @override def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: - # Note that the prefix arg is only included to maintain consistency with the parent class signature # noqa: E501 + # Note that the prefix arg is only included to maintain consistency with the parent class signature # noqa: E501 # FIXME CoP key_list = [] for obj in self._container_client.list_blobs(name_starts_with=self.prefix): @@ -1129,7 +1145,7 @@ def list_keys(self, prefix: Tuple = ()) -> List[Tuple]: @override def get_url_for_key(self, key, protocol=None) -> str: az_blob_key = self._convert_key_to_filepath(key) - az_blob_path = os.path.join( # noqa: PTH118 + az_blob_path = os.path.join( # noqa: PTH118 # FIXME CoP self.container, self.prefix, az_blob_key ) @@ -1143,12 +1159,12 @@ def _has_key(self, key): # type: ignore[explicit-override] # FIXME def _move(self, source_key, dest_key, **kwargs) -> None: source_blob_path = self._convert_key_to_filepath(source_key) if not source_blob_path.startswith(self.prefix): - source_blob_path = os.path.join( # noqa: PTH118 + source_blob_path = os.path.join( # noqa: PTH118 # FIXME CoP self.prefix, source_blob_path ) dest_blob_path = self._convert_key_to_filepath(dest_key) if not dest_blob_path.startswith(self.prefix): - dest_blob_path = os.path.join(self.prefix, dest_blob_path) # noqa: PTH118 + dest_blob_path = os.path.join(self.prefix, dest_blob_path) # noqa: PTH118 # FIXME CoP # azure storage sdk does not have _move method source_blob = self._container_client.get_blob_client(source_blob_path) @@ -1159,7 +1175,7 @@ def _move(self, source_key, dest_key, **kwargs) -> None: if copy_properties.status != "success": dest_blob.abort_copy(copy_properties.id) - raise StoreBackendError( # noqa: TRY003 + raise StoreBackendError( # noqa: TRY003 # FIXME CoP f"Unable to copy blob {source_blob_path} with status {copy_properties.status}" ) source_blob.delete_blob() @@ -1170,7 +1186,7 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME az_blob_path = self._convert_key_to_filepath(key) if not az_blob_path.startswith(self.prefix): - az_blob_path = os.path.join(self.prefix, az_blob_path) # noqa: PTH118 + az_blob_path = os.path.join(self.prefix, az_blob_path) # noqa: PTH118 # FIXME CoP blob = self._container_client.get_blob_client(az_blob_path) blob.delete_blob() @@ -1179,4 +1195,4 @@ def remove_key(self, key): # type: ignore[explicit-override] # FIXME @property @override def config(self) -> dict: - return self._config # type: ignore[attr-defined] + return self._config # type: ignore[attr-defined] # FIXME CoP diff --git a/great_expectations/data_context/store/validation_definition_store.py b/great_expectations/data_context/store/validation_definition_store.py index 7f40a6254810..5ae649d51b84 100644 --- a/great_expectations/data_context/store/validation_definition_store.py +++ b/great_expectations/data_context/store/validation_definition_store.py @@ -37,7 +37,7 @@ def __init__( ) def get_key(self, name: str, id: str | None = None) -> GXCloudIdentifier | StringKey: - """Given a name and optional ID, build the correct key for use in the ValidationDefinitionStore.""" # noqa: E501 + """Given a name and optional ID, build the correct key for use in the ValidationDefinitionStore.""" # noqa: E501 # FIXME CoP if self.cloud_mode: return GXCloudIdentifier( resource_type=GXCloudRESTResource.VALIDATION_DEFINITION, @@ -72,7 +72,7 @@ def _convert_raw_json_to_object_dict(data: dict) -> dict: @override def serialize(self, value): - # In order to enable the custom json_encoders in ValidationDefinition, we need to set `models_as_dict` off # noqa: E501 + # In order to enable the custom json_encoders in ValidationDefinition, we need to set `models_as_dict` off # noqa: E501 # FIXME CoP # Ref: https://docs.pydantic.dev/1.10/usage/exporting_models/#serialising-self-reference-or-other-models output = value.json(models_as_dict=False, indent=2, sort_keys=True) diff --git a/great_expectations/data_context/store/validation_results_store.py b/great_expectations/data_context/store/validation_results_store.py index 9add5891de61..d9070f0d473d 100644 --- a/great_expectations/data_context/store/validation_results_store.py +++ b/great_expectations/data_context/store/validation_results_store.py @@ -94,7 +94,7 @@ class ValidationResultsStore(Store): bug_risk: Moderate --ge-feature-maturity-info-- - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP _key_class: ClassVar[Type] = ValidationResultIdentifier @@ -133,8 +133,8 @@ def __init__(self, store_backend=None, runtime_environment=None, store_name=None store_name=store_name, ) - # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 - # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 + # Gather the call arguments of the present function (include the "module_name" and add the "class_name"), filter # noqa: E501 # FIXME CoP + # out the Falsy values, and set the instance "_config" variable equal to the resulting dictionary. # noqa: E501 # FIXME CoP self._config = { "store_backend": store_backend, "runtime_environment": runtime_environment, @@ -186,7 +186,7 @@ def store_validation_results( ) -> bool | GXCloudResourceRef: """Helper function to do the heavy lifting for StoreValidationResultAction and ValidationConfigs. This is broken from the ValidationAction (for now) so we don't need to pass the data_context around. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP checkpoint_id = None if self.cloud_mode and checkpoint_identifier: checkpoint_id = checkpoint_identifier.id diff --git a/great_expectations/data_context/templates.py b/great_expectations/data_context/templates.py index 744d18171c06..84a1db776885 100644 --- a/great_expectations/data_context/templates.py +++ b/great_expectations/data_context/templates.py @@ -44,7 +44,7 @@ def dump(self, data, stream=None, **kw): # type: ignore[explicit-override] # FI # config_version refers to the syntactic version of this config file, and is used in maintaining backwards compatibility # It is auto-generated and usually does not need to be changed. config_version: {DataContextConfigDefaults.DEFAULT_CONFIG_VERSION.value} -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP CONFIG_VARIABLES_INTRO = """ # This config file supports variable substitution which enables: 1) keeping diff --git a/great_expectations/data_context/types/base.py b/great_expectations/data_context/types/base.py index b1cc596fbf9f..3f9d77abd9fb 100644 --- a/great_expectations/data_context/types/base.py +++ b/great_expectations/data_context/types/base.py @@ -49,7 +49,7 @@ ) from great_expectations.types import DictDot, SerializableDictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP deep_filter_properties_iterable, ) @@ -99,13 +99,13 @@ def __init__(self, commented_map: Optional[CommentedMap] = None) -> None: @classmethod def _get_schema_instance(cls: Type[BYC]) -> Schema: if not issubclass(cls.get_schema_class(), Schema): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 - "Invalid type: A configuration schema class needs to inherit from the Marshmallow Schema class." # noqa: E501 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP + "Invalid type: A configuration schema class needs to inherit from the Marshmallow Schema class." # noqa: E501 # FIXME CoP ) if not issubclass(cls.get_config_class(), BaseYamlConfig): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 - "Invalid type: A configuration class needs to inherit from the BaseYamlConfig class." # noqa: E501 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP + "Invalid type: A configuration class needs to inherit from the BaseYamlConfig class." # noqa: E501 # FIXME CoP ) if hasattr(cls.get_config_class(), "_schema_instance"): @@ -130,7 +130,7 @@ def from_commented_map(cls: Type[BYC], commented_map: Union[CommentedMap, Dict]) return config except ValidationError: - logger.error( # noqa: TRY400 + logger.error( # noqa: TRY400 # FIXME CoP "Encountered errors during loading config. See ValidationError for more details." ) raise @@ -177,7 +177,7 @@ def get_schema_class(cls) -> Type[Schema]: class SorterConfig(DictDot): - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name, class_name=None, @@ -295,7 +295,7 @@ def make_sorter_config(self, data, **kwargs): class AssetConfig(SerializableDictDot): - def __init__( # noqa: C901, PLR0912, PLR0913 + def __init__( # noqa: C901, PLR0912, PLR0913 # FIXME CoP self, name: Optional[str] = None, class_name: Optional[str] = None, @@ -364,9 +364,9 @@ def to_json_dict(self) -> Dict[str, JSONValues]: A JSON-serializable dict representation of this AssetConfig. """ # TODO: 2/4/2022 - # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 - # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 - # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 + # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 # FIXME CoP + # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 # FIXME CoP + # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 # FIXME CoP # make this refactoring infeasible at the present time. dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) @@ -462,7 +462,7 @@ def make_asset_config(self, data, **kwargs): class DataConnectorConfig(AbstractConfig): - def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 + def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 # FIXME CoP self, class_name, name: Optional[str] = None, @@ -597,9 +597,9 @@ def to_json_dict(self) -> Dict[str, JSONValues]: A JSON-serializable dict representation of this DataConnectorConfig. """ # # TODO: 2/4/2022 - # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 - # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 - # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 + # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 # FIXME CoP + # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 # FIXME CoP + # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 # FIXME CoP # make this refactoring infeasible at the present time. dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) @@ -696,8 +696,8 @@ class Meta: # noinspection PyUnusedLocal @validates_schema - def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 - # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. # noqa: E501 + def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 # FIXME CoP + # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. # noqa: E501 # FIXME CoP if data["class_name"][0] == "$": return if ("default_regex" in data) and not ( @@ -715,11 +715,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetDBFSDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by a subclass of the FilePathDataConnector class (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if ("glob_directive" in data) and not ( data["class_name"] # noqa: E713 # membership check @@ -730,11 +730,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetDBFSDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by a filesystem type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if ("delimiter" in data) and not ( data["class_name"] # noqa: E713 # membership check @@ -747,11 +747,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetGCSDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by an S3/Azure/GCS type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration \ to continue. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if ("prefix" in data) and not ( data["class_name"] # noqa: E713 # membership check @@ -762,11 +762,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetGCSDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by an S3/GCS type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if ("bucket" in data or "max_keys" in data) and not ( data["class_name"] # noqa: E713 # membership check @@ -775,11 +775,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetS3DataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by an S3 type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if ("azure_options" in data or "container" in data or "name_starts_with" in data) and not ( data["class_name"] # noqa: E713 # membership check @@ -788,11 +788,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetAzureDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by an Azure type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if "azure_options" in data and data["class_name"] in [ "InferredAssetAzureDataConnector", @@ -800,11 +800,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 ]: azure_options = data["azure_options"] if not (("conn_str" in azure_options) ^ ("account_url" in azure_options)): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP """Your current configuration is either missing methods of authentication or is using too many for \ the Azure type of data connector. You must only select one between `conn_str` or `account_url`. Please update your \ configuration to continue. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if ("gcs_options" in data or "bucket_or_name" in data or "max_results" in data) and not ( data["class_name"] # noqa: E713 # membership check @@ -813,11 +813,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetGCSDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by a GCS type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if "gcs_options" in data and data["class_name"] in [ "InferredAssetGCSDataConnector", @@ -825,11 +825,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 ]: gcs_options = data["gcs_options"] if "filename" in gcs_options and "info" in gcs_options: - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP """Your current configuration can only use a single method of authentication for the GCS type of \ data connector. You must only select one between `filename` (from_service_account_file) and `info` \ (from_service_account_info). Please update your configuration to continue. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if ( "include_schema_name" in data @@ -845,11 +845,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetSqlDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by an SQL type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if ( "data_asset_name_prefix" in data @@ -865,11 +865,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetAWSGlueDataCatalogDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by an SQL/GlueCatalog type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if ( @@ -881,11 +881,11 @@ def validate_schema(self, data, **kwargs): # noqa: C901, PLR0912 "ConfiguredAssetAWSGlueDataCatalogDataConnector", ] ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses one or more keys in a data connector that are required only by an GlueCatalog type of the data connector (your data connector is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) # noinspection PyUnusedLocal @@ -917,7 +917,7 @@ def prepare_dump(self, data, **kwargs): class ExecutionEngineConfig(DictDot): - def __init__( # noqa: C901, PLR0913 + def __init__( # noqa: C901, PLR0913 # FIXME CoP self, class_name, module_name=None, @@ -1012,24 +1012,24 @@ class Meta: # noinspection PyUnusedLocal @validates_schema def validate_schema(self, data, **kwargs): - # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. # noqa: E501 + # If a class_name begins with the dollar sign ("$"), then it is assumed to be a variable name to be substituted. # noqa: E501 # FIXME CoP if data["class_name"][0] == "$": return if ("connection_string" in data or "credentials" in data) and not ( data["class_name"] == "SqlAlchemyExecutionEngine" ): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses the "connection_string" key in an execution engine, but only SqlAlchemyExecutionEngine requires this attribute (your execution engine is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if "spark_config" in data and not (data["class_name"] == "SparkDFExecutionEngine"): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP f"""Your current configuration uses the "spark_config" key in an execution engine, but only SparkDFExecutionEngine requires this attribute (your execution engine is "{data['class_name']}"). Please update your configuration to continue. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) # noinspection PyUnusedLocal @@ -1064,7 +1064,7 @@ def __init__( ) -> None: # access_token was given a default value to maintain arg position of organization_id if access_token is None: - raise ValueError("Access token cannot be None.") # noqa: TRY003 + raise ValueError("Access token cannot be None.") # noqa: TRY003 # FIXME CoP # The base url doesn't point to a specific resource but is the prefix for constructing GX # cloud urls. We want it to end in a '/' so we can manipulate it using tools such as @@ -1092,8 +1092,8 @@ def to_json_dict(self) -> Dict[str, JSONValues]: class DataContextConfigSchema(Schema): - config_version = fields.Number( - validate=lambda x: 0 < x < 100, # noqa: PLR2004 + config_version: fields.Number = fields.Number( + validate=lambda x: 0 < x < 100, # noqa: PLR2004 # FIXME CoP error_messages={"invalid": "config version must " "be a number."}, ) fluent_datasources = fields.Dict( @@ -1114,7 +1114,7 @@ class DataContextConfigSchema(Schema): progress_bars = fields.Nested(ProgressBarsConfigSchema, required=False, allow_none=True) # To ensure backwards compatability, we need to ensure that new options are "opt-in" - # If a user has not explicitly configured the value, it will be None and will be wiped by the post_dump hook # noqa: E501 + # If a user has not explicitly configured the value, it will be None and will be wiped by the post_dump hook # noqa: E501 # FIXME CoP REMOVE_KEYS_IF_NONE = [ "progress_bars", # 0.13.49 "fluent_datasources", @@ -1130,7 +1130,7 @@ def remove_keys_if_none(self, data: dict, **kwargs) -> dict: return data @override - def handle_error(self, exc, data, **kwargs) -> None: # type: ignore[override] + def handle_error(self, exc, data, **kwargs) -> None: # type: ignore[override] # FIXME CoP """Log and raise our custom exception when (de)serialization fails.""" if ( exc @@ -1150,13 +1150,13 @@ def handle_error(self, exc, data, **kwargs) -> None: # type: ignore[override] @validates_schema def validate_schema(self, data, **kwargs) -> None: if "config_version" not in data: - raise gx_exceptions.InvalidDataContextConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidDataContextConfigError( # noqa: TRY003 # FIXME CoP "The key `config_version` is missing; please check your config file.", validation_error=ValidationError(message="no config_version key"), ) if not isinstance(data["config_version"], (int, float)): - raise gx_exceptions.InvalidDataContextConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidDataContextConfigError( # noqa: TRY003 # FIXME CoP "The key `config_version` must be a number. Please check your config file.", validation_error=ValidationError(message="config version not a number"), ) @@ -1166,13 +1166,13 @@ def validate_schema(self, data, **kwargs) -> None: store_config["class_name"] == "ValidationResultsStore" for store_config in data["stores"].values() ): - raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 - "You appear to be using a config version from the 0.7.x series. This version is no longer supported." # noqa: E501 + raise gx_exceptions.UnsupportedConfigVersionError( # noqa: TRY003 # FIXME CoP + "You appear to be using a config version from the 0.7.x series. This version is no longer supported." # noqa: E501 # FIXME CoP ) if data["config_version"] < MINIMUM_SUPPORTED_CONFIG_VERSION: raise gx_exceptions.UnsupportedConfigVersionError( - "You appear to have an invalid config version ({}).\n The version number must be at least {}. " # noqa: E501 + "You appear to have an invalid config version ({}).\n The version number must be at least {}. " # noqa: E501 # FIXME CoP "Please see the migration guide at https://docs.greatexpectations.io/docs/guides/miscellaneous/migration_guide#migrating-to-the-batch-request-v3-api".format( data["config_version"], MINIMUM_SUPPORTED_CONFIG_VERSION ), @@ -1180,7 +1180,7 @@ def validate_schema(self, data, **kwargs) -> None: if data["config_version"] > CURRENT_GX_CONFIG_VERSION: raise gx_exceptions.InvalidDataContextConfigError( - "You appear to have an invalid config version ({}).\n The maximum valid version is {}.".format( # noqa: E501 + "You appear to have an invalid config version ({}).\n The maximum valid version is {}.".format( # noqa: E501 # FIXME CoP data["config_version"], CURRENT_GX_CONFIG_VERSION ), validation_error=ValidationError(message="config version too high"), @@ -1289,12 +1289,12 @@ class BaseStoreBackendDefaults(DictDot): Define base defaults for platform specific StoreBackendDefaults. StoreBackendDefaults define defaults for specific cases of often used configurations. For example, if you plan to store expectations, validations, and data_docs in s3 use the S3StoreBackendDefaults and you may be able to specify less parameters. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, - expectations_store_name: str = DataContextConfigDefaults.DEFAULT_EXPECTATIONS_STORE_NAME.value, # noqa: E501 - validation_results_store_name: str = DataContextConfigDefaults.DEFAULT_VALIDATIONS_STORE_NAME.value, # noqa: E501 + expectations_store_name: str = DataContextConfigDefaults.DEFAULT_EXPECTATIONS_STORE_NAME.value, # noqa: E501 # FIXME CoP + validation_results_store_name: str = DataContextConfigDefaults.DEFAULT_VALIDATIONS_STORE_NAME.value, # noqa: E501 # FIXME CoP checkpoint_store_name: str = DataContextConfigDefaults.DEFAULT_CHECKPOINT_STORE_NAME.value, data_docs_site_name: str = DataContextConfigDefaults.DEFAULT_DATA_DOCS_SITE_NAME.value, stores: Optional[dict] = None, @@ -1336,7 +1336,7 @@ class S3StoreBackendDefaults(BaseStoreBackendDefaults): checkpoint_store_name: Overrides default if supplied """ - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, default_bucket_name: Optional[str] = None, expectations_store_bucket_name: Optional[str] = None, @@ -1444,19 +1444,19 @@ def __init__( self.plugins_directory = str(plugins_directory) if root_directory is not None: root_directory = str(root_directory) - self.stores[self.expectations_store_name]["store_backend"][ # type: ignore[index] + self.stores[self.expectations_store_name]["store_backend"][ # type: ignore[index] # FIXME CoP "root_directory" ] = root_directory - self.stores[self.validation_results_store_name]["store_backend"][ # type: ignore[index] + self.stores[self.validation_results_store_name]["store_backend"][ # type: ignore[index] # FIXME CoP "root_directory" ] = root_directory - self.stores[self.checkpoint_store_name]["store_backend"][ # type: ignore[index] + self.stores[self.checkpoint_store_name]["store_backend"][ # type: ignore[index] # FIXME CoP "root_directory" ] = root_directory - self.stores[self.validation_definition_store_name]["store_backend"][ # type: ignore[index] + self.stores[self.validation_definition_store_name]["store_backend"][ # type: ignore[index] # FIXME CoP "root_directory" ] = root_directory - self.data_docs_sites[self.data_docs_site_name]["store_backend"][ # type: ignore[index] + self.data_docs_sites[self.data_docs_site_name]["store_backend"][ # type: ignore[index] # FIXME CoP "root_directory" ] = root_directory @@ -1505,7 +1505,7 @@ def __init__( temp_dir = tempfile.TemporaryDirectory() path = temp_dir.name logger.info(f"Created temporary directory '{path}' for ephemeral docs site") - self.data_docs_sites[DataContextConfigDefaults.DEFAULT_DATA_DOCS_SITE_NAME.value][ # type: ignore[index] + self.data_docs_sites[DataContextConfigDefaults.DEFAULT_DATA_DOCS_SITE_NAME.value][ # type: ignore[index] # FIXME CoP "store_backend" ]["base_directory"] = path else: @@ -1533,9 +1533,9 @@ class GCSStoreBackendDefaults(BaseStoreBackendDefaults): expectations_store_name: Overrides default if supplied validation_results_store_name: Overrides default if supplied checkpoint_store_name: Overrides default if supplied - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: C901, PLR0913 + def __init__( # noqa: C901, PLR0913 # FIXME CoP self, default_bucket_name: Optional[str] = None, default_project_name: Optional[str] = None, @@ -1655,9 +1655,9 @@ class DatabaseStoreBackendDefaults(BaseStoreBackendDefaults): expectations_store_name: Overrides default if supplied validation_results_store_name: Overrides default if supplied checkpoint_store_name: Overrides default if supplied - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, default_credentials: Optional[Dict] = None, expectations_store_credentials: Optional[Dict] = None, @@ -1671,7 +1671,7 @@ def __init__( # noqa: PLR0913 # Initialize base defaults super().__init__() - # Use default credentials if separate credentials not supplied for expectations_store and validation_results_store # noqa: E501 + # Use default credentials if separate credentials not supplied for expectations_store and validation_results_store # noqa: E501 # FIXME CoP if expectations_store_credentials is None: expectations_store_credentials = default_credentials if validation_results_store_credentials is None: @@ -1748,9 +1748,9 @@ class DataContextConfig(BaseYamlConfig): commented_map (Optional[CommentedMap]): the CommentedMap associated with DataContext configuration. Used when instantiating with yml file. progress_bars (Optional[ProgressBarsConfig]): allows progress_bars to be enabled or disabled globally or for metrics calculations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, config_version: Optional[float] = None, fluent_datasources: Optional[dict] = None, @@ -1772,7 +1772,7 @@ def __init__( # noqa: PLR0913 config_version = DataContextConfigDefaults.DEFAULT_CONFIG_VERSION.value # Set defaults via store_backend_defaults if one is passed in - # Override attributes from store_backend_defaults with any items passed into the constructor: # noqa: E501 + # Override attributes from store_backend_defaults with any items passed into the constructor: # noqa: E501 # FIXME CoP if store_backend_defaults is not None: if stores is None: stores = store_backend_defaults.stores @@ -1809,8 +1809,8 @@ def _init_stores(self, store_configs: dict | None) -> dict: configured_stores = {config["class_name"] for config in store_configs.values()} for name, config in DataContextConfigDefaults.DEFAULT_STORES.value.items(): if not isinstance(config, dict): - raise ValueError( # noqa: TRY003, TRY004 - "Store defaults must be a mapping of default names to default dictionary configurations." # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + "Store defaults must be a mapping of default names to default dictionary configurations." # noqa: E501 # FIXME CoP ) if config["class_name"] not in configured_stores: # Create ephemeral store config @@ -1844,9 +1844,9 @@ def to_json_dict(self) -> Dict[str, JSONValues]: A JSON-serializable dict representation of this DataContextConfig. """ # TODO: 2/4/2022 - # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 - # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 - # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 + # This implementation of "SerializableDictDot.to_json_dict() occurs frequently and should ideally serve as the # noqa: E501 # FIXME CoP + # reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, # noqa: E501 # FIXME CoP + # due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules # noqa: E501 # FIXME CoP # make this refactoring infeasible at the present time. dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) @@ -1874,7 +1874,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_sanitized_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -1896,7 +1896,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() diff --git a/great_expectations/data_context/types/refs.py b/great_expectations/data_context/types/refs.py index ad8a69ff78b0..107538f0f07f 100644 --- a/great_expectations/data_context/types/refs.py +++ b/great_expectations/data_context/types/refs.py @@ -17,7 +17,7 @@ def id(self): class GXCloudResourceRef(GXCloudIDAwareRef): """ This class represents a reference to a Great Expectations object persisted to Great Expectations Cloud. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__(self, resource_type: str, id: str, url: str, response_json: dict) -> None: self._resource_type = resource_type diff --git a/great_expectations/data_context/types/resource_identifiers.py b/great_expectations/data_context/types/resource_identifiers.py index 4cfc609338aa..c62ca68c2643 100644 --- a/great_expectations/data_context/types/resource_identifiers.py +++ b/great_expectations/data_context/types/resource_identifiers.py @@ -21,7 +21,7 @@ class ExpectationSuiteIdentifier(DataContextKey): def __init__(self, name: str) -> None: super().__init__() if not isinstance(name, str): - raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 + raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 # FIXME CoP f"name must be a string, not {type(name).__name__}" ) self._name = name @@ -100,7 +100,7 @@ def make_batch_identifier(self, data, **kwargs): @public_api class ValidationResultIdentifier(DataContextKey): - """A ValidationResultIdentifier identifies a validation result by the fully-qualified expectation_suite_identifier and run_id.""" # noqa: E501 + """A ValidationResultIdentifier identifies a validation result by the fully-qualified expectation_suite_identifier and run_id.""" # noqa: E501 # FIXME CoP def __init__(self, expectation_suite_identifier, run_id, batch_identifier) -> None: """Constructs a ValidationResultIdentifier @@ -172,7 +172,7 @@ def from_object(cls, validation_result): elif isinstance(batch_kwargs, dict): batch_identifier = IDDict(batch_kwargs).to_id() else: - raise gx_exceptions.DataContextError( # noqa: TRY003 + raise gx_exceptions.DataContextError( # noqa: TRY003 # FIXME CoP "Unable to construct ValidationResultIdentifier from provided object." ) return cls( @@ -286,8 +286,8 @@ def to_fixed_length_tuple(self): # type: ignore[explicit-override] # FIXME @classmethod def from_tuple(cls, tuple_): - if len(tuple_) < 6: # noqa: PLR2004 - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + if len(tuple_) < 6: # noqa: PLR2004 # FIXME CoP + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "ValidationMetricIdentifier tuple must have at least six components." ) if tuple_[2] == "__": @@ -305,8 +305,8 @@ def from_tuple(cls, tuple_): @classmethod def from_fixed_length_tuple(cls, tuple_): - if len(tuple_) != 6: # noqa: PLR2004 - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + if len(tuple_) != 6: # noqa: PLR2004 # FIXME CoP + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "ValidationMetricIdentifier fixed length tuple must have exactly six " "components." ) if tuple_[2] == "__": @@ -367,7 +367,7 @@ def to_fixed_length_tuple(self): # type: ignore[explicit-override] # FIXME @classmethod def from_tuple(cls, tuple_): # Only add resource name if it exists in the tuple_ - if len(tuple_) == 3: # noqa: PLR2004 + if len(tuple_) == 3: # noqa: PLR2004 # FIXME CoP return cls(resource_type=tuple_[0], id=tuple_[1], resource_name=tuple_[2]) return cls(resource_type=tuple_[0], id=tuple_[1]) @@ -415,18 +415,18 @@ def __init__(self, site_section_name, resource_identifier) -> None: self._resource_identifier = ValidationResultIdentifier(**resource_identifier) elif site_section_name == "expectations": if isinstance(resource_identifier, ExpectationSuiteIdentifier): - self._resource_identifier = resource_identifier # type: ignore[assignment] + self._resource_identifier = resource_identifier # type: ignore[assignment] # FIXME CoP elif isinstance(resource_identifier, (tuple, list)): - self._resource_identifier = ExpectationSuiteIdentifier( # type: ignore[assignment] + self._resource_identifier = ExpectationSuiteIdentifier( # type: ignore[assignment] # FIXME CoP *resource_identifier ) else: - self._resource_identifier = ExpectationSuiteIdentifier( # type: ignore[assignment] + self._resource_identifier = ExpectationSuiteIdentifier( # type: ignore[assignment] # FIXME CoP **resource_identifier ) else: - raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 - "SiteSectionIdentifier only supports 'validations' and 'expectations' as site section names" # noqa: E501 + raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 # FIXME CoP + "SiteSectionIdentifier only supports 'validations' and 'expectations' as site section names" # noqa: E501 # FIXME CoP ) @property @@ -456,8 +456,8 @@ def from_tuple(cls, tuple_): resource_identifier=ExpectationSuiteIdentifier.from_tuple(tuple_[1:]), ) else: - raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 - "SiteSectionIdentifier only supports 'validations' and 'expectations' as site section names" # noqa: E501 + raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 # FIXME CoP + "SiteSectionIdentifier only supports 'validations' and 'expectations' as site section names" # noqa: E501 # FIXME CoP ) @@ -465,7 +465,7 @@ class ConfigurationIdentifier(DataContextKey): def __init__(self, configuration_key: str) -> None: super().__init__() if not isinstance(configuration_key, str): - raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 + raise gx_exceptions.InvalidDataContextKeyError( # noqa: TRY003 # FIXME CoP f"configuration_key must be a string, not {type(configuration_key).__name__}" ) self._configuration_key = configuration_key diff --git a/great_expectations/data_context/util.py b/great_expectations/data_context/util.py index d308b1386b4b..d7b29bc91f3e 100644 --- a/great_expectations/data_context/util.py +++ b/great_expectations/data_context/util.py @@ -11,22 +11,22 @@ import pyparsing as pp -from great_expectations.alias_types import PathStr # noqa: TCH001 +from great_expectations.alias_types import PathStr # noqa: TCH001 # FIXME CoP from great_expectations.exceptions import StoreConfigurationError from great_expectations.types import safe_deep_copy from great_expectations.util import load_class, verify_dynamic_loading_support try: - import sqlalchemy as sa # noqa: TID251 + import sqlalchemy as sa # noqa: TID251 # FIXME CoP except ImportError: - sa = None # type: ignore[assignment] + sa = None # type: ignore[assignment] # FIXME CoP logger = logging.getLogger(__name__) # TODO: Rename config to constructor_kwargs and config_defaults -> constructor_kwarg_default -# TODO: Improve error messages in this method. Since so much of our workflow is config-driven, this will be a *super* important part of DX. # noqa: E501 -def instantiate_class_from_config( # noqa: C901 +# TODO: Improve error messages in this method. Since so much of our workflow is config-driven, this will be a *super* important part of DX. # noqa: E501 # FIXME CoP +def instantiate_class_from_config( # noqa: C901 # FIXME CoP config, runtime_environment, config_defaults=None ): """Build a GX class from configuration dictionaries.""" @@ -41,8 +41,8 @@ def instantiate_class_from_config( # noqa: C901 try: module_name = config_defaults.pop("module_name") except KeyError: - raise KeyError( # noqa: TRY003 - f"Neither config : {config} nor config_defaults : {config_defaults} contains a module_name key." # noqa: E501 + raise KeyError( # noqa: TRY003 # FIXME CoP + f"Neither config : {config} nor config_defaults : {config_defaults} contains a module_name key." # noqa: E501 # FIXME CoP ) else: # Pop the value without using it, to avoid sending an unwanted value to the config_class @@ -54,14 +54,14 @@ def instantiate_class_from_config( # noqa: C901 class_name = config.pop("class_name", None) if class_name is None: logger.warning( - "Instantiating class from config without an explicit class_name is dangerous. Consider adding " # noqa: E501 + "Instantiating class from config without an explicit class_name is dangerous. Consider adding " # noqa: E501 # FIXME CoP f"an explicit class_name for {config.get('name')}" ) try: class_name = config_defaults.pop("class_name") except KeyError: - raise KeyError( # noqa: TRY003 - f"Neither config : {config} nor config_defaults : {config_defaults} contains a class_name key." # noqa: E501 + raise KeyError( # noqa: TRY003 # FIXME CoP + f"Neither config : {config} nor config_defaults : {config_defaults} contains a class_name key." # noqa: E501 # FIXME CoP ) else: # Pop the value without using it, to avoid sending an unwanted value to the config_class @@ -92,7 +92,7 @@ def instantiate_class_from_config( # noqa: C901 class_instance = class_(**config_with_defaults) except TypeError as e: raise TypeError( - f"Couldn't instantiate class: {class_name} with config: \n\t{format_dict_for_error_message(config_with_defaults)}\n \n" # noqa: E501 + f"Couldn't instantiate class: {class_name} with config: \n\t{format_dict_for_error_message(config_with_defaults)}\n \n" # noqa: E501 # FIXME CoP + str(e) ) @@ -135,7 +135,7 @@ def parse_substitution_variable(substitution_variable: str) -> Optional[str]: Returns: string of variable name e.g. SOME_VAR or None if not parsable. If there are multiple substitution variables this currently returns the first e.g. $SOME_$TRING -> $SOME_ - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP substitution_variable_name = pp.Word(pp.alphanums + "_").setResultsName( "substitution_variable_name" ) @@ -180,7 +180,7 @@ def mask_db_url(cls, url: str, use_urlparse: bool = False, **kwargs) -> str: Returns: url with password masked e.g. "postgresql+psycopg2://username:***@host:65432/database" - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.datasource.fluent.config_str import ConfigStr @@ -194,14 +194,14 @@ def mask_db_url(cls, url: str, use_urlparse: bool = False, **kwargs) -> str: try: engine = sa.create_engine(url, **kwargs) return engine.url.__repr__() - # Account for the edge case where we have SQLAlchemy in our env but haven't installed the appropriate dialect to match the input URL # noqa: E501 + # Account for the edge case where we have SQLAlchemy in our env but haven't installed the appropriate dialect to match the input URL # noqa: E501 # FIXME CoP except Exception as e: logger.warning( f"Something went wrong when trying to use SQLAlchemy to obfuscate URL: {e}" ) else: warnings.warn( - "SQLAlchemy is not installed, using urlparse to mask database url password which ignores **kwargs." # noqa: E501 + "SQLAlchemy is not installed, using urlparse to mask database url password which ignores **kwargs." # noqa: E501 # FIXME CoP ) return cls._mask_db_url_no_sa(url=url) @@ -214,14 +214,14 @@ def _obfuscate_azure_blobstore_connection_string(cls, url: str) -> str: try: matched: re.Match[str] | None = azure_conn_str_re.match(url) if not matched: - raise StoreConfigurationError( # noqa: TRY003, TRY301 - f"The URL for the Azure connection-string, was not configured properly. Please check and try again: {url} " # noqa: E501 + raise StoreConfigurationError( # noqa: TRY003, TRY301 # FIXME CoP + f"The URL for the Azure connection-string, was not configured properly. Please check and try again: {url} " # noqa: E501 # FIXME CoP ) - res = f"DefaultEndpointsProtocol={matched.group(2)};AccountName={matched.group(4)};AccountKey=***;EndpointSuffix={matched.group(8)}" # noqa: E501 + res = f"DefaultEndpointsProtocol={matched.group(2)};AccountName={matched.group(4)};AccountKey=***;EndpointSuffix={matched.group(8)}" # noqa: E501 # FIXME CoP return res except Exception as e: - raise StoreConfigurationError( # noqa: TRY003 - f"Something went wrong when trying to obfuscate URL for Azure connection-string. Please check your configuration: {e}" # noqa: E501 + raise StoreConfigurationError( # noqa: TRY003 # FIXME CoP + f"Something went wrong when trying to obfuscate URL for Azure connection-string. Please check your configuration: {e}" # noqa: E501 # FIXME CoP ) @classmethod @@ -250,7 +250,7 @@ def _mask_db_url_no_sa(cls, url: str) -> str: return masked_url @classmethod - def sanitize_config(cls, config: dict) -> dict: # noqa: C901 - too complex + def sanitize_config(cls, config: dict) -> dict: # noqa: C901 # too complex """ Mask sensitive fields in a Dict. """ diff --git a/great_expectations/datasource/datasource_dict.py b/great_expectations/datasource/datasource_dict.py index 38e6af195d37..7139af70d45e 100644 --- a/great_expectations/datasource/datasource_dict.py +++ b/great_expectations/datasource/datasource_dict.py @@ -42,14 +42,14 @@ class DatasourceDict(UserDict): d["my_fds"] = pandas_fds # Underlying DatasourceStore makes a `set()` call pandas_fds = d["my_fds"] # Underlying DatasourceStore makes a `get()` call ``` - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, context: AbstractDataContext, datasource_store: DatasourceStore, ): - self._context = context # If possible, we should avoid passing the context through - once block-style is removed, we can extract this # noqa: E501 + self._context = context # If possible, we should avoid passing the context through - once block-style is removed, we can extract this # noqa: E501 # FIXME CoP self._datasource_store = datasource_store self._in_memory_data_assets: dict[str, DataAsset] = {} @@ -65,7 +65,7 @@ def data(self) -> dict[str, FluentDatasource]: # type: ignore[override] # `data (__setitem__, __getitem__, etc) This is generated just-in-time as the contents of the store may have changed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP datasources: dict[str, FluentDatasource] = {} configs = self._datasource_store.get_all() @@ -103,7 +103,7 @@ def _get_ds_from_store(self, name: str) -> FluentDatasource: try: return self._datasource_store.retrieve_by_name(name) except ValueError: - raise KeyError(f"Could not find a datasource named '{name}'") # noqa: TRY003 + raise KeyError(f"Could not find a datasource named '{name}'") # noqa: TRY003 # FIXME CoP @override def __delitem__(self, name: str) -> None: @@ -136,7 +136,7 @@ class CacheableDatasourceDict(DatasourceDict): Any retrievals will firstly check an in-memory dictionary before requesting from the store. Other CRUD methods will ensure that both cache and store are kept in sync. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, diff --git a/great_expectations/datasource/fluent/batch_request.py b/great_expectations/datasource/fluent/batch_request.py index 19703fe52122..ab8a2c2c7a25 100644 --- a/great_expectations/datasource/fluent/batch_request.py +++ b/great_expectations/datasource/fluent/batch_request.py @@ -62,7 +62,7 @@ class BatchRequest(pydantic.GenericModel, Generic[PartitionerT]): Returns: BatchRequest - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP datasource_name: StrictStr = Field( ..., @@ -107,11 +107,11 @@ def update_batch_slice(self, value: Optional[BatchSlice] = None) -> None: Returns: None - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: parse_batch_slice(batch_slice=value) except (TypeError, ValueError) as e: - raise ValueError(f"Failed to parse BatchSlice to slice: {e}") # noqa: TRY003 + raise ValueError(f"Failed to parse BatchSlice to slice: {e}") # noqa: TRY003 # FIXME CoP self._batch_slice_input = value class Config: @@ -132,13 +132,13 @@ def _validate_options(cls, options) -> BatchParameters: if options is None: return {} if not isinstance(options, dict): - raise TypeError("BatchParameters must take the form of a dictionary.") # noqa: TRY003 + raise TypeError("BatchParameters must take the form of a dictionary.") # noqa: TRY003 # FIXME CoP if any(not isinstance(key, str) for key in options): - raise TypeError("BatchParameters keys must all be strings.") # noqa: TRY003 + raise TypeError("BatchParameters keys must all be strings.") # noqa: TRY003 # FIXME CoP return options @override - def json( # noqa: PLR0913 + def json( # noqa: PLR0913 # FIXME CoP self, *, include: Optional[Union[AbstractSetIntStr, MappingIntStrAny]] = None, @@ -170,7 +170,7 @@ def json( # noqa: PLR0913 ) @override - def dict( # noqa: PLR0913 + def dict( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSetIntStr | MappingIntStrAny | None = None, @@ -190,14 +190,14 @@ def dict( # noqa: PLR0913 # if we want it to show up in dict() with the _batch_request_input self.__fields__["batch_slice"] = pydantic.fields.ModelField( name="batch_slice", - type_=Optional[BatchSlice], # type: ignore[arg-type] + type_=Optional[BatchSlice], # type: ignore[arg-type] # FIXME CoP required=False, default=None, model_config=self.__config__, class_validators=None, ) - property_set_methods = self.__config__.property_set_methods # type: ignore[attr-defined] - self.__config__.property_set_methods = {} # type: ignore[attr-defined] + property_set_methods = self.__config__.property_set_methods # type: ignore[attr-defined] # FIXME CoP + self.__config__.property_set_methods = {} # type: ignore[attr-defined] # FIXME CoP self.__setattr__("batch_slice", self._batch_slice_input) result = super().dict( include=include, @@ -209,7 +209,7 @@ def dict( # noqa: PLR0913 skip_defaults=skip_defaults, ) # revert model changes - self.__config__.property_set_methods = property_set_methods # type: ignore[attr-defined] + self.__config__.property_set_methods = property_set_methods # type: ignore[attr-defined] # FIXME CoP self.__fields__.pop("batch_slice") return result @@ -226,7 +226,7 @@ def schema_json( # if we want its definition to show up in schema_json() cls.__fields__["batch_slice"] = pydantic.fields.ModelField( name="batch_slice", - type_=Optional[BatchSlice], # type: ignore[arg-type] + type_=Optional[BatchSlice], # type: ignore[arg-type] # FIXME CoP required=False, default=None, model_config=cls.__config__, diff --git a/great_expectations/datasource/fluent/config.py b/great_expectations/datasource/fluent/config.py index 0a0135046e11..029506b3917d 100644 --- a/great_expectations/datasource/fluent/config.py +++ b/great_expectations/datasource/fluent/config.py @@ -76,7 +76,7 @@ _MISSING: Final = object() JSON_ENCODERS: dict[Type, Callable] = {} -if TextClause: # type: ignore[truthy-function] +if TextClause: # type: ignore[truthy-function] # FIXME CoP JSON_ENCODERS[TextClause] = lambda v: str(v) T = TypeVar("T") @@ -88,15 +88,15 @@ class GxConfig(FluentBaseModel): fluent_datasources: List[Datasource] = Field(..., description=_FLUENT_STYLE_DESCRIPTION) _EXCLUDE_FROM_DATASOURCE_SERIALIZATION: ClassVar[Set[str]] = { - _DATASOURCE_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501 + _DATASOURCE_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501 # FIXME CoP } _EXCLUDE_FROM_DATA_ASSET_SERIALIZATION: ClassVar[Set[str]] = { - _DATA_ASSET_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501 + _DATA_ASSET_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501 # FIXME CoP } _EXCLUDE_FROM_BATCH_DEFINITION_SERIALIZATION: ClassVar[Set[str]] = { - _BATCH_DEFINITION_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501 + _BATCH_DEFINITION_NAME_KEY, # The "name" field is set in validation upon deserialization from configuration key; hence, it should not be serialized. # noqa: E501 # FIXME CoP } class Config: @@ -148,7 +148,7 @@ def get_datasource(self, name: str) -> Datasource: ) )[0] except IndexError as exc: - raise LookupError( # noqa: TRY003 + raise LookupError( # noqa: TRY003 # FIXME CoP f"'{name}' not found. Available datasources are {self.get_datasource_names()}" ) from exc @@ -187,7 +187,7 @@ def pop_datasource(self, name: str, default: T = _MISSING) -> Datasource | T: # # noinspection PyNestedDecorators @validator(_FLUENT_DATASOURCES_KEY, pre=True) @classmethod - def _load_datasource_subtype(cls, v: List[dict]): # noqa: C901 - too complex + def _load_datasource_subtype(cls, v: List[dict]): # noqa: C901 # too complex logger.info(f"Loading 'datasources' ->\n{pf(v, depth=2)}") loaded_datasources: List[Datasource] = [] @@ -197,13 +197,13 @@ def _load_datasource_subtype(cls, v: List[dict]): # noqa: C901 - too complex if not ds_type_name: # TODO: (kilo59 122222) ideally this would be raised by `Datasource` validation # https://github.com/pydantic/pydantic/issues/734 - raise ValueError(f"'{ds_name}' is missing a 'type' entry") # noqa: TRY003 + raise ValueError(f"'{ds_name}' is missing a 'type' entry") # noqa: TRY003 # FIXME CoP try: ds_type: Type[Datasource] = DataSourceManager.type_lookup[ds_type_name] logger.debug(f"Instantiating '{ds_name}' as {ds_type}") except KeyError as type_lookup_err: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"'{ds_name}' has unsupported 'type' - {type_lookup_err}" ) from type_lookup_err @@ -244,7 +244,7 @@ def parse_yaml( TODO (kilo59) 122822: remove this as soon as it's no longer needed. Such as when we use a new `config_version` instead of `fluent_datasources` key. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP loaded = yaml.load(f) logger.debug(f"loaded from yaml ->\n{pf(loaded, depth=3)}\n") loaded = _convert_fluent_datasources_loaded_from_yaml_to_internal_object_representation( @@ -289,7 +289,7 @@ def yaml( ) -> pathlib.Path: ... @override - def yaml( # noqa: PLR0913 + def yaml( # noqa: PLR0913 # FIXME CoP self, stream_or_path: Union[StringIO, pathlib.Path, None] = None, *, @@ -343,7 +343,7 @@ def _exclude_name_fields_from_fluent_datasources( datasource_config: dict for datasource_config in fluent_datasources: datasource_name = datasource_config[_DATASOURCE_NAME_KEY] - datasource_config = _exclude_fields_from_serialization( # noqa: PLW2901 + datasource_config = _exclude_fields_from_serialization( # noqa: PLW2901 # FIXME CoP source_dict=datasource_config, exclusions=self._EXCLUDE_FROM_DATASOURCE_SERIALIZATION, ) diff --git a/great_expectations/datasource/fluent/config_str.py b/great_expectations/datasource/fluent/config_str.py index d41396aa4c9f..5bc43ec9a299 100644 --- a/great_expectations/datasource/fluent/config_str.py +++ b/great_expectations/datasource/fluent/config_str.py @@ -206,7 +206,7 @@ def validate_parts(cls, parts: UriPartsDict, validate_port: bool = True) -> UriP cls.str_contains_config_template(part) # type: ignore[arg-type] # is str and name not in cls.ALLOWED_SUBSTITUTIONS ): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"Only {', '.join(allowed_substitutions)} may use config substitution; '{name}'" " substitution not allowed" ) @@ -257,7 +257,7 @@ def _check_config_substitutions_needed( """ Given a Datasource and a dict-like mapping type return the keys whose value is a `ConfigStr` type. Optionally raise a warning if config substitution is needed but impossible due to a missing `_config_provider`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP need_config_subs: set[str] = {k for (k, v) in options.items() if isinstance(v, ConfigStr)} if ( need_config_subs @@ -265,6 +265,6 @@ def _check_config_substitutions_needed( and not datasource._config_provider ): warnings.warn( - f"config variables '{','.join(need_config_subs)}' need substitution but no `_ConfigurationProvider` is present" # noqa: E501 + f"config variables '{','.join(need_config_subs)}' need substitution but no `_ConfigurationProvider` is present" # noqa: E501 # FIXME CoP ) return need_config_subs diff --git a/great_expectations/datasource/fluent/data_asset/path/directory_asset.py b/great_expectations/datasource/fluent/data_asset/path/directory_asset.py index 449f6dfb4a24..84deb97551fa 100644 --- a/great_expectations/datasource/fluent/data_asset/path/directory_asset.py +++ b/great_expectations/datasource/fluent/data_asset/path/directory_asset.py @@ -45,6 +45,16 @@ class DirectoryDataAsset(PathDataAsset[DatasourceT, ColumnPartitioner], Generic[ @public_api def add_batch_definition_daily(self, name: str, column: str) -> BatchDefinition: + """ + Add a BatchDefinition, which creates a single Batch for each day in the directory. + + Args: + name: Name of the Batch Definition. + column: Column to partition on. + + Returns: + A BatchDefinition that is partitioned daily. + """ # todo: test column return self.add_batch_definition( name=name, @@ -55,6 +65,16 @@ def add_batch_definition_daily(self, name: str, column: str) -> BatchDefinition: @public_api def add_batch_definition_monthly(self, name: str, column: str) -> BatchDefinition: + """ + Add a BatchDefinition which creates a single batch for each month in the directory. + + Args: + name: Name of the Batch Definition. + column: Column to partition on. + + Returns: + A BatchDefinition that is partitioned monthly. + """ # todo: test column return self.add_batch_definition( name=name, @@ -65,6 +85,16 @@ def add_batch_definition_monthly(self, name: str, column: str) -> BatchDefinitio @public_api def add_batch_definition_yearly(self, name: str, column: str) -> BatchDefinition: + """ + Add a BatchDefinition which creates a single batch for each year in the directory. + + Args: + name: Name of the Batch Definition. + column: Column to partition on. + + Returns: + A BatchDefinition that is partitioned yearly. + """ # todo: test column return self.add_batch_definition( name=name, @@ -165,7 +195,7 @@ def build_batch_request( ): allowed_keys = set(self.get_batch_parameters_keys(partitioner=partitioner)) actual_keys = set(options.keys()) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "Batch parameters should only contain keys from the following set:\n" f"{allowed_keys}\nbut your specified keys contain\n" f"{actual_keys.difference(allowed_keys)}\nwhich is not valid.\n" @@ -191,7 +221,7 @@ def _batch_spec_options_from_batch_request(self, batch_request: BatchRequest) -> """ get_reader_options_include: set[str] | None = self._get_reader_options_include() if not get_reader_options_include: - # Set to None if empty set to include any additional `extra_kwargs` passed to `add_*_asset` # noqa: E501 + # Set to None if empty set to include any additional `extra_kwargs` passed to `add_*_asset` # noqa: E501 # FIXME CoP get_reader_options_include = None batch_spec_options = { "reader_method": self._get_reader_method(), diff --git a/great_expectations/datasource/fluent/data_asset/path/file_asset.py b/great_expectations/datasource/fluent/data_asset/path/file_asset.py index f7c4c2eb36f9..f0c58e06f3b7 100644 --- a/great_expectations/datasource/fluent/data_asset/path/file_asset.py +++ b/great_expectations/datasource/fluent/data_asset/path/file_asset.py @@ -251,7 +251,7 @@ def build_batch_request( Option "batch_slice" is supported for all "DataAsset" extensions of this class identically. This mechanism applies to every "Datasource" type and any "ExecutionEngine" that is capable of loading data from files on local and/or cloud/networked filesystems (currently, Pandas and Spark backends work with files). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if options: for option, value in options.items(): if ( @@ -259,8 +259,8 @@ def build_batch_request( and value and not isinstance(value, str) ): - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 - f"All batching_regex matching options must be strings. The value of '{option}' is " # noqa: E501 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP + f"All batching_regex matching options must be strings. The value of '{option}' is " # noqa: E501 # FIXME CoP f"not a string: {value}" ) @@ -270,7 +270,7 @@ def build_batch_request( ): allowed_keys = set(self.get_batch_parameters_keys(partitioner=partitioner)) actual_keys = set(options.keys()) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "Batch parameters should only contain keys from the following set:\n" f"{allowed_keys}\nbut your specified keys contain\n" f"{actual_keys.difference(allowed_keys)}\nwhich is not valid.\n" @@ -296,7 +296,7 @@ def _batch_spec_options_from_batch_request(self, batch_request: BatchRequest) -> """ get_reader_options_include: set[str] | None = self._get_reader_options_include() if not get_reader_options_include: - # Set to None if empty set to include any additional `extra_kwargs` passed to `add_*_asset` # noqa: E501 + # Set to None if empty set to include any additional `extra_kwargs` passed to `add_*_asset` # noqa: E501 # FIXME CoP get_reader_options_include = None batch_spec_options = { "reader_method": self._get_reader_method(), diff --git a/great_expectations/datasource/fluent/data_asset/path/path_data_asset.py b/great_expectations/datasource/fluent/data_asset/path/path_data_asset.py index 2220e0d08759..d4ad9a769d3b 100644 --- a/great_expectations/datasource/fluent/data_asset/path/path_data_asset.py +++ b/great_expectations/datasource/fluent/data_asset/path/path_data_asset.py @@ -61,7 +61,7 @@ class PathDataAsset(DataAsset, Generic[DatasourceT, PartitionerT], ABC): connect_options: Mapping = pydantic.Field( default_factory=dict, - description="Optional filesystem specific advanced parameters for connecting to data assets", # noqa: E501 + description="Optional filesystem specific advanced parameters for connecting to data assets", # noqa: E501 # FIXME CoP ) # `_data_connector`` should be set inside `_build_data_connector()` @@ -106,10 +106,10 @@ def _validate_batch_request(self, batch_request: BatchRequest) -> None: datasource_name=self.datasource.name, data_asset_name=self.name, options=options, - batch_slice=batch_request._batch_slice_input, # type: ignore[attr-defined] + batch_slice=batch_request._batch_slice_input, # type: ignore[attr-defined] # FIXME CoP partitioner=batch_request.partitioner, ) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "BatchRequest should have form:\n" f"{pf(expect_batch_request_form.dict())}\n" f"but actually has form:\n{pf(batch_request.dict())}\n" @@ -197,8 +197,8 @@ def test_connection(self) -> None: if self._data_connector.test_connection(): return None except Exception as e: - raise TestConnectionError( # noqa: TRY003 - f"Could not connect to asset using {type(self._data_connector).__name__}: Got {type(e).__name__}" # noqa: E501 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP + f"Could not connect to asset using {type(self._data_connector).__name__}: Got {type(e).__name__}" # noqa: E501 # FIXME CoP ) from e raise TestConnectionError(self._test_connection_error_message) diff --git a/great_expectations/datasource/fluent/data_asset/path/spark/csv_asset.py b/great_expectations/datasource/fluent/data_asset/path/spark/csv_asset.py index 8b7e50208b5e..f601f2dc35b5 100644 --- a/great_expectations/datasource/fluent/data_asset/path/spark/csv_asset.py +++ b/great_expectations/datasource/fluent/data_asset/path/spark/csv_asset.py @@ -17,7 +17,7 @@ class CSVAssetBase(_SparkGenericFilePathAssetMixin): - # vvv spark parameters for pyspark.sql.DataFrameReader.csv() (ordered as in pyspark v3.4.0) appear in comment above # noqa: E501 + # vvv spark parameters for pyspark.sql.DataFrameReader.csv() (ordered as in pyspark v3.4.0) appear in comment above # noqa: E501 # FIXME CoP # parameter for reference (from https://github.com/apache/spark/blob/v3.4.0/python/pyspark/sql/readwriter.py#L604) # See https://spark.apache.org/docs/latest/sql-data-sources-csv.html for more info. # path: PathOrPaths, diff --git a/great_expectations/datasource/fluent/data_asset/path/spark/json_asset.py b/great_expectations/datasource/fluent/data_asset/path/spark/json_asset.py index 56646a46aaa7..34cbd8717707 100644 --- a/great_expectations/datasource/fluent/data_asset/path/spark/json_asset.py +++ b/great_expectations/datasource/fluent/data_asset/path/spark/json_asset.py @@ -17,7 +17,7 @@ class JSONAssetBase(_SparkGenericFilePathAssetMixin): - # vvv spark parameters for pyspark.sql.DataFrameReader.json() (ordered as in pyspark v3.4.0) appear in comment above # noqa: E501 + # vvv spark parameters for pyspark.sql.DataFrameReader.json() (ordered as in pyspark v3.4.0) appear in comment above # noqa: E501 # FIXME CoP # parameter for reference (from https://github.com/apache/spark/blob/v3.4.0/python/pyspark/sql/readwriter.py#L309) # path: Union[str, List[str], RDD[str]], # NA - path determined by asset diff --git a/great_expectations/datasource/fluent/data_connector/__init__.py b/great_expectations/datasource/fluent/data_connector/__init__.py index f88c224fbeb9..d87c4fb3a9ce 100644 --- a/great_expectations/datasource/fluent/data_connector/__init__.py +++ b/great_expectations/datasource/fluent/data_connector/__init__.py @@ -18,7 +18,7 @@ from great_expectations.datasource.fluent.data_connector.azure_blob_storage_data_connector import ( AzureBlobStorageDataConnector, ) -from great_expectations.datasource.fluent.data_connector.google_cloud_storage_data_connector import ( # noqa: E501 +from great_expectations.datasource.fluent.data_connector.google_cloud_storage_data_connector import ( # noqa: E501 # FIXME CoP GoogleCloudStorageDataConnector, ) diff --git a/great_expectations/datasource/fluent/data_connector/azure_blob_storage_data_connector.py b/great_expectations/datasource/fluent/data_connector/azure_blob_storage_data_connector.py index cd699ac2244f..042dc48cdfee 100644 --- a/great_expectations/datasource/fluent/data_connector/azure_blob_storage_data_connector.py +++ b/great_expectations/datasource/fluent/data_connector/azure_blob_storage_data_connector.py @@ -40,7 +40,7 @@ class AzureBlobStorageDataConnector(FilePathDataConnector): delimiter (str): Microsoft Azure Blob Storage delimiter recursive_file_discovery (bool): Flag to indicate if files should be searched recursively from subfolders file_path_template_map_fn: Format function mapping path to fully-qualified resource on ABS - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset_level_option_keys: ClassVar[tuple[str, ...]] = ( "abs_container", @@ -50,7 +50,7 @@ class AzureBlobStorageDataConnector(FilePathDataConnector): ) asset_options_type: ClassVar[Type[_AzureOptions]] = _AzureOptions - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_asset_name: str, @@ -81,7 +81,7 @@ def __init__( # noqa: PLR0913 ) @classmethod - def build_data_connector( # noqa: PLR0913 + def build_data_connector( # noqa: PLR0913 # FIXME CoP cls, datasource_name: str, data_asset_name: str, @@ -108,7 +108,7 @@ def build_data_connector( # noqa: PLR0913 Returns: Instantiated "AzureBlobStorageDataConnector" object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return AzureBlobStorageDataConnector( datasource_name=datasource_name, data_asset_name=data_asset_name, @@ -122,7 +122,7 @@ def build_data_connector( # noqa: PLR0913 ) @classmethod - def build_test_connection_error_message( # noqa: PLR0913 + def build_test_connection_error_message( # noqa: PLR0913 # FIXME CoP cls, data_asset_name: str, account_name: str, @@ -143,8 +143,8 @@ def build_test_connection_error_message( # noqa: PLR0913 Returns: Customized error message - """ # noqa: E501 - test_connection_error_message_template: str = 'No file belonging to account "{account_name}" in container "{container}" with prefix "{name_starts_with}" and recursive file discovery set to "{recursive_file_discovery}" found using delimiter "{delimiter}" for DataAsset "{data_asset_name}".' # noqa: E501 + """ # noqa: E501 # FIXME CoP + test_connection_error_message_template: str = 'No file belonging to account "{account_name}" in container "{container}" with prefix "{name_starts_with}" and recursive file discovery set to "{recursive_file_discovery}" found using delimiter "{delimiter}" for DataAsset "{data_asset_name}".' # noqa: E501 # FIXME CoP return test_connection_error_message_template.format( **{ "data_asset_name": data_asset_name, @@ -189,10 +189,10 @@ def get_data_references(self) -> List[str]: @override def _get_full_file_path(self, path: str) -> str: if self._file_path_template_map_fn is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Converting file paths to fully-qualified object references for "{self.__class__.__name__}" \ requires "file_path_template_map_fn: Callable" to be set. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) template_arguments: dict = { @@ -214,13 +214,13 @@ def sanitize_prefix(text: str) -> str: Takes in a given user-prefix and cleans it to work with file-system traversal methods (i.e. add '/' to the end of a string meant to represent a directory) """ - _, ext = os.path.splitext(text) # noqa: PTH122 + _, ext = os.path.splitext(text) # noqa: PTH122 # FIXME CoP if ext: # Provided prefix is a filename so no adjustment is necessary return text # Provided prefix is a directory (so we want to ensure we append it with '/') - return os.path.join(text, "") # noqa: PTH118 + return os.path.join(text, "") # noqa: PTH118 # FIXME CoP def list_azure_keys( @@ -246,7 +246,7 @@ def list_azure_keys( Returns: List of keys representing Azure file paths (as filtered by the query_options dict) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP container: str = query_options["container"] container_client: azure.ContainerClient = azure_client.get_container_client(container=container) diff --git a/great_expectations/datasource/fluent/data_connector/batch_filter.py b/great_expectations/datasource/fluent/data_connector/batch_filter.py index dc83ef53163d..5d4b14c5beb2 100644 --- a/great_expectations/datasource/fluent/data_connector/batch_filter.py +++ b/great_expectations/datasource/fluent/data_connector/batch_filter.py @@ -43,7 +43,7 @@ def __modify_schema__(cls, field_schema): # the returned value will be ignored field_schema.update( slice={ - "description": "A slice object representing the set of indices specified by range(start, stop, step).", # noqa: E501 + "description": "A slice object representing the set of indices specified by range(start, stop, step).", # noqa: E501 # FIXME CoP "type": "object", "properties": { "start": { @@ -65,7 +65,7 @@ def __modify_schema__(cls, field_schema): @classmethod def validate(cls, v): if not isinstance(v, slice): - raise TypeError("slice required") # noqa: TRY003 + raise TypeError("slice required") # noqa: TRY003 # FIXME CoP return cls(v) @@ -74,7 +74,7 @@ def validate(cls, v): ] -def build_batch_filter( # noqa: C901 - too complex +def build_batch_filter( # noqa: C901 # too complex data_connector_query_dict: Optional[ Dict[ str, @@ -101,53 +101,53 @@ def build_batch_filter( # noqa: C901 - too complex ) data_connector_query_keys: set = set(data_connector_query_dict.keys()) if not data_connector_query_keys <= BatchFilter.RECOGNIZED_KEYS: - raise gx_exceptions.BatchFilterError( # noqa: TRY003 + raise gx_exceptions.BatchFilterError( # noqa: TRY003 # FIXME CoP f"""Unrecognized data_connector_query key(s): "{data_connector_query_keys - BatchFilter.RECOGNIZED_KEYS!s}" detected. """ ) - custom_filter_function: Optional[Callable] = data_connector_query_dict.get( # type: ignore[assignment] + custom_filter_function: Optional[Callable] = data_connector_query_dict.get( # type: ignore[assignment] # FIXME CoP "custom_filter_function" ) - if custom_filter_function and not isinstance(custom_filter_function, Callable): # type: ignore[arg-type] - raise gx_exceptions.BatchFilterError( # noqa: TRY003 + if custom_filter_function and not isinstance(custom_filter_function, Callable): # type: ignore[arg-type] # FIXME CoP + raise gx_exceptions.BatchFilterError( # noqa: TRY003 # FIXME CoP f"""The type of a custom_filter must be a function (Python "Callable"). The type given is "{type(custom_filter_function)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) - batch_filter_parameters: Optional[Union[dict, IDDict]] = data_connector_query_dict.get( # type: ignore[assignment] + batch_filter_parameters: Optional[Union[dict, IDDict]] = data_connector_query_dict.get( # type: ignore[assignment] # FIXME CoP "batch_filter_parameters" ) if batch_filter_parameters: if not isinstance(batch_filter_parameters, dict): - raise gx_exceptions.BatchFilterError( # noqa: TRY003 + raise gx_exceptions.BatchFilterError( # noqa: TRY003 # FIXME CoP f"""The type of batch_filter_parameters must be a dictionary (Python "dict"). The type given is "{type(batch_filter_parameters)!s}", which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if not all(isinstance(key, str) for key in batch_filter_parameters): - raise gx_exceptions.BatchFilterError( # noqa: TRY003 + raise gx_exceptions.BatchFilterError( # noqa: TRY003 # FIXME CoP 'All batch_filter_parameters keys must strings (Python "str").' ) batch_filter_parameters = IDDict(batch_filter_parameters) - index: Optional[BatchSlice] = data_connector_query_dict.get( # type: ignore[assignment] + index: Optional[BatchSlice] = data_connector_query_dict.get( # type: ignore[assignment] # FIXME CoP "index" ) - limit: Optional[int] = data_connector_query_dict.get("limit") # type: ignore[assignment] + limit: Optional[int] = data_connector_query_dict.get("limit") # type: ignore[assignment] # FIXME CoP if limit and (not isinstance(limit, int) or limit < 0): - raise gx_exceptions.BatchFilterError( # noqa: TRY003 + raise gx_exceptions.BatchFilterError( # noqa: TRY003 # FIXME CoP f"""The type of a limit must be an integer (Python "int") that is greater than or equal to 0. The type and value given are "{type(limit)!s}" and "{limit}", respectively, which is illegal. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if index is not None and limit is not None: - raise gx_exceptions.BatchFilterError( # noqa: TRY003 - "Only one of index or limit, but not both, can be specified (specifying both is illegal)." # noqa: E501 + raise gx_exceptions.BatchFilterError( # noqa: TRY003 # FIXME CoP + "Only one of index or limit, but not both, can be specified (specifying both is illegal)." # noqa: E501 # FIXME CoP ) parsed_index: slice | None = parse_batch_slice(batch_slice=index) if index is not None else None return BatchFilter( custom_filter_function=custom_filter_function, - batch_filter_parameters=batch_filter_parameters, # type: ignore[arg-type] + batch_filter_parameters=batch_filter_parameters, # type: ignore[arg-type] # FIXME CoP index=parsed_index, limit=limit, ) @@ -169,13 +169,13 @@ def _batch_slice_string_to_slice_params(batch_slice: str) -> list[int | None]: # split and convert string to int for param in parsed_batch_slice.split(delimiter): - param = param.strip() # noqa: PLW2901 + param = param.strip() # noqa: PLW2901 # FIXME CoP if param and param != "None": try: slice_params.append(int(param)) except ValueError as e: - raise ValueError( # noqa: TRY003 - f'Attempt to convert string slice index "{param}" to integer failed with message: {e}' # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f'Attempt to convert string slice index "{param}" to integer failed with message: {e}' # noqa: E501 # FIXME CoP ) else: slice_params.append(None) @@ -190,13 +190,13 @@ def _batch_slice_from_string(batch_slice: str) -> slice: return slice(0, None, None) elif len(slice_params) == 1 and slice_params[0] is not None: return _batch_slice_from_int(batch_slice=slice_params[0]) - elif len(slice_params) == 2: # noqa: PLR2004 + elif len(slice_params) == 2: # noqa: PLR2004 # FIXME CoP return slice(slice_params[0], slice_params[1], None) - elif len(slice_params) == 3: # noqa: PLR2004 + elif len(slice_params) == 3: # noqa: PLR2004 # FIXME CoP return slice(slice_params[0], slice_params[1], slice_params[2]) else: - raise ValueError( # noqa: TRY003 - f"batch_slice string must take the form of a python slice, but {batch_slice} was provided." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"batch_slice string must take the form of a python slice, but {batch_slice} was provided." # noqa: E501 # FIXME CoP ) @@ -205,12 +205,12 @@ def _batch_slice_from_list_or_tuple(batch_slice: list[int] | tuple[int, ...]) -> return slice(0, None, None) elif len(batch_slice) == 1 and batch_slice[0] is not None: return slice(batch_slice[0] - 1, batch_slice[0]) - elif len(batch_slice) == 2: # noqa: PLR2004 + elif len(batch_slice) == 2: # noqa: PLR2004 # FIXME CoP return slice(batch_slice[0], batch_slice[1]) - elif len(batch_slice) == 3: # noqa: PLR2004 + elif len(batch_slice) == 3: # noqa: PLR2004 # FIXME CoP return slice(batch_slice[0], batch_slice[1], batch_slice[2]) else: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f'batch_slice sequence must be of length 0-3, but "{batch_slice}" was provided.' ) @@ -237,8 +237,8 @@ def parse_batch_slice(batch_slice: Optional[BatchSlice]) -> slice: elif isinstance(batch_slice, (list, tuple)): return_slice = _batch_slice_from_list_or_tuple(batch_slice=batch_slice) else: - raise TypeError( # noqa: TRY003 - f"`batch_slice` should be of type `BatchSlice`, but type: {type(batch_slice)} was passed." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + f"`batch_slice` should be of type `BatchSlice`, but type: {type(batch_slice)} was passed." # noqa: E501 # FIXME CoP ) logger.info(f"batch_slice: {batch_slice} was parsed to: {return_slice}") return return_slice @@ -278,7 +278,7 @@ def index(self) -> Optional[Union[int, slice]]: @property def limit(self) -> int: - return self._limit # type: ignore[return-value] + return self._limit # type: ignore[return-value] # FIXME CoP @override def __repr__(self) -> str: @@ -314,7 +314,7 @@ def select_from_data_connector_query( if self.index is None: selected_batch_definitions = selected_batch_definitions[: self.limit] - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if isinstance(self.index, int): selected_batch_definitions = [selected_batch_definitions[self.index]] else: diff --git a/great_expectations/datasource/fluent/data_connector/data_connector.py b/great_expectations/datasource/fluent/data_connector/data_connector.py index f49922046a8e..79c8bda088a6 100644 --- a/great_expectations/datasource/fluent/data_connector/data_connector.py +++ b/great_expectations/datasource/fluent/data_connector/data_connector.py @@ -72,7 +72,7 @@ def get_batch_definition_list(self, batch_request: BatchRequest) -> List[LegacyB Returns: List[BatchDefinition] -- list of "BatchDefinition" objects, each corresponding to "Batch" of data downstream - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass def build_batch_spec(self, batch_definition: LegacyBatchDefinition) -> BatchSpec: @@ -83,7 +83,7 @@ def build_batch_spec(self, batch_definition: LegacyBatchDefinition) -> BatchSpec batch_definition (LegacyBatchDefinition): required batch_definition parameter for retrieval Returns: BatchSpec object built from BatchDefinition - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_spec_params: dict = self._generate_batch_spec_parameters_from_batch_definition( batch_definition=batch_definition ) @@ -102,7 +102,7 @@ def test_connection(self) -> bool: def get_data_references(self) -> List[Any]: """ This interface method lists objects in the underlying data store used to create a list of data_references (type depends on cloud storage environment, SQL DBMS, etc.). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @abstractmethod @@ -112,7 +112,7 @@ def get_data_reference_count(self) -> int: Returns: int -- number of data references identified - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @abstractmethod @@ -122,7 +122,7 @@ def get_matched_data_references(self) -> List[Any]: Returns: List[Any] -- unmatched data references (type depends on cloud storage environment, SQL DBMS, etc.) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @abstractmethod @@ -132,7 +132,7 @@ def get_matched_data_reference_count(self) -> int: Returns: int -- number of data references identified - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @abstractmethod @@ -142,7 +142,7 @@ def get_unmatched_data_references(self) -> List[Any]: Returns: List[Any] -- unmatched data references (type depends on cloud storage environment, SQL DBMS, etc.) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @abstractmethod @@ -152,7 +152,7 @@ def get_unmatched_data_reference_count(self) -> int: Returns: int -- number of data references identified - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @abstractmethod @@ -169,7 +169,7 @@ def _generate_batch_spec_parameters_from_batch_definition( Returns: dict -- dictionary of "BatchSpec" properties - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @staticmethod diff --git a/great_expectations/datasource/fluent/data_connector/dbfs_data_connector.py b/great_expectations/datasource/fluent/data_connector/dbfs_data_connector.py index 0db4311d93da..e4d32ade7216 100644 --- a/great_expectations/datasource/fluent/data_connector/dbfs_data_connector.py +++ b/great_expectations/datasource/fluent/data_connector/dbfs_data_connector.py @@ -26,9 +26,9 @@ class DBFSDataConnector(FilesystemDataConnector): data_context_root_directory: Optional GreatExpectations root directory (if installed on DBFS) file_path_template_map_fn: Format function mapping path to fully-qualified resource on DBFS get_unfiltered_batch_definition_list_fn: Function used to get the batch definition list before filtering - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_asset_name: str, @@ -50,7 +50,7 @@ def __init__( # noqa: PLR0913 @classmethod @override - def build_data_connector( # noqa: PLR0913 + def build_data_connector( # noqa: PLR0913 # FIXME CoP cls, datasource_name: str, data_asset_name: str, @@ -73,7 +73,7 @@ def build_data_connector( # noqa: PLR0913 Returns: Instantiated "DBFSDataConnector" object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return DBFSDataConnector( datasource_name=datasource_name, data_asset_name=data_asset_name, @@ -88,10 +88,10 @@ def build_data_connector( # noqa: PLR0913 @override def _get_full_file_path(self, path: str) -> str: if self._file_path_template_map_fn is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Converting file paths to fully-qualified object references for "{self.__class__.__name__}" \ requires "file_path_template_map_fn: Callable" to be set. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) template_arguments: dict = { diff --git a/great_expectations/datasource/fluent/data_connector/file_path_data_connector.py b/great_expectations/datasource/fluent/data_connector/file_path_data_connector.py index b1228db4b701..04cd4054cad8 100644 --- a/great_expectations/datasource/fluent/data_connector/file_path_data_connector.py +++ b/great_expectations/datasource/fluent/data_connector/file_path_data_connector.py @@ -92,7 +92,7 @@ def get_batch_definition_list(self, batch_request: BatchRequest) -> List[LegacyB Returns: A list of BatchDefinition objects that match BatchRequest - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP legacy_batch_definition_list: List[LegacyBatchDefinition] = ( self._get_unfiltered_batch_definition_list(batch_request=batch_request) ) @@ -112,7 +112,7 @@ def get_batch_definition_list(self, batch_request: BatchRequest) -> List[LegacyB data_connector_query_dict.update({"index": batch_request.batch_slice}) batch_filter_obj: BatchFilter = build_batch_filter( - data_connector_query_dict=data_connector_query_dict # type: ignore[arg-type] + data_connector_query_dict=data_connector_query_dict # type: ignore[arg-type] # FIXME CoP ) legacy_batch_definition_list = batch_filter_obj.select_from_data_connector_query( batch_definition_list=legacy_batch_definition_list @@ -165,7 +165,7 @@ def get_matched_data_reference_count(self) -> int: Returns: number of matched data_references known by this DataConnector. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return len(self.get_matched_data_references()) # Interface Method @@ -177,7 +177,7 @@ def get_unmatched_data_references(self) -> List[str]: Returns: list of data_references that are not matched by configuration. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._get_data_references(matched=False) # Interface Method @@ -188,7 +188,7 @@ def get_unmatched_data_reference_count(self) -> int: Returns: number of unmached data_references known by this DataConnector. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return len(self.get_unmatched_data_references()) def _get_unfiltered_batch_definition_list( @@ -252,7 +252,7 @@ def _get_data_references(self, matched: bool, regex: re.Pattern | None = None) - Returns: list of data_references that are not matched by configuration. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not regex: regex = self._preprocess_batching_regex(MATCH_ALL_PATTERN) @@ -293,7 +293,7 @@ def _generate_batch_spec_parameters_from_batch_definition( Returns: dict -- dictionary of "BatchSpec" properties - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # this class is overloaded with two separate implementations: if self._whole_directory_path_override: return self._get_batch_spec_params_directory(batch_definition=batch_definition) @@ -303,7 +303,7 @@ def _generate_batch_spec_parameters_from_batch_definition( def _get_batch_spec_params_file(self, batch_definition: LegacyBatchDefinition) -> dict: """File specific implementation of batch spec parameters""" if not batch_definition.batching_regex: - raise RuntimeError("BatchDefinition must contain a batching_regex.") # noqa: TRY003 + raise RuntimeError("BatchDefinition must contain a batching_regex.") # noqa: TRY003 # FIXME CoP batching_regex = batch_definition.batching_regex @@ -318,10 +318,10 @@ def _get_batch_spec_params_file(self, batch_definition: LegacyBatchDefinition) - group_names=group_names, ) if not path: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""No data reference for data asset name "{batch_definition.data_asset_name}" matches the given batch identifiers {batch_definition.batch_identifiers} from batch definition {batch_definition}. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) path = self._get_full_file_path(path=path) @@ -453,7 +453,7 @@ def map_batch_definition_to_data_reference_string_using_regex( group_names: List[str], ) -> str: if not isinstance(batch_definition, LegacyBatchDefinition): - raise TypeError("batch_definition is not of an instance of type BatchDefinition") # noqa: TRY003 + raise TypeError("batch_definition is not of an instance of type BatchDefinition") # noqa: TRY003 # FIXME CoP data_asset_name: str = batch_definition.data_asset_name batch_identifiers: IDDict = batch_definition.batch_identifiers @@ -473,7 +473,7 @@ def convert_batch_identifiers_to_data_reference_string_using_regex( data_asset_name: Optional[str] = None, ) -> str: if not isinstance(batch_identifiers, IDDict): - raise TypeError("batch_identifiers is not " "an instance of type IDDict") # noqa: TRY003 + raise TypeError("batch_identifiers is not " "an instance of type IDDict") # noqa: TRY003 # FIXME CoP template_arguments: dict = copy.deepcopy(batch_identifiers) if data_asset_name is not None: @@ -488,7 +488,7 @@ def convert_batch_identifiers_to_data_reference_string_using_regex( return converted_string -def _invert_regex_to_data_reference_template( # noqa: C901 - too complex +def _invert_regex_to_data_reference_template( # noqa: C901 # too complex regex_pattern: re.Pattern | str, group_names: List[str], ) -> str: @@ -514,7 +514,7 @@ def _invert_regex_to_data_reference_template( # noqa: C901 - too complex NOTE Abe 20201017: This method is almost certainly still brittle. I haven't exhaustively mapped the OPCODES in sre_constants - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_reference_template: str = "" group_name_index: int = 0 @@ -525,7 +525,7 @@ def _invert_regex_to_data_reference_template( # noqa: C901 - too complex # print("-"*80) parsed_sre = sre_parse.parse(str(regex_pattern)) - for parsed_sre_tuple, char in zip(parsed_sre, list(str(regex_pattern))): # type: ignore[call-overload] + for parsed_sre_tuple, char in zip(parsed_sre, list(str(regex_pattern))): # type: ignore[call-overload] # FIXME CoP token, value = parsed_sre_tuple if token == sre_constants.LITERAL: # Transcribe the character directly into the template @@ -555,10 +555,10 @@ def _invert_regex_to_data_reference_template( # noqa: C901 - too complex ]: pass else: - raise ValueError(f"Unrecognized regex token {token} in regex pattern {regex_pattern}.") # noqa: TRY003 + raise ValueError(f"Unrecognized regex token {token} in regex pattern {regex_pattern}.") # noqa: TRY003 # FIXME CoP # Collapse adjacent wildcards into a single wildcard - data_reference_template: str = re.sub("\\*+", "*", data_reference_template) # type: ignore[no-redef] + data_reference_template: str = re.sub("\\*+", "*", data_reference_template) # type: ignore[no-redef] # FIXME CoP return data_reference_template diff --git a/great_expectations/datasource/fluent/data_connector/filesystem_data_connector.py b/great_expectations/datasource/fluent/data_connector/filesystem_data_connector.py index 256fb2e8d564..bcd38475bbf5 100644 --- a/great_expectations/datasource/fluent/data_connector/filesystem_data_connector.py +++ b/great_expectations/datasource/fluent/data_connector/filesystem_data_connector.py @@ -31,12 +31,12 @@ class FilesystemDataConnector(FilePathDataConnector): glob_directive: glob for selecting files in directory (defaults to `**/*`) or nested directories (e.g. `*/*/*.csv`) data_context_root_directory: Optional GreatExpectations root directory (if installed on filesystem) whole_directory_path_override: Treat an entire directory as a single Asset - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset_level_option_keys: ClassVar[tuple[str, ...]] = ("glob_directive",) asset_options_type: ClassVar[Type[FilesystemOptions]] = FilesystemOptions - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_asset_name: str, @@ -62,14 +62,14 @@ def base_directory(self) -> pathlib.Path: """ Accessor method for base_directory. If directory is a relative path, interpret it as relative to the root directory. If it is absolute, then keep as-is. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return normalize_directory_path( dir_path=self._base_directory, root_directory_path=self._data_context_root_directory, ) @classmethod - def build_data_connector( # noqa: PLR0913 + def build_data_connector( # noqa: PLR0913 # FIXME CoP cls, datasource_name: str, data_asset_name: str, @@ -92,7 +92,7 @@ def build_data_connector( # noqa: PLR0913 Returns: Instantiated "FilesystemDataConnector" object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return FilesystemDataConnector( datasource_name=datasource_name, data_asset_name=data_asset_name, @@ -121,8 +121,8 @@ def build_test_connection_error_message( Returns: Customized error message - """ # noqa: E501 - test_connection_error_message_template: str = 'No file at base_directory path "{base_directory}" matched glob_directive "{glob_directive}" for DataAsset "{data_asset_name}".' # noqa: E501 + """ # noqa: E501 # FIXME CoP + test_connection_error_message_template: str = 'No file at base_directory path "{base_directory}" matched glob_directive "{glob_directive}" for DataAsset "{data_asset_name}".' # noqa: E501 # FIXME CoP return test_connection_error_message_template.format( **{ "data_asset_name": data_asset_name, @@ -171,7 +171,7 @@ def get_filesystem_one_level_directory_glob_path_list( :param base_directory_path -- base directory path, relative to which file paths will be collected :param glob_directive -- glob expansion directive :returns -- list of relative file paths - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(base_directory_path, str): base_directory_path = pathlib.Path(base_directory_path) diff --git a/great_expectations/datasource/fluent/data_connector/google_cloud_storage_data_connector.py b/great_expectations/datasource/fluent/data_connector/google_cloud_storage_data_connector.py index 58937e885bf9..d1730d28c2b8 100644 --- a/great_expectations/datasource/fluent/data_connector/google_cloud_storage_data_connector.py +++ b/great_expectations/datasource/fluent/data_connector/google_cloud_storage_data_connector.py @@ -43,7 +43,7 @@ class GoogleCloudStorageDataConnector(FilePathDataConnector): max_results (int): max blob filepaths to return recursive_file_discovery (bool): Flag to indicate if files should be searched recursively from subfolders file_path_template_map_fn: Format function mapping path to fully-qualified resource on GCS - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset_level_option_keys: ClassVar[tuple[str, ...]] = ( "gcs_prefix", @@ -53,7 +53,7 @@ class GoogleCloudStorageDataConnector(FilePathDataConnector): ) asset_options_type: ClassVar[Type[_GCSOptions]] = _GCSOptions - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_asset_name: str, @@ -84,7 +84,7 @@ def __init__( # noqa: PLR0913 ) @classmethod - def build_data_connector( # noqa: PLR0913 + def build_data_connector( # noqa: PLR0913 # FIXME CoP cls, datasource_name: str, data_asset_name: str, @@ -111,7 +111,7 @@ def build_data_connector( # noqa: PLR0913 Returns: Instantiated "GoogleCloudStorageDataConnector" object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return GoogleCloudStorageDataConnector( datasource_name=datasource_name, data_asset_name=data_asset_name, @@ -144,8 +144,8 @@ def build_test_connection_error_message( Returns: Customized error message - """ # noqa: E501 - test_connection_error_message_template: str = 'No file in bucket "{bucket_or_name}" with prefix "{prefix}" and recursive file discovery set to "{recursive_file_discovery}" found using delimiter "{delimiter}" for DataAsset "{data_asset_name}".' # noqa: E501 + """ # noqa: E501 # FIXME CoP + test_connection_error_message_template: str = 'No file in bucket "{bucket_or_name}" with prefix "{prefix}" and recursive file discovery set to "{recursive_file_discovery}" found using delimiter "{delimiter}" for DataAsset "{data_asset_name}".' # noqa: E501 # FIXME CoP return test_connection_error_message_template.format( **{ "data_asset_name": data_asset_name, @@ -190,10 +190,10 @@ def get_data_references(self) -> List[str]: @override def _get_full_file_path(self, path: str) -> str: if self._file_path_template_map_fn is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Converting file paths to fully-qualified object references for "{self.__class__.__name__}" \ requires "file_path_template_map_fn: Callable" to be set. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) template_arguments: dict = { @@ -241,7 +241,7 @@ def list_gcs_keys( Returns: List of keys representing GCS file paths (as filtered by the `query_options` dict) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Delimiter determines whether or not traversal of bucket is recursive # Manually set to appropriate default if not already set by user delimiter = query_options["delimiter"] @@ -250,7 +250,7 @@ def list_gcs_keys( 'In order to access blobs with a ConfiguredAssetGCSDataConnector, \ or with a Fluent datasource without enabling recursive file discovery, \ the delimiter that has been passed to gcs_options in your config cannot be empty; \ - please note that the value is being set to the default "/" in order to work with the Google SDK.' # noqa: E501 + please note that the value is being set to the default "/" in order to work with the Google SDK.' # noqa: E501 # FIXME CoP ) query_options["delimiter"] = "/" elif delimiter is not None and recursive: diff --git a/great_expectations/datasource/fluent/data_connector/s3_data_connector.py b/great_expectations/datasource/fluent/data_connector/s3_data_connector.py index 1df4f4ea77b0..3948956b0a01 100644 --- a/great_expectations/datasource/fluent/data_connector/s3_data_connector.py +++ b/great_expectations/datasource/fluent/data_connector/s3_data_connector.py @@ -45,7 +45,7 @@ class S3DataConnector(FilePathDataConnector): max_keys (int): S3 max_keys (default is 1000) recursive_file_discovery (bool): Flag to indicate if files should be searched recursively from subfolders file_path_template_map_fn: Format function mapping path to fully-qualified resource on S3 - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset_level_option_keys: ClassVar[tuple[str, ...]] = ( "s3_prefix", @@ -55,7 +55,7 @@ class S3DataConnector(FilePathDataConnector): ) asset_options_type: ClassVar[Type[_S3Options]] = _S3Options - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource_name: str, data_asset_name: str, @@ -86,7 +86,7 @@ def __init__( # noqa: PLR0913 ) @classmethod - def build_data_connector( # noqa: PLR0913 + def build_data_connector( # noqa: PLR0913 # FIXME CoP cls, datasource_name: str, data_asset_name: str, @@ -113,7 +113,7 @@ def build_data_connector( # noqa: PLR0913 Returns: Instantiated "S3DataConnector" object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return S3DataConnector( datasource_name=datasource_name, data_asset_name=data_asset_name, @@ -146,8 +146,8 @@ def build_test_connection_error_message( Returns: Customized error message - """ # noqa: E501 - test_connection_error_message_template: str = 'No file in bucket "{bucket}" with prefix "{prefix}" and recursive file discovery set to "{recursive_file_discovery}" found using delimiter "{delimiter}" for DataAsset "{data_asset_name}".' # noqa: E501 + """ # noqa: E501 # FIXME CoP + test_connection_error_message_template: str = 'No file in bucket "{bucket}" with prefix "{prefix}" and recursive file discovery set to "{recursive_file_discovery}" found using delimiter "{delimiter}" for DataAsset "{data_asset_name}".' # noqa: E501 # FIXME CoP return test_connection_error_message_template.format( **{ "data_asset_name": data_asset_name, @@ -195,10 +195,10 @@ def get_data_references(self) -> List[str]: @override def _get_full_file_path(self, path: str) -> str: if self._file_path_template_map_fn is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Converting file paths to fully-qualified object references for "{self.__class__.__name__}" \ requires "file_path_template_map_fn: Callable" to be set. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) template_arguments: dict = { @@ -214,7 +214,7 @@ def _preprocess_batching_regex(self, regex: re.Pattern) -> re.Pattern: return super()._preprocess_batching_regex(regex=regex) -def list_s3_keys( # noqa: C901 - too complex +def list_s3_keys( # noqa: C901 # too complex s3, query_options: dict, iterator_dict: dict, recursive: bool = False ) -> Generator[str, None, None]: """ @@ -229,7 +229,7 @@ def list_s3_keys( # noqa: C901 - too complex :param iterator_dict: dictionary to manage "NextContinuationToken" (if "IsTruncated" is returned from S3) :param recursive: True for InferredAssetS3DataConnector and False for ConfiguredAssetS3DataConnector (see above) :return: string valued key representing file path on S3 (full prefix and leaf file name) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if iterator_dict is None: iterator_dict = {} @@ -241,7 +241,7 @@ def list_s3_keys( # noqa: C901 - too complex s3_objects_info: dict = s3.list_objects_v2(**query_options) if not any(key in s3_objects_info for key in ["Contents", "CommonPrefixes"]): - raise ValueError("S3 query may not have been configured correctly.") # noqa: TRY003 + raise ValueError("S3 query may not have been configured correctly.") # noqa: TRY003 # FIXME CoP if "Contents" in s3_objects_info: keys: List[str] = [item["Key"] for item in s3_objects_info["Contents"] if item["Size"] > 0] diff --git a/great_expectations/datasource/fluent/databricks_sql_datasource.py b/great_expectations/datasource/fluent/databricks_sql_datasource.py index 88e300aa2eba..45f71b9ff140 100644 --- a/great_expectations/datasource/fluent/databricks_sql_datasource.py +++ b/great_expectations/datasource/fluent/databricks_sql_datasource.py @@ -42,7 +42,7 @@ def _parse_param_from_query_string(param: str, query: str) -> str | None: if not path_results: return None if len(path_results) > 1: - raise ValueError(f"Only one `{param}` query entry is allowed") # noqa: TRY003 + raise ValueError(f"Only one `{param}` query entry is allowed") # noqa: TRY003 # FIXME CoP return path_results[0] @@ -143,7 +143,7 @@ def _resolve_quoted_name(cls, table_name: str) -> str | quoted_name: from great_expectations.compatibility import sqlalchemy - if sqlalchemy.quoted_name: # type: ignore[truthy-function] + if sqlalchemy.quoted_name: # type: ignore[truthy-function] # FIXME CoP if isinstance(table_name, sqlalchemy.quoted_name): return table_name @@ -184,12 +184,12 @@ class DatabricksSQLDatasource(SQLDatasource): For example: "databricks://token:@:?http_path=&catalog=&schema="" assets: An optional dictionary whose keys are TableAsset or QueryAsset names and whose values are TableAsset or QueryAsset objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # class var definitions asset_types: ClassVar[List[Type[DataAsset]]] = [DatabricksTableAsset, SqlQueryAsset] - type: Literal["databricks_sql"] = "databricks_sql" # type: ignore[assignment] + type: Literal["databricks_sql"] = "databricks_sql" # type: ignore[assignment] # FIXME CoP connection_string: Union[ConfigStr, DatabricksDsn] # These are instance var because ClassVars can't contain Type variables. See @@ -207,10 +207,10 @@ def test_connection(self, test_assets: bool = True) -> None: # Raise specific error informing how to install dependencies only if relevant if isinstance(nested_exception, sa.exc.NoSuchModuleError): - raise TestConnectionError( # noqa: TRY003 - "Could not connect to Databricks - please ensure you've installed necessary dependencies with `pip install great_expectations[databricks]`." # noqa: E501 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP + "Could not connect to Databricks - please ensure you've installed necessary dependencies with `pip install great_expectations[databricks]`." # noqa: E501 # FIXME CoP ) from e - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP @override def _create_engine(self) -> sqlalchemy.Engine: @@ -231,6 +231,6 @@ def _create_engine(self) -> sqlalchemy.Engine: http_path = _parse_param_from_query_string(param="http_path", query=connection_string.query) assert http_path, "Presence of http_path query string is guaranteed due to prior validation" - # Databricks connection is a bit finicky - the http_path portion of the connection string needs to be passed in connect_args # noqa: E501 + # Databricks connection is a bit finicky - the http_path portion of the connection string needs to be passed in connect_args # noqa: E501 # FIXME CoP connect_args = {"http_path": http_path} return sa.create_engine(connection_string, connect_args=connect_args, **kwargs) diff --git a/great_expectations/datasource/fluent/dynamic_pandas.py b/great_expectations/datasource/fluent/dynamic_pandas.py index cbd3d10ec985..ecccac288c95 100644 --- a/great_expectations/datasource/fluent/dynamic_pandas.py +++ b/great_expectations/datasource/fluent/dynamic_pandas.py @@ -51,17 +51,17 @@ # Types may not exist on earlier version of pandas (current min ver is v.1.1.0) # https://github.com/pandas-dev/pandas/blob/v1.1.0/pandas/_typing.py CompressionDict = Dict[str, Any] - CompressionOptions = Optional[ # type: ignore[misc] + CompressionOptions = Optional[ # type: ignore[misc] # FIXME CoP Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd", "tar"], CompressionDict] ] - CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] # type: ignore[misc] - StorageOptions = Optional[Dict[str, Any]] # type: ignore[misc] + CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] # type: ignore[misc] # FIXME CoP + StorageOptions = Optional[Dict[str, Any]] # type: ignore[misc] # FIXME CoP try: from pandas._libs.lib import _NoDefault except ImportError: - class _NoDefault(enum.Enum): # type: ignore[no-redef] + class _NoDefault(enum.Enum): # type: ignore[no-redef] # FIXME CoP no_default = "NO_DEFAULT" @@ -131,7 +131,7 @@ class _NoDefault(enum.Enum): # type: ignore[no-redef] "Hashable": "str", "Sequence[Hashable]": "Sequence[str]", "Iterable[Hashable]": "Iterable[str]", - # using builtin types as generics may causes TypeError: 'type' object is not subscriptable in python 3.8 # noqa: E501 + # using builtin types as generics may causes TypeError: 'type' object is not subscriptable in python 3.8 # noqa: E501 # FIXME CoP "Sequence[tuple[int, int]]": "Sequence[Tuple[int, int]]", # TypeVars "IntStrT": "Union[int, str]", @@ -170,7 +170,7 @@ def _replace_builtins(input_: str | type) -> str | type: # SQLTable "schema": { "schema_name": _FieldSpec( - Optional[str], # type: ignore[arg-type] + Optional[str], # type: ignore[arg-type] # FIXME CoP Field( None, description="'schema_name' on the instance model." @@ -180,22 +180,22 @@ def _replace_builtins(input_: str | type) -> str | type: ) }, # sql - "con": {"con": _FieldSpec(Union[ConfigStr, str, Any], ...)}, # type: ignore[arg-type] + "con": {"con": _FieldSpec(Union[ConfigStr, str, Any], ...)}, # type: ignore[arg-type] # FIXME CoP # misc "filepath_or_buffer": { - "filepath_or_buffer": _FieldSpec(Union[FilePath, AnyUrl, Any], ...) # type: ignore[arg-type] + "filepath_or_buffer": _FieldSpec(Union[FilePath, AnyUrl, Any], ...) # type: ignore[arg-type] # FIXME CoP }, - "io": {"io": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] - "path": {"path": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] - "path_or_buf": {"path_or_buf": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] - "path_or_buffer": {"path_or_buffer": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] - "dtype": {"dtype": _FieldSpec(Optional[dict], None)}, # type: ignore[arg-type] - "dialect": {"dialect": _FieldSpec(Optional[str], None)}, # type: ignore[arg-type] - "usecols": {"usecols": _FieldSpec(Union[int, str, Sequence[int], None], None)}, # type: ignore[arg-type] - "skiprows": {"skiprows": _FieldSpec(Union[Sequence[int], int, None], None)}, # type: ignore[arg-type] + "io": {"io": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] # FIXME CoP + "path": {"path": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] # FIXME CoP + "path_or_buf": {"path_or_buf": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] # FIXME CoP + "path_or_buffer": {"path_or_buffer": _FieldSpec(Union[FilePath, AnyUrl, Any], ...)}, # type: ignore[arg-type] # FIXME CoP + "dtype": {"dtype": _FieldSpec(Optional[dict], None)}, # type: ignore[arg-type] # FIXME CoP + "dialect": {"dialect": _FieldSpec(Optional[str], None)}, # type: ignore[arg-type] # FIXME CoP + "usecols": {"usecols": _FieldSpec(Union[int, str, Sequence[int], None], None)}, # type: ignore[arg-type] # FIXME CoP + "skiprows": {"skiprows": _FieldSpec(Union[Sequence[int], int, None], None)}, # type: ignore[arg-type] # FIXME CoP "kwargs": { "kwargs": _FieldSpec( - Optional[dict], # type: ignore[arg-type] + Optional[dict], # type: ignore[arg-type] # FIXME CoP Field( None, description="Extra keyword arguments that will be passed to the reader method", @@ -204,7 +204,7 @@ def _replace_builtins(input_: str | type) -> str | type: }, "kwds": { "kwargs": _FieldSpec( - Optional[dict], # type: ignore[arg-type] + Optional[dict], # type: ignore[arg-type] # FIXME CoP Field( None, description="Extra keyword arguments that will be passed to the reader method", @@ -300,7 +300,7 @@ def _get_annotation_type(param: inspect.Parameter) -> Union[Type, str, object]: logger.debug(f"{param.name} has non-string annotations") # `__args__` contains the actual members of a `Union[TYPE_1, TYPE_2]` object union_types = getattr(annotation, "__args__", None) - if union_types and PANDAS_VERSION < 1.2: # noqa: PLR2004 + if union_types and PANDAS_VERSION < 1.2: # noqa: PLR2004 # FIXME CoP # we could examine these types and only kick out certain blacklisted types # but once we drop python 3.7 support our min pandas version will make this # unneeded @@ -312,7 +312,7 @@ def _get_annotation_type(param: inspect.Parameter) -> Union[Type, str, object]: union_parts = annotation.split("|") str_to_eval: str for type_str in union_parts: - type_str = type_str.strip() # noqa: PLW2901 + type_str = type_str.strip() # noqa: PLW2901 # FIXME CoP if type_str in CAN_HANDLE: types.append(type_str) @@ -357,7 +357,7 @@ def _to_pydantic_fields( FIELD_SKIPPED_NO_ANNOTATION.add(param_name) # TODO: not skipped type_ = Any else: - type_ = _get_annotation_type(param) # type: ignore[assignment] + type_ = _get_annotation_type(param) # type: ignore[assignment] # FIXME CoP if type_ is UNSUPPORTED_TYPE or type_ == "None": logger.debug(f"`{param_name}` has no supported types. Field skipped") FIELD_SKIPPED_UNSUPPORTED_TYPE.add(param_name) @@ -373,7 +373,7 @@ def _to_pydantic_fields( M = TypeVar("M", bound=Type[DataAsset]) -def _create_pandas_asset_model( # noqa: PLR0913 +def _create_pandas_asset_model( # noqa: PLR0913 # FIXME CoP model_name: str, model_base: M, type_field: Tuple[Union[Type, str], str], @@ -442,7 +442,7 @@ def _generate_pandas_data_asset_models( continue except TypeError as err: logger.info( - f"pandas {pd.__version__} {model_name} could not be created normally - {type(err).__name__}:{err} , skipping" # noqa: E501 + f"pandas {pd.__version__} {model_name} could not be created normally - {type(err).__name__}:{err} , skipping" # noqa: E501 # FIXME CoP ) logger.info(f"{model_name} fields\n{pf(fields)}") continue @@ -451,8 +451,8 @@ def _generate_pandas_data_asset_models( try: asset_model.update_forward_refs(**_TYPE_REF_LOCALS) except TypeError as e: - raise DynamicAssetError( # noqa: TRY003 - f"Updating forward references for asset model {asset_model.__name__} raised TypeError: {e}" # noqa: E501 + raise DynamicAssetError( # noqa: TRY003 # FIXME CoP + f"Updating forward references for asset model {asset_model.__name__} raised TypeError: {e}" # noqa: E501 # FIXME CoP ) from e logger.debug(f"Needs extra handling\n{pf(dict(NEED_SPECIAL_HANDLING))}") diff --git a/great_expectations/datasource/fluent/fabric.py b/great_expectations/datasource/fluent/fabric.py index 4485087a6c23..05fc4433a7fe 100644 --- a/great_expectations/datasource/fluent/fabric.py +++ b/great_expectations/datasource/fluent/fabric.py @@ -195,7 +195,7 @@ def _validate_batch_request(self, batch_request: BatchRequest) -> None: options={}, batch_slice=batch_request._batch_slice_input, # type: ignore[attr-defined] # private attr does exist ) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "BatchRequest should have form:\n" f"{pf(expect_batch_request_form.dict())}\n" f"but actually has form:\n{pf(batch_request.dict())}\n" @@ -241,7 +241,7 @@ class PowerBITable(_PowerBIAsset): mode: Mode = "xmla" -# This improves our error messages by providing a more specific type for pydantic to validate against # noqa: E501 +# This improves our error messages by providing a more specific type for pydantic to validate against # noqa: E501 # FIXME CoP # It also ensure the generated jsonschema has a oneOf instead of anyOf field for assets # https://docs.pydantic.dev/1.10/usage/types/#discriminated-unions-aka-tagged-unions AssetTypes = Annotated[ @@ -298,12 +298,12 @@ def test_connection(self, test_assets: bool = True) -> None: TestConnectionError: If the connection test fails. """ if not self._running_on_fabric(): - raise TestConnectionError("Must be running Microsoft Fabric to use this datasource") # noqa: TRY003 + raise TestConnectionError("Must be running Microsoft Fabric to use this datasource") # noqa: TRY003 # FIXME CoP try: from sempy import fabric # noqa: F401 # test if fabric is installed except Exception as import_err: - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP "Could not import `sempy.fabric`\npip install semantic-link-sempy" ) from import_err @@ -328,7 +328,7 @@ def add_powerbi_dax_asset( Returns: The asset that is added to the datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset = PowerBIDax( name=name, batch_metadata=batch_metadata or {}, @@ -337,7 +337,7 @@ def add_powerbi_dax_asset( return self._add_asset(asset) @public_api - def add_powerbi_measure_asset( # noqa: PLR0913 + def add_powerbi_measure_asset( # noqa: PLR0913 # FIXME CoP self, name: str, measure: Union[str, List[str]], @@ -356,7 +356,7 @@ def add_powerbi_measure_asset( # noqa: PLR0913 Returns: The asset that is added to the datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset = PowerBIMeasure( name=name, batch_metadata=batch_metadata or {}, @@ -371,7 +371,7 @@ def add_powerbi_measure_asset( # noqa: PLR0913 return self._add_asset(asset) @public_api - def add_powerbi_table_asset( # noqa: PLR0913 + def add_powerbi_table_asset( # noqa: PLR0913 # FIXME CoP self, name: str, table: str, @@ -391,7 +391,7 @@ def add_powerbi_table_asset( # noqa: PLR0913 Returns: The asset that is added to the datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset = PowerBITable( name=name, batch_metadata=batch_metadata or {}, diff --git a/great_expectations/datasource/fluent/fluent_base_model.py b/great_expectations/datasource/fluent/fluent_base_model.py index 82b6f863d096..5fc545ee7234 100644 --- a/great_expectations/datasource/fluent/fluent_base_model.py +++ b/great_expectations/datasource/fluent/fluent_base_model.py @@ -105,7 +105,7 @@ def yaml( **yaml_kwargs, ) -> pathlib.Path: ... - def yaml( # noqa: PLR0913 + def yaml( # noqa: PLR0913 # FIXME CoP self, stream_or_path: Union[StringIO, pathlib.Path, None] = None, *, @@ -147,7 +147,7 @@ def yaml( # noqa: PLR0913 return stream_or_path.getvalue() @override - def json( # noqa: PLR0913 + def json( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSetIntStr | MappingIntStrAny | None = None, @@ -189,7 +189,7 @@ def json( # noqa: PLR0913 **dumps_kwargs, ) - def _json_dict( # noqa: PLR0913 + def _json_dict( # noqa: PLR0913 # FIXME CoP self, *, include: Union[AbstractSetIntStr, MappingIntStrAny, None] = None, @@ -221,7 +221,7 @@ def _json_dict( # noqa: PLR0913 ) @override - def dict( # noqa: PLR0913 + def dict( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSetIntStr | MappingIntStrAny | None = None, @@ -262,7 +262,7 @@ def dict( # noqa: PLR0913 logger.debug(f"{class_name}.dict() - substituting config values") _recursively_set_config_value(result, config_provider) elif raise_on_missing_config_provider: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"{class_name}.dict() -" " `config_provider` must be provided if `raise_on_missing_config_provider` is True." f" {class_name} may be missing a context." @@ -288,7 +288,7 @@ def _include_exclude_to_dict( include_exclude: The include or exclude key passed to pydantic model export methods. Returns: A mutable dictionary that can be used for nested include/exclude. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(include_exclude, Mapping): include_exclude_dict = dict(include_exclude) elif isinstance(include_exclude, AbstractSet): @@ -305,7 +305,7 @@ def __str__(self): class GenericBaseModel(FluentBaseModel, pydantic.GenericModel): ... -def _recursively_set_config_value( # noqa: C901 - too complex +def _recursively_set_config_value( # noqa: C901 # too complex data: MutableMapping | MutableSequence, config_provider: _ConfigurationProvider ) -> None: if isinstance(data, MutableMapping): diff --git a/great_expectations/datasource/fluent/interfaces.py b/great_expectations/datasource/fluent/interfaces.py index b276e280cdf2..b4de0adc860c 100644 --- a/great_expectations/datasource/fluent/interfaces.py +++ b/great_expectations/datasource/fluent/interfaces.py @@ -216,7 +216,7 @@ class GxContextWarning(GxDatasourceWarning): """ Warning related to a Datasource with a missing context. Usually because the Datasource was created directly rather than using a - `context.sources` factory method. + `context.data_sources` factory method. """ @@ -258,7 +258,7 @@ def _sorter_from_list(sorters: SortersDefinition) -> list[Sorter]: # This should never be reached because of static typing but is necessary because # mypy doesn't know of the if conditions must evaluate to True. - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"sorters is a not a SortersDefinition but is a {type(sorters)}" ) @@ -289,6 +289,13 @@ def _sorter_from_str(sort_key: str) -> Sorter: @public_api class DataAsset(GenericBaseModel, Generic[DatasourceT, PartitionerT], ABC): + """ + A Data Asset is a collection of records within a Data Source, which is usually named based + on the underlying data system and sliced to correspond to a desired specification. + + Data Assets are used to specify how Great Expectations will organize data into Batches. + """ + # To subclass a DataAsset one must define `type` as a Class literal explicitly on the sublass # as well as implementing the methods in the `Abstract Methods` section below. # Some examples: @@ -349,7 +356,7 @@ def build_batch_request( Returns: A BatchRequest object that can be used to obtain a batch from an asset by calling the get_batch method. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP raise NotImplementedError( """One must implement "build_batch_request" on a DataAsset subclass.""" ) @@ -391,11 +398,11 @@ def add_batch_definition( """ batch_definition_names = {bc.name for bc in self.batch_definitions} if name in batch_definition_names: - raise ValueError( # noqa: TRY003 - f'"{name}" already exists (all existing batch_definition names are {", ".join(batch_definition_names)})' # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f'"{name}" already exists (all existing batch_definition names are {", ".join(batch_definition_names)})' # noqa: E501 # FIXME CoP ) - # Let mypy know that self.datasource is a Datasource (it is currently bound to MetaDatasource) # noqa: E501 + # Let mypy know that self.datasource is a Datasource (it is currently bound to MetaDatasource) # noqa: E501 # FIXME CoP assert isinstance(self.datasource, Datasource) batch_definition = BatchDefinition[PartitionerT](name=name, partitioner=partitioner) @@ -425,13 +432,13 @@ def delete_batch_definition(self, name: str) -> None: # We collect the names as a list because while we shouldn't have more than 1 # batch definition with the same name, we want to represent it if it does occur. batch_definition_names = [bc.name for bc in self.batch_definitions] - raise ValueError( # noqa: TRY003 - f'"{name}" does not exist. Existing batch_definition names are {batch_definition_names})' # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f'"{name}" does not exist. Existing batch_definition names are {batch_definition_names})' # noqa: E501 # FIXME CoP ) from err self._delete_batch_definition(batch_def) def _delete_batch_definition(self, batch_definition: BatchDefinition[PartitionerT]) -> None: - # Let mypy know that self.datasource is a Datasource (it is currently bound to MetaDatasource) # noqa: E501 + # Let mypy know that self.datasource is a Datasource (it is currently bound to MetaDatasource) # noqa: E501 # FIXME CoP assert isinstance(self.datasource, Datasource) self.batch_definitions.remove(batch_definition) @@ -445,7 +452,7 @@ def _delete_batch_definition(self, batch_definition: BatchDefinition[Partitioner self.update_batch_definition_field_set() def update_batch_definition_field_set(self) -> None: - """Ensure that we have __fields_set__ set correctly for batch_definitions to ensure we serialize IFF needed.""" # noqa: E501 + """Ensure that we have __fields_set__ set correctly for batch_definitions to ensure we serialize IFF needed.""" # noqa: E501 # FIXME CoP has_batch_definitions = len(self.batch_definitions) > 0 if "batch_definitions" in self.__fields_set__ and not has_batch_definitions: @@ -468,13 +475,13 @@ def get_batch_definition(self, name: str) -> BatchDefinition[PartitionerT]: if batch_definition.name == name ] if len(batch_definitions) == 0: - raise KeyError( # noqa: TRY003 + raise KeyError( # noqa: TRY003 # FIXME CoP f"BatchDefinition {name} not found" ) elif len(batch_definitions) > 1: # Our add_batch_definition() method should enforce that different # batch definitions do not share a name. - raise KeyError( # noqa: TRY003 + raise KeyError( # noqa: TRY003 # FIXME CoP f"Multiple keys for {name} found" ) return batch_definitions[0] @@ -578,7 +585,7 @@ def _sort_batch_data_list( reverse=reverse, ) except KeyError as e: - raise KeyError( # noqa: TRY003 + raise KeyError( # noqa: TRY003 # FIXME CoP f"Trying to sort {self.name}'s batches on key {key}, " "which isn't available on all batches." ) from e @@ -619,6 +626,11 @@ class Datasource( Generic[_DataAssetT, _ExecutionEngineT], metaclass=MetaDatasource, ): + """ + A Datasource provides a standard API for accessing and interacting with data from + a wide variety of source systems. + """ + # To subclass Datasource one needs to define: # asset_types # type @@ -633,7 +645,7 @@ class Datasource( asset_types: ClassVar[Sequence[Type[DataAsset]]] = [] # Not all Datasources require a DataConnector data_connector_type: ClassVar[Optional[Type[DataConnector]]] = None - # Datasource sublcasses should update this set if the field should not be passed to the execution engine # noqa: E501 + # Datasource sublcasses should update this set if the field should not be passed to the execution engine # noqa: E501 # FIXME CoP _EXTRA_EXCLUDED_EXEC_ENG_ARGS: ClassVar[Set[str]] = set() _type_lookup: ClassVar[TypeLookup] # This attribute is set in `MetaDatasource.__new__` # Setting this in a Datasource subclass will override the execution engine type. @@ -707,7 +719,7 @@ def add_batch_definition( ) -> BatchDefinition[PartitionerT]: asset_name = batch_definition.data_asset.name if not self.data_context: - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP "Cannot save datasource without a data context." ) @@ -722,16 +734,15 @@ def add_batch_definition( updated_datasource = self.data_context.update_datasource(loaded_datasource) assert isinstance(updated_datasource, Datasource) - output = updated_datasource.get_asset(asset_name).get_batch_definition( - batch_definition.name - ) - output.set_data_asset(batch_definition.data_asset) - return output + updated_asset = updated_datasource.get_asset(asset_name) + updated_batch_definition = updated_asset.get_batch_definition(batch_definition.name) + + return updated_batch_definition def delete_batch_definition(self, batch_definition: BatchDefinition[PartitionerT]) -> None: asset_name = batch_definition.data_asset.name if not self.data_context: - raise DataContextError( # noqa: TRY003 + raise DataContextError( # noqa: TRY003 # FIXME CoP "Cannot save datasource without a data context." ) @@ -819,7 +830,7 @@ def get_asset(self, name: str) -> _DataAssetT: found_asset._datasource = self return found_asset except IndexError as exc: - raise LookupError( # noqa: TRY003 + raise LookupError( # noqa: TRY003 # FIXME CoP f'"{name}" not found. Available assets are ({", ".join(self.get_asset_names())})' ) from exc @@ -829,7 +840,7 @@ def delete_asset(self, name: str) -> None: Args: name: name of DataAsset to be deleted. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.data_context import CloudDataContext asset: _DataAssetT @@ -859,13 +870,13 @@ def _add_asset(self, asset: _DataAssetT, connect_options: dict | None = None) -> asset_names: Set[str] = self.get_asset_names() if asset.name in asset_names: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f'"{asset.name}" already exists (all existing assets are {", ".join(asset_names)})' ) self.assets.append(asset) - # if asset was added to a cloud FDS, _update_fluent_datasource will return FDS fetched from cloud, # noqa: E501 + # if asset was added to a cloud FDS, _update_fluent_datasource will return FDS fetched from cloud, # noqa: E501 # FIXME CoP # which will contain the new asset populated with an id if self._data_context: updated_datasource = self._data_context._update_fluent_datasource(datasource=self) @@ -890,7 +901,7 @@ def _rebuild_asset_data_connectors(self) -> None: A warning is raised if a data_connector cannot be built for an asset. Not all users will have access to the needed dependencies (packages or credentials) for every asset. Missing dependencies will stop them from using the asset but should not stop them from loading it from config. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset_build_failure_direct_cause: dict[str, Exception | BaseException] = {} if self.data_connector_type: @@ -901,7 +912,7 @@ def _rebuild_asset_data_connectors(self) -> None: self._build_data_connector(data_asset, **connect_options) except Exception as dc_build_err: logger.info( - f"Unable to build data_connector for {self.type} {data_asset.type} {data_asset.name}", # noqa: E501 + f"Unable to build data_connector for {self.type} {data_asset.type} {data_asset.name}", # noqa: E501 # FIXME CoP exc_info=True, ) # reveal direct cause instead of generic, unhelpful MyDatasourceError @@ -915,7 +926,7 @@ def _rebuild_asset_data_connectors(self) -> None: for (name, exc) in asset_build_failure_direct_cause.items() ] warnings.warn( - f"data_connector build failure for {self.name} assets - {', '.join(names_and_error)}", # noqa: E501 + f"data_connector build failure for {self.name} assets - {', '.join(names_and_error)}", # noqa: E501 # FIXME CoP category=RuntimeWarning, ) @@ -952,7 +963,7 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP raise NotImplementedError( """One needs to implement "test_connection" on a Datasource subclass.""" ) @@ -965,7 +976,7 @@ def _build_data_connector(self, data_asset: _DataAssetT, **kwargs) -> None: Args: data_asset: DataAsset using this DataConnector instance kwargs: Extra keyword arguments allow specification of arguments used by particular DataConnector subclasses - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @classmethod @@ -977,7 +988,7 @@ def _get_exec_engine_excludes(cls) -> Set[str]: Default implementation is to return the combined set of field names from `_EXTRA_EXCLUDED_EXEC_ENG_ARGS` and `_BASE_DATASOURCE_FIELD_NAMES`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return cls._EXTRA_EXCLUDED_EXEC_ENG_ARGS.union(_BASE_DATASOURCE_FIELD_NAMES) # End Abstract Methods @@ -1009,7 +1020,7 @@ class Batch: a spark or a sql database. An exception exists for pandas or any in-memory datastore. """ - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, datasource: Datasource, data_asset: DataAsset, @@ -1033,7 +1044,7 @@ def __init__( # noqa: PLR0913 self._batch_definition = batch_definition # Mutable Attribute - # metadata is any arbitrary data one wants to associate with a batch. GX will add arbitrary metadata # noqa: E501 + # metadata is any arbitrary data one wants to associate with a batch. GX will add arbitrary metadata # noqa: E501 # FIXME CoP # to a batch so developers may want to namespace any custom metadata they add. self.metadata = metadata or {} @@ -1154,6 +1165,23 @@ def validate( result_format: ResultFormatUnion = DEFAULT_RESULT_FORMAT, expectation_parameters: Optional[SuiteParameterDict] = None, ) -> ExpectationValidationResult | ExpectationSuiteValidationResult: + """ + Validate the Batch using the provided Expectation or Expectation Suite. + + Args: + expect: The Expectation or Expectation Suite to validate. + result_format: The format to return the validation results in. + expectation_parameters: A dictionary of parameters values for any + expectations using parameterized values (the $PARAMETER syntax). + The keys are the parameter names and the values are the values + to be used for this validation run. + + Returns: + An ExpectationValidationResult or ExpectationSuiteValidationResult object. + + Raises: + ValueError: If the expect argument is not an Expectation or an ExpectationSuite. + """ from great_expectations.core import ExpectationSuite from great_expectations.expectations.expectation import Expectation @@ -1166,10 +1194,10 @@ def validate( expect, result_format=result_format, expectation_parameters=expectation_parameters ) else: - # If we are type checking, we should never fall through to this case. However, exploratory # noqa: E501 + # If we are type checking, we should never fall through to this case. However, exploratory # noqa: E501 # FIXME CoP # workflows are not being type checked. - raise ValueError( # noqa: TRY003, TRY004 - f"Trying to validate something that isn't an Expectation or an ExpectationSuite: {expect}" # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + f"Trying to validate something that isn't an Expectation or an ExpectationSuite: {expect}" # noqa: E501 # FIXME CoP ) def _validate_expectation( @@ -1199,7 +1227,7 @@ def _create_validator(self, *, result_format: ResultFormatUnion) -> V1Validator: context = self.datasource.data_context if context is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "We can't validate batches that are attached to datasources without a data context" ) diff --git a/great_expectations/datasource/fluent/invalid_datasource.py b/great_expectations/datasource/fluent/invalid_datasource.py index c2c672f66e4d..c9ee5572bb70 100644 --- a/great_expectations/datasource/fluent/invalid_datasource.py +++ b/great_expectations/datasource/fluent/invalid_datasource.py @@ -43,14 +43,14 @@ class GxInvalidDatasourceWarning(GxDatasourceWarning): """ A warning that the Datasource configuration is invalid and will must be updated before it can used. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class InvalidAsset(DataAsset): """ A DataAsset that is invalid. The DataAsset itself may be valid, but it is classified as invalid because its parent Datasource or sibling assets are invalid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type: str = "invalid" name: str = "invalid" @@ -71,12 +71,12 @@ def _raise_type_error(self) -> NoReturn: @override def test_connection(self) -> None: if datasource := getattr(self, "datasource", None): - raise TestConnectionError( # noqa: TRY003 - f"The Datasource configuration for {self.name} is invalid and cannot be used. Please fix the error and try again" # noqa: E501 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP + f"The Datasource configuration for {self.name} is invalid and cannot be used. Please fix the error and try again" # noqa: E501 # FIXME CoP ) from datasource.config_error - # the asset should always have a datasource, but if it doesn't, we should still raise an error # noqa: E501 - raise TestConnectionError( # noqa: TRY003 - "This Asset configuration is invalid and cannot be used. Please fix the error and try again" # noqa: E501 + # the asset should always have a datasource, but if it doesn't, we should still raise an error # noqa: E501 # FIXME CoP + raise TestConnectionError( # noqa: TRY003 # FIXME CoP + "This Asset configuration is invalid and cannot be used. Please fix the error and try again" # noqa: E501 # FIXME CoP ) @override @@ -124,7 +124,7 @@ def __getitem__(self, key: Type) -> str: ... def __getitem__(self, key: ValidTypes) -> ValidTypes: if isinstance(key, str): return InvalidAsset - # if a type is passed, normally we would return the type name but that doesn't make sense here # noqa: E501 + # if a type is passed, normally we would return the type name but that doesn't make sense here # noqa: E501 # FIXME CoP # for an InvalidAsset raise NotImplementedError( f"Looking up the `type` name for {InvalidAsset.__name__} is not supported" @@ -142,7 +142,7 @@ class InvalidDatasource(Datasource): why it was considered invalid. Any errors raised should raise `from self.config_error`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # class var definitions asset_types: ClassVar[List[Type[DataAsset]]] = [InvalidAsset] @@ -164,8 +164,8 @@ class Config: @override def test_connection(self, test_assets: bool = True) -> None: - raise TestConnectionError( # noqa: TRY003 - "This Datasource configuration is invalid and cannot be used. Please fix the error and try again" # noqa: E501 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP + "This Datasource configuration is invalid and cannot be used. Please fix the error and try again" # noqa: E501 # FIXME CoP ) from self.config_error @override @@ -175,7 +175,7 @@ def get_asset(self, name: str) -> InvalidAsset: Don't raise an error because the users may want to inspect the asset config. """ warnings.warn( - f"The {self.name} Datasource configuration is invalid and cannot be used. Please fix the error and try again", # noqa: E501 + f"The {self.name} Datasource configuration is invalid and cannot be used. Please fix the error and try again", # noqa: E501 # FIXME CoP GxInvalidDatasourceWarning, ) return super().get_asset(name) @@ -186,7 +186,7 @@ def _raise_type_error(self, *args, **kwargs) -> NoReturn: Raise from the original config error that caused the Datasource to be invalid. """ error = TypeError( - f"{self.name} Datasource is configuration is invalid and cannot be used. Please fix the error and try again" # noqa: E501 + f"{self.name} Datasource is configuration is invalid and cannot be used. Please fix the error and try again" # noqa: E501 # FIXME CoP ) raise error from self.config_error diff --git a/great_expectations/datasource/fluent/metadatasource.py b/great_expectations/datasource/fluent/metadatasource.py index 884fd8e52924..30834d382425 100644 --- a/great_expectations/datasource/fluent/metadatasource.py +++ b/great_expectations/datasource/fluent/metadatasource.py @@ -27,7 +27,7 @@ def __new__( # noqa: PYI034 # Self cannot be used with Metaclass available as part of the `DataContext`. Also binds asset adding methods according to the declared `asset_types`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP logger.debug(f"1a. {meta_cls.__name__}.__new__() for `{cls_name}`") cls = super().__new__(meta_cls, cls_name, bases, cls_dict) @@ -44,9 +44,9 @@ def __new__( # noqa: PYI034 # Self cannot be used with Metaclass if cls.__module__ == "__main__": logger.warning( - f"Datasource `{cls_name}` should not be defined as part of __main__ this may cause typing lookup collisions" # noqa: E501 + f"Datasource `{cls_name}` should not be defined as part of __main__ this may cause typing lookup collisions" # noqa: E501 # FIXME CoP ) - # instantiate new TypeLookup to prevent child classes conflicts with parent class asset types # noqa: E501 + # instantiate new TypeLookup to prevent child classes conflicts with parent class asset types # noqa: E501 # FIXME CoP cls._type_lookup = TypeLookup() DataSourceManager.register_datasource(cls) return cls diff --git a/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.py b/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.py index ec4f01e033c0..128e901cbf26 100644 --- a/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.py +++ b/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.py @@ -35,6 +35,11 @@ class PandasAzureBlobStorageDatasourceError(PandasDatasourceError): @public_api class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): + """ + PandasAzureBlobStorageDatasource is a PandasDatasource that uses Azure Blob Storage as a + data store. + """ + # class attributes data_connector_type: ClassVar[Type[AzureBlobStorageDataConnector]] = ( AzureBlobStorageDataConnector @@ -57,50 +62,50 @@ def _get_azure_client(self) -> azure.BlobServiceClient: self, self.azure_options, raise_warning_if_provider_not_present=True ) # pull in needed config substitutions using the `_config_provider` - # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed. # noqa: E501 + # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed. # noqa: E501 # FIXME CoP azure_options: dict = self.dict(config_provider=self._config_provider).get( "azure_options", {} ) - # Thanks to schema validation, we are guaranteed to have one of `conn_str` or `account_url` to # noqa: E501 - # use in authentication (but not both). If the format or content of the provided keys is invalid, # noqa: E501 - # the assignment of `self._account_name` and `self._azure_client` will fail and an error will be raised. # noqa: E501 + # Thanks to schema validation, we are guaranteed to have one of `conn_str` or `account_url` to # noqa: E501 # FIXME CoP + # use in authentication (but not both). If the format or content of the provided keys is invalid, # noqa: E501 # FIXME CoP + # the assignment of `self._account_name` and `self._azure_client` will fail and an error will be raised. # noqa: E501 # FIXME CoP conn_str: str | None = azure_options.get("conn_str") account_url: str | None = azure_options.get("account_url") if not bool(conn_str) ^ bool(account_url): - raise PandasAzureBlobStorageDatasourceError( # noqa: TRY003 - "You must provide one of `conn_str` or `account_url` to the `azure_options` key in your config (but not both)" # noqa: E501 + raise PandasAzureBlobStorageDatasourceError( # noqa: TRY003 # FIXME CoP + "You must provide one of `conn_str` or `account_url` to the `azure_options` key in your config (but not both)" # noqa: E501 # FIXME CoP ) - # Validate that "azure" libararies were successfully imported and attempt to create "azure_client" handle. # noqa: E501 + # Validate that "azure" libararies were successfully imported and attempt to create "azure_client" handle. # noqa: E501 # FIXME CoP if azure.BlobServiceClient: # type: ignore[truthy-function] # False if NotImported try: if conn_str is not None: - self._account_name = re.search( # type: ignore[union-attr] + self._account_name = re.search( # type: ignore[union-attr] # FIXME CoP r".*?AccountName=(.+?);.*?", conn_str ).group(1) azure_client = azure.BlobServiceClient.from_connection_string( **azure_options ) elif account_url is not None: - self._account_name = re.search( # type: ignore[union-attr] + self._account_name = re.search( # type: ignore[union-attr] # FIXME CoP r"(?:https?://)?(.+?).blob.core.windows.net", account_url ).group(1) azure_client = azure.BlobServiceClient(**azure_options) except Exception as e: - # Failure to create "azure_client" is most likely due invalid "azure_options" dictionary. # noqa: E501 - raise PandasAzureBlobStorageDatasourceError( # noqa: TRY003 + # Failure to create "azure_client" is most likely due invalid "azure_options" dictionary. # noqa: E501 # FIXME CoP + raise PandasAzureBlobStorageDatasourceError( # noqa: TRY003 # FIXME CoP f'Due to exception: "{e!s}", "azure_client" could not be created.' ) from e else: - raise PandasAzureBlobStorageDatasourceError( # noqa: TRY003 - 'Unable to create "PandasAzureBlobStorageDatasource" due to missing azure.storage.blob dependency.' # noqa: E501 + raise PandasAzureBlobStorageDatasourceError( # noqa: TRY003 # FIXME CoP + 'Unable to create "PandasAzureBlobStorageDatasource" due to missing azure.storage.blob dependency.' # noqa: E501 # FIXME CoP ) self._azure_client = azure_client if not azure_client: - raise PandasAzureBlobStorageDatasourceError("Failed to return `azure_client`") # noqa: TRY003 + raise PandasAzureBlobStorageDatasourceError("Failed to return `azure_client`") # noqa: TRY003 # FIXME CoP return azure_client @@ -113,11 +118,11 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: _ = self._get_azure_client() except Exception as e: - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP "Attempt to connect to datasource failed with the following error message: " f"{e!s}" ) from e @@ -138,11 +143,11 @@ def _build_data_connector( ) -> None: """Builds and attaches the `AzureBlobStorageDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) if abs_container is _MISSING: - raise TypeError(f"'{data_asset.name}' is missing required argument 'abs_container'") # noqa: TRY003 + raise TypeError(f"'{data_asset.name}' is missing required argument 'abs_container'") # noqa: TRY003 # FIXME CoP data_asset._data_connector = self.data_connector_type.build_data_connector( datasource_name=self.name, diff --git a/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.pyi b/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.pyi index 1ec95b15a425..09b0ceca57c2 100644 --- a/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.pyi +++ b/great_expectations/datasource/fluent/pandas_azure_blob_storage_datasource.pyi @@ -72,7 +72,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): def _get_azure_client(self) -> azure.BlobServiceClient: ... @override def test_connection(self, test_assets: bool = ...) -> None: ... - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -132,7 +132,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): memory_map: bool = ..., storage_options: StorageOptions = ..., ) -> CSVAsset: ... - def add_excel_asset( # noqa: PLR0913 + def add_excel_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -164,7 +164,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., ) -> ExcelAsset: ... - def add_feather_asset( # noqa: PLR0913 + def add_feather_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -176,7 +176,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): use_threads: bool = ..., storage_options: StorageOptions = ..., ) -> FeatherAsset: ... - def add_fwf_asset( # noqa: PLR0913 + def add_fwf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -188,7 +188,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): infer_nrows: int = ..., kwargs: Optional[dict] = ..., ) -> FWFAsset: ... - def add_hdf_asset( # noqa: PLR0913 + def add_hdf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -207,7 +207,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): chunksize: typing.Union[int, None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> HDFAsset: ... - def add_html_asset( # noqa: PLR0913 + def add_html_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -230,7 +230,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): keep_default_na: bool = ..., displayed_only: bool = ..., ) -> HTMLAsset: ... - def add_json_asset( # noqa: PLR0913 + def add_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -254,7 +254,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): nrows: typing.Union[int, None] = ..., storage_options: StorageOptions = ..., ) -> JSONAsset: ... - def add_orc_asset( # noqa: PLR0913 + def add_orc_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -265,7 +265,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): columns: typing.Union[typing.List[str], None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> ORCAsset: ... - def add_parquet_asset( # noqa: PLR0913 + def add_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -279,7 +279,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): use_nullable_dtypes: bool = ..., kwargs: typing.Union[dict, None] = ..., ) -> ParquetAsset: ... - def add_pickle_asset( # noqa: PLR0913 + def add_pickle_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -290,7 +290,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> PickleAsset: ... - def add_sas_asset( # noqa: PLR0913 + def add_sas_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -305,7 +305,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): iterator: bool = ..., compression: CompressionOptions = "infer", ) -> SASAsset: ... - def add_spss_asset( # noqa: PLR0913 + def add_spss_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -316,7 +316,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): usecols: typing.Union[int, str, typing.Sequence[int], None] = ..., convert_categoricals: bool = ..., ) -> SPSSAsset: ... - def add_stata_asset( # noqa: PLR0913 + def add_stata_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -336,7 +336,7 @@ class PandasAzureBlobStorageDatasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> StataAsset: ... - def add_xml_asset( # noqa: PLR0913 + def add_xml_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/pandas_datasource.py b/great_expectations/datasource/fluent/pandas_datasource.py index 88ee6e8059fb..faff9ef5a797 100644 --- a/great_expectations/datasource/fluent/pandas_datasource.py +++ b/great_expectations/datasource/fluent/pandas_datasource.py @@ -85,6 +85,10 @@ class PandasDatasourceError(Exception): @public_api class _PandasDataAsset(DataAsset): + """ + A Pandas DataAsset is a DataAsset that is backed by a Pandas DataFrame. + """ + _EXCLUDE_FROM_READER_OPTIONS: ClassVar[Set[str]] = { "batch_definitions", "batch_metadata", @@ -107,7 +111,7 @@ class Config: def _get_reader_method(self) -> str: raise NotImplementedError( """One needs to explicitly provide "reader_method" for Pandas DataAsset extensions as temporary \ -work-around, until "type" naming convention and method for obtaining 'reader_method' from it are established.""" # noqa: E501 +work-around, until "type" naming convention and method for obtaining 'reader_method' from it are established.""" # noqa: E501 # FIXME CoP ) @override @@ -211,6 +215,15 @@ def build_batch_request( @public_api def add_batch_definition_whole_dataframe(self, name: str) -> BatchDefinition: + """ + Add a BatchDefinition that requests the whole dataframe. + + Args: + name: The name of the BatchDefinition. + + Returns: + A BatchDefinition with no partitioning. + """ return self.add_batch_definition( name=name, partitioner=None, @@ -234,14 +247,14 @@ def _validate_batch_request(self, batch_request: BatchRequest) -> None: options={}, batch_slice=batch_request._batch_slice_input, ) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "BatchRequest should have form:\n" f"{pf(expect_batch_request_form.dict())}\n" f"but actually has form:\n{pf(batch_request.dict())}\n" ) @override - def json( # noqa: PLR0913 + def json( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSetIntStr | MappingIntStrAny | None = None, @@ -365,12 +378,12 @@ class Config: @override def _get_reader_method(self) -> str: raise NotImplementedError( - """Pandas DataFrameAsset does not implement "_get_reader_method()" method, because DataFrame is already available.""" # noqa: E501 + """Pandas DataFrameAsset does not implement "_get_reader_method()" method, because DataFrame is already available.""" # noqa: E501 # FIXME CoP ) def _get_reader_options_include(self) -> set[str]: raise NotImplementedError( - """Pandas DataFrameAsset does not implement "_get_reader_options_include()" method, because DataFrame is already available.""" # noqa: E501 + """Pandas DataFrameAsset does not implement "_get_reader_options_include()" method, because DataFrame is already available.""" # noqa: E501 # FIXME CoP ) @override @@ -438,7 +451,7 @@ def _validate_batch_request(self, batch_request: BatchRequest) -> None: options={"dataframe": pd.DataFrame()}, batch_slice=batch_request._batch_slice_input, ) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "BatchRequest should have form:\n" f"{pf(expect_batch_request_form.dict())}\n" f"but actually has form:\n{pf(batch_request.dict())}\n" @@ -519,7 +532,7 @@ def test_connection(self, test_assets: bool = True) -> None: # End Abstract Methods @override - def json( # noqa: PLR0913 + def json( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSetIntStr | MappingIntStrAny | None = None, @@ -580,7 +593,7 @@ def _add_asset(self, asset: _DataAssetT, connect_options: dict | None = None) -> Args: asset: The DataAsset to be added to this datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset_name: str = asset.name asset_names: Set[str] = self.get_asset_names() @@ -591,10 +604,10 @@ def _add_asset(self, asset: _DataAssetT, connect_options: dict | None = None) -> if asset_name == DEFAULT_PANDAS_DATA_ASSET_NAME: if in_cloud_context: - # In cloud mode, we need to generate a unique name for the asset so that it gets persisted # noqa: E501 + # In cloud mode, we need to generate a unique name for the asset so that it gets persisted # noqa: E501 # FIXME CoP asset_name = f"{asset.type}-{_short_id()}" logger.info( - f"Generating unique name for '{DEFAULT_PANDAS_DATA_ASSET_NAME}' asset '{asset_name}'" # noqa: E501 + f"Generating unique name for '{DEFAULT_PANDAS_DATA_ASSET_NAME}' asset '{asset_name}'" # noqa: E501 # FIXME CoP ) asset.name = asset_name elif asset_name in asset_names: @@ -628,8 +641,8 @@ class PandasDatasource(_PandasDatasource): @override def dict(self, _exclude_default_asset_names: bool = True, **kwargs): - """Overriding `.dict()` so that `DEFAULT_PANDAS_DATA_ASSET_NAME` is always excluded on serialization.""" # noqa: E501 - # Overriding `.dict()` instead of `.json()` because `.json()`is only called from the outermost model, # noqa: E501 + """Overriding `.dict()` so that `DEFAULT_PANDAS_DATA_ASSET_NAME` is always excluded on serialization.""" # noqa: E501 # FIXME CoP + # Overriding `.dict()` instead of `.json()` because `.json()`is only called from the outermost model, # noqa: E501 # FIXME CoP # .dict() is called for deeply nested models. ds_dict = super().dict(**kwargs) if _exclude_default_asset_names: @@ -646,8 +659,8 @@ def test_connection(self, test_assets: bool = True) -> None: ... @staticmethod def _validate_asset_name(asset_name: Optional[str] = None) -> str: if asset_name == DEFAULT_PANDAS_DATA_ASSET_NAME: - raise PandasDatasourceError( # noqa: TRY003 - f"""An asset_name of {DEFAULT_PANDAS_DATA_ASSET_NAME} cannot be passed because it is a reserved name.""" # noqa: E501 + raise PandasDatasourceError( # noqa: TRY003 # FIXME CoP + f"""An asset_name of {DEFAULT_PANDAS_DATA_ASSET_NAME} cannot be passed because it is a reserved name.""" # noqa: E501 # FIXME CoP ) if not asset_name: asset_name = DEFAULT_PANDAS_DATA_ASSET_NAME @@ -657,8 +670,8 @@ def _get_batch(self, asset: _PandasDataAsset, dataframe: pd.DataFrame | None = N batch_request: BatchRequest if isinstance(asset, DataFrameAsset): if not isinstance(dataframe, pd.DataFrame): - raise ValueError( # noqa: TRY003, TRY004 - 'Cannot execute "PandasDatasource.read_dataframe()" without a valid "dataframe" argument.' # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + 'Cannot execute "PandasDatasource.read_dataframe()" without a valid "dataframe" argument.' # noqa: E501 # FIXME CoP ) batch_request = asset.build_batch_request(options={"dataframe": dataframe}) @@ -682,7 +695,7 @@ def add_dataframe_asset( Returns: The DataFameAsset that has been added to this datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset: DataFrameAsset = DataFrameAsset( name=name, batch_metadata=batch_metadata or {}, @@ -706,7 +719,7 @@ def read_dataframe( Returns: A Batch using an ephemeral DataFrameAsset. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str = self._validate_asset_name(asset_name=asset_name) asset: DataFrameAsset = self.add_dataframe_asset( name=name, @@ -719,7 +732,7 @@ def add_clipboard_asset( self, name: str, **kwargs, - ) -> ClipboardAsset: # type: ignore[valid-type] + ) -> ClipboardAsset: # type: ignore[valid-type] # FIXME CoP """ Add a clipboard data asset to the datasource. @@ -753,7 +766,7 @@ def read_clipboard( A Batch using an ephemeral ClipboardAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: ClipboardAsset = self.add_clipboard_asset( # type: ignore[valid-type] + asset: ClipboardAsset = self.add_clipboard_asset( # type: ignore[valid-type] # FIXME CoP name=name, **kwargs, ) @@ -765,7 +778,7 @@ def add_csv_asset( name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> CSVAsset: # type: ignore[valid-type] + ) -> CSVAsset: # type: ignore[valid-type] # FIXME CoP """ Add a CSV data asset to the datasource. @@ -779,7 +792,7 @@ def add_csv_asset( """ asset = CSVAsset( name=name, - filepath_or_buffer=filepath_or_buffer, # type: ignore[call-arg] + filepath_or_buffer=filepath_or_buffer, # type: ignore[call-arg] # FIXME CoP **kwargs, ) return self._add_asset(asset=asset) @@ -803,7 +816,7 @@ def read_csv( A Batch using an ephemeral CSVAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: CSVAsset = self.add_csv_asset( # type: ignore[valid-type] + asset: CSVAsset = self.add_csv_asset( # type: ignore[valid-type] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -816,7 +829,7 @@ def add_excel_asset( name: str, io: os.PathLike | str | bytes, **kwargs, - ) -> ExcelAsset: # type: ignore[valid-type] + ) -> ExcelAsset: # type: ignore[valid-type] # FIXME CoP """ Add an Excel data asset to the datasource. @@ -828,7 +841,7 @@ def add_excel_asset( Returns: The ExcelAsset that has been added to this datasource. """ - asset = ExcelAsset( # type: ignore[call-arg] + asset = ExcelAsset( # type: ignore[call-arg] # FIXME CoP name=name, io=io, **kwargs, @@ -854,7 +867,7 @@ def read_excel( A Batch using an ephemeral ExcelAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: ExcelAsset = self.add_excel_asset( # type: ignore[valid-type] + asset: ExcelAsset = self.add_excel_asset( # type: ignore[valid-type] # FIXME CoP name=name, io=io, **kwargs, @@ -867,7 +880,7 @@ def add_feather_asset( name: str, path: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> FeatherAsset: # type: ignore[valid-type] + ) -> FeatherAsset: # type: ignore[valid-type] # FIXME CoP """ Add a Feather data asset to the datasource. @@ -879,7 +892,7 @@ def add_feather_asset( Returns: The FeatherAsset that has been added to this datasource. """ - asset = FeatherAsset( # type: ignore[call-arg] + asset = FeatherAsset( # type: ignore[call-arg] # FIXME CoP name=name, path=path, **kwargs, @@ -905,7 +918,7 @@ def read_feather( A Batch using an ephemeral FeatherAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: FeatherAsset = self.add_feather_asset( # type: ignore[valid-type] + asset: FeatherAsset = self.add_feather_asset( # type: ignore[valid-type] # FIXME CoP name=name, path=path, **kwargs, @@ -918,7 +931,7 @@ def add_fwf_asset( name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> FeatherAsset: # type: ignore[valid-type] + ) -> FeatherAsset: # type: ignore[valid-type] # FIXME CoP """ Adds a Fixed Width File DataAsset to the datasource. @@ -930,7 +943,7 @@ def add_fwf_asset( Returns: The FWFAsset that has been added to this datasource. """ - asset = FWFAsset( # type: ignore[call-arg] + asset = FWFAsset( # type: ignore[call-arg] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -956,7 +969,7 @@ def read_fwf( A Batch using an ephemeral FWFAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: FWFAsset = self.add_fwf_asset( # type: ignore[valid-type] + asset: FWFAsset = self.add_fwf_asset( # type: ignore[valid-type] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -969,7 +982,7 @@ def add_gbq_asset( name: str, query: str, **kwargs, - ) -> GBQAsset: # type: ignore[valid-type] + ) -> GBQAsset: # type: ignore[valid-type] # FIXME CoP """ Add a GBQ data asset to the datasource. @@ -981,7 +994,7 @@ def add_gbq_asset( Returns: The GBQAsset that has been added to this datasource. """ - asset = GBQAsset( # type: ignore[call-arg] + asset = GBQAsset( # type: ignore[call-arg] # FIXME CoP name=name, query=query, **kwargs, @@ -1007,7 +1020,7 @@ def read_gbq( A Batch using an ephemeral GBQAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: GBQAsset = self.add_gbq_asset( # type: ignore[valid-type] + asset: GBQAsset = self.add_gbq_asset( # type: ignore[valid-type] # FIXME CoP name=name, query=query, **kwargs, @@ -1020,7 +1033,7 @@ def add_hdf_asset( name: str, path_or_buf: pd.HDFStore | os.PathLike | str, **kwargs, - ) -> HDFAsset: # type: ignore[valid-type] + ) -> HDFAsset: # type: ignore[valid-type] # FIXME CoP """ Add an HDF data asset to the datasource. @@ -1032,7 +1045,7 @@ def add_hdf_asset( Returns: The HDFAsset that has been added to this datasource. """ - asset = HDFAsset( # type: ignore[call-arg] + asset = HDFAsset( # type: ignore[call-arg] # FIXME CoP name=name, path_or_buf=path_or_buf, **kwargs, @@ -1058,7 +1071,7 @@ def read_hdf( A Batch using an ephemeral HDFAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: HDFAsset = self.add_hdf_asset( # type: ignore[valid-type] + asset: HDFAsset = self.add_hdf_asset( # type: ignore[valid-type] # FIXME CoP name=name, path_or_buf=path_or_buf, **kwargs, @@ -1071,7 +1084,7 @@ def add_html_asset( name: str, io: os.PathLike | str, **kwargs, - ) -> HTMLAsset: # type: ignore[valid-type] + ) -> HTMLAsset: # type: ignore[valid-type] # FIXME CoP """ Add an HTML data asset to the datasource. @@ -1083,7 +1096,7 @@ def add_html_asset( Returns: The HTMLAsset that has been added to this datasource. """ - asset = HTMLAsset( # type: ignore[call-arg] + asset = HTMLAsset( # type: ignore[call-arg] # FIXME CoP name=name, io=io, **kwargs, @@ -1109,7 +1122,7 @@ def read_html( A Batch using an ephemeral HTMLAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: HTMLAsset = self.add_html_asset( # type: ignore[valid-type] + asset: HTMLAsset = self.add_html_asset( # type: ignore[valid-type] # FIXME CoP name=name, io=io, **kwargs, @@ -1122,7 +1135,7 @@ def add_json_asset( name: str, path_or_buf: pydantic.Json | pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> JSONAsset: # type: ignore[valid-type] + ) -> JSONAsset: # type: ignore[valid-type] # FIXME CoP """ Add a JSON data asset to the datasource. @@ -1134,7 +1147,7 @@ def add_json_asset( Returns: The JSONAsset that has been added to this datasource. """ - asset = JSONAsset( # type: ignore[call-arg] + asset = JSONAsset( # type: ignore[call-arg] # FIXME CoP name=name, path_or_buf=path_or_buf, **kwargs, @@ -1160,7 +1173,7 @@ def read_json( A Batch using an ephemeral JSONAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: JSONAsset = self.add_json_asset( # type: ignore[valid-type] + asset: JSONAsset = self.add_json_asset( # type: ignore[valid-type] # FIXME CoP name=name, path_or_buf=path_or_buf, **kwargs, @@ -1173,7 +1186,7 @@ def add_orc_asset( name: str, path: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> ORCAsset: # type: ignore[valid-type] + ) -> ORCAsset: # type: ignore[valid-type] # FIXME CoP """ Add an ORC file as a DataAsset to this PandasDatasource object. @@ -1185,7 +1198,7 @@ def add_orc_asset( Returns: The ORCAsset that has been added to this datasource. """ - asset = ORCAsset( # type: ignore[call-arg] + asset = ORCAsset( # type: ignore[call-arg] # FIXME CoP name=name, path=path, **kwargs, @@ -1209,9 +1222,9 @@ def read_orc( Returns: A Batch using an ephemeral ORCAsset. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str = self._validate_asset_name(asset_name=asset_name) - asset: ORCAsset = self.add_orc_asset( # type: ignore[valid-type] + asset: ORCAsset = self.add_orc_asset( # type: ignore[valid-type] # FIXME CoP name=name, path=path, **kwargs, @@ -1224,7 +1237,7 @@ def add_parquet_asset( name: str, path: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> ParquetAsset: # type: ignore[valid-type] + ) -> ParquetAsset: # type: ignore[valid-type] # FIXME CoP """ Add a parquet file as a DataAsset to this PandasDatasource object. @@ -1236,7 +1249,7 @@ def add_parquet_asset( Returns: The ParquetAsset that has been added to this datasource. """ - asset = ParquetAsset( # type: ignore[call-arg] + asset = ParquetAsset( # type: ignore[call-arg] # FIXME CoP name=name, path=path, **kwargs, @@ -1260,9 +1273,9 @@ def read_parquet( Returns: A Batch using an ephemeral ParquetAsset. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str = self._validate_asset_name(asset_name=asset_name) - asset: ParquetAsset = self.add_parquet_asset( # type: ignore[valid-type] + asset: ParquetAsset = self.add_parquet_asset( # type: ignore[valid-type] # FIXME CoP name=name, path=path, **kwargs, @@ -1275,7 +1288,7 @@ def add_pickle_asset( name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> PickleAsset: # type: ignore[valid-type] + ) -> PickleAsset: # type: ignore[valid-type] # FIXME CoP """ Add a pickle file as a DataAsset to this PandasDatasource object. @@ -1287,7 +1300,7 @@ def add_pickle_asset( Returns: The PickleAsset that has been added to this datasource. """ - asset = PickleAsset( # type: ignore[call-arg] + asset = PickleAsset( # type: ignore[call-arg] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1311,9 +1324,9 @@ def read_pickle( Returns: A Batch using an ephemeral PickleAsset. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str = self._validate_asset_name(asset_name=asset_name) - asset: PickleAsset = self.add_pickle_asset( # type: ignore[valid-type] + asset: PickleAsset = self.add_pickle_asset( # type: ignore[valid-type] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1326,7 +1339,7 @@ def add_sas_asset( name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> SASAsset: # type: ignore[valid-type] + ) -> SASAsset: # type: ignore[valid-type] # FIXME CoP """ Add a SAS data asset to the datasource. @@ -1338,7 +1351,7 @@ def add_sas_asset( Returns: The SASAsset that has been added to this datasource. """ - asset = SASAsset( # type: ignore[call-arg] + asset = SASAsset( # type: ignore[call-arg] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1364,7 +1377,7 @@ def read_sas( A Batch using an ephemeral SASAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: SASAsset = self.add_sas_asset( # type: ignore[valid-type] + asset: SASAsset = self.add_sas_asset( # type: ignore[valid-type] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1377,7 +1390,7 @@ def add_spss_asset( name: str, path: pydantic.FilePath, **kwargs, - ) -> SPSSAsset: # type: ignore[valid-type] + ) -> SPSSAsset: # type: ignore[valid-type] # FIXME CoP """ Add an SPSS data asset to the datasource. @@ -1389,7 +1402,7 @@ def add_spss_asset( Returns: The SPSSAsset that has been added to this datasource. """ - asset = SPSSAsset( # type: ignore[call-arg] + asset = SPSSAsset( # type: ignore[call-arg] # FIXME CoP name=name, path=path, **kwargs, @@ -1415,7 +1428,7 @@ def read_spss( A Batch using an ephemeral SPSSAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: SPSSAsset = self.add_parquet_asset( # type: ignore[valid-type] + asset: SPSSAsset = self.add_parquet_asset( # type: ignore[valid-type] # FIXME CoP name=name, path=path, **kwargs, @@ -1426,10 +1439,10 @@ def read_spss( def add_sql_asset( self, name: str, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, **kwargs, - ) -> SQLAsset: # type: ignore[valid-type] + ) -> SQLAsset: # type: ignore[valid-type] # FIXME CoP """ Add a SQL data asset to the datasource. @@ -1442,7 +1455,7 @@ def add_sql_asset( Returns: The SQLAsset that has been added to this datasource. """ - asset = SQLAsset( # type: ignore[call-arg] + asset = SQLAsset( # type: ignore[call-arg] # FIXME CoP name=name, sql=sql, con=con, @@ -1453,7 +1466,7 @@ def add_sql_asset( @public_api def read_sql( self, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, asset_name: Optional[str] = None, **kwargs, @@ -1471,7 +1484,7 @@ def read_sql( A Batch using an ephemeral SQLAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: SQLAsset = self.add_sql_asset( # type: ignore[valid-type] + asset: SQLAsset = self.add_sql_asset( # type: ignore[valid-type] # FIXME CoP name=name, sql=sql, con=con, @@ -1483,10 +1496,10 @@ def read_sql( def add_sql_query_asset( self, name: str, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, **kwargs, - ) -> SQLQueryAsset: # type: ignore[valid-type] + ) -> SQLQueryAsset: # type: ignore[valid-type] # FIXME CoP """ Add a SQL query data asset to the datasource. @@ -1499,7 +1512,7 @@ def add_sql_query_asset( Returns: The SQLQueryAsset that has been added to this datasource. """ - asset = SQLQueryAsset( # type: ignore[call-arg] + asset = SQLQueryAsset( # type: ignore[call-arg] # FIXME CoP name=name, sql=sql, con=con, @@ -1510,7 +1523,7 @@ def add_sql_query_asset( @public_api def read_sql_query( self, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, asset_name: Optional[str] = None, **kwargs, @@ -1528,7 +1541,7 @@ def read_sql_query( A Batch using an ephemeral SQLQueryAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: SQLQueryAsset = self.add_sql_query_asset( # type: ignore[valid-type] + asset: SQLQueryAsset = self.add_sql_query_asset( # type: ignore[valid-type] # FIXME CoP name=name, sql=sql, con=con, @@ -1543,7 +1556,7 @@ def add_sql_table_asset( table_name: str, con: sqlalchemy.Engine | str, **kwargs, - ) -> SQLTableAsset: # type: ignore[valid-type] + ) -> SQLTableAsset: # type: ignore[valid-type] # FIXME CoP """ Add a SQL table data asset to the datasource. @@ -1556,7 +1569,7 @@ def add_sql_table_asset( Returns: The SQLTableAsset that has been added to this datasource. """ - asset = SQLTableAsset( # type: ignore[call-arg] + asset = SQLTableAsset( # type: ignore[call-arg] # FIXME CoP name=name, table_name=table_name, con=con, @@ -1585,7 +1598,7 @@ def read_sql_table( A Batch using an ephemeral SQLTableAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: SQLTableAsset = self.add_sql_table_asset( # type: ignore[valid-type] + asset: SQLTableAsset = self.add_sql_table_asset( # type: ignore[valid-type] # FIXME CoP name=name, table_name=table_name, con=con, @@ -1599,7 +1612,7 @@ def add_stata_asset( name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> StataAsset: # type: ignore[valid-type] + ) -> StataAsset: # type: ignore[valid-type] # FIXME CoP """ Add a Stata data asset to the datasource. @@ -1611,7 +1624,7 @@ def add_stata_asset( Returns: The StataAsset that has been added to this datasource. """ - asset = StataAsset( # type: ignore[call-arg] + asset = StataAsset( # type: ignore[call-arg] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1637,7 +1650,7 @@ def read_stata( A Batch using an ephemeral StataAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: StataAsset = self.add_stata_asset( # type: ignore[valid-type] + asset: StataAsset = self.add_stata_asset( # type: ignore[valid-type] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1650,7 +1663,7 @@ def add_table_asset( name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> TableAsset: # type: ignore[valid-type] + ) -> TableAsset: # type: ignore[valid-type] # FIXME CoP """ Add a Table data asset to the datasource. @@ -1662,7 +1675,7 @@ def add_table_asset( Returns: The TableAsset that has been added to this datasource. """ - asset = TableAsset( # type: ignore[call-arg] + asset = TableAsset( # type: ignore[call-arg] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1688,7 +1701,7 @@ def read_table( A Batch using an ephemeral TableAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: TableAsset = self.add_table_asset( # type: ignore[valid-type] + asset: TableAsset = self.add_table_asset( # type: ignore[valid-type] # FIXME CoP name=name, filepath_or_buffer=filepath_or_buffer, **kwargs, @@ -1701,7 +1714,7 @@ def add_xml_asset( name: str, path_or_buffer: pydantic.FilePath | pydantic.AnyUrl, **kwargs, - ) -> XMLAsset: # type: ignore[valid-type] + ) -> XMLAsset: # type: ignore[valid-type] # FIXME CoP """ Add an XML data asset to the datasource. @@ -1713,7 +1726,7 @@ def add_xml_asset( Returns: The XMLAsset that has been added to this datasource. """ - asset = XMLAsset( # type: ignore[call-arg] + asset = XMLAsset( # type: ignore[call-arg] # FIXME CoP name=name, path_or_buffer=path_or_buffer, **kwargs, @@ -1739,7 +1752,7 @@ def read_xml( A Batch using an ephemeral XMLAsset. """ name: str = self._validate_asset_name(asset_name=asset_name) - asset: XMLAsset = self.add_xml_asset( # type: ignore[valid-type] + asset: XMLAsset = self.add_xml_asset( # type: ignore[valid-type] # FIXME CoP name=name, path_or_buffer=path_or_buffer, **kwargs, @@ -1748,117 +1761,117 @@ def read_xml( # attr-defined issue # https://github.com/python/mypy/issues/12472 - add_clipboard_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_clipboard_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_clipboard_asset, ClipboardAsset, exclude={"type"} ) - read_clipboard.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_clipboard.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_clipboard, ClipboardAsset, exclude={"type"} ) - add_csv_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_csv_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_csv_asset, CSVAsset, exclude={"type"} ) - read_csv.__signature__ = _merge_signatures(read_csv, CSVAsset, exclude={"type"}) # type: ignore[attr-defined] - add_excel_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_csv.__signature__ = _merge_signatures(read_csv, CSVAsset, exclude={"type"}) # type: ignore[attr-defined] # FIXME CoP + add_excel_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_excel_asset, ExcelAsset, exclude={"type"} ) - read_excel.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_excel.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_excel, ExcelAsset, exclude={"type"} ) - add_feather_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_feather_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_feather_asset, FeatherAsset, exclude={"type"} ) - read_feather.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_feather.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_feather, FeatherAsset, exclude={"type"} ) - add_fwf_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_fwf_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_fwf_asset, FWFAsset, exclude={"type"} ) - read_fwf.__signature__ = _merge_signatures(read_fwf, FWFAsset, exclude={"type"}) # type: ignore[attr-defined] - add_gbq_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_fwf.__signature__ = _merge_signatures(read_fwf, FWFAsset, exclude={"type"}) # type: ignore[attr-defined] # FIXME CoP + add_gbq_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_gbq_asset, GBQAsset, exclude={"type"} ) - read_gbq.__signature__ = _merge_signatures(read_gbq, GBQAsset, exclude={"type"}) # type: ignore[attr-defined] - add_hdf_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_gbq.__signature__ = _merge_signatures(read_gbq, GBQAsset, exclude={"type"}) # type: ignore[attr-defined] # FIXME CoP + add_hdf_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_hdf_asset, HDFAsset, exclude={"type"} ) - read_hdf.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_hdf.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_hdf, HDFAsset, exclude={"type"} ) - add_html_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_html_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_html_asset, HTMLAsset, exclude={"type"} ) - read_html.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_html.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_html, HTMLAsset, exclude={"type"} ) - add_json_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_json_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_json_asset, JSONAsset, exclude={"type"} ) - read_json.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_json.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_json, JSONAsset, exclude={"type"} ) - add_orc_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_orc_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_orc_asset, ORCAsset, exclude={"type"} ) - read_orc.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_orc.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_orc, ORCAsset, exclude={"type"} ) - add_parquet_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_parquet_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_parquet_asset, ParquetAsset, exclude={"type"} ) - read_parquet.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_parquet.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_parquet, ParquetAsset, exclude={"type"} ) - add_pickle_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_pickle_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_pickle_asset, PickleAsset, exclude={"type"} ) - read_pickle.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_pickle.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_pickle, PickleAsset, exclude={"type"} ) - add_sas_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_sas_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_sas_asset, SASAsset, exclude={"type"} ) - read_sas.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_sas.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_sas, SASAsset, exclude={"type"} ) - add_spss_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_spss_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_spss_asset, SPSSAsset, exclude={"type"} ) - read_spss.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_spss.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_spss, SPSSAsset, exclude={"type"} ) - add_sql_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_sql_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_sql_asset, SQLAsset, exclude={"type"} ) - read_sql.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_sql.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_sql, SQLAsset, exclude={"type"} ) - add_sql_query_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_sql_query_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_sql_query_asset, SQLQueryAsset, exclude={"type"} ) - read_sql_query.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_sql_query.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_sql_query, SQLQueryAsset, exclude={"type"} ) - add_sql_table_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_sql_table_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_sql_table_asset, SQLTableAsset, exclude={"type"} ) - read_sql_table.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_sql_table.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_sql_table, SQLTableAsset, exclude={"type"} ) - add_stata_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_stata_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_stata_asset, StataAsset, exclude={"type"} ) - read_stata.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_stata.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_stata, StataAsset, exclude={"type"} ) - add_table_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_table_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_table_asset, TableAsset, exclude={"type"} ) - read_table.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_table.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_table, TableAsset, exclude={"type"} ) - add_xml_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_xml_asset.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_xml_asset, XMLAsset, exclude={"type"} ) - read_xml.__signature__ = _merge_signatures( # type: ignore[attr-defined] + read_xml.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP read_xml, XMLAsset, exclude={"type"} ) diff --git a/great_expectations/datasource/fluent/pandas_datasource.pyi b/great_expectations/datasource/fluent/pandas_datasource.pyi index 1e434a9d2a25..12bd2150585a 100644 --- a/great_expectations/datasource/fluent/pandas_datasource.pyi +++ b/great_expectations/datasource/fluent/pandas_datasource.pyi @@ -84,7 +84,7 @@ class _PandasDataAsset(DataAsset): @override def _validate_batch_request(self, batch_request: BatchRequest) -> None: ... @override - def json( # noqa: PLR0913 + def json( # noqa: PLR0913 # FIXME CoP self, *, include: Union[AbstractSetIntStr, MappingIntStrAny, None] = ..., @@ -139,14 +139,14 @@ _PandasDataAssetT = TypeVar("_PandasDataAssetT", bound=_PandasDataAsset) class _PandasDatasource(Datasource): asset_types: ClassVar[Sequence[Type[DataAsset]]] - assets: MutableSequence[_PandasDataAssetT] # type: ignore[valid-type] + assets: MutableSequence[_PandasDataAssetT] # type: ignore[valid-type] # FIXME CoP @property @override def execution_engine_type(self) -> Type[PandasExecutionEngine]: ... @override def test_connection(self, test_assets: bool = ...) -> None: ... @override - def json( # noqa: PLR0913 + def json( # noqa: PLR0913 # FIXME CoP self, *, include: Union[AbstractSetIntStr, MappingIntStrAny, None] = ..., @@ -171,7 +171,7 @@ class PandasDatasource(_PandasDatasource): def test_connection(self, test_assets: bool = ...) -> None: ... @deprecated_argument( argument_name="dataframe", - message='The "dataframe" argument is no longer part of "PandasDatasource.add_dataframe_asset()" method call; instead, "dataframe" is the required argument to "DataFrameAsset.build_batch_request()" method.', # noqa: E501 + message='The "dataframe" argument is no longer part of "PandasDatasource.add_dataframe_asset()" method call; instead, "dataframe" is the required argument to "DataFrameAsset.build_batch_request()" method.', # noqa: E501 # FIXME CoP version="0.16.15", ) def add_dataframe_asset( @@ -196,7 +196,7 @@ class PandasDatasource(_PandasDatasource): sep: str = "\\s+", kwargs: typing.Union[dict, None] = ..., ) -> ClipboardAsset: ... - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, @@ -253,7 +253,7 @@ class PandasDatasource(_PandasDatasource): memory_map: bool = ..., storage_options: StorageOptions = ..., ) -> CSVAsset: ... - def add_excel_asset( # noqa: PLR0913 + def add_excel_asset( # noqa: PLR0913 # FIXME CoP self, name: str, io: os.PathLike | str | bytes, @@ -283,7 +283,7 @@ class PandasDatasource(_PandasDatasource): mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., ) -> ExcelAsset: ... - def add_feather_asset( # noqa: PLR0913 + def add_feather_asset( # noqa: PLR0913 # FIXME CoP self, name: str, path: pydantic.FilePath | pydantic.AnyUrl, @@ -293,7 +293,7 @@ class PandasDatasource(_PandasDatasource): use_threads: bool = ..., storage_options: StorageOptions = ..., ) -> FeatherAsset: ... - def add_fwf_asset( # noqa: PLR0913 + def add_fwf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, @@ -304,7 +304,7 @@ class PandasDatasource(_PandasDatasource): infer_nrows: int = ..., kwargs: Optional[dict] = ..., ) -> FWFAsset: ... - def add_gbq_asset( # noqa: PLR0913 + def add_gbq_asset( # noqa: PLR0913 # FIXME CoP self, name: str, query: str, @@ -323,7 +323,7 @@ class PandasDatasource(_PandasDatasource): max_results: typing.Union[int, None] = ..., progress_bar_type: typing.Union[str, None] = ..., ) -> GBQAsset: ... - def add_hdf_asset( # noqa: PLR0913 + def add_hdf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, path_or_buf: str | os.PathLike | pd.HDFStore, @@ -340,7 +340,7 @@ class PandasDatasource(_PandasDatasource): chunksize: typing.Union[int, None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> HDFAsset: ... - def add_html_asset( # noqa: PLR0913 + def add_html_asset( # noqa: PLR0913 # FIXME CoP self, name: str, io: os.PathLike | str, @@ -361,7 +361,7 @@ class PandasDatasource(_PandasDatasource): keep_default_na: bool = ..., displayed_only: bool = ..., ) -> HTMLAsset: ... - def add_json_asset( # noqa: PLR0913 + def add_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, path_or_buf: pydantic.Json | pydantic.FilePath | pydantic.AnyUrl, @@ -392,7 +392,7 @@ class PandasDatasource(_PandasDatasource): columns: typing.Union[typing.List[str], None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> ORCAsset: ... - def add_parquet_asset( # noqa: PLR0913 + def add_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, path: pydantic.FilePath | pydantic.AnyUrl, @@ -413,7 +413,7 @@ class PandasDatasource(_PandasDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> PickleAsset: ... - def add_sas_asset( # noqa: PLR0913 + def add_sas_asset( # noqa: PLR0913 # FIXME CoP self, name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, @@ -435,10 +435,10 @@ class PandasDatasource(_PandasDatasource): usecols: typing.Union[int, str, typing.Sequence[int], None] = ..., convert_categoricals: bool = ..., ) -> SPSSAsset: ... - def add_sql_asset( # noqa: PLR0913 + def add_sql_asset( # noqa: PLR0913 # FIXME CoP self, name: str, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, *, batch_metadata: Optional[BatchMetadata] = ..., @@ -449,10 +449,10 @@ class PandasDatasource(_PandasDatasource): columns: typing.Union[typing.List[str], None] = ..., chunksize: typing.Union[int, None] = ..., ) -> SQLAsset: ... - def add_sql_query_asset( # noqa: PLR0913 + def add_sql_query_asset( # noqa: PLR0913 # FIXME CoP self, name: str, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, *, batch_metadata: Optional[BatchMetadata] = ..., @@ -463,7 +463,7 @@ class PandasDatasource(_PandasDatasource): chunksize: typing.Union[int, None] = ..., dtype: typing.Union[dict, None] = ..., ) -> SQLQueryAsset: ... - def add_sql_table_asset( # noqa: PLR0913 + def add_sql_table_asset( # noqa: PLR0913 # FIXME CoP self, name: str, table_name: str, @@ -477,7 +477,7 @@ class PandasDatasource(_PandasDatasource): columns: typing.Union[typing.List[str], None] = ..., chunksize: typing.Union[int, None] = ..., ) -> SQLTableAsset: ... - def add_stata_asset( # noqa: PLR0913 + def add_stata_asset( # noqa: PLR0913 # FIXME CoP self, name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, @@ -495,7 +495,7 @@ class PandasDatasource(_PandasDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> StataAsset: ... - def add_table_asset( # noqa: PLR0913 + def add_table_asset( # noqa: PLR0913 # FIXME CoP self, name: str, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, @@ -553,7 +553,7 @@ class PandasDatasource(_PandasDatasource): float_precision: typing.Union[str, None] = ..., storage_options: StorageOptions = ..., ) -> TableAsset: ... - def add_xml_asset( # noqa: PLR0913 + def add_xml_asset( # noqa: PLR0913 # FIXME CoP self, name: str, path_or_buffer: pydantic.FilePath | pydantic.AnyUrl, @@ -578,7 +578,7 @@ class PandasDatasource(_PandasDatasource): sep: str = r"\s+", kwargs: typing.Union[dict, None] = ..., ) -> Batch: ... - def read_csv( # noqa: PLR0913 + def read_csv( # noqa: PLR0913 # FIXME CoP self, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, *, @@ -635,7 +635,7 @@ class PandasDatasource(_PandasDatasource): memory_map: bool = ..., storage_options: StorageOptions = ..., ) -> Batch: ... - def read_excel( # noqa: PLR0913 + def read_excel( # noqa: PLR0913 # FIXME CoP self, io: os.PathLike | str | bytes, *, @@ -665,7 +665,7 @@ class PandasDatasource(_PandasDatasource): mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., ) -> Batch: ... - def read_feather( # noqa: PLR0913 + def read_feather( # noqa: PLR0913 # FIXME CoP self, path: pydantic.FilePath | pydantic.AnyUrl, *, @@ -675,7 +675,7 @@ class PandasDatasource(_PandasDatasource): use_threads: bool = ..., storage_options: StorageOptions = ..., ) -> Batch: ... - def read_fwf( # noqa: PLR0913 + def read_fwf( # noqa: PLR0913 # FIXME CoP self, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, *, @@ -685,7 +685,7 @@ class PandasDatasource(_PandasDatasource): infer_nrows: int = ..., kwargs: Optional[dict] = ..., ) -> Batch: ... - def read_gbq( # noqa: PLR0913 + def read_gbq( # noqa: PLR0913 # FIXME CoP self, query: str, *, @@ -704,7 +704,7 @@ class PandasDatasource(_PandasDatasource): max_results: typing.Union[int, None] = ..., progress_bar_type: typing.Union[str, None] = ..., ) -> Batch: ... - def read_hdf( # noqa: PLR0913 + def read_hdf( # noqa: PLR0913 # FIXME CoP self, path_or_buf: pd.HDFStore | os.PathLike | str, *, @@ -721,7 +721,7 @@ class PandasDatasource(_PandasDatasource): chunksize: typing.Union[int, None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> Batch: ... - def read_html( # noqa: PLR0913 + def read_html( # noqa: PLR0913 # FIXME CoP self, io: os.PathLike | str, *, @@ -742,7 +742,7 @@ class PandasDatasource(_PandasDatasource): keep_default_na: bool = ..., displayed_only: bool = ..., ) -> Batch: ... - def read_json( # noqa: PLR0913 + def read_json( # noqa: PLR0913 # FIXME CoP self, path_or_buf: pydantic.Json | pydantic.FilePath | pydantic.AnyUrl, *, @@ -773,7 +773,7 @@ class PandasDatasource(_PandasDatasource): columns: typing.Union[typing.List[str], None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> Batch: ... - def read_parquet( # noqa: PLR0913 + def read_parquet( # noqa: PLR0913 # FIXME CoP self, path: pydantic.FilePath | pydantic.AnyUrl, *, @@ -794,7 +794,7 @@ class PandasDatasource(_PandasDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> Batch: ... - def read_sas( # noqa: PLR0913 + def read_sas( # noqa: PLR0913 # FIXME CoP self, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, *, @@ -816,9 +816,9 @@ class PandasDatasource(_PandasDatasource): usecols: typing.Union[int, str, typing.Sequence[int], None] = ..., convert_categoricals: bool = ..., ) -> Batch: ... - def read_sql( # noqa: PLR0913 + def read_sql( # noqa: PLR0913 # FIXME CoP self, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, *, asset_name: Optional[str] = ..., @@ -830,9 +830,9 @@ class PandasDatasource(_PandasDatasource): columns: typing.Union[typing.List[str], None] = ..., chunksize: typing.Union[int, None] = ..., ) -> Batch: ... - def read_sql_query( # noqa: PLR0913 + def read_sql_query( # noqa: PLR0913 # FIXME CoP self, - sql: sa.select | sa.text | str, # type: ignore[valid-type] + sql: sa.select | sa.text | str, # type: ignore[valid-type] # FIXME CoP con: sqlalchemy.Engine | sqlite3.Connection | str, *, asset_name: Optional[str] = ..., @@ -844,7 +844,7 @@ class PandasDatasource(_PandasDatasource): chunksize: typing.Union[int, None] = ..., dtype: typing.Union[dict, None] = ..., ) -> Batch: ... - def read_sql_table( # noqa: PLR0913 + def read_sql_table( # noqa: PLR0913 # FIXME CoP self, table_name: str, con: sqlalchemy.Engine | str, @@ -858,7 +858,7 @@ class PandasDatasource(_PandasDatasource): columns: typing.Union[typing.List[str], None] = ..., chunksize: typing.Union[int, None] = ..., ) -> Batch: ... - def read_stata( # noqa: PLR0913 + def read_stata( # noqa: PLR0913 # FIXME CoP self, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, *, @@ -876,7 +876,7 @@ class PandasDatasource(_PandasDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> Batch: ... - def read_table( # noqa: PLR0913 + def read_table( # noqa: PLR0913 # FIXME CoP self, filepath_or_buffer: pydantic.FilePath | pydantic.AnyUrl, *, @@ -934,7 +934,7 @@ class PandasDatasource(_PandasDatasource): float_precision: typing.Union[str, None] = ..., storage_options: StorageOptions = ..., ) -> Batch: ... - def read_xml( # noqa: PLR0913 + def read_xml( # noqa: PLR0913 # FIXME CoP self, path_or_buffer: pydantic.FilePath | pydantic.AnyUrl, *, diff --git a/great_expectations/datasource/fluent/pandas_dbfs_datasource.py b/great_expectations/datasource/fluent/pandas_dbfs_datasource.py index 373e128807cd..b2ce81eb5f40 100644 --- a/great_expectations/datasource/fluent/pandas_dbfs_datasource.py +++ b/great_expectations/datasource/fluent/pandas_dbfs_datasource.py @@ -34,7 +34,7 @@ def _build_data_connector( ) -> None: """Builds and attaches the `DBFSDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) data_asset._data_connector = self.data_connector_type.build_data_connector( diff --git a/great_expectations/datasource/fluent/pandas_dbfs_datasource.pyi b/great_expectations/datasource/fluent/pandas_dbfs_datasource.pyi index 3c3f8e12e66b..376d90d1f6ab 100644 --- a/great_expectations/datasource/fluent/pandas_dbfs_datasource.pyi +++ b/great_expectations/datasource/fluent/pandas_dbfs_datasource.pyi @@ -41,10 +41,10 @@ from great_expectations.datasource.fluent.interfaces import ( logger: Logger class PandasDBFSDatasource(PandasFilesystemDatasource): - type: Literal["pandas_dbfs"] # type: ignore[assignment] + type: Literal["pandas_dbfs"] # type: ignore[assignment] # FIXME CoP @override - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -102,7 +102,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): storage_options: StorageOptions = ..., ) -> CSVAsset: ... @override - def add_excel_asset( # noqa: PLR0913 + def add_excel_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -133,7 +133,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): storage_options: StorageOptions = ..., ) -> ExcelAsset: ... @override - def add_feather_asset( # noqa: PLR0913 + def add_feather_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -144,7 +144,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): storage_options: StorageOptions = ..., ) -> FeatherAsset: ... @override - def add_hdf_asset( # noqa: PLR0913 + def add_hdf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -162,7 +162,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): kwargs: typing.Union[dict, None] = ..., ) -> HDFAsset: ... @override - def add_html_asset( # noqa: PLR0913 + def add_html_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -184,7 +184,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): displayed_only: bool = ..., ) -> HTMLAsset: ... @override - def add_json_asset( # noqa: PLR0913 + def add_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -217,7 +217,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): kwargs: typing.Union[dict, None] = ..., ) -> ORCAsset: ... @override - def add_parquet_asset( # noqa: PLR0913 + def add_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -240,7 +240,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): storage_options: StorageOptions = ..., ) -> PickleAsset: ... @override - def add_sas_asset( # noqa: PLR0913 + def add_sas_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -264,7 +264,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): convert_categoricals: bool = ..., ) -> SPSSAsset: ... @override - def add_stata_asset( # noqa: PLR0913 + def add_stata_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -283,7 +283,7 @@ class PandasDBFSDatasource(PandasFilesystemDatasource): storage_options: StorageOptions = ..., ) -> StataAsset: ... @override - def add_xml_asset( # noqa: PLR0913 + def add_xml_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/pandas_filesystem_datasource.py b/great_expectations/datasource/fluent/pandas_filesystem_datasource.py index 367528474232..d11278c40025 100644 --- a/great_expectations/datasource/fluent/pandas_filesystem_datasource.py +++ b/great_expectations/datasource/fluent/pandas_filesystem_datasource.py @@ -48,9 +48,9 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not self.base_directory.exists(): - raise TestConnectionError(f"Path: {self.base_directory.resolve()} does not exist.") # noqa: TRY003 + raise TestConnectionError(f"Path: {self.base_directory.resolve()} does not exist.") # noqa: TRY003 # FIXME CoP if self.assets and test_assets: for asset in self.assets: @@ -62,7 +62,7 @@ def _build_data_connector( ) -> None: """Builds and attaches the `FilesystemDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) data_asset._data_connector = self.data_connector_type.build_data_connector( diff --git a/great_expectations/datasource/fluent/pandas_filesystem_datasource.pyi b/great_expectations/datasource/fluent/pandas_filesystem_datasource.pyi index 9aaacf7dd024..8d88e0dbb3c6 100644 --- a/great_expectations/datasource/fluent/pandas_filesystem_datasource.pyi +++ b/great_expectations/datasource/fluent/pandas_filesystem_datasource.pyi @@ -56,7 +56,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): data_context_root_directory: Optional[pathlib.Path] @override def test_connection(self, test_assets: bool = ...) -> None: ... - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -113,7 +113,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): memory_map: bool = ..., storage_options: StorageOptions = ..., ) -> CSVAsset: ... - def add_excel_asset( # noqa: PLR0913 + def add_excel_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -143,7 +143,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., ) -> ExcelAsset: ... - def add_feather_asset( # noqa: PLR0913 + def add_feather_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -153,7 +153,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): use_threads: bool = ..., storage_options: StorageOptions = ..., ) -> FeatherAsset: ... - def add_fwf_asset( # noqa: PLR0913 + def add_fwf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -165,8 +165,8 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): infer_nrows: int = ..., kwargs: Optional[dict] = ..., ) -> FWFAsset: - """Add a fixed-width-file asset to the catalog.""" # noqa: PYI021 - def add_hdf_asset( # noqa: PLR0913 + """Add a fixed-width-file asset to the catalog.""" # noqa: PYI021 # FIXME CoP + def add_hdf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -183,7 +183,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): chunksize: typing.Union[int, None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> HDFAsset: ... - def add_html_asset( # noqa: PLR0913 + def add_html_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -204,7 +204,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): keep_default_na: bool = ..., displayed_only: bool = ..., ) -> HTMLAsset: ... - def add_json_asset( # noqa: PLR0913 + def add_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -235,7 +235,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): columns: typing.Union[typing.List[str], None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> ORCAsset: ... - def add_parquet_asset( # noqa: PLR0913 + def add_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -256,7 +256,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> PickleAsset: ... - def add_sas_asset( # noqa: PLR0913 + def add_sas_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -278,7 +278,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): usecols: typing.Union[int, str, typing.Sequence[int], None] = ..., convert_categoricals: bool = ..., ) -> SPSSAsset: ... - def add_stata_asset( # noqa: PLR0913 + def add_stata_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -296,7 +296,7 @@ class PandasFilesystemDatasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> StataAsset: ... - def add_xml_asset( # noqa: PLR0913 + def add_xml_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.py b/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.py index 65f9cb40d5f6..363fee41a5a0 100644 --- a/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.py +++ b/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.py @@ -31,6 +31,11 @@ class PandasGoogleCloudStorageDatasourceError(PandasDatasourceError): @public_api class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): + """ + PandasGoogleCloudStorageDatasource is a PandasDatasource that uses Google Cloud Storage as a + data store. + """ + # class attributes data_connector_type: ClassVar[Type[GoogleCloudStorageDataConnector]] = ( GoogleCloudStorageDataConnector @@ -55,7 +60,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): def _get_gcs_client(self) -> google.Client: gcs_client: Union[google.Client, None] = self._gcs_client if not gcs_client: - # Validate that "google" libararies were successfully imported and attempt to create "gcs_client" handle. # noqa: E501 + # Validate that "google" libararies were successfully imported and attempt to create "gcs_client" handle. # noqa: E501 # FIXME CoP if google.service_account and google.storage: try: credentials: Union[google.Credentials, None] = ( @@ -67,7 +72,7 @@ def _get_gcs_client(self) -> google.Client: raise_warning_if_provider_not_present=True, ) # pull in needed config substitutions using the `_config_provider` - # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed # noqa: E501 + # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed # noqa: E501 # FIXME CoP gcs_options: dict = self.dict(config_provider=self._config_provider).get( "gcs_options", {} ) @@ -85,13 +90,13 @@ def _get_gcs_client(self) -> google.Client: gcs_client = google.storage.Client(credentials=credentials, **gcs_options) except Exception as e: - # Failure to create "gcs_client" is most likely due invalid "gcs_options" dictionary. # noqa: E501 - raise PandasGoogleCloudStorageDatasourceError( # noqa: TRY003 + # Failure to create "gcs_client" is most likely due invalid "gcs_options" dictionary. # noqa: E501 # FIXME CoP + raise PandasGoogleCloudStorageDatasourceError( # noqa: TRY003 # FIXME CoP f'Due to exception: "{e!r}", "gcs_client" could not be created.' ) from e else: - raise PandasGoogleCloudStorageDatasourceError( # noqa: TRY003 - 'Unable to create "PandasGoogleCloudStorageDatasource" due to missing google dependency.' # noqa: E501 + raise PandasGoogleCloudStorageDatasourceError( # noqa: TRY003 # FIXME CoP + 'Unable to create "PandasGoogleCloudStorageDatasource" due to missing google dependency.' # noqa: E501 # FIXME CoP ) self._gcs_client = gcs_client @@ -107,11 +112,11 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: _ = self._get_gcs_client() except Exception as e: - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP "Attempt to connect to datasource failed with the following error message: " f"{e!s}" ) from e @@ -132,7 +137,7 @@ def _build_data_connector( ) -> None: """Builds and attaches the `GoogleCloudStorageDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) data_asset._data_connector = self.data_connector_type.build_data_connector( diff --git a/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.pyi b/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.pyi index 5efd854f9f16..a9e84bb658d4 100644 --- a/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.pyi +++ b/great_expectations/datasource/fluent/pandas_google_cloud_storage_datasource.pyi @@ -71,7 +71,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): @override def test_connection(self, test_assets: bool = ...) -> None: ... - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -131,7 +131,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): memory_map: bool = ..., storage_options: StorageOptions = ..., ) -> CSVAsset: ... - def add_excel_asset( # noqa: PLR0913 + def add_excel_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -163,7 +163,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., ) -> ExcelAsset: ... - def add_feather_asset( # noqa: PLR0913 + def add_feather_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -175,7 +175,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): use_threads: bool = ..., storage_options: StorageOptions = ..., ) -> FeatherAsset: ... - def add_fwf_asset( # noqa: PLR0913 + def add_fwf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -187,7 +187,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): infer_nrows: int = ..., kwargs: Optional[dict] = ..., ) -> FWFAsset: ... - def add_hdf_asset( # noqa: PLR0913 + def add_hdf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -206,7 +206,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): chunksize: typing.Union[int, None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> HDFAsset: ... - def add_html_asset( # noqa: PLR0913 + def add_html_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -229,7 +229,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): keep_default_na: bool = ..., displayed_only: bool = ..., ) -> HTMLAsset: ... - def add_json_asset( # noqa: PLR0913 + def add_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -253,7 +253,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): nrows: typing.Union[int, None] = ..., storage_options: StorageOptions = ..., ) -> JSONAsset: ... - def add_orc_asset( # noqa: PLR0913 + def add_orc_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -264,7 +264,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): columns: typing.Union[typing.List[str], None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> ORCAsset: ... - def add_parquet_asset( # noqa: PLR0913 + def add_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -278,7 +278,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): use_nullable_dtypes: bool = ..., kwargs: typing.Union[dict, None] = ..., ) -> ParquetAsset: ... - def add_pickle_asset( # noqa: PLR0913 + def add_pickle_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -289,7 +289,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> PickleAsset: ... - def add_sas_asset( # noqa: PLR0913 + def add_sas_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -304,7 +304,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): iterator: bool = ..., compression: CompressionOptions = "infer", ) -> SASAsset: ... - def add_spss_asset( # noqa: PLR0913 + def add_spss_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -315,7 +315,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): usecols: typing.Union[int, str, typing.Sequence[int], None] = ..., convert_categoricals: bool = ..., ) -> SPSSAsset: ... - def add_stata_asset( # noqa: PLR0913 + def add_stata_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -335,7 +335,7 @@ class PandasGoogleCloudStorageDatasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> StataAsset: ... - def add_xml_asset( # noqa: PLR0913 + def add_xml_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/pandas_s3_datasource.py b/great_expectations/datasource/fluent/pandas_s3_datasource.py index 443794a1531b..ce016a8fd7af 100644 --- a/great_expectations/datasource/fluent/pandas_s3_datasource.py +++ b/great_expectations/datasource/fluent/pandas_s3_datasource.py @@ -32,6 +32,10 @@ class PandasS3DatasourceError(PandasDatasourceError): @public_api class PandasS3Datasource(_PandasFilePathDatasource): + """ + PandasS3Datasource is a PandasDatasource that uses Amazon S3 as a data store. + """ + # class attributes data_connector_type: ClassVar[Type[S3DataConnector]] = S3DataConnector # these fields should not be passed to the execution engine @@ -52,25 +56,25 @@ class PandasS3Datasource(_PandasFilePathDatasource): def _get_s3_client(self) -> BaseClient: s3_client: Union[BaseClient, None] = self._s3_client if not s3_client: - # Validate that "boto3" library was successfully imported and attempt to create "s3_client" handle. # noqa: E501 + # Validate that "boto3" library was successfully imported and attempt to create "s3_client" handle. # noqa: E501 # FIXME CoP if aws.boto3: _check_config_substitutions_needed( self, self.boto3_options, raise_warning_if_provider_not_present=True ) # pull in needed config substitutions using the `_config_provider` - # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed # noqa: E501 + # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed # noqa: E501 # FIXME CoP boto3_options: dict = self.dict(config_provider=self._config_provider).get( "boto3_options", {} ) try: s3_client = aws.boto3.client("s3", **boto3_options) except Exception as e: - # Failure to create "s3_client" is most likely due invalid "boto3_options" dictionary. # noqa: E501 - raise PandasS3DatasourceError( # noqa: TRY003 - f'Due to exception: "{type(e).__name__}:{e}", "s3_client" could not be created.' # noqa: E501 + # Failure to create "s3_client" is most likely due invalid "boto3_options" dictionary. # noqa: E501 # FIXME CoP + raise PandasS3DatasourceError( # noqa: TRY003 # FIXME CoP + f'Due to exception: "{type(e).__name__}:{e}", "s3_client" could not be created.' # noqa: E501 # FIXME CoP ) from e else: - raise PandasS3DatasourceError( # noqa: TRY003 + raise PandasS3DatasourceError( # noqa: TRY003 # FIXME CoP 'Unable to create "PandasS3Datasource" due to missing boto3 dependency.' ) @@ -87,11 +91,11 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: _ = self._get_s3_client() except Exception as e: - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP "Attempt to connect to datasource failed with the following error message: " f"{e!s}" ) from e @@ -113,7 +117,7 @@ def _build_data_connector( """Builds and attaches the `S3DataConnector` to the asset.""" # TODO: use the `asset_options_type` for validation and defaults if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) diff --git a/great_expectations/datasource/fluent/pandas_s3_datasource.pyi b/great_expectations/datasource/fluent/pandas_s3_datasource.pyi index 3cfa98698f37..e947655b29d6 100644 --- a/great_expectations/datasource/fluent/pandas_s3_datasource.pyi +++ b/great_expectations/datasource/fluent/pandas_s3_datasource.pyi @@ -69,7 +69,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): boto3_options: Dict[str, ConfigStr | Any] @override def test_connection(self, test_assets: bool = ...) -> None: ... - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -129,7 +129,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): memory_map: bool = ..., storage_options: StorageOptions = ..., ) -> CSVAsset: ... - def add_excel_asset( # noqa: PLR0913 + def add_excel_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -161,7 +161,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): mangle_dupe_cols: bool = ..., storage_options: StorageOptions = ..., ) -> ExcelAsset: ... - def add_feather_asset( # noqa: PLR0913 + def add_feather_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -173,7 +173,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): use_threads: bool = ..., storage_options: StorageOptions = ..., ) -> FeatherAsset: ... - def add_fwf_asset( # noqa: PLR0913 + def add_fwf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -185,7 +185,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): infer_nrows: int = ..., kwargs: Optional[dict] = ..., ) -> FWFAsset: ... - def add_hdf_asset( # noqa: PLR0913 + def add_hdf_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -204,7 +204,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): chunksize: typing.Union[int, None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> HDFAsset: ... - def add_html_asset( # noqa: PLR0913 + def add_html_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -227,7 +227,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): keep_default_na: bool = ..., displayed_only: bool = ..., ) -> HTMLAsset: ... - def add_json_asset( # noqa: PLR0913 + def add_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -251,7 +251,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): nrows: typing.Union[int, None] = ..., storage_options: StorageOptions = ..., ) -> JSONAsset: ... - def add_orc_asset( # noqa: PLR0913 + def add_orc_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -262,7 +262,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): columns: typing.Union[typing.List[str], None] = ..., kwargs: typing.Union[dict, None] = ..., ) -> ORCAsset: ... - def add_parquet_asset( # noqa: PLR0913 + def add_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -276,7 +276,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): use_nullable_dtypes: bool = ..., kwargs: typing.Union[dict, None] = ..., ) -> ParquetAsset: ... - def add_pickle_asset( # noqa: PLR0913 + def add_pickle_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -287,7 +287,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> PickleAsset: ... - def add_sas_asset( # noqa: PLR0913 + def add_sas_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -302,7 +302,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): iterator: bool = ..., compression: CompressionOptions = "infer", ) -> SASAsset: ... - def add_spss_asset( # noqa: PLR0913 + def add_spss_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -313,7 +313,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): usecols: typing.Union[int, str, typing.Sequence[int], None] = ..., convert_categoricals: bool = ..., ) -> SPSSAsset: ... - def add_stata_asset( # noqa: PLR0913 + def add_stata_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -333,7 +333,7 @@ class PandasS3Datasource(_PandasFilePathDatasource): compression: CompressionOptions = "infer", storage_options: StorageOptions = ..., ) -> StataAsset: ... - def add_xml_asset( # noqa: PLR0913 + def add_xml_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/postgres_datasource.py b/great_expectations/datasource/fluent/postgres_datasource.py index 45e95567ab2b..94368ffcdd75 100644 --- a/great_expectations/datasource/fluent/postgres_datasource.py +++ b/great_expectations/datasource/fluent/postgres_datasource.py @@ -18,7 +18,7 @@ class PostgresDatasource(SQLDatasource): For example: "postgresql+psycopg2://postgres:@localhost/test_database" assets: An optional dictionary whose keys are TableAsset or QueryAsset names and whose values are TableAsset or QueryAsset objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - type: Literal["postgres"] = "postgres" # type: ignore[assignment] + type: Literal["postgres"] = "postgres" # type: ignore[assignment] # FIXME CoP connection_string: Union[ConfigStr, PostgresDsn] diff --git a/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource.json b/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource.json index 7772a9b03d11..537f0ab2e67d 100644 --- a/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource.json +++ b/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource.json @@ -429,7 +429,7 @@ }, "TableAsset": { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { @@ -486,7 +486,7 @@ }, "QueryAsset": { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/DatabricksTableAsset.json b/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/DatabricksTableAsset.json index 5354c902c54c..2329b8023316 100644 --- a/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/DatabricksTableAsset.json +++ b/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/DatabricksTableAsset.json @@ -1,6 +1,6 @@ { "title": "DatabricksTableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/QueryAsset.json b/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/QueryAsset.json index 8b3fdedfc3ae..3dd40effbf5b 100644 --- a/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/QueryAsset.json +++ b/great_expectations/datasource/fluent/schemas/DatabricksSQLDatasource/QueryAsset.json @@ -1,6 +1,6 @@ { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/Datasource.json b/great_expectations/datasource/fluent/schemas/Datasource.json index 6f2d664ed4d2..bc696d901aa1 100644 --- a/great_expectations/datasource/fluent/schemas/Datasource.json +++ b/great_expectations/datasource/fluent/schemas/Datasource.json @@ -1,6 +1,6 @@ { "title": "Datasource", - "description": "--Public API--", + "description": "--Public API--\nA Datasource provides a standard API for accessing and interacting with data from\na wide variety of source systems.", "type": "object", "properties": { "type": { @@ -529,7 +529,7 @@ }, "DataAsset": { "title": "DataAsset", - "description": "--Public API--", + "description": "--Public API--\nA Data Asset is a collection of records within a Data Source, which is usually named based\non the underlying data system and sliced to correspond to a desired specification.\n\nData Assets are used to specify how Great Expectations will organize data into Batches.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/PandasAzureBlobStorageDatasource.json b/great_expectations/datasource/fluent/schemas/PandasAzureBlobStorageDatasource.json index 34a01f1da8a8..21dc4f96d8cc 100644 --- a/great_expectations/datasource/fluent/schemas/PandasAzureBlobStorageDatasource.json +++ b/great_expectations/datasource/fluent/schemas/PandasAzureBlobStorageDatasource.json @@ -1,6 +1,6 @@ { "title": "PandasAzureBlobStorageDatasource", - "description": "--Public API--", + "description": "--Public API--\nPandasAzureBlobStorageDatasource is a PandasDatasource that uses Azure Blob Storage as a\ndata store.", "type": "object", "properties": { "type": { diff --git a/great_expectations/datasource/fluent/schemas/PandasDatasource.json b/great_expectations/datasource/fluent/schemas/PandasDatasource.json index 534bd1361986..2d0aff8c7839 100644 --- a/great_expectations/datasource/fluent/schemas/PandasDatasource.json +++ b/great_expectations/datasource/fluent/schemas/PandasDatasource.json @@ -532,7 +532,7 @@ }, "_PandasDataAsset": { "title": "_PandasDataAsset", - "description": "--Public API--", + "description": "--Public API--\nA Pandas DataAsset is a DataAsset that is backed by a Pandas DataFrame.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/PandasDatasource/DataFrameAsset.json b/great_expectations/datasource/fluent/schemas/PandasDatasource/DataFrameAsset.json index 74b8fa127093..7a8a3bfc9995 100644 --- a/great_expectations/datasource/fluent/schemas/PandasDatasource/DataFrameAsset.json +++ b/great_expectations/datasource/fluent/schemas/PandasDatasource/DataFrameAsset.json @@ -1,6 +1,6 @@ { "title": "DataFrameAsset", - "description": "--Public API--", + "description": "--Public API--\nA Pandas DataAsset is a DataAsset that is backed by a Pandas DataFrame.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/PandasGoogleCloudStorageDatasource.json b/great_expectations/datasource/fluent/schemas/PandasGoogleCloudStorageDatasource.json index d52fca28c022..3cb735937042 100644 --- a/great_expectations/datasource/fluent/schemas/PandasGoogleCloudStorageDatasource.json +++ b/great_expectations/datasource/fluent/schemas/PandasGoogleCloudStorageDatasource.json @@ -1,6 +1,6 @@ { "title": "PandasGoogleCloudStorageDatasource", - "description": "--Public API--", + "description": "--Public API--\nPandasGoogleCloudStorageDatasource is a PandasDatasource that uses Google Cloud Storage as a\ndata store.", "type": "object", "properties": { "type": { diff --git a/great_expectations/datasource/fluent/schemas/PandasS3Datasource.json b/great_expectations/datasource/fluent/schemas/PandasS3Datasource.json index 454017890d7a..241c2b318c99 100644 --- a/great_expectations/datasource/fluent/schemas/PandasS3Datasource.json +++ b/great_expectations/datasource/fluent/schemas/PandasS3Datasource.json @@ -1,6 +1,6 @@ { "title": "PandasS3Datasource", - "description": "--Public API--", + "description": "--Public API--\nPandasS3Datasource is a PandasDatasource that uses Amazon S3 as a data store.", "type": "object", "properties": { "type": { diff --git a/great_expectations/datasource/fluent/schemas/PostgresDatasource.json b/great_expectations/datasource/fluent/schemas/PostgresDatasource.json index c03cc1c2ae60..f6682cdb4e7a 100644 --- a/great_expectations/datasource/fluent/schemas/PostgresDatasource.json +++ b/great_expectations/datasource/fluent/schemas/PostgresDatasource.json @@ -429,7 +429,7 @@ }, "TableAsset": { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { @@ -486,7 +486,7 @@ }, "QueryAsset": { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/PostgresDatasource/QueryAsset.json b/great_expectations/datasource/fluent/schemas/PostgresDatasource/QueryAsset.json index 8b3fdedfc3ae..3dd40effbf5b 100644 --- a/great_expectations/datasource/fluent/schemas/PostgresDatasource/QueryAsset.json +++ b/great_expectations/datasource/fluent/schemas/PostgresDatasource/QueryAsset.json @@ -1,6 +1,6 @@ { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/PostgresDatasource/TableAsset.json b/great_expectations/datasource/fluent/schemas/PostgresDatasource/TableAsset.json index 276e71d03a01..f1d41d7239a0 100644 --- a/great_expectations/datasource/fluent/schemas/PostgresDatasource/TableAsset.json +++ b/great_expectations/datasource/fluent/schemas/PostgresDatasource/TableAsset.json @@ -1,6 +1,6 @@ { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SQLDatasource.json b/great_expectations/datasource/fluent/schemas/SQLDatasource.json index 3034eb659d1f..d8de20c9ac95 100644 --- a/great_expectations/datasource/fluent/schemas/SQLDatasource.json +++ b/great_expectations/datasource/fluent/schemas/SQLDatasource.json @@ -426,7 +426,7 @@ }, "TableAsset": { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { @@ -483,7 +483,7 @@ }, "QueryAsset": { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SQLDatasource/QueryAsset.json b/great_expectations/datasource/fluent/schemas/SQLDatasource/QueryAsset.json index 8b3fdedfc3ae..3dd40effbf5b 100644 --- a/great_expectations/datasource/fluent/schemas/SQLDatasource/QueryAsset.json +++ b/great_expectations/datasource/fluent/schemas/SQLDatasource/QueryAsset.json @@ -1,6 +1,6 @@ { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SQLDatasource/TableAsset.json b/great_expectations/datasource/fluent/schemas/SQLDatasource/TableAsset.json index 276e71d03a01..f1d41d7239a0 100644 --- a/great_expectations/datasource/fluent/schemas/SQLDatasource/TableAsset.json +++ b/great_expectations/datasource/fluent/schemas/SQLDatasource/TableAsset.json @@ -1,6 +1,6 @@ { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SnowflakeDatasource.json b/great_expectations/datasource/fluent/schemas/SnowflakeDatasource.json index 9401ce228112..44044808bdd6 100644 --- a/great_expectations/datasource/fluent/schemas/SnowflakeDatasource.json +++ b/great_expectations/datasource/fluent/schemas/SnowflakeDatasource.json @@ -434,7 +434,7 @@ }, "TableAsset": { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { @@ -491,7 +491,7 @@ }, "QueryAsset": { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/QueryAsset.json b/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/QueryAsset.json index 8b3fdedfc3ae..3dd40effbf5b 100644 --- a/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/QueryAsset.json +++ b/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/QueryAsset.json @@ -1,6 +1,6 @@ { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/TableAsset.json b/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/TableAsset.json index 276e71d03a01..f1d41d7239a0 100644 --- a/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/TableAsset.json +++ b/great_expectations/datasource/fluent/schemas/SnowflakeDatasource/TableAsset.json @@ -1,6 +1,6 @@ { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SparkAzureBlobStorageDatasource.json b/great_expectations/datasource/fluent/schemas/SparkAzureBlobStorageDatasource.json index 65afc06d769b..d66489311287 100644 --- a/great_expectations/datasource/fluent/schemas/SparkAzureBlobStorageDatasource.json +++ b/great_expectations/datasource/fluent/schemas/SparkAzureBlobStorageDatasource.json @@ -1,6 +1,6 @@ { "title": "SparkAzureBlobStorageDatasource", - "description": "--Public API--", + "description": "--Public API--\nSparkAzureBlobStorageDatasource is a subclass of SparkDatasource which connects to\nAzure Blob Storage.", "type": "object", "properties": { "type": { diff --git a/great_expectations/datasource/fluent/schemas/SparkDatasource.json b/great_expectations/datasource/fluent/schemas/SparkDatasource.json index 562b66cbd832..74a5e1c0b14a 100644 --- a/great_expectations/datasource/fluent/schemas/SparkDatasource.json +++ b/great_expectations/datasource/fluent/schemas/SparkDatasource.json @@ -1,6 +1,6 @@ { "title": "SparkDatasource", - "description": "--Public API--", + "description": "--Public API--\nA SparkDatasource is a Datasource that connects to a Spark cluster and provides\naccess to Spark DataFrames.", "type": "object", "properties": { "type": { @@ -562,7 +562,7 @@ }, "DataFrameAsset": { "title": "DataFrameAsset", - "description": "--Public API--", + "description": "--Public API--\nA DataAsset that represents a Spark DataFrame.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SparkDatasource/DataFrameAsset.json b/great_expectations/datasource/fluent/schemas/SparkDatasource/DataFrameAsset.json index 74b8fa127093..99b90a0a298a 100644 --- a/great_expectations/datasource/fluent/schemas/SparkDatasource/DataFrameAsset.json +++ b/great_expectations/datasource/fluent/schemas/SparkDatasource/DataFrameAsset.json @@ -1,6 +1,6 @@ { "title": "DataFrameAsset", - "description": "--Public API--", + "description": "--Public API--\nA DataAsset that represents a Spark DataFrame.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SparkFilesystemDatasource.json b/great_expectations/datasource/fluent/schemas/SparkFilesystemDatasource.json index 7faa09758327..18ee76c40e59 100644 --- a/great_expectations/datasource/fluent/schemas/SparkFilesystemDatasource.json +++ b/great_expectations/datasource/fluent/schemas/SparkFilesystemDatasource.json @@ -1,6 +1,6 @@ { "title": "SparkFilesystemDatasource", - "description": "--Public API--", + "description": "--Public API--\nSparkFilesystemDatasource is a subclass of SparkDatasource which connects to\nthe filesystem.", "type": "object", "properties": { "type": { diff --git a/great_expectations/datasource/fluent/schemas/SparkGoogleCloudStorageDatasource.json b/great_expectations/datasource/fluent/schemas/SparkGoogleCloudStorageDatasource.json index c37b577d33d2..516e01537b29 100644 --- a/great_expectations/datasource/fluent/schemas/SparkGoogleCloudStorageDatasource.json +++ b/great_expectations/datasource/fluent/schemas/SparkGoogleCloudStorageDatasource.json @@ -1,6 +1,6 @@ { "title": "SparkGoogleCloudStorageDatasource", - "description": "--Public API--", + "description": "--Public API--\nSparkGoogleCloudStorageDatasource is a subclass of SparkDatasource which connects to\nGoogle Cloud Storage.", "type": "object", "properties": { "type": { diff --git a/great_expectations/datasource/fluent/schemas/SparkS3Datasource.json b/great_expectations/datasource/fluent/schemas/SparkS3Datasource.json index 136b654e40be..a0c31e596367 100644 --- a/great_expectations/datasource/fluent/schemas/SparkS3Datasource.json +++ b/great_expectations/datasource/fluent/schemas/SparkS3Datasource.json @@ -1,6 +1,6 @@ { "title": "SparkS3Datasource", - "description": "--Public API--", + "description": "--Public API--\nSparkS3Datasource is a subclass of SparkDatasource which connects to\nAmazon S3.", "type": "object", "properties": { "type": { diff --git a/great_expectations/datasource/fluent/schemas/SqliteDatasource.json b/great_expectations/datasource/fluent/schemas/SqliteDatasource.json index 48f22c99d451..2cd92d1300a6 100644 --- a/great_expectations/datasource/fluent/schemas/SqliteDatasource.json +++ b/great_expectations/datasource/fluent/schemas/SqliteDatasource.json @@ -429,7 +429,7 @@ }, "TableAsset": { "title": "TableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { @@ -486,7 +486,7 @@ }, "QueryAsset": { "title": "QueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteQueryAsset.json b/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteQueryAsset.json index 8580bfa57916..7ee7fd934848 100644 --- a/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteQueryAsset.json +++ b/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteQueryAsset.json @@ -1,6 +1,6 @@ { "title": "SqliteQueryAsset", - "description": "--Public API--", + "description": "--Public API--An asset made from a SQL query\n\nArgs:\n query: The query to be used to construct the underlying Data Asset", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteTableAsset.json b/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteTableAsset.json index 8b900b796325..b35919673398 100644 --- a/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteTableAsset.json +++ b/great_expectations/datasource/fluent/schemas/SqliteDatasource/SqliteTableAsset.json @@ -1,6 +1,6 @@ { "title": "SqliteTableAsset", - "description": "--Public API--", + "description": "--Public API--A class representing a table from a SQL database\n\nArgs:\n table_name: The name of the database table to be added\n schema_name: The name of the schema containing the database table to be added.", "type": "object", "properties": { "name": { diff --git a/great_expectations/datasource/fluent/snowflake_datasource.py b/great_expectations/datasource/fluent/snowflake_datasource.py index 8bd3fddc9e12..85302913cec2 100644 --- a/great_expectations/datasource/fluent/snowflake_datasource.py +++ b/great_expectations/datasource/fluent/snowflake_datasource.py @@ -113,7 +113,7 @@ def _get_config_substituted_connection_string( warning_msg: str = "Unable to perform config substitution", ) -> AnyUrl | None: if not isinstance(datasource.connection_string, ConfigUri): - raise TypeError("Config substitution is only supported for `ConfigUri`") # noqa: TRY003 + raise TypeError("Config substitution is only supported for `ConfigUri`") # noqa: TRY003 # FIXME CoP if not datasource._data_context: warnings.warn( f"{warning_msg} for {datasource.connection_string.template_str}." @@ -189,7 +189,7 @@ def get_schema(cls) -> dict: @classmethod def _validate(cls, value: str) -> AccountIdentifier: if not value: - raise ValueError("Account identifier cannot be empty") # noqa: TRY003 + raise ValueError("Account identifier cannot be empty") # noqa: TRY003 # FIXME CoP v = cls(value) if not v._match: LOGGER.info( @@ -253,7 +253,7 @@ def as_tuple( fmt2 = (self.orgname, self.account_name) if any(fmt2): return fmt2 - raise ValueError("Account identifier does not match either expected format") # noqa: TRY003 + raise ValueError("Account identifier does not match either expected format") # noqa: TRY003 # FIXME CoP class _UrlPasswordError(pydantic.UrlError): @@ -409,9 +409,9 @@ class SnowflakeDatasource(SQLDatasource): For example: "snowflake://:@" assets: An optional dictionary whose keys are TableAsset or QueryAsset names and whose values are TableAsset or QueryAsset objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - type: Literal["snowflake"] = "snowflake" # type: ignore[assignment] + type: Literal["snowflake"] = "snowflake" # type: ignore[assignment] # FIXME CoP # TODO: rename this to `connection` for v1? connection_string: Union[ConnectionDetails, ConfigUri, SnowflakeDsn] # type: ignore[assignment] # Deviation from parent class as individual args are supported for connection @@ -581,7 +581,7 @@ def _convert_root_connection_detail_fields(cls, values: dict) -> dict: It also allows for users to continue to provide connection details in the `context.data_sources.add_snowflake()` factory functions without nesting it in a `connection_string` dict. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP connection_detail_fields: set[str] = { "schema", # field name in ConnectionDetails is schema_ (with underscore) *ConnectionDetails.__fields__.keys(), @@ -594,7 +594,7 @@ def _convert_root_connection_detail_fields(cls, values: dict) -> dict: for field_name in provided_fields: if field_name in connection_detail_fields: if connection_string: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "Provided both connection detail keyword args and `connection_string`." ) connection_details[field_name] = values.pop(field_name) @@ -632,7 +632,7 @@ def _check_xor_input_args(cls, values: dict) -> dict: ) if is_connection_string or has_min_connection_detail_values: return values - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "Must provide either a connection string or" f" a combination of {', '.join(ConnectionDetails.required_fields())} as keyword args." ) @@ -721,7 +721,7 @@ def get_execution_engine(self) -> SqlAlchemyExecutionEngine: the database to be created. For Snowflake specifically we may represent the connection_string as a dict, which is not supported by SQLAlchemy. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP gx_execution_engine_type: Type[SqlAlchemyExecutionEngine] = self.execution_engine_type connection_string: str | None = ( @@ -809,4 +809,4 @@ def _build_engine_with_connect_args( engine_kwargs["url"] = url - return sa.create_engine(**engine_kwargs) # type: ignore[misc] + return sa.create_engine(**engine_kwargs) # type: ignore[misc] # FIXME CoP diff --git a/great_expectations/datasource/fluent/sources.py b/great_expectations/datasource/fluent/sources.py index eea2dc4fed5a..cda653445bd3 100644 --- a/great_expectations/datasource/fluent/sources.py +++ b/great_expectations/datasource/fluent/sources.py @@ -112,18 +112,18 @@ def register_datasource(cls, ds_type: Type[Datasource]) -> None: Example ------- - An `.add_pandas_filesystem()` pandas_filesystem factory method will be added to `context.sources`. + An `.add_pandas_filesystem()` pandas_filesystem factory method will be added to `context.data_sources`. >>> class PandasFilesystemDatasource(_PandasFilePathDatasource): >>> type: str = 'pandas_filesystem' >>> asset_types = [FileAsset] >>> execution_engine: PandasExecutionEngine - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # TODO: check that the name is a valid python identifier (and maybe that it is snake_case?) ds_type_name = _get_field_details(ds_type, "type").default_value if not ds_type_name: - raise TypeRegistrationError( # noqa: TRY003 + raise TypeRegistrationError( # noqa: TRY003 # FIXME CoP f"`{ds_type.__name__}` is missing a `type` attribute with an assigned string value" ) @@ -152,7 +152,7 @@ def _register_datasource( The method name is pulled from the `Datasource.type` attribute. """ if ds_type in datasource_type_lookup: - raise TypeRegistrationError( # noqa: TRY003 + raise TypeRegistrationError( # noqa: TRY003 # FIXME CoP f"'{ds_type_name}' is already a registered typed and there can only be 1 type " "for a given name." ) @@ -223,7 +223,7 @@ def _register_crud_method( crud_method_info.__name__ = crud_fn_name crud_method_info.__doc__ = crud_fn_doc if crud_fn_name in cls.__crud_registry: - raise TypeRegistrationError( # noqa: TRY003 + raise TypeRegistrationError( # noqa: TRY003 # FIXME CoP f"'`sources.{crud_fn_name}()` already exists", ) logger.debug(f"Registering data_context.source.{crud_fn_name}()") @@ -242,22 +242,22 @@ def _register_assets(cls, ds_type: Type[Datasource], asset_type_lookup: TypeLook for t in asset_types: if t.__name__.startswith("_"): logger.debug( - f"{t} is private, assuming not intended as a public concrete type. Skipping registration" # noqa: E501 + f"{t} is private, assuming not intended as a public concrete type. Skipping registration" # noqa: E501 # FIXME CoP ) continue try: asset_type_name = _get_field_details(t, "type").default_value if asset_type_name is None: - raise TypeError( # noqa: TRY003, TRY301 + raise TypeError( # noqa: TRY003, TRY301 # FIXME CoP f"{t.__name__} `type` field must be assigned and cannot be `None`" ) logger.debug( - f"Registering `{ds_type.__name__}` `DataAsset` `{t.__name__}` as '{asset_type_name}'" # noqa: E501 + f"Registering `{ds_type.__name__}` `DataAsset` `{t.__name__}` as '{asset_type_name}'" # noqa: E501 # FIXME CoP ) asset_type_lookup[t] = asset_type_name except (AttributeError, KeyError, TypeError) as bad_field_exc: - raise TypeRegistrationError( # noqa: TRY003 - f"No `type` field found for `{ds_type.__name__}.asset_types` -> `{t.__name__}` unable to register asset type", # noqa: E501 + raise TypeRegistrationError( # noqa: TRY003 # FIXME CoP + f"No `type` field found for `{ds_type.__name__}.asset_types` -> `{t.__name__}` unable to register asset type", # noqa: E501 # FIXME CoP ) from bad_field_exc cls._bind_asset_factory_method_if_not_present(ds_type, t, asset_type_name) @@ -274,7 +274,7 @@ def _bind_asset_factory_method_if_not_present( if not asset_factory_defined: logger.debug( - f"No `{add_asset_factory_method_name}()` method found for `{ds_type.__name__}` generating the method..." # noqa: E501 + f"No `{add_asset_factory_method_name}()` method found for `{ds_type.__name__}` generating the method..." # noqa: E501 # FIXME CoP ) def _add_asset_factory(self: Datasource, name: str, **kwargs) -> pydantic.BaseModel: @@ -283,7 +283,7 @@ def _add_asset_factory(self: Datasource, name: str, **kwargs) -> pydantic.BaseMo # push them to `connect_options` field if self.data_connector_type: logger.info( - f"'{self.name}' {type(self).__name__} uses {self.data_connector_type.__name__}" # noqa: E501 + f"'{self.name}' {type(self).__name__} uses {self.data_connector_type.__name__}" # noqa: E501 # FIXME CoP ) connect_options = { k: v @@ -292,7 +292,7 @@ def _add_asset_factory(self: Datasource, name: str, **kwargs) -> pydantic.BaseMo } if connect_options: logger.info( - f"{self.data_connector_type.__name__} connect_options provided -> {list(connect_options.keys())}" # noqa: E501 + f"{self.data_connector_type.__name__} connect_options provided -> {list(connect_options.keys())}" # noqa: E501 # FIXME CoP ) for k in connect_options: # TODO: avoid this extra loop kwargs.pop(k) @@ -307,12 +307,17 @@ def _add_asset_factory(self: Datasource, name: str, **kwargs) -> pydantic.BaseMo # attr-defined issue # https://github.com/python/mypy/issues/12472 - _add_asset_factory.__signature__ = _merge_signatures( # type: ignore[attr-defined] + _add_asset_factory.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP _add_asset_factory, asset_type, exclude={"type"} ) _add_asset_factory.__name__ = add_asset_factory_method_name setattr(ds_type, add_asset_factory_method_name, _add_asset_factory) + # NOTE: Please review what this looks like in our Public API docs preview before merging + _add_asset_factory.__doc__ = DataSourceManager._build_add_asset_docstring( + asset_type_name + ) + # add the public api decorator public_api(getattr(ds_type, add_asset_factory_method_name)) @@ -328,7 +333,7 @@ def _read_asset_factory( # TODO: raise error if `_data_context` not set return self._data_context.get_validator(batch_request=batch_request) # type: ignore[union-attr] # self._data_context must be set - _read_asset_factory.__signature__ = _merge_signatures( # type: ignore[attr-defined] + _read_asset_factory.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP _read_asset_factory, asset_type, exclude={"type"} ) read_asset_factory_method_name = f"read_{asset_type_name}" @@ -339,6 +344,11 @@ def _read_asset_factory( f"`{add_asset_factory_method_name}()` already defined `{ds_type.__name__}`" ) + @staticmethod + def _build_add_asset_docstring(asset_type_name: str) -> str: + article = "an" if asset_type_name[0].lower() in "aeiou" else "a" + return f"""Add {article} {asset_type_name} asset to the datasource.""" + @property def pandas_default(self) -> PandasDatasource: from great_expectations.datasource.fluent import PandasDatasource @@ -354,7 +364,7 @@ def pandas_default(self) -> PandasDatasource: if isinstance(existing_datasource, PandasDatasource): return existing_datasource - raise DefaultPandasDatasourceError( # noqa: TRY003 + raise DefaultPandasDatasourceError( # noqa: TRY003 # FIXME CoP "Another non-pandas datasource already exists " f'with the name: "{DEFAULT_PANDAS_DATASOURCE_NAME}". ' "Please rename this datasources if you wish " @@ -372,10 +382,10 @@ def _validate_current_datasource_type( current_datasource = self._data_context.data_sources.get(name) except KeyError as e: if raise_if_none: - raise ValueError(f"There is no datasource {name} in the data context.") from e # noqa: TRY003 + raise ValueError(f"There is no datasource {name} in the data context.") from e # noqa: TRY003 # FIXME CoP current_datasource = None if current_datasource and not isinstance(current_datasource, datasource_type): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"Trying to update datasource {name} but it is not the correct type. " f"Expected {datasource_type.__name__} but got {type(current_datasource).__name__}" ) @@ -392,16 +402,16 @@ def _datasource_passed_in_as_only_argument( datasource: Optional[Datasource] = None if name_or_datasource and isinstance(name_or_datasource, Datasource): if len(kwargs) != 0: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"The datasource must be the sole argument. We also received: {kwargs}" ) datasource = name_or_datasource elif name_or_datasource is None and "datasource" in kwargs: if len(kwargs) != 1: - raise ValueError(f"The datasource must be the sole argument. We received: {kwargs}") # noqa: TRY003 + raise ValueError(f"The datasource must be the sole argument. We received: {kwargs}") # noqa: TRY003 # FIXME CoP datasource = kwargs["datasource"] if datasource and not isinstance(datasource, datasource_type): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"Trying to modify datasource {datasource.name} but it is not the correct type. " f"Expected {datasource_type} but got {type(datasource)}" ) @@ -435,10 +445,10 @@ def _datasource_passed_in( if new_datasource: return new_datasource if ( - name_or_datasource and isinstance(name_or_datasource, str) and "name" not in "kwargs" # noqa: PLR0133 + name_or_datasource and isinstance(name_or_datasource, str) and "name" not in "kwargs" # noqa: PLR0133 # FIXME CoP ) or (name_or_datasource is None and "name" in kwargs and isinstance(kwargs["name"], str)): return None - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "A datasource object or a name string must be present. The datasource or " "name can be passed in as the first and only positional argument or can be" "can be passed in as keyword arguments. The arguments we received were: " @@ -470,7 +480,7 @@ def add_datasource( add_datasource.__doc__ = doc_string # attr-defined issue https://github.com/python/mypy/issues/12472 - add_datasource.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_datasource.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_datasource, datasource_type, exclude={"type", "assets"}, @@ -517,7 +527,7 @@ def update_datasource( update_datasource.__doc__ = doc_string # attr-defined issue https://github.com/python/mypy/issues/12472 - update_datasource.__signature__ = _merge_signatures( # type: ignore[attr-defined] + update_datasource.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP update_datasource, datasource_type, exclude={"type", "assets"}, @@ -544,7 +554,7 @@ def add_or_update_datasource( else datasource_type(**kwargs) ) - # if new_datasource is None that means name is defined as name_or_datasource or as a kwarg # noqa: E501 + # if new_datasource is None that means name is defined as name_or_datasource or as a kwarg # noqa: E501 # FIXME CoP datasource_name: str = new_datasource.name logger.debug(f"Adding or updating {datasource_type.__name__} with '{datasource_name}'") self._validate_current_datasource_type( @@ -567,7 +577,7 @@ def add_or_update_datasource( add_or_update_datasource.__doc__ = doc_string # attr-defined issue https://github.com/python/mypy/issues/12472 - add_or_update_datasource.__signature__ = _merge_signatures( # type: ignore[attr-defined] + add_or_update_datasource.__signature__ = _merge_signatures( # type: ignore[attr-defined] # FIXME CoP add_or_update_datasource, datasource_type, exclude={"type", "assets"}, @@ -588,7 +598,7 @@ def delete_datasource(name: str) -> None: delete_datasource.__doc__ = doc_string # attr-defined issue https://github.com/python/mypy/issues/12472 - delete_datasource.__signature__ = inspect.signature(delete_datasource) # type: ignore[attr-defined] + delete_datasource.__signature__ = inspect.signature(delete_datasource) # type: ignore[attr-defined] # FIXME CoP return delete_datasource @public_api @@ -603,10 +613,19 @@ def delete(self, name: str) -> None: @public_api def all(self) -> DatasourceDict: + """Get all Datasources.""" return self._data_context._datasources @public_api def get(self, name: str) -> Datasource: + """Get a Datasource from the collection by name. + + Parameters: + name: Name of Datasource to get + + Raises: + KeyError when Datasource is not found. + """ return self.all()[name] def __getattr__(self, attr_name: str): @@ -623,16 +642,16 @@ def __getattr__(self, attr_name: str): elif crud_method_type == CrudMethodType.DELETE: # deprecated-v0.17.2 warnings.warn( - f"`{attr_name}` is deprecated as of v0.17.2 and will be removed in v0.19. Please use `.sources.delete` moving forward.", # noqa: E501 + f"`{attr_name}` is deprecated as of v0.17.2 and will be removed in v0.19. Please use `.sources.delete` moving forward.", # noqa: E501 # FIXME CoP DeprecationWarning, ) return self.create_delete_crud_method(datasource_type, docstring) else: - raise TypeRegistrationError( # noqa: TRY003 + raise TypeRegistrationError( # noqa: TRY003 # FIXME CoP f"Unknown crud method registered for {attr_name} with type {crud_method_type}" ) except KeyError as e: - raise AttributeError(f"No crud method '{attr_name}' in {self.factories}") from e # noqa: TRY003 + raise AttributeError(f"No crud method '{attr_name}' in {self.factories}") from e # noqa: TRY003 # FIXME CoP @override def __dir__(self) -> List[str]: diff --git a/great_expectations/datasource/fluent/sources.pyi b/great_expectations/datasource/fluent/sources.pyi index dc79b3954fa0..2f9118011bac 100644 --- a/great_expectations/datasource/fluent/sources.pyi +++ b/great_expectations/datasource/fluent/sources.pyi @@ -350,7 +350,7 @@ class DataSourceManager: self, name: str, ) -> None: ... - def add_spark_filesystem( # noqa: PLR0913 + def add_spark_filesystem( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -362,7 +362,7 @@ class DataSourceManager: base_directory: pathlib.Path = ..., data_context_root_directory: Union[pathlib.Path, None] = ..., ) -> SparkFilesystemDatasource: ... - def update_spark_filesystem( # noqa: PLR0913 + def update_spark_filesystem( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -374,7 +374,7 @@ class DataSourceManager: base_directory: pathlib.Path = ..., data_context_root_directory: Union[pathlib.Path, None] = ..., ) -> SparkFilesystemDatasource: ... - def add_or_update_spark_filesystem( # noqa: PLR0913 + def add_or_update_spark_filesystem( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -390,7 +390,7 @@ class DataSourceManager: self, name: str, ) -> None: ... - def add_spark_dbfs( # noqa: PLR0913 + def add_spark_dbfs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -402,7 +402,7 @@ class DataSourceManager: base_directory: pathlib.Path = ..., data_context_root_directory: Union[pathlib.Path, None] = ..., ) -> SparkDBFSDatasource: ... - def update_spark_dbfs( # noqa: PLR0913 + def update_spark_dbfs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -414,7 +414,7 @@ class DataSourceManager: base_directory: pathlib.Path = ..., data_context_root_directory: Union[pathlib.Path, None] = ..., ) -> SparkDBFSDatasource: ... - def add_or_update_spark_dbfs( # noqa: PLR0913 + def add_or_update_spark_dbfs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -430,7 +430,7 @@ class DataSourceManager: self, name: str, ) -> None: ... - def add_spark_s3( # noqa: PLR0913 + def add_spark_s3( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -442,7 +442,7 @@ class DataSourceManager: bucket: str = ..., boto3_options: dict[str, Union[ConfigStr, Any]] = ..., ) -> SparkS3Datasource: ... - def update_spark_s3( # noqa: PLR0913 + def update_spark_s3( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -454,7 +454,7 @@ class DataSourceManager: bucket: str = ..., boto3_options: dict[str, Union[ConfigStr, Any]] = ..., ) -> SparkS3Datasource: ... - def add_or_update_spark_s3( # noqa: PLR0913 + def add_or_update_spark_s3( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -470,7 +470,7 @@ class DataSourceManager: self, name: str, ) -> None: ... - def add_spark_gcs( # noqa: PLR0913 + def add_spark_gcs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -482,7 +482,7 @@ class DataSourceManager: bucket_or_name: str = ..., gcs_options: dict[str, Union[ConfigStr, Any]] = ..., ) -> SparkGoogleCloudStorageDatasource: ... - def update_spark_gcs( # noqa: PLR0913 + def update_spark_gcs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -494,7 +494,7 @@ class DataSourceManager: bucket_or_name: str = ..., gcs_options: dict[str, Union[ConfigStr, Any]] = ..., ) -> SparkGoogleCloudStorageDatasource: ... - def add_or_update_spark_gcs( # noqa: PLR0913 + def add_or_update_spark_gcs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -510,7 +510,7 @@ class DataSourceManager: self, name: str, ) -> None: ... - def add_spark_abs( # noqa: PLR0913 + def add_spark_abs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -521,7 +521,7 @@ class DataSourceManager: persist: bool = True, azure_options: dict[str, Any] = ..., ) -> SparkAzureBlobStorageDatasource: ... - def update_spark_abs( # noqa: PLR0913 + def update_spark_abs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, @@ -532,7 +532,7 @@ class DataSourceManager: persist: bool = True, azure_options: dict[str, Any] = ..., ) -> SparkAzureBlobStorageDatasource: ... - def add_or_update_spark_abs( # noqa: PLR0913 + def add_or_update_spark_abs( # noqa: PLR0913 # FIXME CoP self, name_or_datasource: Optional[Union[str, Datasource]] = None, name: Optional[str] = None, diff --git a/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.py b/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.py index a899ca3ee39b..84be3365470e 100644 --- a/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.py +++ b/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.py @@ -37,6 +37,11 @@ class SparkAzureBlobStorageDatasourceError(SparkDatasourceError): @public_api class SparkAzureBlobStorageDatasource(_SparkFilePathDatasource): + """ + SparkAzureBlobStorageDatasource is a subclass of SparkDatasource which connects to + Azure Blob Storage. + """ + # class attributes data_connector_type: ClassVar[Type[AzureBlobStorageDataConnector]] = ( AzureBlobStorageDataConnector @@ -59,22 +64,22 @@ def _get_azure_client(self) -> azure.BlobServiceClient: self, self.azure_options, raise_warning_if_provider_not_present=True ) # pull in needed config substitutions using the `_config_provider` - # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed. # noqa: E501 + # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed. # noqa: E501 # FIXME CoP azure_options: dict = self.dict(config_provider=self._config_provider).get( "azure_options", {} ) - # Thanks to schema validation, we are guaranteed to have one of `conn_str` or `account_url` to # noqa: E501 - # use in authentication (but not both). If the format or content of the provided keys is invalid, # noqa: E501 - # the assignment of `self._account_name` and `self._azure_client` will fail and an error will be raised. # noqa: E501 + # Thanks to schema validation, we are guaranteed to have one of `conn_str` or `account_url` to # noqa: E501 # FIXME CoP + # use in authentication (but not both). If the format or content of the provided keys is invalid, # noqa: E501 # FIXME CoP + # the assignment of `self._account_name` and `self._azure_client` will fail and an error will be raised. # noqa: E501 # FIXME CoP conn_str: str | None = azure_options.get("conn_str") account_url: str | None = azure_options.get("account_url") if not bool(conn_str) ^ bool(account_url): - raise SparkAzureBlobStorageDatasourceError( # noqa: TRY003 - "You must provide one of `conn_str` or `account_url` to the `azure_options` key in your config (but not both)" # noqa: E501 + raise SparkAzureBlobStorageDatasourceError( # noqa: TRY003 # FIXME CoP + "You must provide one of `conn_str` or `account_url` to the `azure_options` key in your config (but not both)" # noqa: E501 # FIXME CoP ) - # Validate that "azure" libararies were successfully imported and attempt to create "azure_client" handle. # noqa: E501 + # Validate that "azure" libararies were successfully imported and attempt to create "azure_client" handle. # noqa: E501 # FIXME CoP if azure.BlobServiceClient: # type: ignore[truthy-function] # False if NotImported try: if conn_str is not None: @@ -91,19 +96,19 @@ def _get_azure_client(self) -> azure.BlobServiceClient: ).group(1) azure_client = azure.BlobServiceClient(**azure_options) except Exception as e: - # Failure to create "azure_client" is most likely due invalid "azure_options" dictionary. # noqa: E501 - raise SparkAzureBlobStorageDatasourceError( # noqa: TRY003 + # Failure to create "azure_client" is most likely due invalid "azure_options" dictionary. # noqa: E501 # FIXME CoP + raise SparkAzureBlobStorageDatasourceError( # noqa: TRY003 # FIXME CoP f'Due to exception: "{e!s}", "azure_client" could not be created.' ) from e else: - raise SparkAzureBlobStorageDatasourceError( # noqa: TRY003 - 'Unable to create "SparkAzureBlobStorageDatasource" due to missing azure.storage.blob dependency.' # noqa: E501 + raise SparkAzureBlobStorageDatasourceError( # noqa: TRY003 # FIXME CoP + 'Unable to create "SparkAzureBlobStorageDatasource" due to missing azure.storage.blob dependency.' # noqa: E501 # FIXME CoP ) self._azure_client = azure_client if not azure_client: - raise SparkAzureBlobStorageDatasourceError("Failed to return `azure_client`") # noqa: TRY003 + raise SparkAzureBlobStorageDatasourceError("Failed to return `azure_client`") # noqa: TRY003 # FIXME CoP return azure_client @@ -116,12 +121,12 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: # tests Azure connection _ = self._get_azure_client() except Exception as e: - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP "Attempt to connect to datasource failed with the following error message: " f"{e!s}" ) from e @@ -145,11 +150,11 @@ def _build_data_connector( ) -> None: """Builds and attaches the `AzureBlobStorageDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) if abs_container is _MISSING: - raise TypeError(f"'{data_asset.name}' is missing required argument 'abs_container'") # noqa: TRY003 + raise TypeError(f"'{data_asset.name}' is missing required argument 'abs_container'") # noqa: TRY003 # FIXME CoP data_asset._data_connector = self.data_connector_type.build_data_connector( datasource_name=self.name, diff --git a/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.pyi b/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.pyi index 2a69e1567ada..5ef8da910774 100644 --- a/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.pyi +++ b/great_expectations/datasource/fluent/spark_azure_blob_storage_datasource.pyi @@ -32,7 +32,7 @@ class SparkAzureBlobStorageDatasource(_SparkFilePathDatasource): azure_options: dict[str, ConfigStr | Any] = {} # private _azure_client: azure.BlobServiceClient | None - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/spark_datasource.py b/great_expectations/datasource/fluent/spark_datasource.py index b6d409c4e480..02bbee1e4946 100644 --- a/great_expectations/datasource/fluent/spark_datasource.py +++ b/great_expectations/datasource/fluent/spark_datasource.py @@ -86,16 +86,16 @@ def _force_reuse_spark_context_deprecation_warning(cls, v: bool) -> bool: # deprecated-v1.0.0 warnings.warn( "force_reuse_spark_context is deprecated and will be removed in version 1.0. " - "In environments that allow it, the existing Spark context will be reused, adding the " # noqa: E501 - "spark_config options that have been passed. If the Spark context cannot be updated with " # noqa: E501 - "the spark_config, the context will be stopped and restarted with the new spark_config.", # noqa: E501 + "In environments that allow it, the existing Spark context will be reused, adding the " # noqa: E501 # FIXME CoP + "spark_config options that have been passed. If the Spark context cannot be updated with " # noqa: E501 # FIXME CoP + "the spark_config, the context will be stopped and restarted with the new spark_config.", # noqa: E501 # FIXME CoP category=DeprecationWarning, ) return v @classmethod @override - def update_forward_refs(cls) -> None: # type: ignore[override] + def update_forward_refs(cls) -> None: # type: ignore[override] # FIXME CoP from great_expectations.compatibility.pyspark import SparkSession super().update_forward_refs(SparkSession=SparkSession) @@ -129,7 +129,7 @@ def get_spark(self) -> SparkSession: @override def get_execution_engine(self) -> SparkDFExecutionEngine: - # Method override is required because PrivateAttr _spark won't be passed into Execution Engine # noqa: E501 + # Method override is required because PrivateAttr _spark won't be passed into Execution Engine # noqa: E501 # FIXME CoP # unless it is passed explicitly. current_execution_engine_kwargs = self.dict( exclude=self._get_exec_engine_excludes(), @@ -172,6 +172,10 @@ def test_connection(self, test_assets: bool = True) -> None: @public_api class DataFrameAsset(DataAsset, Generic[_SparkDataFrameT]): + """ + A DataAsset that represents a Spark DataFrame. + """ + # instance attributes type: Literal["dataframe"] = "dataframe" @@ -191,12 +195,12 @@ def get_batch_parameters_keys( def _get_reader_method(self) -> str: raise NotImplementedError( - """Spark DataFrameAsset does not implement "_get_reader_method()" method, because DataFrame is already available.""" # noqa: E501 + """Spark DataFrameAsset does not implement "_get_reader_method()" method, because DataFrame is already available.""" # noqa: E501 # FIXME CoP ) def _get_reader_options_include(self) -> set[str]: raise NotImplementedError( - """Spark DataFrameAsset does not implement "_get_reader_options_include()" method, because DataFrame is already available.""" # noqa: E501 + """Spark DataFrameAsset does not implement "_get_reader_options_include()" method, because DataFrame is already available.""" # noqa: E501 # FIXME CoP ) @override @@ -263,9 +267,9 @@ def _validate_batch_request(self, batch_request: BatchRequest) -> None: datasource_name=self.datasource.name, data_asset_name=self.name, options={}, - batch_slice=batch_request._batch_slice_input, # type: ignore[attr-defined] + batch_slice=batch_request._batch_slice_input, # type: ignore[attr-defined] # FIXME CoP ) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "BatchRequest should have form:\n" f"{pf(expect_batch_request_form.dict())}\n" f"but actually has form:\n{pf(batch_request.dict())}\n" @@ -311,6 +315,15 @@ def get_batch(self, batch_request: BatchRequest) -> Batch: @public_api def add_batch_definition_whole_dataframe(self, name: str) -> BatchDefinition: + """ + Add a BatchDefinition that represents the entire DataFrame. + + Args: + name: The name of the Batch Definition. + + Returns: + A BatchDefinition object that represents the entire DataFrame. + """ return self.add_batch_definition( name=name, partitioner=None, @@ -327,6 +340,11 @@ def is_spark_data_frame(df: Any) -> TypeGuard[Union[DataFrame, ConnectDataFrame] @public_api class SparkDatasource(_SparkDatasource): + """ + A SparkDatasource is a Datasource that connects to a Spark cluster and provides + access to Spark DataFrames. + """ + # class attributes asset_types: ClassVar[List[Type[DataAsset]]] = [DataFrameAsset] @@ -351,7 +369,7 @@ def add_dataframe_asset( Returns: The DataFameAsset that has been added to this datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset: DataFrameAsset = DataFrameAsset( name=name, batch_metadata=batch_metadata or {}, diff --git a/great_expectations/datasource/fluent/spark_dbfs_datasource.py b/great_expectations/datasource/fluent/spark_dbfs_datasource.py index 43340431f6ee..89275b37b32a 100644 --- a/great_expectations/datasource/fluent/spark_dbfs_datasource.py +++ b/great_expectations/datasource/fluent/spark_dbfs_datasource.py @@ -39,7 +39,7 @@ def _build_data_connector( ) -> None: """Builds and attaches the `DBFSDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) data_asset._data_connector = self.data_connector_type.build_data_connector( diff --git a/great_expectations/datasource/fluent/spark_dbfs_datasource.pyi b/great_expectations/datasource/fluent/spark_dbfs_datasource.pyi index 765fb09013cd..0f0b952b4aa1 100644 --- a/great_expectations/datasource/fluent/spark_dbfs_datasource.pyi +++ b/great_expectations/datasource/fluent/spark_dbfs_datasource.pyi @@ -24,10 +24,10 @@ from great_expectations.datasource.fluent.interfaces import ( logger: Logger class SparkDBFSDatasource(SparkFilesystemDatasource): - type: Literal["spark_dbfs"] # type: ignore[assignment] + type: Literal["spark_dbfs"] # type: ignore[assignment] # FIXME CoP @override - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/spark_filesystem_datasource.py b/great_expectations/datasource/fluent/spark_filesystem_datasource.py index 025e48d21602..d163ea36f5b5 100644 --- a/great_expectations/datasource/fluent/spark_filesystem_datasource.py +++ b/great_expectations/datasource/fluent/spark_filesystem_datasource.py @@ -22,6 +22,11 @@ @public_api class SparkFilesystemDatasource(_SparkFilePathDatasource): + """ + SparkFilesystemDatasource is a subclass of SparkDatasource which connects to + the filesystem. + """ + # class attributes data_connector_type: ClassVar[Type[FilesystemDataConnector]] = FilesystemDataConnector # these fields should not be passed to the execution engine @@ -45,10 +50,10 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # tests Filesystem connection if not self.base_directory.exists(): - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP f"base_directory path: {self.base_directory.resolve()} does not exist." ) @@ -68,7 +73,7 @@ def _build_data_connector( ) -> None: """Builds and attaches the `FilesystemDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) data_asset._data_connector = self.data_connector_type.build_data_connector( diff --git a/great_expectations/datasource/fluent/spark_filesystem_datasource.pyi b/great_expectations/datasource/fluent/spark_filesystem_datasource.pyi index 3428fb96807c..50ef4e3ba1a7 100644 --- a/great_expectations/datasource/fluent/spark_filesystem_datasource.pyi +++ b/great_expectations/datasource/fluent/spark_filesystem_datasource.pyi @@ -45,7 +45,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): base_directory: pathlib.Path data_context_root_directory: Optional[pathlib.Path] = None - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -146,7 +146,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # CSV Specific Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> CSVAsset: ... - def add_directory_csv_asset( # noqa: PLR0913 + def add_directory_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -250,7 +250,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # CSV Specific Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> DirectoryCSVAsset: ... - def add_parquet_asset( # noqa: PLR0913 + def add_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -262,7 +262,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): modified_after: Optional[Union[bool, str]] = None, recursive_file_lookup: Optional[Union[bool, str]] = None, # Spark Generic File Reader Options ^^^ - # vvv spark parameters for pyspark.sql.DataFrameReader.parquet() (ordered as in pyspark v3.4.0) # noqa: E501 + # vvv spark parameters for pyspark.sql.DataFrameReader.parquet() (ordered as in pyspark v3.4.0) # noqa: E501 # FIXME CoP # See https://spark.apache.org/docs/latest/sql-data-sources-parquet.html for more info. # Parquet Specific Options vvv merge_schema: Optional[Union[bool, str]] = None, @@ -278,7 +278,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # Spark Generic File Reader Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> ParquetAsset: ... - def add_directory_parquet_asset( # noqa: PLR0913 + def add_directory_parquet_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -293,7 +293,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): modified_after: Optional[Union[bool, str]] = None, recursive_file_lookup: Optional[Union[bool, str]] = None, # Spark Generic File Reader Options ^^^ - # vvv spark parameters for pyspark.sql.DataFrameReader.parquet() (ordered as in pyspark v3.4.0) # noqa: E501 + # vvv spark parameters for pyspark.sql.DataFrameReader.parquet() (ordered as in pyspark v3.4.0) # noqa: E501 # FIXME CoP # See https://spark.apache.org/docs/latest/sql-data-sources-parquet.html for more info. # Parquet Specific Options vvv merge_schema: Optional[Union[bool, str]] = None, @@ -309,7 +309,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # Spark Generic File Reader Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> DirectoryParquetAsset: ... - def add_orc_asset( # noqa: PLR0913 + def add_orc_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -333,7 +333,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # Spark Generic File Reader Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> ORCAsset: ... - def add_directory_orc_asset( # noqa: PLR0913 + def add_directory_orc_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -360,7 +360,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # Spark Generic File Reader Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> DirectoryORCAsset: ... - def add_json_asset( # noqa: PLR0913 + def add_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -435,7 +435,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # JSON Specific Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> JSONAsset: ... - def add_directory_json_asset( # noqa: PLR0913 + def add_directory_json_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -513,7 +513,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # JSON Specific Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> DirectoryJSONAsset: ... - def add_text_asset( # noqa: PLR0913 + def add_text_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -540,7 +540,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): # Spark Generic File Reader Options ^^^ # ^^^ pyspark Docs <> Source Code mismatch ) -> TextAsset: ... - def add_directory_text_asset( # noqa: PLR0913 + def add_directory_text_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, @@ -581,7 +581,7 @@ class SparkFilesystemDatasource(_SparkFilePathDatasource): version_as_of: Optional[str] = None, # Delta Specific Options ^^^ ) -> DeltaAsset: ... - def add_delta_directory_asset( # noqa: PLR0913 + def add_delta_directory_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.py b/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.py index 3f0230f56a97..424b120f4de4 100644 --- a/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.py +++ b/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.py @@ -33,6 +33,11 @@ class SparkGoogleCloudStorageDatasourceError(SparkDatasourceError): @public_api class SparkGoogleCloudStorageDatasource(_SparkFilePathDatasource): + """ + SparkGoogleCloudStorageDatasource is a subclass of SparkDatasource which connects to + Google Cloud Storage. + """ + # class attributes data_connector_type: ClassVar[Type[GoogleCloudStorageDataConnector]] = ( GoogleCloudStorageDataConnector @@ -57,7 +62,7 @@ class SparkGoogleCloudStorageDatasource(_SparkFilePathDatasource): def _get_gcs_client(self) -> google.Client: gcs_client: Union[google.Client, None] = self._gcs_client if not gcs_client: - # Validate that "google" libararies were successfully imported and attempt to create "gcs_client" handle. # noqa: E501 + # Validate that "google" libararies were successfully imported and attempt to create "gcs_client" handle. # noqa: E501 # FIXME CoP if google.service_account and google.storage: try: credentials: Union[google.Client, None] = ( @@ -69,7 +74,7 @@ def _get_gcs_client(self) -> google.Client: raise_warning_if_provider_not_present=True, ) # pull in needed config substitutions using the `_config_provider` - # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed # noqa: E501 + # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed # noqa: E501 # FIXME CoP gcs_options: dict = self.dict(config_provider=self._config_provider).get( "gcs_options", {} ) @@ -87,13 +92,13 @@ def _get_gcs_client(self) -> google.Client: gcs_client = google.storage.Client(credentials=credentials, **gcs_options) except Exception as e: - # Failure to create "gcs_client" is most likely due invalid "gcs_options" dictionary. # noqa: E501 - raise SparkGoogleCloudStorageDatasourceError( # noqa: TRY003 + # Failure to create "gcs_client" is most likely due invalid "gcs_options" dictionary. # noqa: E501 # FIXME CoP + raise SparkGoogleCloudStorageDatasourceError( # noqa: TRY003 # FIXME CoP f'Due to exception: "{e!r}", "gcs_client" could not be created.' ) from e else: - raise SparkGoogleCloudStorageDatasourceError( # noqa: TRY003 - 'Unable to create "SparkGoogleCloudStorageDatasource" due to missing google dependency.' # noqa: E501 + raise SparkGoogleCloudStorageDatasourceError( # noqa: TRY003 # FIXME CoP + 'Unable to create "SparkGoogleCloudStorageDatasource" due to missing google dependency.' # noqa: E501 # FIXME CoP ) self._gcs_client = gcs_client @@ -109,12 +114,12 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: # tests GCS connection _ = self._get_gcs_client() except Exception as e: - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP "Attempt to connect to datasource failed with the following error message: " f"{e!s}" ) from e @@ -138,7 +143,7 @@ def _build_data_connector( ) -> None: """Builds and attaches the `GoogleCloudStorageDataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) data_asset._data_connector = self.data_connector_type.build_data_connector( diff --git a/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.pyi b/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.pyi index f3cb0f084c9d..70bbe4413af0 100644 --- a/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.pyi +++ b/great_expectations/datasource/fluent/spark_google_cloud_storage_datasource.pyi @@ -25,7 +25,7 @@ class SparkGoogleCloudStorageDatasource(_SparkFilePathDatasource): gcs_options: dict[str, ConfigStr | Any] = {} _gcs_client: google.Client | None - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/spark_s3_datasource.py b/great_expectations/datasource/fluent/spark_s3_datasource.py index 491c93e65772..d466b0ba33ab 100644 --- a/great_expectations/datasource/fluent/spark_s3_datasource.py +++ b/great_expectations/datasource/fluent/spark_s3_datasource.py @@ -34,6 +34,11 @@ class SparkS3DatasourceError(SparkDatasourceError): @public_api class SparkS3Datasource(_SparkFilePathDatasource): + """ + SparkS3Datasource is a subclass of SparkDatasource which connects to + Amazon S3. + """ + # class attributes data_connector_type: ClassVar[Type[S3DataConnector]] = S3DataConnector # these fields should not be passed to the execution engine @@ -54,25 +59,25 @@ class SparkS3Datasource(_SparkFilePathDatasource): def _get_s3_client(self) -> BaseClient: s3_client: Union[BaseClient, None] = self._s3_client if not s3_client: - # Validate that "boto3" libarary was successfully imported and attempt to create "s3_client" handle. # noqa: E501 + # Validate that "boto3" libarary was successfully imported and attempt to create "s3_client" handle. # noqa: E501 # FIXME CoP if aws.boto3: _check_config_substitutions_needed( self, self.boto3_options, raise_warning_if_provider_not_present=True ) # pull in needed config substitutions using the `_config_provider` - # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed. # noqa: E501 + # The `FluentBaseModel.dict()` call will do the config substitution on the serialized dict if a `config_provider` is passed. # noqa: E501 # FIXME CoP boto3_options: dict = self.dict(config_provider=self._config_provider).get( "boto3_options", {} ) try: s3_client = aws.boto3.client("s3", **boto3_options) except Exception as e: - # Failure to create "s3_client" is most likely due invalid "boto3_options" dictionary. # noqa: E501 - raise SparkS3DatasourceError( # noqa: TRY003 + # Failure to create "s3_client" is most likely due invalid "boto3_options" dictionary. # noqa: E501 # FIXME CoP + raise SparkS3DatasourceError( # noqa: TRY003 # FIXME CoP f'Due to exception: "{e!s}", "s3_client" could not be created.' ) from e else: - raise SparkS3DatasourceError( # noqa: TRY003 + raise SparkS3DatasourceError( # noqa: TRY003 # FIXME CoP 'Unable to create "SparkS3Datasource" due to missing boto3 dependency.' ) @@ -89,12 +94,12 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: # tests S3 connection _ = self._get_s3_client() except Exception as e: - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP "Attempt to connect to datasource failed with the following error message: " f"{e!s}" ) from e @@ -118,7 +123,7 @@ def _build_data_connector( ) -> None: """Builds and attaches the `S3DataConnector` to the asset.""" if kwargs: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"_build_data_connector() got unexpected keyword arguments {list(kwargs.keys())}" ) diff --git a/great_expectations/datasource/fluent/spark_s3_datasource.pyi b/great_expectations/datasource/fluent/spark_s3_datasource.pyi index 5eec2adc1c05..47c72c4a5925 100644 --- a/great_expectations/datasource/fluent/spark_s3_datasource.pyi +++ b/great_expectations/datasource/fluent/spark_s3_datasource.pyi @@ -22,7 +22,7 @@ class SparkS3Datasource(_SparkFilePathDatasource): # S3 specific attributes bucket: str boto3_options: dict[str, ConfigStr | Any] = {} - def add_csv_asset( # noqa: PLR0913 + def add_csv_asset( # noqa: PLR0913 # FIXME CoP self, name: str, *, diff --git a/great_expectations/datasource/fluent/sql_datasource.py b/great_expectations/datasource/fluent/sql_datasource.py index ff6921c601bd..f8e2589e2fe4 100644 --- a/great_expectations/datasource/fluent/sql_datasource.py +++ b/great_expectations/datasource/fluent/sql_datasource.py @@ -211,7 +211,7 @@ def batch_parameters_to_batch_spec_kwarg_identifiers( identifiers: Dict = {} for part in self.param_names: if part not in options: - raise ValueError(f"'{part}' must be specified in the batch parameters") # noqa: TRY003 + raise ValueError(f"'{part}' must be specified in the batch parameters") # noqa: TRY003 # FIXME CoP identifiers[part] = options[part] return {self.column_name: identifiers} @@ -344,7 +344,7 @@ def batch_parameters_to_batch_spec_kwarg_identifiers( self, options: BatchParameters ) -> Dict[str, Any]: if "quotient" not in options: - raise ValueError("'quotient' must be specified in the batch parameters") # noqa: TRY003 + raise ValueError("'quotient' must be specified in the batch parameters") # noqa: TRY003 # FIXME CoP return {self.column_name: options["quotient"]} @@ -367,7 +367,7 @@ def batch_parameters_to_batch_spec_kwarg_identifiers( self, options: BatchParameters ) -> Dict[str, Any]: if "remainder" not in options: - raise ValueError("'remainder' must be specified in the batch parameters") # noqa: TRY003 + raise ValueError("'remainder' must be specified in the batch parameters") # noqa: TRY003 # FIXME CoP return {self.column_name: options["remainder"]} @@ -389,7 +389,7 @@ def batch_parameters_to_batch_spec_kwarg_identifiers( self, options: BatchParameters ) -> Dict[str, Any]: if self.column_name not in options: - raise ValueError(f"'{self.column_name}' must be specified in the batch parameters") # noqa: TRY003 + raise ValueError(f"'{self.column_name}' must be specified in the batch parameters") # noqa: TRY003 # FIXME CoP return {self.column_name: options[self.column_name]} @override @@ -421,8 +421,8 @@ def batch_parameters_to_batch_spec_kwarg_identifiers( self, options: BatchParameters ) -> Dict[str, Any]: if not (set(self.column_names) <= set(options.keys())): - raise ValueError( # noqa: TRY003 - f"All column names, {self.column_names}, must be specified in the batch parameters. " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"All column names, {self.column_names}, must be specified in the batch parameters. " # noqa: E501 # FIXME CoP f" The options provided were f{options}." ) return {col: options[col] for col in self.column_names} @@ -468,7 +468,7 @@ def batch_parameters_to_batch_spec_kwarg_identifiers( self, options: BatchParameters ) -> Dict[str, Any]: if "datetime" not in options: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "'datetime' must be specified in the batch parameters to create a batch identifier" ) return {self.column_name: options["datetime"]} @@ -524,8 +524,8 @@ def get_partitioner_implementation( ) -> SqlPartitioner: PartitionerClass = self._partitioner_implementation_map.get(type(abstract_partitioner)) if not PartitionerClass: - raise ValueError( # noqa: TRY003 - f"Requested Partitioner `{abstract_partitioner.method_name}` is not implemented for this DataAsset. " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"Requested Partitioner `{abstract_partitioner.method_name}` is not implemented for this DataAsset. " # noqa: E501 # FIXME CoP ) assert PartitionerClass is not None return PartitionerClass(**abstract_partitioner.dict()) @@ -697,13 +697,13 @@ def build_batch_request( Returns: A BatchRequest object that can be used to obtain a batch from an Asset by calling the get_batch method. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if options is not None and not self._batch_parameters_are_valid( options=options, partitioner=partitioner ): allowed_keys = set(self.get_batch_parameters_keys(partitioner=partitioner)) actual_keys = set(options.keys()) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "batch parameters should only contain keys from the following set:\n" f"{allowed_keys}\nbut your specified keys contain\n" f"{actual_keys.difference(allowed_keys)}\nwhich is not valid.\n" @@ -730,9 +730,9 @@ def add_batch_definition( @public_api def validate_batch_definition(self, partitioner: ColumnPartitioner) -> None: - """Validates that the batch definition column is of a permissible type + """Validates that the Batch Definition column is of a permissible type - This isn't meant to be called directly. This is called internally when a batch definition + This isn't meant to be called directly. This is called internally when a Batch Definition is added. Data asset implementers can override this for their specific data asset. Raises: @@ -786,6 +786,14 @@ def validate_batch_definition(self, partitioner: ColumnPartitioner) -> None: @public_api def add_batch_definition_whole_table(self, name: str) -> BatchDefinition: + """Adds a whole table Batch Definition to this Data Asset + + Args: + name: The name of the Batch Definition to be added + + Returns: + The added BatchDefinition object. + """ return self.add_batch_definition( name=name, partitioner=None, @@ -799,6 +807,19 @@ def add_batch_definition_yearly( sort_ascending: bool = True, validate_batchable: bool = True, ) -> BatchDefinition: + """Adds a yearly Batch Definition to this Data Asset + + Args: + name: The name of the Batch Definition to be added. + column: The column name on which to partition the asset by year. + sort_ascending: Boolean to indicate whether to sort ascending (default) or descending. + When running a validation, we default to running the last Batch Definition + if one is not explicitly specified. + + Returns: + The added BatchDefinition object. + """ + return self.add_batch_definition( name=name, partitioner=ColumnPartitionerYearly( @@ -815,6 +836,19 @@ def add_batch_definition_monthly( sort_ascending: bool = True, validate_batchable: bool = True, ) -> BatchDefinition: + """Adds a monthly Batch Definition to this Data Asset + + Args: + name: The name of the Batch Definition to be added + column: The column name on which to partition the asset by month + sort_ascending: Boolean to indicate whether to sort ascending (default) or descending. + When running a validation, we default to running the last Batch Definition + if one is not explicitly specified. + + Returns: + The added BatchDefinition object. + """ + return self.add_batch_definition( name=name, partitioner=ColumnPartitionerMonthly( @@ -833,6 +867,19 @@ def add_batch_definition_daily( sort_ascending: bool = True, validate_batchable: bool = True, ) -> BatchDefinition: + """Adds a daily Batch Definition to this Data Asset + + Args: + name: The name of the Batch Definition to be added + column: The column name on which to partition the asset by day + sort_ascending: Boolean to indicate whether to sort ascending (default) or descending. + When running a validation, we default to running the last Batch Definition + if one is not explicitly specified. + + Returns: + The added BatchDefinition object. + """ + return self.add_batch_definition( name=name, partitioner=ColumnPartitionerDaily( @@ -866,10 +913,10 @@ def _validate_batch_request(self, batch_request: BatchRequest) -> None: datasource_name=self.datasource.name, data_asset_name=self.name, options=options, - batch_slice=batch_request._batch_slice_input, # type: ignore[attr-defined] + batch_slice=batch_request._batch_slice_input, # type: ignore[attr-defined] # FIXME CoP partitioner=batch_request.partitioner, ) - raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 + raise gx_exceptions.InvalidBatchRequestError( # noqa: TRY003 # FIXME CoP "BatchRequest should have form:\n" f"{pf(expect_batch_request_form.dict())}\n" f"but actually has form:\n{pf(batch_request.dict())}\n" @@ -882,7 +929,7 @@ def _create_batch_spec_kwargs(self) -> dict[str, Any]: Returns: A dictionary that will be passed to self._create_batch_spec(**returned_dict) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP raise NotImplementedError def _create_batch_spec(self, batch_spec_kwargs: dict) -> BatchSpec: @@ -902,6 +949,12 @@ def as_selectable(self) -> sqlalchemy.Selectable: @public_api class QueryAsset(_SQLAsset): + """An asset made from a SQL query + + Args: + query: The query to be used to construct the underlying Data Asset + """ + # Instance fields type: Literal["query"] = "query" query: str @@ -910,7 +963,7 @@ class QueryAsset(_SQLAsset): def query_must_start_with_select(cls, v: str): query = v.lstrip() if not (query.upper().startswith("SELECT") and query[6].isspace()): - raise ValueError("query must start with 'SELECT' followed by a whitespace.") # noqa: TRY003 + raise ValueError("query must start with 'SELECT' followed by a whitespace.") # noqa: TRY003 # FIXME CoP return v @override @@ -937,6 +990,13 @@ def _create_batch_spec(self, batch_spec_kwargs: dict) -> RuntimeQueryBatchSpec: @public_api class TableAsset(_SQLAsset): + """A class representing a table from a SQL database + + Args: + table_name: The name of the database table to be added + schema_name: The name of the schema containing the database table to be added. + """ + # Instance fields type: Literal["table"] = "table" # TODO: quoted_name or str @@ -953,7 +1013,7 @@ def qualified_name(self) -> str: @pydantic.validator("table_name", pre=True, always=True) def _default_table_name(cls, table_name: str, values: dict, **kwargs) -> str: if not (validated_table_name := table_name or values.get("name")): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "table_name cannot be empty and should default to name if not provided" ) @@ -967,7 +1027,7 @@ def _resolve_quoted_name(cls, table_name: str) -> str | quoted_name: # quoted_name a top level import there. from great_expectations.compatibility import sqlalchemy - if sqlalchemy.quoted_name: # type: ignore[truthy-function] + if sqlalchemy.quoted_name: # type: ignore[truthy-function] # FIXME CoP if isinstance(table_name, sqlalchemy.quoted_name): return table_name @@ -996,7 +1056,7 @@ def test_connection(self) -> None: inspector: sqlalchemy.Inspector = sa.inspect(engine) if self.schema_name and self.schema_name not in inspector.get_schema_names(): - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP f'Attempt to connect to table: "{self.qualified_name}" failed because the schema ' f'"{self.schema_name}" does not exist.' ) @@ -1008,9 +1068,9 @@ def test_connection(self) -> None: connection.execute(sa.select(1, table).limit(1)) except Exception as query_error: LOGGER.info(f"{self.name} `.test_connection()` query failed: {query_error!r}") - raise TestConnectionError( # noqa: TRY003 + raise TestConnectionError( # noqa: TRY003 # FIXME CoP f"Attempt to connect to table: {self.qualified_name} failed because the test query " - f"failed. Ensure the table exists and the user has access to select data from the table: {query_error}" # noqa: E501 + f"failed. Ensure the table exists and the user has access to select data from the table: {query_error}" # noqa: E501 # FIXME CoP ) from query_error @override @@ -1071,7 +1131,7 @@ def _to_lower_if_not_bracketed_by_quotes(cls, target: str) -> str: def _warn_for_more_specific_datasource_type(connection_string: str) -> None: """ Warns if a more specific datasource type may be more appropriate based on the connection string connector prefix. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.datasource.fluent.sources import DataSourceManager connector: str = connection_string.split("://")[0].split("+")[0] @@ -1098,7 +1158,7 @@ def _warn_for_more_specific_datasource_type(connection_string: str) -> None: ) -# This improves our error messages by providing a more specific type for pydantic to validate against # noqa: E501 +# This improves our error messages by providing a more specific type for pydantic to validate against # noqa: E501 # FIXME CoP # It also ensure the generated jsonschema has a oneOf instead of anyOf field for assets # https://docs.pydantic.dev/1.10/usage/types/#discriminated-unions-aka-tagged-unions AssetTypes = Annotated[Union[TableAsset, QueryAsset], Field(discriminator="type")] @@ -1156,7 +1216,7 @@ def get_engine(self) -> sqlalchemy.Engine: try: self._engine = self._create_engine() except Exception as e: - # connection_string has passed pydantic validation, but still fails to create a sqlalchemy engine # noqa: E501 + # connection_string has passed pydantic validation, but still fails to create a sqlalchemy engine # noqa: E501 # FIXME CoP # one possible case is a missing plugin (e.g. psycopg2) raise SQLAlchemyCreateEngineError(cause=e) from e self._cached_connection_string = self.connection_string @@ -1210,7 +1270,7 @@ def test_connection(self, test_assets: bool = True) -> None: Raises: TestConnectionError: If the connection test fails. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: engine: sqlalchemy.Engine = self.get_engine() engine.connect() @@ -1241,7 +1301,7 @@ def add_table_asset( The table asset that is added to the datasource. The type of this object will match the necessary type for this datasource. eg, it could be a TableAsset or a SqliteTableAsset. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if schema_name: schema_name = self._TableAsset._to_lower_if_not_bracketed_by_quotes(schema_name) asset = self._TableAsset( @@ -1270,7 +1330,7 @@ def add_query_asset( The query asset that is added to the datasource. The type of this object will match the necessary type for this datasource. eg, it could be a QueryAsset or a SqliteQueryAsset. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP asset = self._QueryAsset( name=name, query=query, diff --git a/great_expectations/datasource/fluent/sqlite_datasource.py b/great_expectations/datasource/fluent/sqlite_datasource.py index 2320d9925257..821696560f34 100644 --- a/great_expectations/datasource/fluent/sqlite_datasource.py +++ b/great_expectations/datasource/fluent/sqlite_datasource.py @@ -84,7 +84,7 @@ def batch_parameters_to_batch_spec_kwarg_identifiers( self, options: BatchParameters ) -> Dict[str, Any]: if "datetime" not in options: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "'datetime' must be specified in the batch parameters to create a batch identifier" ) return {self.column_name: options["datetime"]} @@ -155,7 +155,7 @@ class SqliteDatasource(SQLDatasource): # Subclass instance var overrides # right side of the operator determines the type name # left side enforces the names on instance creation - type: Literal["sqlite"] = "sqlite" # type: ignore[assignment] + type: Literal["sqlite"] = "sqlite" # type: ignore[assignment] # FIXME CoP connection_string: Union[ConfigStr, SqliteDsn] _TableAsset: Type[SqlTableAsset] = pydantic.PrivateAttr(SqliteTableAsset) @@ -170,6 +170,17 @@ def add_table_asset( schema_name: Optional[str] = None, batch_metadata: Optional[BatchMetadata] = None, ) -> SqliteTableAsset: + """Adds a table asset to this SQLite datasource + + Args: + name: The name of this table asset + table_name: The name of the database table + schema_name: The schema to which this table belongs + batch_metadata: An arbitrary dictionary for a caller to annotate the asset + + Returns: + The SqliteTableAsset added + """ return cast( SqliteTableAsset, super().add_table_asset( @@ -190,6 +201,17 @@ def add_query_asset( query: str, batch_metadata: Optional[BatchMetadata] = None, ) -> SqliteQueryAsset: + """Adds a query asset to this SQLite datasource + + Args: + name: The name of this query asset + query: The SQL query + batch_metadata: An arbitrary dictionary for a caller to annotate the asset + + Returns: + The SqliteQueryAsset added + """ + return cast( SqliteQueryAsset, super().add_query_asset(name=name, query=query, batch_metadata=batch_metadata), diff --git a/great_expectations/datasource/fluent/type_lookup.py b/great_expectations/datasource/fluent/type_lookup.py index 288eec1949c4..4a3d43dab0fc 100644 --- a/great_expectations/datasource/fluent/type_lookup.py +++ b/great_expectations/datasource/fluent/type_lookup.py @@ -89,13 +89,13 @@ def __setitem__(self, key: ValidTypes, value: ValidTypes): # This key, value pair has already been registered so we return return if key is None: - raise TypeLookupError(f"`NoneType` for {value} is not allowed - bad key") # noqa: TRY003 + raise TypeLookupError(f"`NoneType` for {value} is not allowed - bad key") # noqa: TRY003 # FIXME CoP if value is None: - raise TypeLookupError(f"`NoneType` for {key} is not allowed - bad value") # noqa: TRY003 + raise TypeLookupError(f"`NoneType` for {key} is not allowed - bad value") # noqa: TRY003 # FIXME CoP if key in self: - raise TypeLookupError(f"`{key}` already set - bad key") # noqa: TRY003 + raise TypeLookupError(f"`{key}` already set - bad key") # noqa: TRY003 # FIXME CoP if value in self: - raise TypeLookupError(f"`{value}` already set - bad value") # noqa: TRY003 + raise TypeLookupError(f"`{value}` already set - bad value") # noqa: TRY003 # FIXME CoP super().__setitem__(key, value) super().__setitem__(value, key) @@ -118,7 +118,7 @@ def raise_if_contains(self, collection_: Iterable[ValidTypes]): """Raise a TypeLookup error if the passed iterable contains any overlapping items.""" intersection = self.intersection(collection_) if intersection: - raise TypeLookupError(f"Items are already present - {intersection}") # noqa: TRY003 + raise TypeLookupError(f"Items are already present - {intersection}") # noqa: TRY003 # FIXME CoP @override def clear(self) -> None: @@ -145,7 +145,7 @@ def transaction(self) -> Generator[TypeLookup, None, None]: AssertionError: Should fail >>> print(tuple in t) False - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP txn_exc: Union[Exception, None] = None backup_data = copy.copy(self.data) diff --git a/great_expectations/deployment_version b/great_expectations/deployment_version index c813fe116c9f..31e5c843497c 100644 --- a/great_expectations/deployment_version +++ b/great_expectations/deployment_version @@ -1 +1 @@ -1.2.5 +1.3.3 diff --git a/great_expectations/exceptions/exceptions.py b/great_expectations/exceptions/exceptions.py index 836189ed0cf5..f8d6b8d85182 100644 --- a/great_expectations/exceptions/exceptions.py +++ b/great_expectations/exceptions/exceptions.py @@ -237,7 +237,7 @@ def __init__(self, result_dict) -> None: - Great Expectations enables caching by default. - Please ensure that caching behavior is consistent between the underlying Dataset (e.g. Spark) and Great Expectations. Result: {} -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP self.message = template.format(json.dumps(result_dict, indent=2)) super().__init__(self.message) @@ -249,7 +249,7 @@ def __init__(self) -> None: self.message = """Error: No gx directory was found here! - Please check that you are in the correct directory or have specified the correct directory. - If you have never run Great Expectations in this project, please run `great_expectations init` to get started. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP super().__init__(self.message) @@ -327,12 +327,12 @@ def __init__(self, module_name, package_name, class_name) -> None: self.message = f"""No module named "{package_name + module_name}" could be found in the repository. \ Please make sure that the file, corresponding to this package and module, exists and that dynamic loading of code \ modules, templates, and assets is supported in your execution environment. This error is unrecoverable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP else: self.message = f"""The module "{module_name}" exists; however, the system is unable to create an instance \ of the class "{class_name}", searched for inside this module. Please make sure that the class named "{class_name}" is \ properly defined inside its intended module and declared correctly by the calling entity. This error is unrecoverable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__(self.message) @@ -455,7 +455,7 @@ def __init__(self, message: str, response: requests.Response) -> None: class GXCloudConfigurationError(GreatExpectationsError): """ Error finding and verifying the required configuration values when preparing to connect to GX Cloud - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Only used in tests @@ -466,3 +466,22 @@ class DatabaseConnectionError(GreatExpectationsError): class SqlAddBatchDefinitionError(Exception): def __init__(self, msg: str): super().__init__(f"Failed adding batch definition: {msg}") + + +class ValidationActionRegistryError(GreatExpectationsError): + pass + + +class ValidationActionAlreadyRegisteredError(ValidationActionRegistryError): + def __init__(self, action_type: str) -> None: + super().__init__(message=f"Action of type {action_type} is already registered.") + + +class ValidationActionRegistryRetrievalError(ValidationActionRegistryError): + def __init__(self, action_type: str | None) -> None: + if action_type: + message = f"Invalid action configuration; no action of type {action_type} found." + else: + message = "Invalid action configuration; no 'type' key found." + + super().__init__(message) diff --git a/great_expectations/exceptions/resource_freshness.py b/great_expectations/exceptions/resource_freshness.py index 2401b72d4dba..d266a9c88a1a 100644 --- a/great_expectations/exceptions/resource_freshness.py +++ b/great_expectations/exceptions/resource_freshness.py @@ -82,7 +82,7 @@ def __init__(self, name: str) -> None: class ValidationDefinitionNotAddedError(ResourceFreshnessError): def __init__(self, name: str) -> None: super().__init__( - f"ValidationDefinition '{name}' must be added to the DataContext before it can be updated. " # noqa: E501 + f"ValidationDefinition '{name}' must be added to the DataContext before it can be updated. " # noqa: E501 # FIXME CoP "Please call `context.validation_definitions.add()`, " "then try your action again." ) @@ -92,7 +92,7 @@ class ValidationDefinitionNotFreshError(ResourceFreshnessError): def __init__(self, name: str) -> None: super().__init__( f"ValidationDefinition '{name}' has changed since it has last been saved. " - "Please update with `.save()`, then try your action again." # noqa: E501 + "Please update with `.save()`, then try your action again." # noqa: E501 # FIXME CoP ) diff --git a/great_expectations/execution_engine/execution_engine.py b/great_expectations/execution_engine/execution_engine.py index 87f0151a3da3..9ff4e6d889b6 100644 --- a/great_expectations/execution_engine/execution_engine.py +++ b/great_expectations/execution_engine/execution_engine.py @@ -28,12 +28,12 @@ ) from great_expectations.types import DictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP filter_properties_dict, ) -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP from great_expectations.validator.metric_configuration import ( - MetricConfiguration, # noqa: TCH001 + MetricConfiguration, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -68,10 +68,10 @@ def update(self, value): class MetricComputationConfiguration(DictDot): """ MetricComputationConfiguration is a "dataclass" object, which holds components required for metric computation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_configuration: MetricConfiguration - metric_fn: sa.func | F # type: ignore[valid-type] + metric_fn: sa.func | F # type: ignore[valid-type] # FIXME CoP metric_provider_kwargs: dict compute_domain_kwargs: Optional[dict] = None accessor_domain_kwargs: Optional[dict] = None @@ -134,7 +134,7 @@ class ExecutionEngine(ABC): batch_spec_defaults: dictionary of BatchSpec overrides (useful for amending configuration at runtime). batch_data_dict: dictionary of Batch objects with corresponding IDs as keys supplied at initialization time validator: Validator object (optional) -- not utilized in V3 and later versions - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP recognized_batch_spec_defaults: Set[str] = set() @@ -149,7 +149,7 @@ def __init__( self.name = name self._validator = validator - # NOTE: using caching makes the strong assumption that the user will not modify the core data store # noqa: E501 + # NOTE: using caching makes the strong assumption that the user will not modify the core data store # noqa: E501 # FIXME CoP # (e.g. self.spark_df) over the lifetime of the dataset instance self._caching = caching # NOTE: 20200918 - this is a naive cache; update. @@ -182,7 +182,7 @@ def __init__( self._load_batch_data_from_dict(batch_data_dict=batch_data_dict) - # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, and # noqa: E501 + # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, and # noqa: E501 # FIXME CoP # set the instance "_config" variable equal to the resulting dictionary. self._config = { "name": name, @@ -221,7 +221,7 @@ def _load_batch_data_from_dict(self, batch_data_dict: Dict[str, BatchDataType]) batch_id: str batch_data: BatchDataType for batch_id, batch_data in batch_data_dict.items(): - self.load_batch_data(batch_id=batch_id, batch_data=batch_data) # type: ignore[arg-type] + self.load_batch_data(batch_id=batch_id, batch_data=batch_data) # type: ignore[arg-type] # FIXME CoP def load_batch_data(self, batch_id: str, batch_data: BatchDataUnion) -> None: self._batch_manager.save_batch_data(batch_id=batch_id, batch_data=batch_data) @@ -262,7 +262,7 @@ def resolve_metrics( Returns: resolved_metrics (Dict): a dictionary with the values for the metrics that have just been resolved. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not metrics_to_resolve: return metrics or {} @@ -282,7 +282,7 @@ def resolve_metrics( ) def resolve_metric_bundle(self, metric_fn_bundle) -> Dict[Tuple[str, str, str], MetricValue]: - """Resolve a bundle of metrics with the same compute Domain as part of a single trip to the compute engine.""" # noqa: E501 + """Resolve a bundle of metrics with the same compute Domain as part of a single trip to the compute engine.""" # noqa: E501 # FIXME CoP raise NotImplementedError def get_domain_records( @@ -296,7 +296,7 @@ def get_domain_records( Returns: data corresponding to the compute domain - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP raise NotImplementedError @@ -327,7 +327,7 @@ def get_compute_domain( In general, the union of the compute_domain_kwargs and accessor_domain_kwargs will be the same as the domain_kwargs provided to this method. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP raise NotImplementedError @@ -343,20 +343,20 @@ def add_column_row_condition( table_domain_kwargs filter_null: if true, add a filter for null values filter_nan: if true, add a filter for nan values - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if filter_null is False and filter_nan is False: logger.warning("add_column_row_condition called with no filter condition requested") return domain_kwargs if filter_nan: - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "Base ExecutionEngine does not support adding nan condition filters" ) new_domain_kwargs = copy.deepcopy(domain_kwargs) assert ( "column" in domain_kwargs or column_name is not None - ), "No column provided: A column must be provided in domain_kwargs or in the column_name parameter" # noqa: E501 + ), "No column provided: A column must be provided in domain_kwargs or in the column_name parameter" # noqa: E501 # FIXME CoP if column_name is not None: column = column_name else: @@ -392,7 +392,7 @@ def _build_direct_and_bundled_metric_computation_configurations( Returns: Tuple with two elements: directly-computable and bundled "MetricComputationConfiguration" objects - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_fn_direct_configurations: List[MetricComputationConfiguration] = [] metric_fn_bundle_configurations: List[MetricComputationConfiguration] = [] @@ -410,7 +410,7 @@ def _build_direct_and_bundled_metric_computation_configurations( ] metric_class: MetricProvider metric_fn: Union[Callable, None] - metric_aggregate_fn: sa.func | F # type: ignore[valid-type] + metric_aggregate_fn: sa.func | F # type: ignore[valid-type] # FIXME CoP metric_provider_kwargs: dict compute_domain_kwargs: dict accessor_domain_kwargs: dict @@ -442,7 +442,7 @@ def _build_direct_and_bundled_metric_computation_configurations( ) = resolved_metric_dependencies_by_metric_name.pop("metric_partial_fn") except KeyError as e: raise gx_exceptions.MetricError( - message=f'Missing metric dependency: {e!s} for metric "{metric_to_resolve.metric_name}".' # noqa: E501 + message=f'Missing metric dependency: {e!s} for metric "{metric_to_resolve.metric_name}".' # noqa: E501 # FIXME CoP ) metric_fn_bundle_configurations.append( @@ -483,7 +483,7 @@ def _get_computed_metric_evaluation_dependencies_by_metric_name( Returns: Dictionary keyed by "metric_name" with values as computed metric or partial bundling information tuple - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_dependencies_by_metric_name: Dict[ str, Union[MetricValue, Tuple[Any, dict, dict]] ] = {} @@ -502,7 +502,7 @@ def _get_computed_metric_evaluation_dependencies_by_metric_name( ] else: raise gx_exceptions.MetricError( - message=f'Missing metric dependency: "{metric_name}" for metric "{metric_to_resolve.metric_name}".' # noqa: E501 + message=f'Missing metric dependency: "{metric_name}" for metric "{metric_to_resolve.metric_name}".' # noqa: E501 # FIXME CoP ) return metric_dependencies_by_metric_name @@ -521,7 +521,7 @@ def _process_direct_and_bundled_metric_computation_configurations( Returns: resolved_metrics (Dict): a dictionary with the values for the metrics that have just been resolved. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP resolved_metrics: Dict[Tuple[str, str, str], MetricValue] = {} metric_computation_configuration: MetricComputationConfiguration @@ -579,7 +579,7 @@ class MetricDomainTypes. Returns: compute_domain_kwargs, accessor_domain_kwargs from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Extracting value from enum if it is given for future computation domain_type = MetricDomainTypes(domain_type) @@ -641,7 +641,7 @@ def _partition_table_metric_domain_kwargs( Returns: compute_domain_kwargs, accessor_domain_kwargs from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert ( domain_type == MetricDomainTypes.TABLE ), "This method only supports MetricDomainTypes.TABLE" @@ -667,7 +667,7 @@ def _partition_table_metric_domain_kwargs( map(lambda element: f'"{element}"', unexpected_keys) ) logger.warning( - f"""Unexpected key(s) {unexpected_keys_str} found in domain_kwargs for Domain type "{domain_type.value}".""" # noqa: E501 + f"""Unexpected key(s) {unexpected_keys_str} found in domain_kwargs for Domain type "{domain_type.value}".""" # noqa: E501 # FIXME CoP ) return PartitionDomainKwargs(compute_domain_kwargs, accessor_domain_kwargs) @@ -687,7 +687,7 @@ def _partition_column_metric_domain_kwargs( Returns: compute_domain_kwargs, accessor_domain_kwargs from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert ( domain_type == MetricDomainTypes.COLUMN ), "This method only supports MetricDomainTypes.COLUMN" @@ -696,7 +696,7 @@ def _partition_column_metric_domain_kwargs( accessor_domain_kwargs: Dict = {} if "column" not in compute_domain_kwargs: - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "Column not provided in compute_domain_kwargs" ) @@ -719,7 +719,7 @@ def _partition_column_pair_metric_domain_kwargs( Returns: compute_domain_kwargs, accessor_domain_kwargs from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert ( domain_type == MetricDomainTypes.COLUMN_PAIR ), "This method only supports MetricDomainTypes.COLUMN_PAIR" @@ -728,7 +728,7 @@ def _partition_column_pair_metric_domain_kwargs( accessor_domain_kwargs: Dict = {} if not ("column_A" in domain_kwargs and "column_B" in domain_kwargs): - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "column_A or column_B not found within domain_kwargs" ) @@ -752,7 +752,7 @@ def _partition_multi_column_metric_domain_kwargs( Returns: compute_domain_kwargs, accessor_domain_kwargs from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert ( domain_type == MetricDomainTypes.MULTICOLUMN ), "This method only supports MetricDomainTypes.MULTICOLUMN" @@ -761,12 +761,12 @@ def _partition_multi_column_metric_domain_kwargs( accessor_domain_kwargs: Dict = {} if "column_list" not in domain_kwargs: - raise gx_exceptions.GreatExpectationsError("column_list not found within domain_kwargs") # noqa: TRY003 + raise gx_exceptions.GreatExpectationsError("column_list not found within domain_kwargs") # noqa: TRY003 # FIXME CoP column_list = compute_domain_kwargs.pop("column_list") - if len(column_list) < 2: # noqa: PLR2004 - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + if len(column_list) < 2: # noqa: PLR2004 # FIXME CoP + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "column_list must contain at least 2 columns" ) diff --git a/great_expectations/execution_engine/pandas_execution_engine.py b/great_expectations/execution_engine/pandas_execution_engine.py index 3d61dcc3ecb9..da5295090433 100644 --- a/great_expectations/execution_engine/pandas_execution_engine.py +++ b/great_expectations/execution_engine/pandas_execution_engine.py @@ -40,12 +40,12 @@ S3BatchSpec, ) from great_expectations.core.metric_domain_types import ( - MetricDomainTypes, # noqa: TCH001 + MetricDomainTypes, # noqa: TCH001 # FIXME CoP ) from great_expectations.core.util import AzureUrl, GCSUrl, S3Url, sniff_s3_compression from great_expectations.execution_engine import ExecutionEngine from great_expectations.execution_engine.execution_engine import ( - PartitionDomainKwargs, # noqa: TCH001 + PartitionDomainKwargs, # noqa: TCH001 # FIXME CoP ) from great_expectations.execution_engine.pandas_batch_data import PandasBatchData from great_expectations.execution_engine.partition_and_sample.pandas_data_partitioner import ( @@ -100,7 +100,7 @@ class PandasExecutionEngine(ExecutionEngine): expectation_completeness: Complete --ge-feature-maturity-info-- - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP recognized_batch_spec_defaults = { "reader_method", @@ -162,7 +162,7 @@ def _instantiate_gcs_client(self) -> None: 2. passing in explicit credentials via gcs_options 3. running Great Expectations from within a GCP container, at which you would be able to create a Client without passing in an additional environment variable or explicit credentials - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP gcs_options = self.config.get("gcs_options", {}) try: credentials = None # If configured with gcloud CLI / env vars @@ -190,19 +190,19 @@ def configure_validator(self, validator) -> None: def load_batch_data( self, batch_id: str, - batch_data: Union[PandasBatchData, pd.DataFrame], # type: ignore[override] + batch_data: Union[PandasBatchData, pd.DataFrame], # type: ignore[override] # FIXME CoP ) -> None: if isinstance(batch_data, pd.DataFrame): batch_data = PandasBatchData(self, batch_data) elif not isinstance(batch_data, PandasBatchData): - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 - "PandasExecutionEngine requires batch data that is either a DataFrame or a PandasBatchData object" # noqa: E501 + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP + "PandasExecutionEngine requires batch data that is either a DataFrame or a PandasBatchData object" # noqa: E501 # FIXME CoP ) super().load_batch_data(batch_id=batch_id, batch_data=batch_data) @override - def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 + def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 # FIXME CoP self, batch_spec: BatchSpec | PandasBatchSpecProtocol ) -> Tuple[PandasBatchData, BatchMarkers]: # batch_data # We need to build a batch_markers to be used in the dataframe @@ -219,9 +219,9 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 # batch_data != None is already checked when RuntimeDataBatchSpec is instantiated batch_data = batch_spec.batch_data if isinstance(batch_data, str): - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine has been passed a string type batch_data, "{batch_data}", which is illegal. Please check your config. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if isinstance(batch_spec.batch_data, pd.DataFrame): @@ -229,8 +229,8 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 elif isinstance(batch_spec.batch_data, PandasBatchData): df = batch_spec.batch_data.dataframe else: - raise ValueError( # noqa: TRY003, TRY004 - "RuntimeDataBatchSpec must provide a Pandas DataFrame or PandasBatchData object." # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + "RuntimeDataBatchSpec must provide a Pandas DataFrame or PandasBatchData object." # noqa: E501 # FIXME CoP ) batch_spec.batch_data = "PandasDataFrame" @@ -254,8 +254,8 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 aws.exceptions.ParamValidationError, aws.exceptions.ClientError, ) as error: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 - f"""PandasExecutionEngine encountered the following error while trying to read data from S3 Bucket: {error}""" # noqa: E501 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP + f"""PandasExecutionEngine encountered the following error while trying to read data from S3 Bucket: {error}""" # noqa: E501 # FIXME CoP ) logger.debug(f"Fetching s3 object. Bucket: {s3_url.bucket} Key: {s3_url.key}") reader_fn: DataFrameFactoryFn = self._get_reader_fn(reader_method, s3_url.key) @@ -268,9 +268,9 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 self._instantiate_azure_client() # if we were not able to instantiate Azure client, then raise error if self._azure is None: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP """PandasExecutionEngine has been passed a AzureBatchSpec, - but the ExecutionEngine does not have an Azure client configured. Please check your config.""" # noqa: E501 + but the ExecutionEngine does not have an Azure client configured. Please check your config.""" # noqa: E501 # FIXME CoP ) azure_engine = self._azure reader_method = batch_spec.reader_method @@ -294,9 +294,9 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 self._instantiate_gcs_client() # if we were not able to instantiate GCS client, then raise error if self._gcs is None: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP """PandasExecutionEngine has been passed a GCSBatchSpec, - but the ExecutionEngine does not have an GCS client configured. Please check your config.""" # noqa: E501 + but the ExecutionEngine does not have an GCS client configured. Please check your config.""" # noqa: E501 # FIXME CoP ) gcs_engine = self._gcs gcs_url = GCSUrl(batch_spec.path) @@ -307,9 +307,9 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 gcs_blob = gcs_bucket.blob(gcs_url.blob) logger.debug(f"Fetching GCS blob. Bucket: {gcs_url.bucket} Blob: {gcs_url.blob}") except google.GoogleAPIError as error: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine encountered the following error while trying to read data from GCS \ -Bucket: {error}""" # noqa: E501 +Bucket: {error}""" # noqa: E501 # FIXME CoP ) reader_fn = self._get_reader_fn(reader_method, gcs_url.blob) buf = BytesIO(gcs_blob.download_as_bytes()) @@ -333,7 +333,7 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 ) if isinstance(reader_fn_result, list): if len(reader_fn_result) > 1: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP "Pandas reader method must return a single DataFrame, " f'but "{reader_method}" returned {len(reader_fn_result)} DataFrames.' ) @@ -347,12 +347,12 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 df = reader_fn(**batch_spec.reader_options) else: - raise gx_exceptions.BatchSpecError( # noqa: TRY003 + raise gx_exceptions.BatchSpecError( # noqa: TRY003 # FIXME CoP f"""batch_spec must be of type RuntimeDataBatchSpec, PandasBatchSpec, PathBatchSpec, S3BatchSpec, AzureBatchSpec or FabricBatchSpec \ -not {batch_spec.__class__.__name__}""" # noqa: E501 +not {batch_spec.__class__.__name__}""" # noqa: E501 # FIXME CoP ) - df = self._apply_partitioning_and_sampling_methods(batch_spec, df) # type: ignore[arg-type] + df = self._apply_partitioning_and_sampling_methods(batch_spec, df) # type: ignore[arg-type] # FIXME CoP if df.memory_usage().sum() < HASH_THRESHOLD: batch_markers["pandas_data_fingerprint"] = hash_pandas_dataframe(df) @@ -386,10 +386,10 @@ def _apply_partitioning_and_sampling_methods( def dataframe(self) -> pd.DataFrame: """Tests whether or not a Batch has been loaded. If the loaded batch does not exist, raises a ValueError Exception - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Changed to is None because was breaking prior if self.batch_manager.active_batch_data is None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "Batch has not been loaded - please run load_batch_data() to load a batch." ) @@ -397,7 +397,7 @@ def dataframe(self) -> pd.DataFrame: # NOTE Abe 20201105: Any reason this shouldn't be a private method? @staticmethod - def guess_reader_method_from_path(path: str): # noqa: C901, PLR0911 + def guess_reader_method_from_path(path: str): # noqa: C901, PLR0911 # FIXME CoP """Helper method for deciding which reader to use to read in a certain path. Args: @@ -429,7 +429,7 @@ def guess_reader_method_from_path(path: str): # noqa: C901, PLR0911 return {"reader_method": "read_sas"} else: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f'Unable to determine reader method from path: "{path}".' ) @@ -454,9 +454,9 @@ def _get_reader_fn( Returns: ReaderMethod to use for the filepath - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if reader_method is None and path is None: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP "Unable to determine pandas reader function without reader_method or path." ) @@ -474,17 +474,17 @@ def _get_reader_fn( reader_fn = partial(reader_fn, **reader_options) return reader_fn except AttributeError: - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f'Unable to find reader_method "{reader_method}" in pandas.' ) @override def resolve_metric_bundle(self, metric_fn_bundle) -> Dict[Tuple[str, str, str], Any]: - """Resolve a bundle of metrics with the same compute Domain as part of a single trip to the compute engine.""" # noqa: E501 - return {} # This is NO-OP for "PandasExecutionEngine" (no bundling for direct execution computational backend). # noqa: E501 + """Resolve a bundle of metrics with the same compute Domain as part of a single trip to the compute engine.""" # noqa: E501 # FIXME CoP + return {} # This is NO-OP for "PandasExecutionEngine" (no bundling for direct execution computational backend). # noqa: E501 # FIXME CoP @override - def get_domain_records( # noqa: C901, PLR0912 + def get_domain_records( # noqa: C901, PLR0912 # FIXME CoP self, domain_kwargs: dict, ) -> pd.DataFrame: @@ -495,10 +495,10 @@ def get_domain_records( # noqa: C901, PLR0912 Returns: A DataFrame (the data on which to compute returned in the format of a Pandas DataFrame) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP table = domain_kwargs.get("table", None) if table: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "PandasExecutionEngine does not currently support multiple named tables." ) @@ -508,16 +508,16 @@ def get_domain_records( # noqa: C901, PLR0912 if self.batch_manager.active_batch_data_id is not None: data = cast(PandasBatchData, self.batch_manager.active_batch_data).dataframe else: - raise gx_exceptions.ValidationError( # noqa: TRY003 + raise gx_exceptions.ValidationError( # noqa: TRY003 # FIXME CoP "No batch is specified, but could not identify a loaded batch." ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if batch_id in self.batch_manager.batch_data_cache: data = cast( PandasBatchData, self.batch_manager.batch_data_cache[batch_id] ).dataframe else: - raise gx_exceptions.ValidationError( # noqa: TRY003 + raise gx_exceptions.ValidationError( # noqa: TRY003 # FIXME CoP f"Unable to find batch with batch_id {batch_id}" ) @@ -529,7 +529,7 @@ def get_domain_records( # noqa: C901, PLR0912 if condition_parser == CONDITION_PARSER_PANDAS: data = data.query(row_condition, parser=condition_parser) else: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "condition_parser for Pandas is required when setting a row_condition." ) @@ -559,9 +559,9 @@ def get_domain_records( # noqa: C901, PLR0912 how="any", subset=[column_A_name, column_B_name], ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if ignore_row_if != "neither": - raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 + raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 # FIXME CoP return data @@ -581,9 +581,9 @@ def get_domain_records( # noqa: C901, PLR0912 how="any", subset=column_list, ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if ignore_row_if != "never": - raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 + raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 # FIXME CoP return data @@ -616,10 +616,10 @@ def get_compute_domain( - a dictionary of compute_domain_kwargs, describing the DataFrame - a dictionary of accessor_domain_kwargs, describing any accessors needed to identify the Domain within the compute domain - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP table: str = domain_kwargs.get("table", None) if table: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "PandasExecutionEngine does not currently support multiple named tables." ) diff --git a/great_expectations/execution_engine/partition_and_sample/data_partitioner.py b/great_expectations/execution_engine/partition_and_sample/data_partitioner.py index b54f24a00605..72c01913b729 100644 --- a/great_expectations/execution_engine/partition_and_sample/data_partitioner.py +++ b/great_expectations/execution_engine/partition_and_sample/data_partitioner.py @@ -73,12 +73,12 @@ def __hash__(self: PartitionerMethod): return hash(self.value) -class DataPartitioner(abc.ABC): # noqa: B024 +class DataPartitioner(abc.ABC): # noqa: B024 # FIXME CoP """Abstract base class containing methods for partitioning data accessible via Execution Engines. Note, for convenience, you can also access DatePart via the instance variable date_part e.g. DataPartitioner.date_part.MONTH - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP date_part: ClassVar[Type[DatePart]] = DatePart @@ -136,11 +136,11 @@ def _validate_date_parts(date_parts: List[DatePart] | List[str]) -> None: None, this method raises exceptions if the config is invalid. """ if len(date_parts) == 0: - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP "date_parts are required when using partition_on_date_parts." ) if not all(isinstance(dp, (DatePart, str)) for dp in date_parts): - raise gx_exceptions.InvalidConfigError("date_parts should be of type DatePart or str.") # noqa: TRY003 + raise gx_exceptions.InvalidConfigError("date_parts should be of type DatePart or str.") # noqa: TRY003 # FIXME CoP @staticmethod def _verify_all_strings_are_valid_date_parts(date_part_strings: List[str]) -> None: @@ -155,8 +155,8 @@ def _verify_all_strings_are_valid_date_parts(date_part_strings: List[str]) -> No try: [DatePart(date_part_string) for date_part_string in date_part_strings] except ValueError as e: - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 - f"{e} please only specify strings that are supported in DatePart: {[dp.value for dp in DatePart]}" # noqa: E501 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP + f"{e} please only specify strings that are supported in DatePart: {[dp.value for dp in DatePart]}" # noqa: E501 # FIXME CoP ) def _convert_datetime_batch_identifiers_to_date_parts_dict( diff --git a/great_expectations/execution_engine/partition_and_sample/data_sampler.py b/great_expectations/execution_engine/partition_and_sample/data_sampler.py index bf53c278050a..1d69c368f570 100644 --- a/great_expectations/execution_engine/partition_and_sample/data_sampler.py +++ b/great_expectations/execution_engine/partition_and_sample/data_sampler.py @@ -54,7 +54,7 @@ def verify_batch_spec_sampling_kwargs_exists(self, batch_spec: BatchSpec) -> Non SamplerError """ if batch_spec.get("sampling_kwargs") is None: - raise gx_exceptions.SamplerError( # noqa: TRY003 + raise gx_exceptions.SamplerError( # noqa: TRY003 # FIXME CoP "Please make sure to provide sampling_kwargs in addition to your sampling_method." ) @@ -71,8 +71,8 @@ def verify_batch_spec_sampling_kwargs_key_exists(self, key: str, batch_spec: Bat SamplerError """ if batch_spec["sampling_kwargs"].get(key) is None: - raise gx_exceptions.SamplerError( # noqa: TRY003 - f"Please make sure to provide the {key} key in sampling_kwargs in addition to your sampling_method." # noqa: E501 + raise gx_exceptions.SamplerError( # noqa: TRY003 # FIXME CoP + f"Please make sure to provide the {key} key in sampling_kwargs in addition to your sampling_method." # noqa: E501 # FIXME CoP ) @staticmethod diff --git a/great_expectations/execution_engine/partition_and_sample/pandas_data_partitioner.py b/great_expectations/execution_engine/partition_and_sample/pandas_data_partitioner.py index 08d00e442a42..5338ee2ac421 100644 --- a/great_expectations/execution_engine/partition_and_sample/pandas_data_partitioner.py +++ b/great_expectations/execution_engine/partition_and_sample/pandas_data_partitioner.py @@ -164,7 +164,7 @@ def partition_on_column_value(df, column_name: str, batch_identifiers: dict) -> Returns: Filtered spark DataFrame. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return df[df[column_name] == batch_identifiers[column_name]] @staticmethod @@ -211,7 +211,7 @@ def partition_on_multi_column_values( for column_name in column_names: value = batch_identifiers.get(column_name) if not value: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"In order for PandasExecution to `_partition_on_multi_column_values`, " f"all values in column_names must also exist in batch_identifiers. " f"{column_name} was not found in batch_identifiers." @@ -232,9 +232,9 @@ def partition_on_hashed_column( hash_method = getattr(hashlib, hash_function_name) except (TypeError, AttributeError): raise ( - gx_exceptions.ExecutionEngineError( # noqa: TRY003 + gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"""The partitioning method used with SparkDFExecutionEngine has a reference to an invalid hash_function_name. - Reference to {hash_function_name} cannot be found.""" # noqa: E501 + Reference to {hash_function_name} cannot be found.""" # noqa: E501 # FIXME CoP ) ) matching_rows = df[column_name].map( diff --git a/great_expectations/execution_engine/partition_and_sample/pandas_data_sampler.py b/great_expectations/execution_engine/partition_and_sample/pandas_data_sampler.py index 25830e02e911..5f56bf32862f 100644 --- a/great_expectations/execution_engine/partition_and_sample/pandas_data_sampler.py +++ b/great_expectations/execution_engine/partition_and_sample/pandas_data_sampler.py @@ -112,7 +112,7 @@ def sample_using_a_list( column_name: str = self.get_sampling_kwargs_value_or_default(batch_spec, "column_name") value_list: int = self.get_sampling_kwargs_value_or_default(batch_spec, "value_list") - return df[df[column_name].isin(value_list)] # type: ignore[arg-type] + return df[df[column_name].isin(value_list)] # type: ignore[arg-type] # FIXME CoP def sample_using_hash( self, @@ -153,9 +153,9 @@ def sample_using_hash( hash_func = getattr(hashlib, hash_function_name) except (TypeError, AttributeError): raise ( - gx_exceptions.ExecutionEngineError( # noqa: TRY003 + gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"""The sampling method used with PandasExecutionEngine has a reference to an invalid hash_function_name. - Reference to {hash_function_name} cannot be found.""" # noqa: E501 + Reference to {hash_function_name} cannot be found.""" # noqa: E501 # FIXME CoP ) ) diff --git a/great_expectations/execution_engine/partition_and_sample/sparkdf_data_partitioner.py b/great_expectations/execution_engine/partition_and_sample/sparkdf_data_partitioner.py index ffe6252e5f16..e4b6fd89ad1e 100644 --- a/great_expectations/execution_engine/partition_and_sample/sparkdf_data_partitioner.py +++ b/great_expectations/execution_engine/partition_and_sample/sparkdf_data_partitioner.py @@ -156,7 +156,7 @@ def _convert_date_part_to_spark_equivalent(date_part: DatePart | str) -> str: Returns: String representing the spark function to use for the given DatePart. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP date_part = DatePart(date_part) spark_date_part_decoder: dict = { @@ -197,7 +197,7 @@ def partition_on_column_value( Returns: Filtered spark DataFrame. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return df.filter(F.col(column_name) == batch_identifiers[column_name]) @staticmethod @@ -260,7 +260,7 @@ def partition_on_multi_column_values(df, column_names: list, batch_identifiers: for column_name in column_names: value = batch_identifiers.get(column_name) if not value: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"In order for SparkDFExecutionEngine to `_partition_on_multi_column_values`, " f"all values in column_names must also exist in batch_identifiers. " f"{column_name} was not found in batch_identifiers." @@ -281,9 +281,9 @@ def partition_on_hashed_column( getattr(hashlib, hash_function_name) except (TypeError, AttributeError): raise ( - gx_exceptions.ExecutionEngineError( # noqa: TRY003 + gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"""The partitioning method used with SparkDFExecutionEngine has a reference to an invalid hash_function_name. - Reference to {hash_function_name} cannot be found.""" # noqa: E501 + Reference to {hash_function_name} cannot be found.""" # noqa: E501 # FIXME CoP ) ) diff --git a/great_expectations/execution_engine/partition_and_sample/sparkdf_data_sampler.py b/great_expectations/execution_engine/partition_and_sample/sparkdf_data_sampler.py index 69325742f2d2..6d5f5953712b 100644 --- a/great_expectations/execution_engine/partition_and_sample/sparkdf_data_sampler.py +++ b/great_expectations/execution_engine/partition_and_sample/sparkdf_data_sampler.py @@ -156,9 +156,9 @@ def sample_using_hash( getattr(hashlib, str(hash_function_name)) except (TypeError, AttributeError): raise ( - gx_exceptions.ExecutionEngineError( # noqa: TRY003 + gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"""The sampling method used with SparkDFExecutionEngine has a reference to an invalid hash_function_name. - Reference to {hash_function_name} cannot be found.""" # noqa: E501 + Reference to {hash_function_name} cannot be found.""" # noqa: E501 # FIXME CoP ) ) diff --git a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py index 572f4e2f3921..a4d5deb05779 100644 --- a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py +++ b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py @@ -48,19 +48,19 @@ def __init__(self, dialect: str): DATETIME_PARTITIONER_METHOD_TO_GET_UNIQUE_BATCH_IDENTIFIERS_METHOD_MAPPING: dict = { PartitionerMethod.PARTITION_ON_YEAR: "get_data_for_batch_identifiers_year", - PartitionerMethod.PARTITION_ON_YEAR_AND_MONTH: "get_data_for_batch_identifiers_year_and_month", # noqa: E501 - PartitionerMethod.PARTITION_ON_YEAR_AND_MONTH_AND_DAY: "get_data_for_batch_identifiers_year_and_month_and_day", # noqa: E501 - PartitionerMethod.PARTITION_ON_DATE_PARTS: "get_data_for_batch_identifiers_for_partition_on_date_parts", # noqa: E501 + PartitionerMethod.PARTITION_ON_YEAR_AND_MONTH: "get_data_for_batch_identifiers_year_and_month", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_YEAR_AND_MONTH_AND_DAY: "get_data_for_batch_identifiers_year_and_month_and_day", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_DATE_PARTS: "get_data_for_batch_identifiers_for_partition_on_date_parts", # noqa: E501 # FIXME CoP } PARTITIONER_METHOD_TO_GET_UNIQUE_BATCH_IDENTIFIERS_METHOD_MAPPING: dict = { - PartitionerMethod.PARTITION_ON_WHOLE_TABLE: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_whole_table", # noqa: E501 - PartitionerMethod.PARTITION_ON_COLUMN_VALUE: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_column_value", # noqa: E501 - PartitionerMethod.PARTITION_ON_CONVERTED_DATETIME: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_converted_datetime", # noqa: E501 - PartitionerMethod.PARTITION_ON_DIVIDED_INTEGER: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_integer", # noqa: E501 - PartitionerMethod.PARTITION_ON_MOD_INTEGER: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_mod_integer", # noqa: E501 - PartitionerMethod.PARTITION_ON_MULTI_COLUMN_VALUES: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_multi_column_values", # noqa: E501 - PartitionerMethod.PARTITION_ON_HASHED_COLUMN: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_hashed_column", # noqa: E501 + PartitionerMethod.PARTITION_ON_WHOLE_TABLE: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_whole_table", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_COLUMN_VALUE: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_column_value", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_CONVERTED_DATETIME: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_converted_datetime", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_DIVIDED_INTEGER: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_integer", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_MOD_INTEGER: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_mod_integer", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_MULTI_COLUMN_VALUES: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_multi_column_values", # noqa: E501 # FIXME CoP + PartitionerMethod.PARTITION_ON_HASHED_COLUMN: "get_partition_query_for_data_for_batch_identifiers_for_partition_on_hashed_column", # noqa: E501 # FIXME CoP } def partition_on_year( @@ -167,7 +167,7 @@ def partition_on_date_parts( column_batch_identifiers, date_parts ) - query: Union[sqlalchemy.BinaryExpression, sqlalchemy.BooleanClauseList] = sa.and_( # type: ignore[assignment] + query: Union[sqlalchemy.BinaryExpression, sqlalchemy.BooleanClauseList] = sa.and_( # type: ignore[assignment] # FIXME CoP *[ sa.extract(date_part.value, sa.column(column_name)) == date_parts_dict[date_part.value] @@ -206,7 +206,7 @@ def partition_on_converted_datetime( ) raise NotImplementedError( - f'Partitioner method "partition_on_converted_datetime" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 + f'Partitioner method "partition_on_converted_datetime" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 # FIXME CoP ) def partition_on_divided_integer( @@ -284,7 +284,7 @@ def partition_on_multi_column_values( ) -> bool: """Partition on the joint values in the named columns""" - return sa.and_( # type: ignore[return-value] + return sa.and_( # type: ignore[return-value] # FIXME CoP *( sa.column(column_name) == column_value for column_name, column_value in batch_identifiers.items() @@ -305,7 +305,7 @@ def partition_on_hashed_column( ) raise NotImplementedError( - f'Partitioner method "partition_on_hashed_column" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 + f'Partitioner method "partition_on_hashed_column" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 # FIXME CoP ) def get_data_for_batch_identifiers( @@ -327,7 +327,7 @@ def get_data_for_batch_identifiers( Returns: List of dicts of the form [{column_name: {"key": value}}] - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP processed_partitioner_method_name: str = self._get_partitioner_method_name( partitioner_method_name ) @@ -463,7 +463,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par Returns: List of dicts of the form [{column_name: {date_part_name: date_part_value}}] - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._validate_date_parts(date_parts) date_parts = self._convert_date_parts(date_parts) @@ -477,7 +477,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par concat_date_parts: sqlalchemy.Cast | sqlalchemy.ColumnOperators if len(date_parts) == 1: # MSSql does not accept single item concatenation - concat_clause = sa.func.distinct( # type: ignore[assignment] + concat_clause = sa.func.distinct( # type: ignore[assignment] # FIXME CoP sa.func.extract(date_parts[0].value, sa.column(column_name)).label( date_parts[0].value ) @@ -487,7 +487,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par """ # NOTE: 6/29/2022 Certain SQLAlchemy-compliant backends (e.g., Amazon Redshift, SQLite) allow only binary operators for "CONCAT". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if self._dialect == GXSqlDialect.SQLITE: concat_date_parts = sa.cast( sa.func.extract(date_parts[0].value, sa.column(column_name)), @@ -503,7 +503,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par ) ) - concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") # type: ignore[assignment] + concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") # type: ignore[assignment] # FIXME CoP else: concat_date_parts = sa.func.concat( "", @@ -522,9 +522,9 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par ), ) - concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") # type: ignore[assignment] + concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") # type: ignore[assignment] # FIXME CoP - partitioned_query: sqlalchemy.Selectable = sa.select( # type: ignore[call-overload] + partitioned_query: sqlalchemy.Selectable = sa.select( # type: ignore[call-overload] # FIXME CoP concat_clause, *[ sa.cast(sa.func.extract(date_part.value, sa.column(column_name)), sa.Integer).label( @@ -586,7 +586,7 @@ def _execute_partitioned_query( Returns: List of row results. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return execution_engine.execute_partitioned_query(partitioned_query) def _get_params_for_batch_identifiers_from_date_part_partitioner( @@ -605,7 +605,7 @@ def _get_params_for_batch_identifiers_from_date_part_partitioner( Returns: List of dicts of the form [{column_name: {date_part_name: date_part_value}}] - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP date_parts = self._convert_date_parts(date_parts) data_for_batch_identifiers: List[dict] = [ @@ -648,7 +648,7 @@ def get_data_for_batch_identifiers_for_non_date_part_partitioners( Returns: List of dicts of the form [{column_name: {"key": value}}] - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP get_partition_query_method_name: str = ( self._get_method_name_for_get_data_for_batch_identifiers_method(partitioner_method_name) ) @@ -674,7 +674,7 @@ def _get_method_name_for_get_data_for_batch_identifiers_method( Returns: Name of the corresponding method to get data for building batch identifiers. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP processed_partitioner_method_name: str = self._get_partitioner_method_name( partitioner_method_name ) @@ -683,8 +683,8 @@ def _get_method_name_for_get_data_for_batch_identifiers_method( processed_partitioner_method_name ] except ValueError: - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 - f"Please provide a supported partitioner method name, you provided: {partitioner_method_name}" # noqa: E501 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP + f"Please provide a supported partitioner method name, you provided: {partitioner_method_name}" # noqa: E501 # FIXME CoP ) @staticmethod @@ -700,7 +700,7 @@ def _get_params_for_batch_identifiers_from_non_date_part_partitioners( Returns: Dict of {column_name: row, column_name: row, ...} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return [dict(zip(column_names, row)) for row in rows] @staticmethod @@ -711,7 +711,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_whole_ta 'Partition' by returning the whole table Note: the selectable parameter is a required to keep the signature of this method consistent with other methods. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return sa.select(sa.true()) @staticmethod @@ -722,7 +722,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_column_v """Partition using the values in the named column""" return ( sa.select(sa.func.distinct(sa.column(column_name))) - .select_from(selectable) # type: ignore[arg-type] + .select_from(selectable) # type: ignore[arg-type] # FIXME CoP .order_by(sa.column(column_name).asc()) ) @@ -741,10 +741,10 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_converte sa.column(column_name), ) ) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP raise NotImplementedError( - f'Partitioner method "partition_on_converted_datetime" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 + f'Partitioner method "partition_on_converted_datetime" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 # FIXME CoP ) def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_integer( @@ -762,7 +762,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP if self._dialect == GXSqlDialect.MYSQL: return sa.select( @@ -775,7 +775,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP if self._dialect == GXSqlDialect.MSSQL: return sa.select( @@ -789,7 +789,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP if self._dialect == GXSqlDialect.AWSATHENA: return sa.select( @@ -799,7 +799,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP return sa.select( sa.func.distinct( @@ -808,7 +808,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP def get_partition_query_for_data_for_batch_identifiers_for_partition_on_mod_integer( self, @@ -823,11 +823,11 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_mod_inte ]: return sa.select( sa.func.distinct(sa.cast(sa.column(column_name), sa.Integer) % mod) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP return sa.select( sa.func.distinct(sa.func.mod(sa.cast(sa.column(column_name), sa.Integer), mod)) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP @staticmethod def get_partition_query_for_data_for_batch_identifiers_for_partition_on_multi_column_values( @@ -838,7 +838,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_multi_co return ( sa.select(*[sa.column(column_name) for column_name in column_names]) .distinct() - .select_from(selectable) # type: ignore[arg-type] + .select_from(selectable) # type: ignore[arg-type] # FIXME CoP ) def get_partition_query_for_data_for_batch_identifiers_for_partition_on_hashed_column( @@ -853,8 +853,8 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_hashed_c sa.func.distinct( sa.func.md5(sa.cast(sa.column(column_name), sa.VARCHAR), hash_digits) ) - ).select_from(selectable) # type: ignore[arg-type] + ).select_from(selectable) # type: ignore[arg-type] # FIXME CoP raise NotImplementedError( - f'Partitioner method "partition_on_hashed_column" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 + f'Partitioner method "partition_on_hashed_column" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 # FIXME CoP ) diff --git a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py index 467a8702d81e..da8591217e64 100644 --- a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py +++ b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py @@ -6,7 +6,7 @@ from great_expectations.compatibility.sqlalchemy import ( sqlalchemy as sa, ) -from great_expectations.core.id_dict import BatchSpec # noqa: TCH001 +from great_expectations.core.id_dict import BatchSpec # noqa: TCH001 # FIXME CoP from great_expectations.execution_engine.partition_and_sample.data_sampler import ( DataSampler, ) @@ -41,14 +41,14 @@ def sample_using_limit( Returns: A query as a string or sqlalchemy object. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Partition clause should be permissive of all values if not supplied. if where_clause is None: if execution_engine.dialect_name == GXSqlDialect.SQLITE: - where_clause = sa.text("1 = 1") # type: ignore[assignment] + where_clause = sa.text("1 = 1") # type: ignore[assignment] # FIXME CoP else: - where_clause = sa.true() # type: ignore[assignment] + where_clause = sa.true() # type: ignore[assignment] # FIXME CoP table_name: str = batch_spec["table_name"] @@ -57,11 +57,11 @@ def sample_using_limit( dialect_name: str = execution_engine.dialect_name if dialect_name == GXSqlDialect.ORACLE: # TODO: AJB 20220429 WARNING THIS oracle dialect METHOD IS NOT COVERED BY TESTS - # limit doesn't compile properly for oracle so we will append rownum to query string later # noqa: E501 + # limit doesn't compile properly for oracle so we will append rownum to query string later # noqa: E501 # FIXME CoP raw_query: sqlalchemy.Selectable = ( sa.select("*") .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) # type: ignore[arg-type] + .where(where_clause) # type: ignore[arg-type] # FIXME CoP ) query: str = str( raw_query.compile( @@ -69,7 +69,7 @@ def sample_using_limit( compile_kwargs={"literal_binds": True}, ) ) - query += "\nAND ROWNUM <= %d" % batch_spec["sampling_kwargs"]["n"] # noqa: UP031 + query += "\nAND ROWNUM <= %d" % batch_spec["sampling_kwargs"]["n"] # noqa: UP031 # FIXME CoP return query elif dialect_name == GXSqlDialect.MSSQL: # Note that this code path exists because the limit parameter is not getting rendered @@ -77,7 +77,7 @@ def sample_using_limit( selectable_query: sqlalchemy.Selectable = ( sa.select("*") .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) # type: ignore[arg-type] + .where(where_clause) # type: ignore[arg-type] # FIXME CoP .limit(batch_spec["sampling_kwargs"]["n"]) ) string_of_query: str = str( @@ -88,14 +88,14 @@ def sample_using_limit( ) n: Union[str, int] = batch_spec["sampling_kwargs"]["n"] self._validate_mssql_limit_param(n) - # This string replacement is here because the limit parameter is not substituted during query.compile() # noqa: E501 + # This string replacement is here because the limit parameter is not substituted during query.compile() # noqa: E501 # FIXME CoP string_of_query = string_of_query.replace("?", str(n)) return string_of_query else: return ( - sa.select("*") # type: ignore[return-value] + sa.select("*") # type: ignore[return-value] # FIXME CoP .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) # type: ignore[arg-type] + .where(where_clause) # type: ignore[arg-type] # FIXME CoP .limit(batch_spec["sampling_kwargs"]["n"]) ) @@ -108,13 +108,13 @@ def _validate_mssql_limit_param(n: Union[str, int]) -> None: Returns: None - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not isinstance(n, (str, int)): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP "Please specify your sampling kwargs 'n' parameter as a string or int." ) if isinstance(n, str) and not n.isdigit(): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP "If specifying your sampling kwargs 'n' parameter as a string please ensure it is " "parseable as an integer." ) @@ -138,32 +138,32 @@ def sample_using_random( Returns: Sqlalchemy selectable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: table_name: str = batch_spec["table_name"] except KeyError as e: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "A table name must be specified when using sample_using_random. " "Please update your configuration" ) from e try: p: float = batch_spec["sampling_kwargs"]["p"] or 1.0 except (KeyError, TypeError) as e: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "To use sample_using_random you must specify the parameter 'p' in " "the 'sampling_kwargs' configuration." ) from e - num_rows: int = execution_engine.execute_query( # type: ignore[assignment] + num_rows: int = execution_engine.execute_query( # type: ignore[assignment] # FIXME CoP sa.select(sa.func.count()) .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) # type: ignore[arg-type] + .where(where_clause) # type: ignore[arg-type] # FIXME CoP ).scalar() sample_size: int = round(p * num_rows) return ( sa.select("*") .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) # type: ignore[arg-type] + .where(where_clause) # type: ignore[arg-type] # FIXME CoP .order_by(sa.func.random()) .limit(sample_size) ) @@ -191,7 +191,7 @@ def sample_using_mod( mod: int = self.get_sampling_kwargs_value_or_default(batch_spec, "mod") value: int = self.get_sampling_kwargs_value_or_default(batch_spec, "value") - return sa.column(column_name) % mod == value # type: ignore[return-value] + return sa.column(column_name) % mod == value # type: ignore[return-value] # FIXME CoP def sample_using_a_list( self, @@ -213,7 +213,7 @@ def sample_using_a_list( self.verify_batch_spec_sampling_kwargs_key_exists("value_list", batch_spec) column_name: str = self.get_sampling_kwargs_value_or_default(batch_spec, "column_name") value_list: list = self.get_sampling_kwargs_value_or_default(batch_spec, "value_list") - return sa.column(column_name).in_(value_list) # type: ignore[return-value] + return sa.column(column_name).in_(value_list) # type: ignore[return-value] # FIXME CoP def sample_using_md5( self, @@ -230,7 +230,7 @@ def sample_using_md5( Raises: SamplerError - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self.verify_batch_spec_sampling_kwargs_exists(batch_spec) self.verify_batch_spec_sampling_kwargs_key_exists("column_name", batch_spec) column_name: str = self.get_sampling_kwargs_value_or_default(batch_spec, "column_name") @@ -242,6 +242,6 @@ def sample_using_md5( ) return ( - sa.func.right(sa.func.md5(sa.cast(sa.column(column_name), sa.Text)), hash_digits) # type: ignore[return-value] + sa.func.right(sa.func.md5(sa.cast(sa.column(column_name), sa.Text)), hash_digits) # type: ignore[return-value] # FIXME CoP == hash_value ) diff --git a/great_expectations/execution_engine/sparkdf_execution_engine.py b/great_expectations/execution_engine/sparkdf_execution_engine.py index 9024292498ba..3e1460a5f40f 100644 --- a/great_expectations/execution_engine/sparkdf_execution_engine.py +++ b/great_expectations/execution_engine/sparkdf_execution_engine.py @@ -38,7 +38,7 @@ ) from great_expectations.core.id_dict import IDDict from great_expectations.core.metric_domain_types import ( - MetricDomainTypes, # noqa: TCH001 + MetricDomainTypes, # noqa: TCH001 # FIXME CoP ) from great_expectations.core.util import AzureUrl from great_expectations.exceptions import ( @@ -50,8 +50,8 @@ from great_expectations.exceptions import exceptions as gx_exceptions from great_expectations.execution_engine import ExecutionEngine from great_expectations.execution_engine.execution_engine import ( - MetricComputationConfiguration, # noqa: TCH001 - PartitionDomainKwargs, # noqa: TCH001 + MetricComputationConfiguration, # noqa: TCH001 # FIXME CoP + PartitionDomainKwargs, # noqa: TCH001 # FIXME CoP ) from great_expectations.execution_engine.partition_and_sample.sparkdf_data_partitioner import ( SparkDataPartitioner, @@ -70,10 +70,10 @@ RowConditionParserType, parse_condition_to_spark, ) -from great_expectations.util import convert_to_json_serializable # noqa: TID251 -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP from great_expectations.validator.metric_configuration import ( - MetricConfiguration, # noqa: TCH001 + MetricConfiguration, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -92,8 +92,8 @@ def apply_dateutil_parse(column): @deprecated_argument( argument_name="force_reuse_spark_context", version="1.0", - message="The force_reuse_spark_context attribute is no longer part of any Spark Datasource classes. " # noqa: E501 - "The existing Spark context will be reused if possible. If a spark_config is passed that doesn't match " # noqa: E501 + message="The force_reuse_spark_context attribute is no longer part of any Spark Datasource classes. " # noqa: E501 # FIXME CoP + "The existing Spark context will be reused if possible. If a spark_config is passed that doesn't match " # noqa: E501 # FIXME CoP "the existing config, the context will be stopped and restarted in local environments only.", ) class SparkDFExecutionEngine(ExecutionEngine): @@ -193,7 +193,7 @@ class SparkDFExecutionEngine(ExecutionEngine): expectation_completeness: Moderate --ge-feature-maturity-info-- - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP recognized_batch_definition_keys = {"limit"} @@ -229,9 +229,9 @@ def __init__( # deprecated-v1.0.0 warnings.warn( "force_reuse_spark_context is deprecated and will be removed in version 1.0. " - "In environments that allow it, the existing Spark context will be reused, adding the " # noqa: E501 - "spark_config options that have been passed. If the Spark context cannot be updated with " # noqa: E501 - "the spark_config, the context will be stopped and restarted with the new spark_config.", # noqa: E501 + "In environments that allow it, the existing Spark context will be reused, adding the " # noqa: E501 # FIXME CoP + "spark_config options that have been passed. If the Spark context cannot be updated with " # noqa: E501 # FIXME CoP + "the spark_config, the context will be stopped and restarted with the new spark_config.", # noqa: E501 # FIXME CoP category=DeprecationWarning, ) super().__init__(*args, **kwargs) @@ -249,9 +249,9 @@ def __init__( @property def dataframe(self) -> pyspark.DataFrame: - """If a batch has been loaded, returns a Spark Dataframe containing the data within the loaded batch""" # noqa: E501 + """If a batch has been loaded, returns a Spark Dataframe containing the data within the loaded batch""" # noqa: E501 # FIXME CoP if self.batch_manager.active_batch_data is None: - raise ValueError("Batch has not been loaded - please run load_batch() to load a batch.") # noqa: TRY003 + raise ValueError("Batch has not been loaded - please run load_batch() to load a batch.") # noqa: TRY003 # FIXME CoP return cast(SparkDFBatchData, self.batch_manager.active_batch_data).dataframe @@ -266,7 +266,7 @@ def get_or_create_spark_session( Returns: SparkSession - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP spark_config = spark_config or {} spark_session: pyspark.SparkSession @@ -337,7 +337,7 @@ def _session_is_not_stoppable( pyspark.SparkConnectSession # type: ignore[truthy-function] # returns false if module is not installed and isinstance(spark_session, pyspark.SparkConnectSession) ) or ( - os.environ.get("DATABRICKS_RUNTIME_VERSION") is not None # noqa: TID251 + os.environ.get("DATABRICKS_RUNTIME_VERSION") is not None # noqa: TID251 # FIXME CoP ) @staticmethod @@ -353,7 +353,7 @@ def _try_update_or_stop_misconfigured_spark_session( Returns: SparkSession, Boolean specifying if SparkSession is stopped - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP stopped = False warning_messages = [] for key, value in spark_config.items(): @@ -371,15 +371,15 @@ def _try_update_or_stop_misconfigured_spark_session( spark_session.conf.set(key, value) elif key == "spark.app.name" and spark_session.sparkContext.appName != value: spark_session.sparkContext.appName = value - # attribute error can be raised for connect sessions that haven't implemented a conf for sparkContext method # noqa: E501 - # analysis exception can be raised in environments that don't allow updating config of that option # noqa: E501 + # attribute error can be raised for connect sessions that haven't implemented a conf for sparkContext method # noqa: E501 # FIXME CoP + # analysis exception can be raised in environments that don't allow updating config of that option # noqa: E501 # FIXME CoP except ( pyspark.PySparkAttributeError, pyspark.AnalysisException, ): if SparkDFExecutionEngine._session_is_not_stoppable(spark_session=spark_session): warning_messages.append( - f"Passing spark_config option `{key}` had no effect, because in this environment " # noqa: E501 + f"Passing spark_config option `{key}` had no effect, because in this environment " # noqa: E501 # FIXME CoP "it is not modifiable and the Spark Session cannot be restarted." ) else: @@ -400,14 +400,14 @@ def _try_update_or_stop_misconfigured_spark_session( return spark_session, stopped @override - def load_batch_data( # type: ignore[override] + def load_batch_data( # type: ignore[override] # FIXME CoP self, batch_id: str, batch_data: Union[SparkDFBatchData, pyspark.DataFrame] ) -> None: - if pyspark.DataFrame and isinstance(batch_data, pyspark.DataFrame): # type: ignore[truthy-function] + if pyspark.DataFrame and isinstance(batch_data, pyspark.DataFrame): # type: ignore[truthy-function] # FIXME CoP batch_data = SparkDFBatchData(self, batch_data) elif not isinstance(batch_data, SparkDFBatchData): - raise GreatExpectationsError( # noqa: TRY003 - "SparkDFExecutionEngine requires batch data that is either a DataFrame or a SparkDFBatchData object" # noqa: E501 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP + "SparkDFExecutionEngine requires batch data that is either a DataFrame or a SparkDFBatchData object" # noqa: E501 # FIXME CoP ) if self._persist: @@ -416,7 +416,7 @@ def load_batch_data( # type: ignore[override] super().load_batch_data(batch_id=batch_id, batch_data=batch_data) @override - def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 + def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 # FIXME CoP self, batch_spec: BatchSpec ) -> Tuple[Any, BatchMarkers]: # batch_data # We need to build a batch_markers to be used in the dataframe @@ -434,7 +434,7 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 path formats for accessing all other supported cloud storage services (AWS S3 and Google Cloud Storage). Moreover, these formats (encapsulated in S3BatchSpec and GCSBatchSpec) extend PathBatchSpec (common to them). Therefore, at the present time, all cases with the exception of Azure Blob Storage, are handled generically. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_data: Any reader_method: str @@ -447,9 +447,9 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 # batch_data != None is already checked when RuntimeDataBatchSpec is instantiated batch_data = batch_spec.batch_data if isinstance(batch_data, str): - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"""SparkDFExecutionEngine has been passed a string type batch_data, "{batch_data}", which is \ -illegal. Please check your config.""" # noqa: E501 +illegal. Please check your config.""" # noqa: E501 # FIXME CoP ) batch_spec.batch_data = "SparkDataFrame" @@ -458,7 +458,7 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 reader_options = batch_spec.reader_options or {} path = batch_spec.path azure_url = AzureUrl(path) - # TODO 202209 - Add `schema` definition to Azure like PathBatchSpec below (GREAT-1224) # noqa: E501 + # TODO 202209 - Add `schema` definition to Azure like PathBatchSpec below (GREAT-1224) # noqa: E501 # FIXME CoP try: credential = self._azure_options.get("credential") storage_account_url = azure_url.account_url @@ -476,7 +476,7 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 ) batch_data = reader_fn(path) except AttributeError: - raise ExecutionEngineError( # noqa: TRY003 + raise ExecutionEngineError( # noqa: TRY003 # FIXME CoP """ Unable to load pyspark. Pyspark is required for SparkDFExecutionEngine. """ @@ -495,7 +495,7 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 # this can happen if we have not converted schema into json at Datasource-config level elif isinstance(schema, str): - raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 + raise gx_exceptions.ExecutionEngineError( # noqa: TRY003 # FIXME CoP """ Spark schema was not properly serialized. Please run the .jsonValue() method on the schema object before loading into GX. @@ -516,19 +516,19 @@ def get_batch_data_and_markers( # noqa: C901, PLR0912, PLR0915 ) batch_data = reader_fn(path) except AttributeError: - raise ExecutionEngineError( # noqa: TRY003 + raise ExecutionEngineError( # noqa: TRY003 # FIXME CoP """ Unable to load pyspark. Pyspark is required for SparkDFExecutionEngine. """ ) # pyspark will raise an AnalysisException error if path is incorrect except pyspark.AnalysisException: - raise ExecutionEngineError( # noqa: TRY003 - f"""Unable to read in batch from the following path: {path}. Please check your configuration.""" # noqa: E501 + raise ExecutionEngineError( # noqa: TRY003 # FIXME CoP + f"""Unable to read in batch from the following path: {path}. Please check your configuration.""" # noqa: E501 # FIXME CoP ) else: - raise BatchSpecError( # noqa: TRY003 + raise BatchSpecError( # noqa: TRY003 # FIXME CoP """ Invalid batch_spec: batch_data is required for a SparkDFExecutionEngine to operate. """ @@ -564,7 +564,7 @@ def _apply_partitioning_and_sampling_methods(self, batch_spec, batch_data): return batch_data - # TODO: Similar to Abe's note in PandasExecutionEngine: Any reason this shouldn't be a private method? # noqa: E501 + # TODO: Similar to Abe's note in PandasExecutionEngine: Any reason this shouldn't be a private method? # noqa: E501 # FIXME CoP @staticmethod def guess_reader_method_from_path(path: str): """ @@ -577,14 +577,14 @@ def guess_reader_method_from_path(path: str): Returns: A dictionary entry of format {'reader_method': reader_method} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP path = path.lower() if path.endswith(".csv") or path.endswith(".tsv"): return "csv" elif path.endswith(".parquet") or path.endswith(".parq") or path.endswith(".pqt"): return "parquet" - raise ExecutionEngineError(f"Unable to determine reader method from path: {path}") # noqa: TRY003 + raise ExecutionEngineError(f"Unable to determine reader method from path: {path}") # noqa: TRY003 # FIXME CoP @overload def _get_reader_fn( @@ -605,9 +605,9 @@ def _get_reader_fn(self, reader, reader_method=None, path=None) -> Callable: Returns: ReaderMethod to use for the filepath - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if reader_method is None and path is None: - raise ExecutionEngineError( # noqa: TRY003 + raise ExecutionEngineError( # noqa: TRY003 # FIXME CoP "Unable to determine spark reader function without reader_method or path" ) @@ -620,12 +620,12 @@ def _get_reader_fn(self, reader, reader_method=None, path=None) -> Callable: return reader.format(reader_method_op).load return getattr(reader, reader_method_op) except AttributeError: - raise ExecutionEngineError( # noqa: TRY003 + raise ExecutionEngineError( # noqa: TRY003 # FIXME CoP f"Unable to find reader_method {reader_method} in spark.", ) @override - def get_domain_records( # noqa: C901, PLR0912, PLR0915 + def get_domain_records( # noqa: C901, PLR0912, PLR0915 # FIXME CoP self, domain_kwargs: dict, ) -> "pyspark.DataFrame": # noqa F821 @@ -636,15 +636,15 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 Returns: A DataFrame (the data on which to compute returned in the format of a Spark DataFrame) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP """ # TODO: Docusaurus run fails, unless "pyspark.DataFrame" type hint above is enclosed in quotes. This may be caused by it becoming great_expectations.compatibility.not_imported.NotImported when pyspark is not installed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP table = domain_kwargs.get("table", None) if table: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "SparkDFExecutionEngine does not currently support multiple named tables." ) @@ -654,16 +654,16 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 if self.batch_manager.active_batch_data: data = cast(SparkDFBatchData, self.batch_manager.active_batch_data).dataframe else: - raise ValidationError( # noqa: TRY003 + raise ValidationError( # noqa: TRY003 # FIXME CoP "No batch is specified, but could not identify a loaded batch." ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if batch_id in self.batch_manager.batch_data_cache: data = cast( SparkDFBatchData, self.batch_manager.batch_data_cache[batch_id] ).dataframe else: - raise ValidationError(f"Unable to find batch with batch_id {batch_id}") # noqa: TRY003 + raise ValidationError(f"Unable to find batch with batch_id {batch_id}") # noqa: TRY003 # FIXME CoP # Filtering by row condition. row_condition = domain_kwargs.get("row_condition", None) @@ -678,7 +678,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 parsed_condition = parse_condition_to_spark(row_condition) data = data.filter(parsed_condition) else: - raise GreatExpectationsError( # noqa: TRY003 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP f"unrecognized condition_parser {condition_parser!s} for Spark execution engine" ) @@ -709,9 +709,9 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 elif ignore_row_if == "either_value_is_missing": ignore_condition = F.col(column_A_name).isNull() | F.col(column_B_name).isNull() data = data.filter(~ignore_condition) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if ignore_row_if != "neither": - raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 + raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 # FIXME CoP return data @@ -726,9 +726,9 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 conditions = [F.col(column_name).isNull() for column_name in column_list] ignore_condition = reduce(lambda a, b: a | b, conditions) data = data.filter(~ignore_condition) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if ignore_row_if != "never": - raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 + raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 # FIXME CoP return data @@ -785,15 +785,15 @@ def get_compute_domain( - a dictionary of compute_domain_kwargs, describing the DataFrame - a dictionary of accessor_domain_kwargs, describing any accessors needed to identify the Domain within the compute domain - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP """ # TODO: Docusaurus run fails, unless "pyspark.DataFrame" type hint above is enclosed in quotes. This may be caused by it becoming great_expectations.compatibility.not_imported.NotImported when pyspark is not installed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP table: str = domain_kwargs.get("table", None) if table: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "SparkDFExecutionEngine does not currently support multiple named tables." ) @@ -863,7 +863,7 @@ def resolve_metric_bundle( Returns: A dictionary of "MetricConfiguration" IDs and their corresponding fully resolved values for domains. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP resolved_metrics: Dict[Tuple[str, str, str], MetricValue] = {} res: List[pyspark.Row] @@ -904,7 +904,7 @@ def resolve_metric_bundle( res = df.agg(*aggregate["column_aggregates"]).collect() logger.debug( - f"SparkDFExecutionEngine computed {len(res[0])} metrics on domain_id {IDDict(domain_kwargs).to_id()}" # noqa: E501 + f"SparkDFExecutionEngine computed {len(res[0])} metrics on domain_id {IDDict(domain_kwargs).to_id()}" # noqa: E501 # FIXME CoP ) assert len(res) == 1, "all bundle-computed metrics must be single-value statistics" @@ -915,8 +915,8 @@ def resolve_metric_bundle( idx: int metric_id: Tuple[str, str, str] for idx, metric_id in enumerate(aggregate["metric_ids"]): - # Converting DataFrame.collect() results into JSON-serializable format produces simple data types, # noqa: E501 - # amenable for subsequent post-processing by higher-level "Metric" and "Expectation" layers. # noqa: E501 + # Converting DataFrame.collect() results into JSON-serializable format produces simple data types, # noqa: E501 # FIXME CoP + # amenable for subsequent post-processing by higher-level "Metric" and "Expectation" layers. # noqa: E501 # FIXME CoP resolved_metrics[metric_id] = convert_to_json_serializable(data=res[0][idx]) return resolved_metrics diff --git a/great_expectations/execution_engine/sqlalchemy_batch_data.py b/great_expectations/execution_engine/sqlalchemy_batch_data.py index 25cbc29e02a7..c0efc5e62f9c 100644 --- a/great_expectations/execution_engine/sqlalchemy_batch_data.py +++ b/great_expectations/execution_engine/sqlalchemy_batch_data.py @@ -17,7 +17,7 @@ class SqlAlchemyBatchData(BatchData): """A class which represents a SQL alchemy batch, with properties including the construction of the batch itself - and several getters used to access various properties.""" # noqa: E501 + and several getters used to access various properties.""" # noqa: E501 # FIXME CoP # Instantiating SqlAlchemyBatchData with table_name and schema_name @overload @@ -74,7 +74,7 @@ def __init__( source_table_name: str = ..., ) -> None: ... - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, execution_engine, # Option 1 @@ -132,7 +132,7 @@ def __init__( # noqa: PLR0913 In general, temporary tables invite more optimization from the query engine itself. Subselect statements may sometimes be preferred, because they do not require write access on the database. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__(execution_engine=execution_engine) engine = execution_engine.engine self._engine = engine @@ -142,10 +142,10 @@ def __init__( # noqa: PLR0913 self._source_schema_name = source_schema_name if sum(bool(x) for x in [table_name, query, selectable is not None]) != 1: - raise ValueError("Exactly one of table_name, query, or selectable must be specified") # noqa: TRY003 + raise ValueError("Exactly one of table_name, query, or selectable must be specified") # noqa: TRY003 # FIXME CoP elif (query and schema_name) or (selectable is not None and schema_name): - raise ValueError( # noqa: TRY003 - "schema_name can only be used with table_name. Use temp_table_schema_name to provide a target schema for creating a temporary table." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "schema_name can only be used with table_name. Use temp_table_schema_name to provide a target schema for creating a temporary table." # noqa: E501 # FIXME CoP ) dialect_name: str = engine.dialect.name.lower() @@ -198,7 +198,7 @@ def selectable(self): def use_quoted_name(self): return self._use_quoted_name - def _create_temporary_table( # noqa: C901, PLR0912 + def _create_temporary_table( # noqa: C901, PLR0912 # FIXME CoP self, dialect: GXSqlDialect, query: str, @@ -207,7 +207,7 @@ def _create_temporary_table( # noqa: C901, PLR0912 """ Create Temporary table based on sql query. This will be used as a basis for executing expectations. :param query: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP temp_table_name = generate_temporary_table_name() # mssql expects all temporary table names to have a prefix '#' @@ -225,7 +225,7 @@ def _create_temporary_table( # noqa: C901, PLR0912 temp_table_name = f"{temp_table_schema_name}.{temp_table_name}" if dialect == GXSqlDialect.BIGQUERY: - # BigQuery Table is created using with an expiration of 24 hours using Google's Data Definition Language # noqa: E501 + # BigQuery Table is created using with an expiration of 24 hours using Google's Data Definition Language # noqa: E501 # FIXME CoP # https://stackoverflow.com/questions/20673986/how-to-create-temporary-table-in-google-bigquery stmt = f"""CREATE OR REPLACE TABLE `{temp_table_name}` OPTIONS( @@ -244,7 +244,7 @@ def _create_temporary_table( # noqa: C901, PLR0912 elif dialect == GXSqlDialect.HIVE: stmt = f"CREATE TEMPORARY TABLE `{temp_table_name}` AS {query}" elif dialect == GXSqlDialect.MSSQL: - # Insert "into #{temp_table_name}" in the custom sql query right before the "from" clause # noqa: E501 + # Insert "into #{temp_table_name}" in the custom sql query right before the "from" clause # noqa: E501 # FIXME CoP # Partition is case-sensitive so detect case. # Note: transforming query to uppercase/lowercase has unintended consequences (i.e., # changing column names), so this is not an option! @@ -260,7 +260,7 @@ def _create_temporary_table( # noqa: C901, PLR0912 stmt = f"{querymod[0]}into {{temp_table_name}} from{querymod[1]}".format( temp_table_name=temp_table_name ) - # TODO: logger.warning is emitted in situations where a permanent TABLE is created in _create_temporary_table() # noqa: E501 + # TODO: logger.warning is emitted in situations where a permanent TABLE is created in _create_temporary_table() # noqa: E501 # FIXME CoP # Similar message may be needed in the future for Trino backend. elif dialect in (GXSqlDialect.TRINO, GXSqlDialect.CLICKHOUSE): logger.warning( @@ -322,13 +322,13 @@ def _generate_selectable_from_schema_name_and_table_name( Returns: sqlalchemy.Table: SqlAlchemy Table that is Selectable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if use_quoted_name: table_name = sqlalchemy.quoted_name(table_name, quote=True) if dialect == GXSqlDialect.BIGQUERY: if schema_name is not None: logger.warning( - "schema_name should not be used when passing a table_name for biquery. Instead, include the schema name in the table_name string." # noqa: E501 + "schema_name should not be used when passing a table_name for biquery. Instead, include the schema name in the table_name string." # noqa: E501 # FIXME CoP ) # In BigQuery the table name is already qualified with its schema name return sa.Table( @@ -377,7 +377,7 @@ def _generate_selectable_from_query( Returns: sqlalchemy.Table: SqlAlchemy Table that is Selectable or a TextClause. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not create_temp_table: return sa.text(query) _, temp_table_name = self._create_temporary_table( @@ -410,7 +410,7 @@ def _generate_selectable_from_selectable( Returns: sqlalchemy.Table: SqlAlchemy Table that is Selectable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not create_temp_table: return selectable.alias() diff --git a/great_expectations/execution_engine/sqlalchemy_dialect.py b/great_expectations/execution_engine/sqlalchemy_dialect.py index 101c43524d41..5ca226f96519 100644 --- a/great_expectations/execution_engine/sqlalchemy_dialect.py +++ b/great_expectations/execution_engine/sqlalchemy_dialect.py @@ -78,7 +78,7 @@ def quote_str(unquoted_identifier: str, dialect: GXSqlDialect) -> str: """Quote a string using the specified dialect's quote character.""" quote_char = DIALECT_IDENTIFIER_QUOTE_STRINGS[dialect] if unquoted_identifier.startswith(quote_char) or unquoted_identifier.endswith(quote_char): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"Identifier {unquoted_identifier} already uses quote character {quote_char}" ) return f"{quote_char}{unquoted_identifier}{quote_char}" diff --git a/great_expectations/execution_engine/sqlalchemy_execution_engine.py b/great_expectations/execution_engine/sqlalchemy_execution_engine.py index eb22a962c530..02b35d26fb0d 100644 --- a/great_expectations/execution_engine/sqlalchemy_execution_engine.py +++ b/great_expectations/execution_engine/sqlalchemy_execution_engine.py @@ -37,7 +37,7 @@ __version__ = get_versions()["version"] # isort:skip from great_expectations._docs_decorators import new_method_or_class -from great_expectations.compatibility import aws, snowflake, sqlalchemy, trino +from great_expectations.compatibility import snowflake, sqlalchemy from great_expectations.compatibility.not_imported import is_version_greater_or_equal from great_expectations.compatibility.sqlalchemy import Subquery from great_expectations.compatibility.sqlalchemy import ( @@ -58,8 +58,8 @@ CONDITION_PARSER_GREAT_EXPECTATIONS, CONDITION_PARSER_GREAT_EXPECTATIONS_DEPRECATED, ) -from great_expectations.util import convert_to_json_serializable # noqa: TID251 -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP del get_versions # isort:skip @@ -96,7 +96,7 @@ import_make_url, ) from great_expectations.validator.metric_configuration import ( - MetricConfiguration, # noqa: TCH001 + MetricConfiguration, # noqa: TCH001 # FIXME CoP ) logger = logging.getLogger(__name__) @@ -107,35 +107,34 @@ try: - import psycopg2 # noqa: F401 - import sqlalchemy.dialects.postgresql.psycopg2 as sqlalchemy_psycopg2 # noqa: TID251 + import psycopg2 # noqa: F401 # FIXME CoP + import sqlalchemy.dialects.postgresql.psycopg2 as sqlalchemy_psycopg2 # noqa: TID251 # FIXME CoP except (ImportError, KeyError): - sqlalchemy_psycopg2 = None # type: ignore[assignment] + sqlalchemy_psycopg2 = None # type: ignore[assignment] # FIXME CoP try: import sqlalchemy_dremio.pyodbc if sa: - sa.dialects.registry.register(GXSqlDialect.DREMIO, "sqlalchemy_dremio.pyodbc", "dialect") # type: ignore[arg-type] + sa.dialects.registry.register(GXSqlDialect.DREMIO, "sqlalchemy_dremio.pyodbc", "dialect") # type: ignore[arg-type] # FIXME CoP except ImportError: sqlalchemy_dremio = None if snowflake.snowflakedialect: if sa: - # Sometimes "snowflake-sqlalchemy" fails to self-register in certain environments, so we do it explicitly. # noqa: E501 + # Sometimes "snowflake-sqlalchemy" fails to self-register in certain environments, so we do it explicitly. # noqa: E501 # FIXME CoP # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) - sa.dialects.registry.register(GXSqlDialect.SNOWFLAKE, "snowflake.sqlalchemy", "dialect") # type: ignore[arg-type] + sa.dialects.registry.register(GXSqlDialect.SNOWFLAKE, "snowflake.sqlalchemy", "dialect") # type: ignore[arg-type] # FIXME CoP from great_expectations.compatibility.bigquery import ( _BIGQUERY_MODULE_NAME, - bigquery_types_tuple, ) from great_expectations.compatibility.bigquery import ( sqlalchemy_bigquery as sqla_bigquery, ) if sqla_bigquery and sa: - sa.dialects.registry.register(GXSqlDialect.BIGQUERY, _BIGQUERY_MODULE_NAME, "BigQueryDialect") # type: ignore[arg-type] + sa.dialects.registry.register(GXSqlDialect.BIGQUERY, _BIGQUERY_MODULE_NAME, "BigQueryDialect") # type: ignore[arg-type] # FIXME CoP try: import teradatasqlalchemy.dialect @@ -145,64 +144,7 @@ teradatatypes = None if TYPE_CHECKING: - from sqlalchemy.engine import Engine as SaEngine # noqa: TID251 - - -def _get_dialect_type_module(dialect): # noqa: C901 - """Given a dialect, returns the dialect type, which is defines the engine/system that is used to communicates - with the database/database implementation. Currently checks for RedShift/BigQuery dialects - """ # noqa: E501 - if dialect is None: - logger.warning("No sqlalchemy dialect found; relying in top-level sqlalchemy types.") - return sa - - # Redshift does not (yet) export types to top level; only recognize base SA types - if aws.redshiftdialect and isinstance(dialect, aws.redshiftdialect.RedshiftDialect): - # noinspection PyUnresolvedReferences - return dialect.sa - else: - pass - - # Bigquery works with newer versions, but use a patch if we had to define bigquery_types_tuple - try: - if ( - isinstance( - dialect, - sqla_bigquery, - ) - and bigquery_types_tuple is not None - ): - return bigquery_types_tuple - except (TypeError, AttributeError): - pass - - # Teradata types module - try: - if ( - issubclass( - dialect, - teradatasqlalchemy.dialect.TeradataDialect, - ) - and teradatatypes is not None - ): - return teradatatypes - except (TypeError, AttributeError): - pass - - # Trino types module - try: - if ( - isinstance( - dialect, - trino.trinodialect.TrinoDialect, - ) - and trino.trinotypes is not None - ): - return trino.trinotypes - except (TypeError, AttributeError): - pass - - return dialect + from sqlalchemy.engine import Engine as SaEngine # noqa: TID251 # FIXME CoP _PERSISTED_CONNECTION_DIALECTS = ( @@ -231,7 +173,7 @@ def _dialect_requires_persisted_connection( Boolean indicating whether the dialect requires a persisted connection. """ if sum(bool(x) for x in [connection_string, credentials, url is not None]) != 1: - raise ValueError("Exactly one of connection_string, credentials, url must be specified") # noqa: TRY003 + raise ValueError("Exactly one of connection_string, credentials, url must be specified") # noqa: TRY003 # FIXME CoP return_val = False if connection_string is not None: str_to_check = connection_string @@ -284,10 +226,10 @@ class SqlAlchemyExecutionEngine(ExecutionEngine): ```python execution_engine: ExecutionEngine = SqlAlchemyExecutionEngine(connection_string="dbmstype://user:password@host:5432/database_name") ``` - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # noinspection PyUnusedLocal - def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 + def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 # FIXME CoP self, name: Optional[str] = None, credentials: Optional[dict] = None, @@ -297,7 +239,7 @@ def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 url: Optional[str] = None, batch_data_dict: Optional[dict] = None, create_temp_table: bool = True, - # kwargs will be passed as optional parameters to the SQLAlchemy engine, **not** the ExecutionEngine # noqa: E501 + # kwargs will be passed as optional parameters to the SQLAlchemy engine, **not** the ExecutionEngine # noqa: E501 # FIXME CoP **kwargs, ) -> None: super().__init__(name=name, batch_data_dict=batch_data_dict) @@ -307,11 +249,11 @@ def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 self._connection_string = connection_string self._url = url self._create_temp_table = create_temp_table - os.environ["SF_PARTNER"] = "great_expectations_oss" # noqa: TID251 + os.environ["SF_PARTNER"] = "great_expectations_oss" # noqa: TID251 # FIXME CoP - # sqlite/mssql temp tables only persist within a connection, so we need to keep the connection alive by # noqa: E501 + # sqlite/mssql temp tables only persist within a connection, so we need to keep the connection alive by # noqa: E501 # FIXME CoP # keeping a reference to it. - # Even though we use a single connection pool for dialects that need a single persisted connection # noqa: E501 + # Even though we use a single connection pool for dialects that need a single persisted connection # noqa: E501 # FIXME CoP # (e.g. for accessing temporary tables), if we don't keep a reference # then we get errors like sqlite3.ProgrammingError: Cannot operate on a closed database. self._connection: sqlalchemy.Connection | None = None @@ -324,7 +266,7 @@ def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 if engine is not None: if credentials is not None: logger.warning( - "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " # noqa: E501 + "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. " # noqa: E501 # FIXME CoP "Ignoring credentials." ) self.engine = engine @@ -336,12 +278,12 @@ def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 url=url, ) - # these are two backends where temp_table_creation is not supported we set the default value to False. # noqa: E501 + # these are two backends where temp_table_creation is not supported we set the default value to False. # noqa: E501 # FIXME CoP if ( self.dialect_name in [ GXSqlDialect.TRINO, - GXSqlDialect.AWSATHENA, # WKS 202201 - AWS Athena currently doesn't support temp_tables. # noqa: E501 + GXSqlDialect.AWSATHENA, # WKS 202201 - AWS Athena currently doesn't support temp_tables. # noqa: E501 # FIXME CoP GXSqlDialect.CLICKHOUSE, ] ): @@ -387,9 +329,9 @@ def __init__( # noqa: C901, PLR0912, PLR0913, PLR0915 else: self.dialect_module = None - # 20210726 - engine_backup is used by the snowflake connector, which requires connection and engine # noqa: E501 - # to be closed and disposed separately. Currently self.engine can refer to either a Connection or Engine, # noqa: E501 - # depending on the backend. This will need to be cleaned up in an upcoming refactor, so that Engine and # noqa: E501 + # 20210726 - engine_backup is used by the snowflake connector, which requires connection and engine # noqa: E501 # FIXME CoP + # to be closed and disposed separately. Currently self.engine can refer to either a Connection or Engine, # noqa: E501 # FIXME CoP + # depending on the backend. This will need to be cleaned up in an upcoming refactor, so that Engine and # noqa: E501 # FIXME CoP # Connection can be handled separately. self._engine_backup = None if self.engine and self.dialect_name in [ @@ -417,18 +359,18 @@ def _on_connect(dbapi_con, connection_record): _add_sqlite_functions(dbapi_con) sa.event.listen(self.engine, "connect", _on_connect) - # Also immediately add the sqlite functions in case there already exists an underlying # noqa: E501 + # Also immediately add the sqlite functions in case there already exists an underlying # noqa: E501 # FIXME CoP # sqlite3.Connection (distinct from a sqlalchemy Connection). _add_sqlite_functions(self.engine.raw_connection()) self._engine_backup = self.engine - # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, # noqa: E501 + # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values, # noqa: E501 # FIXME CoP # and set the instance "_config" variable equal to the resulting dictionary. self._config = { "name": name, "credentials": credentials, "data_context": data_context, - "engine": engine, # type: ignore[dict-item] + "engine": engine, # type: ignore[dict-item] # FIXME CoP "connection_string": connection_string, "url": url, "batch_data_dict": batch_data_dict, @@ -458,7 +400,7 @@ def _setup_engine( Returns: Nothing, the engine instance variable is set. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if credentials is not None: self.engine = self._build_engine(credentials=credentials, **kwargs) elif connection_string is not None: @@ -480,7 +422,7 @@ def _setup_engine( else: self.engine = sa.create_engine(url, **kwargs) else: - raise InvalidConfigError( # noqa: TRY003 + raise InvalidConfigError( # noqa: TRY003 # FIXME CoP "Credentials or an engine are required for a SqlAlchemyExecutionEngine." ) @@ -514,7 +456,7 @@ def _build_engine(self, credentials: dict, **kwargs) -> sa.engine.Engine: """ Using a set of given credentials, constructs an Execution Engine , connecting to a database using a URL or a private key path. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Update credentials with anything passed during connection time drivername = credentials.pop("drivername") schema_name = credentials.pop("schema_name", None) @@ -561,7 +503,7 @@ def _get_sqlalchemy_key_pair_auth_url( Returns: a tuple consisting of a url with the serialized key-pair authentication, and a dictionary of engine kwargs. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization @@ -582,7 +524,7 @@ def _get_sqlalchemy_key_pair_auth_url( message="Decryption of key failed, was the passphrase incorrect?", ) from e else: - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP pkb = p_key.private_bytes( encoding=serialization.Encoding.DER, format=serialization.PrivateFormat.PKCS8, @@ -597,7 +539,7 @@ def _get_sqlalchemy_key_pair_auth_url( ) @override - def get_domain_records( # noqa: C901, PLR0912, PLR0915 + def get_domain_records( # noqa: C901, PLR0912, PLR0915 # FIXME CoP self, domain_kwargs: dict, ) -> sqlalchemy.Selectable: @@ -608,7 +550,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 Returns: An SqlAlchemy table/column(s) (the selectable object for obtaining data on which to compute returned in the format of an SqlAlchemy table/column(s) object) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_object: SqlAlchemyBatchData batch_id: Optional[str] = domain_kwargs.get("batch_id") @@ -617,16 +559,16 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 if self.batch_manager.active_batch_data: data_object = cast(SqlAlchemyBatchData, self.batch_manager.active_batch_data) else: - raise GreatExpectationsError( # noqa: TRY003 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP "No batch is specified, but could not identify a loaded batch." ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if batch_id in self.batch_manager.batch_data_cache: data_object = cast( SqlAlchemyBatchData, self.batch_manager.batch_data_cache[batch_id] ) else: - raise GreatExpectationsError(f"Unable to find batch with batch_id {batch_id}") # noqa: TRY003 + raise GreatExpectationsError(f"Unable to find batch with batch_id {batch_id}") # noqa: TRY003 # FIXME CoP selectable: sqlalchemy.Selectable if "table" in domain_kwargs and domain_kwargs["table"] is not None: @@ -642,7 +584,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 else: selectable = data_object.selectable elif "query" in domain_kwargs: - raise ValueError("query is not currently supported by SqlAlchemyExecutionEngine") # noqa: TRY003 + raise ValueError("query is not currently supported by SqlAlchemyExecutionEngine") # noqa: TRY003 # FIXME CoP else: selectable = data_object.selectable @@ -651,7 +593,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 as a subquery wrapped in "(subquery) alias". TextClause must first be converted to TextualSelect using sa.columns() before it can be converted to type Subquery """ - if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] + if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] # FIXME CoP selectable = selectable.columns().subquery() # Filtering by row condition. @@ -662,10 +604,10 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 CONDITION_PARSER_GREAT_EXPECTATIONS_DEPRECATED, ]: parsed_condition = parse_condition_to_sqlalchemy(domain_kwargs["row_condition"]) - selectable = sa.select(sa.text("*")).select_from(selectable).where(parsed_condition) # type: ignore[arg-type] + selectable = sa.select(sa.text("*")).select_from(selectable).where(parsed_condition) # type: ignore[arg-type] # FIXME CoP else: - raise GreatExpectationsError( # noqa: TRY003 - "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser." # noqa: E501 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP + "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser." # noqa: E501 # FIXME CoP ) # Filtering by filter_conditions @@ -678,17 +620,17 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 ), "filter_condition must be of type GX for SqlAlchemyExecutionEngine" # SQLAlchemy 2.0 deprecated select_from() from a non-Table asset without a subquery. - # Implicit coercion of SELECT and textual SELECT constructs into FROM clauses is deprecated. # noqa: E501 + # Implicit coercion of SELECT and textual SELECT constructs into FROM clauses is deprecated. # noqa: E501 # FIXME CoP if not isinstance(selectable, (sa.Table, Subquery)): - selectable = selectable.subquery() # type: ignore[attr-defined] + selectable = selectable.subquery() # type: ignore[attr-defined] # FIXME CoP selectable = ( sa.select(sa.text("*")) - .select_from(selectable) # type: ignore[arg-type] + .select_from(selectable) # type: ignore[arg-type] # FIXME CoP .where(parse_condition_to_sqlalchemy(filter_condition.condition)) ) elif len(filter_conditions) > 1: - raise GreatExpectationsError( # noqa: TRY003 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP "SqlAlchemyExecutionEngine currently only supports a single filter condition." ) @@ -717,12 +659,12 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 if ignore_row_if == "both_values_are_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] # FIXME CoP .where( sa.not_( sa.and_( - sa.column(column_A_name) == None, # noqa: E711 - sa.column(column_B_name) == None, # noqa: E711 + sa.column(column_A_name) == None, # noqa: E711 # FIXME CoP + sa.column(column_B_name) == None, # noqa: E711 # FIXME CoP ) ) ) @@ -730,19 +672,19 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 elif ignore_row_if == "either_value_is_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] # FIXME CoP .where( sa.not_( sa.or_( - sa.column(column_A_name) == None, # noqa: E711 - sa.column(column_B_name) == None, # noqa: E711 + sa.column(column_A_name) == None, # noqa: E711 # FIXME CoP + sa.column(column_B_name) == None, # noqa: E711 # FIXME CoP ) ) ) ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if ignore_row_if != "neither": - raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 + raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 # FIXME CoP return selectable @@ -760,12 +702,12 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 if ignore_row_if == "all_values_are_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] # FIXME CoP .where( sa.not_( sa.and_( *( - sa.column(column_name) == None # noqa: E711 + sa.column(column_name) == None # noqa: E711 # FIXME CoP for column_name in column_list ) ) @@ -775,21 +717,21 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 elif ignore_row_if == "any_value_is_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] # FIXME CoP .where( sa.not_( sa.or_( *( - sa.column(column_name) == None # noqa: E711 + sa.column(column_name) == None # noqa: E711 # FIXME CoP for column_name in column_list ) ) ) ) ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if ignore_row_if != "never": - raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 + raise ValueError(f'Unrecognized value of ignore_row_if ("{ignore_row_if}").') # noqa: TRY003 # FIXME CoP return selectable @@ -815,7 +757,7 @@ def get_compute_domain( Returns: SqlAlchemy column - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP partitioned_domain_kwargs: PartitionDomainKwargs = self._partition_domain_kwargs( domain_kwargs, domain_type, accessor_keys ) @@ -844,7 +786,7 @@ def _partition_column_metric_domain_kwargs( # type: ignore[override] # Executio Returns: compute_domain_kwargs, accessor_domain_kwargs partition from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert ( domain_type == MetricDomainTypes.COLUMN ), "This method only supports MetricDomainTypes.COLUMN" @@ -853,7 +795,7 @@ def _partition_column_metric_domain_kwargs( # type: ignore[override] # Executio accessor_domain_kwargs: dict = {} if "column" not in compute_domain_kwargs: - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "Column not provided in compute_domain_kwargs" ) @@ -883,7 +825,7 @@ def _partition_column_pair_metric_domain_kwargs( # type: ignore[override] # Exe Returns: compute_domain_kwargs, accessor_domain_kwargs partition from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert ( domain_type == MetricDomainTypes.COLUMN_PAIR ), "This method only supports MetricDomainTypes.COLUMN_PAIR" @@ -892,7 +834,7 @@ def _partition_column_pair_metric_domain_kwargs( # type: ignore[override] # Exe accessor_domain_kwargs: dict = {} if not ("column_A" in compute_domain_kwargs and "column_B" in compute_domain_kwargs): - raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 + raise gx_exceptions.GreatExpectationsError( # noqa: TRY003 # FIXME CoP "column_A or column_B not found within compute_domain_kwargs" ) @@ -926,7 +868,7 @@ def _partition_multi_column_metric_domain_kwargs( # type: ignore[override] # Ex Returns: compute_domain_kwargs, accessor_domain_kwargs partition from domain_kwargs The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert ( domain_type == MetricDomainTypes.MULTICOLUMN ), "This method only supports MetricDomainTypes.MULTICOLUMN" @@ -935,12 +877,12 @@ def _partition_multi_column_metric_domain_kwargs( # type: ignore[override] # Ex accessor_domain_kwargs: dict = {} if "column_list" not in domain_kwargs: - raise GreatExpectationsError("column_list not found within domain_kwargs") # noqa: TRY003 + raise GreatExpectationsError("column_list not found within domain_kwargs") # noqa: TRY003 # FIXME CoP column_list = compute_domain_kwargs.pop("column_list") - if len(column_list) < 2: # noqa: PLR2004 - raise GreatExpectationsError("column_list must contain at least 2 columns") # noqa: TRY003 + if len(column_list) < 2: # noqa: PLR2004 # FIXME CoP + raise GreatExpectationsError("column_list must contain at least 2 columns") # noqa: TRY003 # FIXME CoP # Checking if case-sensitive and using appropriate name if cast(SqlAlchemyBatchData, self.batch_manager.active_batch_data).use_quoted_name: @@ -953,7 +895,7 @@ def _partition_multi_column_metric_domain_kwargs( # type: ignore[override] # Ex return PartitionDomainKwargs(compute_domain_kwargs, accessor_domain_kwargs) @override - def resolve_metric_bundle( # noqa: C901 - too complex + def resolve_metric_bundle( # noqa: C901 # too complex self, metric_fn_bundle: Iterable[MetricComputationConfiguration], ) -> Dict[Tuple[str, str, str], MetricValue]: @@ -969,7 +911,7 @@ def resolve_metric_bundle( # noqa: C901 - too complex Returns: A dictionary of "MetricConfiguration" IDs and their corresponding now-queried (fully resolved) values. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP resolved_metrics: Dict[Tuple[str, str, str], MetricValue] = {} res: List[sqlalchemy.Row] @@ -1024,19 +966,19 @@ def resolve_metric_bundle( # noqa: C901 - too complex as a subquery wrapped in "(subquery) alias". TextClause must first be converted to TextualSelect using sa.columns() before it can be converted to type Subquery """ - if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] + if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] # FIXME CoP sa_query_object = sa.select(*query["select"]).select_from( selectable.columns().subquery() ) - elif (sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select)) or ( # type: ignore[truthy-function] - sqlalchemy.TextualSelect and isinstance(selectable, sqlalchemy.TextualSelect) # type: ignore[truthy-function] + elif (sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select)) or ( # type: ignore[truthy-function] # FIXME CoP + sqlalchemy.TextualSelect and isinstance(selectable, sqlalchemy.TextualSelect) # type: ignore[truthy-function] # FIXME CoP ): sa_query_object = sa.select(*query["select"]).select_from(selectable.subquery()) else: - sa_query_object = sa.select(*query["select"]).select_from(selectable) # type: ignore[arg-type] + sa_query_object = sa.select(*query["select"]).select_from(selectable) # type: ignore[arg-type] # FIXME CoP logger.debug(f"Attempting query {sa_query_object!s}") - res = self.execute_query(sa_query_object).fetchall() # type: ignore[assignment] + res = self.execute_query(sa_query_object).fetchall() # type: ignore[assignment] # FIXME CoP logger.debug( f"""SqlAlchemyExecutionEngine computed {len(res[0])} metrics on domain_id \ @@ -1048,7 +990,7 @@ def resolve_metric_bundle( # noqa: C901 - too complex exception_message += ( f'{type(oe).__name__}: "{oe!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP raise ExecutionEngineError(message=exception_message) assert len(res) == 1, "all bundle-computed metrics must be single-value statistics" @@ -1057,8 +999,8 @@ def resolve_metric_bundle( # noqa: C901 - too complex idx: int metric_id: Tuple[str, str, str] for idx, metric_id in enumerate(query["metric_ids"]): - # Converting SQL query execution results into JSON-serializable format produces simple data types, # noqa: E501 - # amenable for subsequent post-processing by higher-level "Metric" and "Expectation" layers. # noqa: E501 + # Converting SQL query execution results into JSON-serializable format produces simple data types, # noqa: E501 # FIXME CoP + # amenable for subsequent post-processing by higher-level "Metric" and "Expectation" layers. # noqa: E501 # FIXME CoP resolved_metrics[metric_id] = convert_to_json_serializable(data=res[0][idx]) return resolved_metrics @@ -1080,7 +1022,7 @@ def close(self) -> None: self.engine.dispose() More background can be found here: https://github.com/great-expectations/great_expectations/pull/3104/ - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if self._engine_backup: if self._connection: self._connection.close() @@ -1114,14 +1056,14 @@ def execute_partitioned_query( # Note: Athena does not support casting to string, only to varchar # but sqlalchemy currently generates a query as `CAST(colname AS STRING)` instead # of `CAST(colname AS VARCHAR)` with other dialects. - partitioned_query = str( # type: ignore[assignment] + partitioned_query = str( # type: ignore[assignment] # FIXME CoP partitioned_query.compile(self.engine, compile_kwargs={"literal_binds": True}) ) pattern = re.compile(r"(CAST\(EXTRACT\(.*?\))( AS STRING\))", re.IGNORECASE) - partitioned_query = re.sub(pattern, r"\1 AS VARCHAR)", partitioned_query) # type: ignore[call-overload] + partitioned_query = re.sub(pattern, r"\1 AS VARCHAR)", partitioned_query) # type: ignore[call-overload] # FIXME CoP - return self.execute_query(partitioned_query).fetchall() # type: ignore[return-value] + return self.execute_query(partitioned_query).fetchall() # type: ignore[return-value] # FIXME CoP def get_data_for_batch_identifiers( self, @@ -1140,7 +1082,7 @@ def get_data_for_batch_identifiers( Returns: List of dicts of the form [{column_name: {"key": value}}] - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._data_partitioner.get_data_for_batch_identifiers( execution_engine=self, selectable=selectable, @@ -1150,7 +1092,7 @@ def get_data_for_batch_identifiers( def _build_selectable_from_batch_spec(self, batch_spec: BatchSpec) -> sqlalchemy.Selectable: if batch_spec.get("query") is not None and batch_spec.get("sampling_method") is not None: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "Sampling is not supported on query data. " "It is currently only supported on table data." ) @@ -1164,7 +1106,7 @@ def _build_selectable_from_batch_spec(self, batch_spec: BatchSpec) -> sqlalchemy **batch_spec["partitioner_kwargs"], ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if self.dialect_name == GXSqlDialect.SQLITE: partition_clause = sa.text("1 = 1") else: @@ -1189,7 +1131,7 @@ def _build_selectable_from_batch_spec(self, batch_spec: BatchSpec) -> sqlalchemy sampler_fn = self._data_sampler.get_sampler_method(sampling_method) return ( sa.select("*") - .select_from(selectable) # type: ignore[arg-type] + .select_from(selectable) # type: ignore[arg-type] # FIXME CoP .where( sa.and_( partition_clause, @@ -1198,7 +1140,7 @@ def _build_selectable_from_batch_spec(self, batch_spec: BatchSpec) -> sqlalchemy ) ) - return sa.select("*").select_from(selectable).where(partition_clause) # type: ignore[arg-type] + return sa.select("*").select_from(selectable).where(partition_clause) # type: ignore[arg-type] # FIXME CoP def _subselectable(self, batch_spec: BatchSpec) -> sqlalchemy.Selectable: table_name = batch_spec.get("table_name") @@ -1208,7 +1150,7 @@ def _subselectable(self, batch_spec: BatchSpec) -> sqlalchemy.Selectable: selectable = sa.table(table_name, schema=batch_spec.get("schema_name", None)) else: if not isinstance(query, str): - raise ValueError(f"SQL query should be a str but got {query}") # noqa: TRY003 + raise ValueError(f"SQL query should be a str but got {query}") # noqa: TRY003 # FIXME CoP # Query is a valid SELECT query that begins with r"\w+select\w" selectable = sa.select( sa.text(query.lstrip()[6:].strip().rstrip(";").rstrip()) @@ -1221,13 +1163,13 @@ def get_batch_data_and_markers( self, batch_spec: BatchSpec ) -> Tuple[SqlAlchemyBatchData, BatchMarkers]: if not isinstance(batch_spec, (SqlAlchemyDatasourceBatchSpec, RuntimeQueryBatchSpec)): - raise InvalidBatchSpecError( # noqa: TRY003 + raise InvalidBatchSpecError( # noqa: TRY003 # FIXME CoP f"""SqlAlchemyExecutionEngine accepts batch_spec only of type SqlAlchemyDatasourceBatchSpec or RuntimeQueryBatchSpec (illegal type "{type(batch_spec)!s}" was received). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) if sum(1 if x else 0 for x in [batch_spec.get("query"), batch_spec.get("table_name")]) != 1: - raise InvalidBatchSpecError( # noqa: TRY003 + raise InvalidBatchSpecError( # noqa: TRY003 # FIXME CoP "SqlAlchemyExecutionEngine only accepts a batch_spec where exactly 1 of " "'query' or 'table_name' is specified. " f"table_name={batch_spec.get('table_name')}, query={batch_spec.get('query')}" @@ -1251,7 +1193,7 @@ def get_batch_data_and_markers( selectable: sqlalchemy.Selectable = self._build_selectable_from_batch_spec( batch_spec=batch_spec ) - # NOTE: what's being checked here is the presence of a `query` attribute, we could check this directly # noqa: E501 + # NOTE: what's being checked here is the presence of a `query` attribute, we could check this directly # noqa: E501 # FIXME CoP # instead of doing an instance check if isinstance(batch_spec, RuntimeQueryBatchSpec): # query != None is already checked when RuntimeQueryBatchSpec is instantiated @@ -1282,11 +1224,11 @@ def get_inspector(self) -> sqlalchemy.engine.reflection.Inspector: if self._inspector is None: if version.parse(sa.__version__) < version.parse("1.4"): # Inspector.from_engine deprecated since 1.4, sa.inspect() should be used instead - self._inspector = sqlalchemy.reflection.Inspector.from_engine(self.engine) # type: ignore[assignment] + self._inspector = sqlalchemy.reflection.Inspector.from_engine(self.engine) # type: ignore[assignment] # FIXME CoP else: - self._inspector = sa.inspect(self.engine) # type: ignore[assignment] + self._inspector = sa.inspect(self.engine) # type: ignore[assignment] # FIXME CoP - return self._inspector # type: ignore[return-value] + return self._inspector # type: ignore[return-value] # FIXME CoP @contextmanager def get_connection(self) -> Generator[sqlalchemy.Connection, None, None]: @@ -1300,7 +1242,7 @@ def get_connection(self) -> Generator[sqlalchemy.Connection, None, None]: Returns: Sqlalchemy connection - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if self.dialect_name in _PERSISTED_CONNECTION_DIALECTS: try: if not self._connection: @@ -1344,7 +1286,7 @@ def execute_query_in_transaction( Returns: CursorResult for sqlalchemy 2.0+ or LegacyCursorResult for earlier versions. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP with self.get_connection() as connection: if ( is_version_greater_or_equal(sqlalchemy.sqlalchemy.__version__, "2.0.0") diff --git a/great_expectations/execution_engine/util.py b/great_expectations/execution_engine/util.py index 39cc8928321e..f6e9f14be7b4 100644 --- a/great_expectations/execution_engine/util.py +++ b/great_expectations/execution_engine/util.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) try: - import sqlalchemy # noqa: F401, TID251 + import sqlalchemy # noqa: F401, TID251 # FIXME CoP except ImportError: logger.debug("Unable to load SqlAlchemy or one of its subclasses.") @@ -49,7 +49,7 @@ def is_valid_continuous_partition_object(partition_object): :param partition_object: The partition_object to evaluate :return: Boolean - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if ( (partition_object is None) or ("weights" not in partition_object) @@ -58,7 +58,7 @@ def is_valid_continuous_partition_object(partition_object): return False if "tail_weights" in partition_object: - if len(partition_object["tail_weights"]) != 2: # noqa: PLR2004 + if len(partition_object["tail_weights"]) != 2: # noqa: PLR2004 # FIXME CoP return False comb_weights = partition_object["tail_weights"] + partition_object["weights"] else: @@ -68,7 +68,7 @@ def is_valid_continuous_partition_object(partition_object): # if (partition_object['bins'][0] == -np.inf) or (partition_object['bins'][-1] == np.inf): # return False - # Expect one more bin edge than weight; all bin edges should be monotonically increasing; weights should sum to one # noqa: E501 + # Expect one more bin edge than weight; all bin edges should be monotonically increasing; weights should sum to one # noqa: E501 # FIXME CoP return ( (len(partition_object["bins"]) == (len(partition_object["weights"]) + 1)) and np.all(np.diff(partition_object["bins"]) > 0) @@ -102,7 +102,7 @@ def build_continuous_partition_object( } See :ref:`partition_object`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP partition_metric_configuration = MetricConfiguration( "column.partition", metric_domain_kwargs=domain_kwargs, @@ -164,7 +164,7 @@ def build_categorical_partition_object(execution_engine, domain_kwargs, sort="va "weights": (list) The densities of the values implied by the partition. } See :ref:`partition_object`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP counts_configuration = MetricConfiguration( "column.partition", metric_domain_kwargs=domain_kwargs, @@ -187,7 +187,7 @@ def build_categorical_partition_object(execution_engine, domain_kwargs, sort="va } -def infer_distribution_parameters( # noqa: C901, PLR0912 +def infer_distribution_parameters( # noqa: C901, PLR0912 # FIXME CoP data, distribution, params=None ): """Convenience method for determining the shape parameters of a given distribution @@ -219,7 +219,7 @@ def infer_distribution_parameters( # noqa: C901, PLR0912 if params is None: params = {} elif not isinstance(params, dict): - raise TypeError("params must be a dictionary object, see great_expectations documentation") # noqa: TRY003 + raise TypeError("params must be a dictionary object, see great_expectations documentation") # noqa: TRY003 # FIXME CoP if "mean" not in params: params["mean"] = data.mean() @@ -273,7 +273,7 @@ def infer_distribution_parameters( # noqa: C901, PLR0912 # Lambda is optional # params['scale'] = 1 / params['lambda'] elif distribution != "norm": - raise AttributeError( # noqa: TRY003 + raise AttributeError( # noqa: TRY003 # FIXME CoP "Unsupported distribution type. Please refer to Great Expectations Documentation" ) @@ -299,7 +299,7 @@ def _scipy_distribution_positional_args_from_dict(distribution, params): Raises: AttributeError: \ If an unsupported distribution is provided. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP params["loc"] = params.get("loc", 0) if "scale" not in params: @@ -321,7 +321,7 @@ def _scipy_distribution_positional_args_from_dict(distribution, params): return params["loc"], params["scale"] -def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 +def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 # FIXME CoP distribution, params ): """Ensures that necessary parameters for a distribution are present and that all parameters are sensical. @@ -343,14 +343,14 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 ValueError: \ With an informative description, usually when necessary parameters are omitted or are invalid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP norm_msg = "norm distributions require 0 parameters and optionally 'mean', 'std_dev'." - beta_msg = "beta distributions require 2 positive parameters 'alpha', 'beta' and optionally 'loc', 'scale'." # noqa: E501 + beta_msg = "beta distributions require 2 positive parameters 'alpha', 'beta' and optionally 'loc', 'scale'." # noqa: E501 # FIXME CoP gamma_msg = ( "gamma distributions require 1 positive parameter 'alpha' and optionally 'loc','scale'." ) - # poisson_msg = "poisson distributions require 1 positive parameter 'lambda' and optionally 'loc'." # noqa: E501 + # poisson_msg = "poisson distributions require 1 positive parameter 'lambda' and optionally 'loc'." # noqa: E501 # FIXME CoP uniform_msg = "uniform distributions require 0 parameters and optionally 'loc', 'scale'." chi2_msg = "chi2 distributions require 1 positive parameter 'df' and optionally 'loc', 'scale'." expon_msg = "expon distributions require 0 parameters and optionally 'loc', 'scale'." @@ -364,20 +364,20 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 "chi2", "expon", ]: - raise AttributeError(f"Unsupported distribution provided: {distribution}") # noqa: TRY003 + raise AttributeError(f"Unsupported distribution provided: {distribution}") # noqa: TRY003 # FIXME CoP if isinstance(params, dict): # `params` is a dictionary if params.get("std_dev", 1) <= 0 or params.get("scale", 1) <= 0: - raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 + raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 # FIXME CoP # alpha and beta are required and positive if distribution == "beta" and (params.get("alpha", -1) <= 0 or params.get("beta", -1) <= 0): - raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 # FIXME CoP # alpha is required and positive elif distribution == "gamma" and params.get("alpha", -1) <= 0: - raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 # FIXME CoP # lambda is a required and positive # elif distribution == 'poisson' and params.get('lambda', -1) <= 0: @@ -385,37 +385,37 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 # df is necessary and required to be positive elif distribution == "chi2" and params.get("df", -1) <= 0: - raise ValueError(f"Invalid parameters: {chi2_msg}:") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {chi2_msg}:") # noqa: TRY003 # FIXME CoP elif isinstance(params, (tuple, list)): scale = None # `params` is a tuple or a list if distribution == "beta": - if len(params) < 2: # noqa: PLR2004 - raise ValueError(f"Missing required parameters: {beta_msg}") # noqa: TRY003 + if len(params) < 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Missing required parameters: {beta_msg}") # noqa: TRY003 # FIXME CoP if params[0] <= 0 or params[1] <= 0: - raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 - if len(params) == 4: # noqa: PLR2004 + raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 # FIXME CoP + if len(params) == 4: # noqa: PLR2004 # FIXME CoP scale = params[3] - elif len(params) > 4: # noqa: PLR2004 - raise ValueError(f"Too many parameters provided: {beta_msg}") # noqa: TRY003 + elif len(params) > 4: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many parameters provided: {beta_msg}") # noqa: TRY003 # FIXME CoP elif distribution == "norm": - if len(params) > 2: # noqa: PLR2004 - raise ValueError(f"Too many parameters provided: {norm_msg}") # noqa: TRY003 - if len(params) == 2: # noqa: PLR2004 + if len(params) > 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many parameters provided: {norm_msg}") # noqa: TRY003 # FIXME CoP + if len(params) == 2: # noqa: PLR2004 # FIXME CoP scale = params[1] elif distribution == "gamma": if len(params) < 1: - raise ValueError(f"Missing required parameters: {gamma_msg}") # noqa: TRY003 - if len(params) == 3: # noqa: PLR2004 + raise ValueError(f"Missing required parameters: {gamma_msg}") # noqa: TRY003 # FIXME CoP + if len(params) == 3: # noqa: PLR2004 # FIXME CoP scale = params[2] - if len(params) > 3: # noqa: PLR2004 - raise ValueError(f"Too many parameters provided: {gamma_msg}") # noqa: TRY003 + if len(params) > 3: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many parameters provided: {gamma_msg}") # noqa: TRY003 # FIXME CoP elif params[0] <= 0: - raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 # FIXME CoP # elif distribution == 'poisson': # if len(params) < 1: @@ -426,33 +426,33 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 # raise ValueError("Invalid parameters: %s" %poisson_msg) elif distribution == "uniform": - if len(params) == 2: # noqa: PLR2004 + if len(params) == 2: # noqa: PLR2004 # FIXME CoP scale = params[1] - if len(params) > 2: # noqa: PLR2004 - raise ValueError(f"Too many arguments provided: {uniform_msg}") # noqa: TRY003 + if len(params) > 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many arguments provided: {uniform_msg}") # noqa: TRY003 # FIXME CoP elif distribution == "chi2": if len(params) < 1: - raise ValueError(f"Missing required parameters: {chi2_msg}") # noqa: TRY003 - elif len(params) == 3: # noqa: PLR2004 + raise ValueError(f"Missing required parameters: {chi2_msg}") # noqa: TRY003 # FIXME CoP + elif len(params) == 3: # noqa: PLR2004 # FIXME CoP scale = params[2] - elif len(params) > 3: # noqa: PLR2004 - raise ValueError(f"Too many arguments provided: {chi2_msg}") # noqa: TRY003 + elif len(params) > 3: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many arguments provided: {chi2_msg}") # noqa: TRY003 # FIXME CoP if params[0] <= 0: - raise ValueError(f"Invalid parameters: {chi2_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {chi2_msg}") # noqa: TRY003 # FIXME CoP elif distribution == "expon": - if len(params) == 2: # noqa: PLR2004 + if len(params) == 2: # noqa: PLR2004 # FIXME CoP scale = params[1] - if len(params) > 2: # noqa: PLR2004 - raise ValueError(f"Too many arguments provided: {expon_msg}") # noqa: TRY003 + if len(params) > 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many arguments provided: {expon_msg}") # noqa: TRY003 # FIXME CoP if scale is not None and scale <= 0: - raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 + raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 # FIXME CoP else: - raise ValueError( # noqa: TRY003, TRY004 - "params must be a dict or list, or use great_expectations.dataset.util.infer_distribution_parameters(data, distribution)" # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + "params must be a dict or list, or use great_expectations.dataset.util.infer_distribution_parameters(data, distribution)" # noqa: E501 # FIXME CoP ) @@ -472,7 +472,7 @@ def create_multiple_expectations(df, columns, expectation_type, *args, **kwargs) A list of expectation results. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation = getattr(df, expectation_type) results = list() diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py index dbc55fd42da9..cadf30b6787b 100644 --- a/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py +++ b/great_expectations/expectations/core/expect_column_distinct_values_to_be_in_set.py @@ -5,10 +5,12 @@ import altair as alt import pandas as pd +from great_expectations.compatibility import pydantic from great_expectations.expectations.expectation import ( ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, VALUE_SET_DESCRIPTION, @@ -58,12 +60,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Sets"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectColumnDistinctValuesToBeInSet(ColumnAggregateExpectation): @@ -106,7 +107,7 @@ class ExpectColumnDistinctValuesToBeInSet(ColumnAggregateExpectation): [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set) [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -115,9 +116,8 @@ class ExpectColumnDistinctValuesToBeInSet(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) - [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -198,7 +198,7 @@ class ExpectColumnDistinctValuesToBeInSet(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value_set: ValueSetField @@ -212,7 +212,7 @@ class ExpectColumnDistinctValuesToBeInSet(ColumnAggregateExpectation): } _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.value_counts",) success_keys = ("value_set",) @@ -254,6 +254,12 @@ def schema_extra( } ) + @pydantic.validator("value_set") + def _validate_value_set(cls, value_set: ValueSetField) -> ValueSetField: + if not value_set: + raise ValueError("value_set must be a non-empty set-like object.") # noqa: TRY003 # Error messaged gets swallowed by Pydantic + return value_set + @classmethod def _prescriptive_template( cls, @@ -382,17 +388,17 @@ def _descriptive_value_counts_bar_chart_renderer( } ) - if len(values) > 60: # noqa: PLR2004 + if len(values) > 60: # noqa: PLR2004 # FIXME CoP return None else: chart_pixel_width = (len(values) / 60.0) * 500 chart_pixel_width = max(chart_pixel_width, 250) chart_container_col_width = round((len(values) / 60.0) * 6) - if chart_container_col_width < 4: # noqa: PLR2004 + if chart_container_col_width < 4: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 4 - elif chart_container_col_width >= 5: # noqa: PLR2004 + elif chart_container_col_width >= 5: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 6 - elif chart_container_col_width >= 4: # noqa: PLR2004 + elif chart_container_col_width >= 4: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 5 mark_bar_args = {} diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_contain_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_contain_set.py index e845299e6645..51a7a7005309 100644 --- a/great_expectations/expectations/core/expect_column_distinct_values_to_contain_set.py +++ b/great_expectations/expectations/core/expect_column_distinct_values_to_contain_set.py @@ -7,6 +7,7 @@ ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, VALUE_SET_DESCRIPTION, @@ -50,12 +51,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Sets"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectColumnDistinctValuesToContainSet(ColumnAggregateExpectation): @@ -94,7 +94,7 @@ class ExpectColumnDistinctValuesToContainSet(ColumnAggregateExpectation): [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set) [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -105,7 +105,7 @@ class ExpectColumnDistinctValuesToContainSet(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -186,7 +186,7 @@ class ExpectColumnDistinctValuesToContainSet(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value_set: ValueSetField @@ -202,7 +202,7 @@ class ExpectColumnDistinctValuesToContainSet(ColumnAggregateExpectation): _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.value_counts",) success_keys = ("value_set",) @@ -298,7 +298,7 @@ def _prescriptive_renderer( runtime_configuration=runtime_configuration, ) if renderer_configuration.configuration is None: - raise ValueError("renderer_configuration.configuration is None.") # noqa: TRY003 + raise ValueError("renderer_configuration.configuration is None.") # noqa: TRY003 # FIXME CoP params = substitute_none_for_missing( renderer_configuration.configuration.kwargs, [ @@ -356,7 +356,7 @@ def _validate( parsed_value_set = value_set if observed_value_counts is None: - raise ValueError("observed_value_counts None, but is required") # noqa: TRY003 + raise ValueError("observed_value_counts None, but is required") # noqa: TRY003 # FIXME CoP observed_value_set = set(observed_value_counts.index) expected_value_set = set(parsed_value_set) diff --git a/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py b/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py index 13ce3830829a..1aa151f03a93 100644 --- a/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py +++ b/great_expectations/expectations/core/expect_column_distinct_values_to_equal_set.py @@ -7,6 +7,7 @@ ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, VALUE_SET_DESCRIPTION, @@ -50,12 +51,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Sets"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectColumnDistinctValuesToEqualSet(ColumnAggregateExpectation): @@ -94,7 +94,7 @@ class ExpectColumnDistinctValuesToEqualSet(ColumnAggregateExpectation): [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set) [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -105,7 +105,7 @@ class ExpectColumnDistinctValuesToEqualSet(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -186,7 +186,7 @@ class ExpectColumnDistinctValuesToEqualSet(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value_set: ValueSetField @@ -202,7 +202,7 @@ class ExpectColumnDistinctValuesToEqualSet(ColumnAggregateExpectation): _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.value_counts",) success_keys = ("value_set",) args_keys = ( @@ -285,7 +285,7 @@ def _prescriptive_template( @classmethod @renderer(renderer_type=LegacyRendererType.PRESCRIPTIVE) @render_suite_parameter_string - def _prescriptive_renderer( # noqa: C901 - too complex + def _prescriptive_renderer( # noqa: C901 # too complex cls, configuration: Optional[ExpectationConfiguration] = None, result: Optional[ExpectationValidationResult] = None, diff --git a/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py b/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py index 2651c3ac25cc..624bc23380ff 100644 --- a/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py +++ b/great_expectations/expectations/core/expect_column_kl_divergence_to_be_less_than.py @@ -9,7 +9,7 @@ from scipy import stats from great_expectations.compatibility import pydantic -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.execution_engine.util import ( is_valid_categorical_partition_object, is_valid_partition_object, @@ -19,6 +19,7 @@ ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import ( AtomicDiagnosticRendererType, AtomicPrescriptiveRendererType, @@ -100,11 +101,10 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", ] -DATA_QUALITY_ISSUES = ["Distribution"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnKLDivergenceToBeLessThan(ColumnAggregateExpectation): @@ -194,7 +194,7 @@ class ExpectColumnKLDivergenceToBeLessThan(ColumnAggregateExpectation): value will be set to None. This is because inf, -inf, Nan, are not json serializable and cause some json \ parsers to crash when encountered. The python None token will be serialized to null in json. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -203,9 +203,8 @@ class ExpectColumnKLDivergenceToBeLessThan(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) - [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -317,7 +316,7 @@ class ExpectColumnKLDivergenceToBeLessThan(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP partition_object: Union[dict, None] = pydantic.Field(description=PARTITION_OBJECT_DESCRIPTION) threshold: Union[float, None] = pydantic.Field(description=THRESHOLD_DESCRIPTION) @@ -455,9 +454,9 @@ def get_validation_dependencies( metric_value_kwargs=None, ) # - # NOTE 20201117 - JPC - Would prefer not to include partition_metric_configuration here, # noqa: E501 - # since we have already evaluated it, and its result is in the kwargs for the histogram. # noqa: E501 - # However, currently the validation_dependencies' configurations are not passed to the _validate method # noqa: E501 + # NOTE 20201117 - JPC - Would prefer not to include partition_metric_configuration here, # noqa: E501 # FIXME CoP + # since we have already evaluated it, and its result is in the kwargs for the histogram. # noqa: E501 # FIXME CoP + # However, currently the validation_dependencies' configurations are not passed to the _validate method # noqa: E501 # FIXME CoP # validation_dependencies.set_metric_configuration( metric_name="column.partition", @@ -516,7 +515,7 @@ def get_validation_dependencies( bins is None ): # if the user did not supply a partition_object, so we just computed it if not is_valid_partition_object(partition_object): - raise ValueError("Invalid partition_object provided") # noqa: TRY003 + raise ValueError("Invalid partition_object provided") # noqa: TRY003 # FIXME CoP bins = partition_object["bins"] hist_metric_configuration = MetricConfiguration( @@ -558,7 +557,7 @@ def get_validation_dependencies( return validation_dependencies - def _validate( # noqa: C901, PLR0912, PLR0915 + def _validate( # noqa: C901, PLR0912, PLR0915 # FIXME CoP self, metrics: Dict, runtime_configuration: Optional[dict] = None, @@ -581,7 +580,7 @@ def _validate( # noqa: C901, PLR0912, PLR0915 ) if partition_object is None: if bucketize_data: - # in this case, we have requested a partition, histogram using said partition, and nonnull count # noqa: E501 + # in this case, we have requested a partition, histogram using said partition, and nonnull count # noqa: E501 # FIXME CoP bins = list(metrics["column.partition"]) weights = list( np.array(metrics["column.histogram"]) / metrics["column_values.nonnull.count"] @@ -602,34 +601,34 @@ def _validate( # noqa: C901, PLR0912, PLR0915 } if not is_valid_partition_object(partition_object): - raise ValueError("Invalid partition object.") # noqa: TRY003 + raise ValueError("Invalid partition object.") # noqa: TRY003 # FIXME CoP if threshold is not None and ((not isinstance(threshold, (int, float))) or (threshold < 0)): - raise ValueError("Threshold must be specified, greater than or equal to zero.") # noqa: TRY003 + raise ValueError("Threshold must be specified, greater than or equal to zero.") # noqa: TRY003 # FIXME CoP if ( (not isinstance(tail_weight_holdout, (int, float))) or (tail_weight_holdout < 0) or (tail_weight_holdout > 1) ): - raise ValueError("tail_weight_holdout must be between zero and one.") # noqa: TRY003 + raise ValueError("tail_weight_holdout must be between zero and one.") # noqa: TRY003 # FIXME CoP if ( (not isinstance(internal_weight_holdout, (int, float))) or (internal_weight_holdout < 0) or (internal_weight_holdout > 1) ): - raise ValueError("internal_weight_holdout must be between zero and one.") # noqa: TRY003 + raise ValueError("internal_weight_holdout must be between zero and one.") # noqa: TRY003 # FIXME CoP if tail_weight_holdout != 0 and "tail_weights" in partition_object: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "tail_weight_holdout must be 0 when using tail_weights in partition object" ) # TODO: add checks for duplicate values in is_valid_categorical_partition_object if is_valid_categorical_partition_object(partition_object): if internal_weight_holdout > 0: - raise ValueError("Internal weight holdout cannot be used for discrete data.") # noqa: TRY003 + raise ValueError("Internal weight holdout cannot be used for discrete data.") # noqa: TRY003 # FIXME CoP # Data are expected to be discrete, use value_counts observed_weights = ( @@ -690,8 +689,8 @@ def _validate( # noqa: C901, PLR0912, PLR0915 else: # Data are expected to be continuous; discretize first if bucketize_data is False: - raise ValueError( # noqa: TRY003 - "KL Divergence cannot be computed with a continuous partition object and the bucketize_data " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "KL Divergence cannot be computed with a continuous partition object and the bucketize_data " # noqa: E501 # FIXME CoP "parameter set to false." ) # Build the histogram first using expected bins so that the largest bin is >= @@ -702,7 +701,7 @@ def _validate( # noqa: C901, PLR0912, PLR0915 below_partition = metrics["below_partition"] above_partition = metrics["above_partition"] - # Observed Weights is just the histogram values divided by the total number of observations # noqa: E501 + # Observed Weights is just the histogram values divided by the total number of observations # noqa: E501 # FIXME CoP observed_weights = hist / nonnull_count # Adjust expected_weights to account for tail_weight and internal_weight @@ -724,17 +723,17 @@ def _validate( # noqa: C901, PLR0912, PLR0915 expected_weights[index] = internal_weight_holdout / zero_count # Assign tail weight holdout if applicable - # We need to check cases to only add tail weight holdout if it makes sense based on the provided partition. # noqa: E501 + # We need to check cases to only add tail weight holdout if it makes sense based on the provided partition. # noqa: E501 # FIXME CoP if (partition_object["bins"][0] == -np.inf) and ( partition_object["bins"][-1] ) == np.inf: if tail_weight_holdout > 0: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "tail_weight_holdout cannot be used for partitions with infinite endpoints." ) if "tail_weights" in partition_object: - raise ValueError( # noqa: TRY003 - "There can be no tail weights for partitions with one or both endpoints at infinity" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "There can be no tail weights for partitions with one or both endpoints at infinity" # noqa: E501 # FIXME CoP ) # Remove -inf and inf @@ -758,8 +757,8 @@ def _validate( # noqa: C901, PLR0912, PLR0915 elif partition_object["bins"][0] == -np.inf: if "tail_weights" in partition_object: - raise ValueError( # noqa: TRY003 - "There can be no tail weights for partitions with one or both endpoints at infinity" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "There can be no tail weights for partitions with one or both endpoints at infinity" # noqa: E501 # FIXME CoP ) # Remove -inf @@ -791,8 +790,8 @@ def _validate( # noqa: C901, PLR0912, PLR0915 elif partition_object["bins"][-1] == np.inf: if "tail_weights" in partition_object: - raise ValueError( # noqa: TRY003 - "There can be no tail weights for partitions with one or both endpoints at infinity" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "There can be no tail weights for partitions with one or both endpoints at infinity" # noqa: E501 # FIXME CoP ) # Remove inf @@ -858,7 +857,7 @@ def _validate( # noqa: C901, PLR0912, PLR0915 np.concatenate(([below_partition], [above_partition])) / nonnull_count ) - # Main expected_weights and main observed weights had no tail_weights, so nothing needs to be removed. # noqa: E501 + # Main expected_weights and main observed weights had no tail_weights, so nothing needs to be removed. # noqa: E501 # FIXME CoP # TODO: VERIFY THAT THIS STILL WORKS BASED ON CHANGE TO HIST # comb_expected_weights = np.array(comb_expected_weights).astype(float) @@ -882,7 +881,7 @@ def _validate( # noqa: C901, PLR0912, PLR0915 "observed_value": observed_value, "details": { "observed_partition": { - # return expected_bins, since we used those bins to compute the observed_weights # noqa: E501 + # return expected_bins, since we used those bins to compute the observed_weights # noqa: E501 # FIXME CoP "bins": expected_bins, "weights": observed_weights.tolist(), "tail_weights": observed_tail_weights.tolist(), @@ -899,12 +898,12 @@ def _validate( # noqa: C901, PLR0912, PLR0915 return return_obj @classmethod - def _get_kl_divergence_chart( # noqa: C901 - 13 + def _get_kl_divergence_chart( # noqa: C901 # 13 cls, partition_object, header=None ): weights = partition_object["weights"] - if len(weights) > 60: # noqa: PLR2004 + if len(weights) > 60: # noqa: PLR2004 # FIXME CoP expected_distribution = cls._get_kl_divergence_partition_object_table( partition_object, header=header ) @@ -912,11 +911,11 @@ def _get_kl_divergence_chart( # noqa: C901 - 13 chart_pixel_width = (len(weights) / 60.0) * 500 chart_pixel_width = max(chart_pixel_width, 250) chart_container_col_width = round((len(weights) / 60.0) * 6) - if chart_container_col_width < 4: # noqa: PLR2004 + if chart_container_col_width < 4: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 4 - elif chart_container_col_width >= 5: # noqa: PLR2004 + elif chart_container_col_width >= 5: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 6 - elif chart_container_col_width >= 4: # noqa: PLR2004 + elif chart_container_col_width >= 4: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 5 mark_bar_args = {} @@ -1010,11 +1009,11 @@ def _atomic_kl_divergence_chart_template(cls, partition_object: dict) -> tuple: chart_pixel_width = (len(weights) / 60.0) * 500 chart_pixel_width = max(chart_pixel_width, 250) chart_container_col_width = round((len(weights) / 60.0) * 6) - if chart_container_col_width < 4: # noqa: PLR2004 + if chart_container_col_width < 4: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 4 - elif chart_container_col_width >= 5: # noqa: PLR2004 + elif chart_container_col_width >= 5: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 6 - elif chart_container_col_width >= 4: # noqa: PLR2004 + elif chart_container_col_width >= 4: # noqa: PLR2004 # FIXME CoP chart_container_col_width = 5 mark_bar_args = {} @@ -1231,7 +1230,7 @@ def _prescriptive_template( template_str = "can match any distribution." else: template_str = ( - "Kullback-Leibler (KL) divergence with respect to the following distribution must be " # noqa: E501 + "Kullback-Leibler (KL) divergence with respect to the following distribution must be " # noqa: E501 # FIXME CoP "lower than $threshold." ) @@ -1240,7 +1239,7 @@ def _prescriptive_template( template_str = f"$column {template_str}" # generate table or chart depending on number of weights - if len(weights) > 60: # noqa: PLR2004 + if len(weights) > 60: # noqa: PLR2004 # FIXME CoP ( renderer_configuration.header_row, renderer_configuration.table, @@ -1347,7 +1346,7 @@ def _prescriptive_renderer( template_str = "can match any distribution." else: template_str = ( - "Kullback-Leibler (KL) divergence with respect to the following distribution must be " # noqa: E501 + "Kullback-Leibler (KL) divergence with respect to the following distribution must be " # noqa: E501 # FIXME CoP "lower than $threshold." ) expected_distribution = cls._get_kl_divergence_chart(params.get("partition_object")) @@ -1403,7 +1402,7 @@ def _atomic_diagnostic_observed_value_template( distribution_table_header_row = None distribution_table_rows = None - if len(weights) > 60: # noqa: PLR2004 + if len(weights) > 60: # noqa: PLR2004 # FIXME CoP ( distribution_table_header_row, distribution_table_rows, @@ -1554,7 +1553,7 @@ def _descriptive_histogram_renderer( assert result, "Must pass in result." observed_partition_object = result.result["details"]["observed_partition"] weights = observed_partition_object["weights"] - if len(weights) > 60: # noqa: PLR2004 + if len(weights) > 60: # noqa: PLR2004 # FIXME CoP return None header = RenderedStringTemplateContent( diff --git a/great_expectations/expectations/core/expect_column_max_to_be_between.py b/great_expectations/expectations/core/expect_column_max_to_be_between.py index 0f89b39e16b4..7007d0e2caa4 100644 --- a/great_expectations/expectations/core/expect_column_max_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_max_to_be_between.py @@ -4,10 +4,11 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.render import ( LegacyDescriptiveRendererType, @@ -25,7 +26,7 @@ ) try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass @@ -62,12 +63,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Numerical data"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnMaxToBeBetween(ColumnAggregateExpectation): @@ -118,7 +118,7 @@ class ExpectColumnMaxToBeBetween(ColumnAggregateExpectation): See Also: [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -129,7 +129,7 @@ class ExpectColumnMaxToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -185,7 +185,7 @@ class ExpectColumnMaxToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -206,7 +206,7 @@ class ExpectColumnMaxToBeBetween(ColumnAggregateExpectation): } _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.max",) success_keys = ( "min_value", @@ -250,7 +250,7 @@ def schema_extra(schema: Dict[str, Any], model: Type[ExpectColumnMaxToBeBetween] @classmethod @override - def _prescriptive_template( # noqa: C901 - too complex + def _prescriptive_template( # noqa: C901 # too complex cls, renderer_configuration: RendererConfiguration, ) -> RendererConfiguration: @@ -284,7 +284,7 @@ def _prescriptive_template( # noqa: C901 - too complex if params.min_value == params.max_value: template_str = "maximum value must be $min_value" else: - template_str = f"maximum value must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 + template_str = f"maximum value must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 # FIXME CoP elif not params.min_value: template_str = f"maximum value must be {at_most_str} $max_value." else: diff --git a/great_expectations/expectations/core/expect_column_mean_to_be_between.py b/great_expectations/expectations/core/expect_column_mean_to_be_between.py index a66a7f1e636f..6184856c9e59 100644 --- a/great_expectations/expectations/core/expect_column_mean_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_mean_to_be_between.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.render import ( LegacyDescriptiveRendererType, @@ -54,12 +55,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Numerical data"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnMeanToBeBetween(ColumnAggregateExpectation): @@ -111,7 +111,7 @@ class ExpectColumnMeanToBeBetween(ColumnAggregateExpectation): [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between) [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -122,7 +122,7 @@ class ExpectColumnMeanToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -176,7 +176,7 @@ class ExpectColumnMeanToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -197,7 +197,7 @@ class ExpectColumnMeanToBeBetween(ColumnAggregateExpectation): } _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.mean",) success_keys = ( "min_value", diff --git a/great_expectations/expectations/core/expect_column_median_to_be_between.py b/great_expectations/expectations/core/expect_column_median_to_be_between.py index 6681feabb9a4..3706302ef4a2 100644 --- a/great_expectations/expectations/core/expect_column_median_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_median_to_be_between.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer @@ -50,12 +51,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Numerical data"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnMedianToBeBetween(ColumnAggregateExpectation): @@ -107,7 +107,7 @@ class ExpectColumnMedianToBeBetween(ColumnAggregateExpectation): [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between) [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -118,7 +118,7 @@ class ExpectColumnMedianToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -172,7 +172,7 @@ class ExpectColumnMedianToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -193,7 +193,7 @@ class ExpectColumnMedianToBeBetween(ColumnAggregateExpectation): } _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.median",) success_keys = ( "min_value", @@ -331,7 +331,7 @@ def _prescriptive_renderer( # type: ignore[override] # TODO: Fix this type igno elif params["max_value"] is None: template_str = f"median must be {at_least_str} $min_value." else: - raise ValueError("unresolvable template_str") # noqa: TRY003 + raise ValueError("unresolvable template_str") # noqa: TRY003 # FIXME CoP if include_column_name: template_str = f"$column {template_str}" diff --git a/great_expectations/expectations/core/expect_column_min_to_be_between.py b/great_expectations/expectations/core/expect_column_min_to_be_between.py index 1c3ff65fc5e9..5aff390f8ef5 100644 --- a/great_expectations/expectations/core/expect_column_min_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_min_to_be_between.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.render import ( LegacyDescriptiveRendererType, @@ -55,12 +56,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Numerical data"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnMinToBeBetween(ColumnAggregateExpectation): @@ -111,7 +111,7 @@ class ExpectColumnMinToBeBetween(ColumnAggregateExpectation): See Also: [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -122,7 +122,7 @@ class ExpectColumnMinToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -178,7 +178,7 @@ class ExpectColumnMinToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -199,7 +199,7 @@ class ExpectColumnMinToBeBetween(ColumnAggregateExpectation): } _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.min",) success_keys = ( "min_value", @@ -249,7 +249,7 @@ def schema_extra(schema: Dict[str, Any], model: Type[ExpectColumnMinToBeBetween] @classmethod @override - def _prescriptive_template( # noqa: C901 - too complex + def _prescriptive_template( # noqa: C901 # too complex cls, renderer_configuration: RendererConfiguration, ) -> RendererConfiguration: @@ -283,7 +283,7 @@ def _prescriptive_template( # noqa: C901 - too complex if params.min_value == params.max_value: template_str = "minimum value must be $min_value" else: - template_str = f"minimum value must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 + template_str = f"minimum value must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 # FIXME CoP elif not params.min_value: template_str = f"minimum value must be {at_most_str} $max_value." else: diff --git a/great_expectations/expectations/core/expect_column_most_common_value_to_be_in_set.py b/great_expectations/expectations/core/expect_column_most_common_value_to_be_in_set.py index d64890a0dc83..40eef4871e56 100644 --- a/great_expectations/expectations/core/expect_column_most_common_value_to_be_in_set.py +++ b/great_expectations/expectations/core/expect_column_most_common_value_to_be_in_set.py @@ -8,6 +8,7 @@ ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_types import ( ValueSetField, # noqa: TCH001 # type needed in pydantic validation ) @@ -56,12 +57,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Sets"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value, DataQualityIssues.VALIDITY.value] class ExpectColumnMostCommonValueToBeInSet(ColumnAggregateExpectation): @@ -104,7 +104,7 @@ class ExpectColumnMostCommonValueToBeInSet(ColumnAggregateExpectation): is a tie for most common among multiple values, observed_value will contain a single copy of each \ most common value - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -115,8 +115,9 @@ class ExpectColumnMostCommonValueToBeInSet(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} + {DATA_QUALITY_ISSUES[1]} Example Data: test test2 @@ -173,7 +174,7 @@ class ExpectColumnMostCommonValueToBeInSet(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value_set: ValueSetField ties_okay: Union[bool, None] = pydantic.Field( @@ -193,7 +194,7 @@ class ExpectColumnMostCommonValueToBeInSet(ColumnAggregateExpectation): _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.most_common_value",) success_keys = ( "value_set", diff --git a/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py b/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py index 4bf234d23be4..bb004030a35f 100644 --- a/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py +++ b/great_expectations/expectations/core/expect_column_pair_cramers_phi_value_to_be_less_than.py @@ -83,7 +83,7 @@ def _prescriptive_template( params = renderer_configuration.params if not params.column_A or not params.column_B: - renderer_configuration.template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 + renderer_configuration.template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 # FIXME CoP else: renderer_configuration.template_str = ( "Values in $column_A and $column_B must be independent." @@ -106,7 +106,7 @@ def _prescriptive_renderer( styling = runtime_configuration.get("styling") params = substitute_none_for_missing(configuration.kwargs, ["column_A", "column_B"]) if (params["column_A"] is None) or (params["column_B"] is None): - template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 + template_str = " unrecognized kwargs for expect_column_pair_cramers_phi_value_to_be_less_than: missing column." # noqa: E501 # FIXME CoP else: template_str = "Values in $column_A and $column_B must be independent." diff --git a/great_expectations/expectations/core/expect_column_pair_values_a_to_be_greater_than_b.py b/great_expectations/expectations/core/expect_column_pair_values_a_to_be_greater_than_b.py index 848b73d3e46d..3475c13c1d40 100644 --- a/great_expectations/expectations/core/expect_column_pair_values_a_to_be_greater_than_b.py +++ b/great_expectations/expectations/core/expect_column_pair_values_a_to_be_greater_than_b.py @@ -7,6 +7,7 @@ ColumnPairMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_A_DESCRIPTION, COLUMN_B_DESCRIPTION, @@ -42,12 +43,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Distribution"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnPairValuesAToBeGreaterThanB(ColumnPairMapExpectation): @@ -87,7 +87,7 @@ class ExpectColumnPairValuesAToBeGreaterThanB(ColumnPairMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -98,7 +98,7 @@ class ExpectColumnPairValuesAToBeGreaterThanB(ColumnPairMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -177,7 +177,7 @@ class ExpectColumnPairValuesAToBeGreaterThanB(ColumnPairMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP or_equal: Union[bool, None] = pydantic.Field(default=None, description=OR_EQUAL_DESCRIPTION) ignore_row_if: Literal["both_values_are_missing", "either_value_is_missing", "neither"] = ( @@ -267,7 +267,7 @@ def _prescriptive_template( template_str = "" if not params.column_A or not params.column_B: - template_str += "$column has a bogus `expect_column_pair_values_A_to_be_greater_than_B` expectation. " # noqa: E501 + template_str += "$column has a bogus `expect_column_pair_values_A_to_be_greater_than_B` expectation. " # noqa: E501 # FIXME CoP if not params.mostly or params.mostly.value == 1.0: if not params.or_equal: @@ -275,15 +275,15 @@ def _prescriptive_template( "Values in $column_A must always be greater than those in $column_B." ) else: - template_str += "Values in $column_A must always be greater than or equal to those in $column_B." # noqa: E501 + template_str += "Values in $column_A must always be greater than or equal to those in $column_B." # noqa: E501 # FIXME CoP else: renderer_configuration = cls._add_mostly_pct_param( renderer_configuration=renderer_configuration ) if not params.or_equal: - template_str = "Values in $column_A must be greater than those in $column_B, at least $mostly_pct % of the time." # noqa: E501 + template_str = "Values in $column_A must be greater than those in $column_B, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP else: - template_str = "Values in $column_A must be greater than or equal to those in $column_B, at least $mostly_pct % of the time." # noqa: E501 + template_str = "Values in $column_A must be greater than or equal to those in $column_B, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP renderer_configuration.template_str = template_str @@ -316,21 +316,21 @@ def _prescriptive_renderer( ) if (params["column_A"] is None) or (params["column_B"] is None): - template_str = "$column has a bogus `expect_column_pair_values_A_to_be_greater_than_B` expectation." # noqa: E501 + template_str = "$column has a bogus `expect_column_pair_values_A_to_be_greater_than_B` expectation." # noqa: E501 # FIXME CoP params["row_condition"] = None if params["mostly"] is None or params["mostly"] == 1.0: if params["or_equal"] in [None, False]: template_str = "Values in $column_A must always be greater than those in $column_B." else: - template_str = "Values in $column_A must always be greater than or equal to those in $column_B." # noqa: E501 + template_str = "Values in $column_A must always be greater than or equal to those in $column_B." # noqa: E501 # FIXME CoP else: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") if params["or_equal"] in [None, False]: - template_str = "Values in $column_A must be greater than those in $column_B, at least $mostly_pct % of the time." # noqa: E501 + template_str = "Values in $column_A must be greater than those in $column_B, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP else: - template_str = "Values in $column_A must be greater than or equal to those in $column_B, at least $mostly_pct % of the time." # noqa: E501 + template_str = "Values in $column_A must be greater than or equal to those in $column_B, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP if params["row_condition"] is not None: ( diff --git a/great_expectations/expectations/core/expect_column_pair_values_to_be_equal.py b/great_expectations/expectations/core/expect_column_pair_values_to_be_equal.py index 34f6de03a2b0..e1cc651c2662 100644 --- a/great_expectations/expectations/core/expect_column_pair_values_to_be_equal.py +++ b/great_expectations/expectations/core/expect_column_pair_values_to_be_equal.py @@ -7,6 +7,7 @@ ColumnPairMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_A_DESCRIPTION, COLUMN_B_DESCRIPTION, @@ -42,12 +43,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Data integrity"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value, DataQualityIssues.VALIDITY.value] class ExpectColumnPairValuesToBeEqual(ColumnPairMapExpectation): @@ -86,7 +86,7 @@ class ExpectColumnPairValuesToBeEqual(ColumnPairMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -97,8 +97,9 @@ class ExpectColumnPairValuesToBeEqual(ColumnPairMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} + {DATA_QUALITY_ISSUES[1]} Example Data: test test2 @@ -174,7 +175,7 @@ class ExpectColumnPairValuesToBeEqual(ColumnPairMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ignore_row_if: Literal["both_values_are_missing", "either_value_is_missing", "neither"] = ( pydantic.Field(default="both_values_are_missing", description=IGNORE_ROW_IF_DESCRIPTION) @@ -266,7 +267,7 @@ def _prescriptive_template( renderer_configuration = cls._add_mostly_pct_param( renderer_configuration=renderer_configuration ) - template_str = "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time." # noqa: E501 + template_str = "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP renderer_configuration.template_str = template_str @@ -310,7 +311,7 @@ def _prescriptive_renderer( else: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") - template_str = "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time." # noqa: E501 + template_str = "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP if params["row_condition"] is not None: ( diff --git a/great_expectations/expectations/core/expect_column_pair_values_to_be_in_set.py b/great_expectations/expectations/core/expect_column_pair_values_to_be_in_set.py index 47e6650870aa..66cd1afac806 100644 --- a/great_expectations/expectations/core/expect_column_pair_values_to_be_in_set.py +++ b/great_expectations/expectations/core/expect_column_pair_values_to_be_in_set.py @@ -5,6 +5,7 @@ from great_expectations.expectations.expectation import ( ColumnPairMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_A_DESCRIPTION, COLUMN_B_DESCRIPTION, @@ -16,7 +17,7 @@ ) VALUE_PAIRS_SET_DESCRIPTION = "All the valid pairs to be matched." SUPPORTED_DATA_SOURCES = ["Snowflake", "PostgreSQL"] -DATA_QUALITY_ISSUES = ["Sets"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value, DataQualityIssues.VALIDITY.value] SUPPORTED_DATA_SOURCES = [ "Pandas", @@ -25,7 +26,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", ] @@ -68,7 +68,7 @@ class ExpectColumnPairValuesToBeInSet(ColumnPairMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -77,10 +77,10 @@ class ExpectColumnPairValuesToBeInSet(ColumnPairMapExpectation): [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) - [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} + {DATA_QUALITY_ISSUES[1]} Example Data: test test2 @@ -161,7 +161,7 @@ class ExpectColumnPairValuesToBeInSet(ColumnPairMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value_pairs_set: List[Tuple[Any, Any]] ignore_row_if: Literal["both_values_are_missing", "either_value_is_missing", "neither"] = ( diff --git a/great_expectations/expectations/core/expect_column_proportion_of_unique_values_to_be_between.py b/great_expectations/expectations/core/expect_column_proportion_of_unique_values_to_be_between.py index 86e0b4ca42e8..6eed1bccc3e6 100644 --- a/great_expectations/expectations/core/expect_column_proportion_of_unique_values_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_proportion_of_unique_values_to_be_between.py @@ -3,12 +3,13 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type from great_expectations.compatibility import pydantic -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( COLUMN_DESCRIPTION, ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import ( LegacyDescriptiveRendererType, LegacyRendererType, @@ -57,12 +58,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Cardinality"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectColumnProportionOfUniqueValuesToBeBetween(ColumnAggregateExpectation): @@ -116,7 +116,7 @@ class ExpectColumnProportionOfUniqueValuesToBeBetween(ColumnAggregateExpectation See Also: [ExpectColumnUniqueValueCountToBeBetween](https://greatexpectations.io/expectations/expect_column_unique_value_count_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -127,7 +127,7 @@ class ExpectColumnProportionOfUniqueValuesToBeBetween(ColumnAggregateExpectation [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -183,7 +183,7 @@ class ExpectColumnProportionOfUniqueValuesToBeBetween(ColumnAggregateExpectation "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -206,7 +206,7 @@ class ExpectColumnProportionOfUniqueValuesToBeBetween(ColumnAggregateExpectation _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.unique_proportion",) success_keys = ( "min_value", @@ -259,7 +259,7 @@ def schema_extra( ) @classmethod - def _prescriptive_template( # noqa: C901 - too complex + def _prescriptive_template( # noqa: C901 # too complex cls, renderer_configuration: RendererConfiguration, ) -> RendererConfiguration: @@ -292,9 +292,9 @@ def _prescriptive_template( # noqa: C901 - too complex template_str = f"fraction of unique values must be {at_most_str} $max_value." elif not params.max_value: template_str = f"fraction of unique values must be {at_least_str} $min_value." - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if params.min_value.value != params.max_value.value: - template_str = f"fraction of unique values must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 + template_str = f"fraction of unique values must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 # FIXME CoP else: template_str = "fraction of unique values must be exactly $min_value." @@ -339,9 +339,9 @@ def _prescriptive_renderer( template_str = f"fraction of unique values must be {at_most_str} $max_value." elif params["max_value"] is None: template_str = f"fraction of unique values must be {at_least_str} $min_value." - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if params["min_value"] != params["max_value"]: - template_str = f"fraction of unique values must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 + template_str = f"fraction of unique values must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 # FIXME CoP else: template_str = "fraction of unique values must be exactly $min_value." diff --git a/great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py b/great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py index 628b877d924d..e8e213606f6c 100644 --- a/great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py @@ -12,6 +12,7 @@ ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import ( AtomicDiagnosticRendererType, AtomicPrescriptiveRendererType, @@ -68,8 +69,17 @@ class QuantileRange(pydantic.BaseModel): "Whether to allow relative error in quantile " "communications on backends that support or require it." ) -SUPPORTED_DATA_SOURCES = ["Pandas", "Spark", "SQLite", "PostgreSQL", "MySQL", "MSSQL", "Redshift"] -DATA_QUALITY_ISSUES = ["Numerical data"] +SUPPORTED_DATA_SOURCES = [ + "Pandas", + "Spark", + "SQLite", + "PostgreSQL", + "MySQL", + "MSSQL", + "Snowflake", + "BigQuery", +] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnQuantileValuesToBeBetween(ColumnAggregateExpectation): @@ -120,7 +130,7 @@ class ExpectColumnQuantileValuesToBeBetween(ColumnAggregateExpectation): [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between) [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -128,8 +138,9 @@ class ExpectColumnQuantileValuesToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -232,7 +243,7 @@ class ExpectColumnQuantileValuesToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP quantile_ranges: QuantileRange = pydantic.Field(description=QUANTILE_RANGES_DESCRIPTION) allow_relative_error: Union[bool, str] = pydantic.Field( @@ -308,7 +319,7 @@ def validate_quantile_ranges(cls, quantile_ranges: QuantileRange) -> Optional[Qu raise InvalidExpectationConfigurationError(str(e)) if len(quantile_ranges.quantiles) != len(quantile_ranges.value_ranges): - raise ValueError("quantile_values and quantiles must have the same number of elements") # noqa: TRY003 + raise ValueError("quantile_values and quantiles must have the same number of elements") # noqa: TRY003 # FIXME CoP return quantile_ranges @@ -668,8 +679,8 @@ def _descriptive_quantile_table_renderer( quantile_string if quantile_string else f"{quantile:3.2f}" ), "tooltip": { - "content": "expect_column_quantile_values_to_be_between \n expect_column_median_to_be_between" # noqa: E501 - if quantile == 0.50 # noqa: PLR2004 + "content": "expect_column_quantile_values_to_be_between \n expect_column_median_to_be_between" # noqa: E501 # FIXME CoP + if quantile == 0.50 # noqa: PLR2004 # FIXME CoP else "expect_column_quantile_values_to_be_between" }, }, diff --git a/great_expectations/expectations/core/expect_column_stdev_to_be_between.py b/great_expectations/expectations/core/expect_column_stdev_to_be_between.py index 61dd08da100d..3d5feb9c7a64 100644 --- a/great_expectations/expectations/core/expect_column_stdev_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_stdev_to_be_between.py @@ -3,12 +3,13 @@ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Type, Union from great_expectations.compatibility import pydantic -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( COLUMN_DESCRIPTION, ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( @@ -50,12 +51,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Distribution"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnStdevToBeBetween(ColumnAggregateExpectation): @@ -109,7 +109,7 @@ class ExpectColumnStdevToBeBetween(ColumnAggregateExpectation): [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between) [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -120,7 +120,7 @@ class ExpectColumnStdevToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -174,7 +174,7 @@ class ExpectColumnStdevToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -276,7 +276,7 @@ def _prescriptive_template( ) if params.min_value and params.max_value: - template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 + template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 # FIXME CoP elif not params.min_value: template_str = f"standard deviation must be {at_most_str} $max_value." else: @@ -323,7 +323,7 @@ def _prescriptive_renderer( at_least_str, at_most_str = handle_strict_min_max(params) if params["min_value"] is not None and params["max_value"] is not None: - template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 + template_str = f"standard deviation must be {at_least_str} $min_value and {at_most_str} $max_value." # noqa: E501 # FIXME CoP elif params["min_value"] is None: template_str = f"standard deviation must be {at_most_str} $max_value." elif params["max_value"] is None: diff --git a/great_expectations/expectations/core/expect_column_sum_to_be_between.py b/great_expectations/expectations/core/expect_column_sum_to_be_between.py index 6585dcd9eb6e..44a5f720f0a8 100644 --- a/great_expectations/expectations/core/expect_column_sum_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_sum_to_be_between.py @@ -4,12 +4,13 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( COLUMN_DESCRIPTION, ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( @@ -45,12 +46,12 @@ "SQLite", "PostgreSQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", + "MySQL", ] -DATA_QUALITY_ISSUES = ["Distribution"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectColumnSumToBeBetween(ColumnAggregateExpectation): @@ -98,7 +99,7 @@ class ExpectColumnSumToBeBetween(ColumnAggregateExpectation): * observed_value field in the result object is customized for this expectation to be a list \ representing the actual column sum - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -107,8 +108,9 @@ class ExpectColumnSumToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -162,7 +164,7 @@ class ExpectColumnSumToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -185,7 +187,7 @@ class ExpectColumnSumToBeBetween(ColumnAggregateExpectation): _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.sum",) success_keys = ( "min_value", @@ -325,7 +327,7 @@ def _prescriptive_renderer( elif params["max_value"] is None: template_str = f"sum must be {at_least_str} $min_value." else: - raise ValueError("unresolvable template_str") # noqa: TRY003 + raise ValueError("unresolvable template_str") # noqa: TRY003 # FIXME CoP if renderer_configuration.include_column_name: template_str = f"$column {template_str}" diff --git a/great_expectations/expectations/core/expect_column_to_exist.py b/great_expectations/expectations/core/expect_column_to_exist.py index b19921751a9d..66a5bb55ec9c 100644 --- a/great_expectations/expectations/core/expect_column_to_exist.py +++ b/great_expectations/expectations/core/expect_column_to_exist.py @@ -5,12 +5,13 @@ from great_expectations.compatibility.pydantic import Field, StrictStr from great_expectations.compatibility.typing_extensions import override from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( BatchExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer @@ -42,12 +43,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Schema"] +DATA_QUALITY_ISSUES = [DataQualityIssues.SCHEMA.value] class ExpectColumnToExist(BatchExpectation): @@ -78,7 +78,7 @@ class ExpectColumnToExist(BatchExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -89,7 +89,7 @@ class ExpectColumnToExist(BatchExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -134,7 +134,7 @@ class ExpectColumnToExist(BatchExpectation): "success": false, "result": {{}} }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column: StrictStr = Field(min_length=1, description=COLUMN_DESCRIPTION) column_index: Union[int, SuiteParameterDict, None] = Field( diff --git a/great_expectations/expectations/core/expect_column_unique_value_count_to_be_between.py b/great_expectations/expectations/core/expect_column_unique_value_count_to_be_between.py index 811708286ecb..bae9c5798f53 100644 --- a/great_expectations/expectations/core/expect_column_unique_value_count_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_unique_value_count_to_be_between.py @@ -3,12 +3,13 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type from great_expectations.compatibility import pydantic -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( COLUMN_DESCRIPTION, ColumnAggregateExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import ( LegacyDescriptiveRendererType, LegacyRendererType, @@ -53,12 +54,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Cardinality"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectColumnUniqueValueCountToBeBetween(ColumnAggregateExpectation): @@ -109,7 +109,7 @@ class ExpectColumnUniqueValueCountToBeBetween(ColumnAggregateExpectation): See Also: [ExpectColumnProportionOfUniqueValuesToBeBetween](https://greatexpectations.io/expectations/expect_column_proportion_of_unique_values_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -120,7 +120,7 @@ class ExpectColumnUniqueValueCountToBeBetween(ColumnAggregateExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -174,7 +174,7 @@ class ExpectColumnUniqueValueCountToBeBetween(ColumnAggregateExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -203,17 +203,21 @@ class ExpectColumnUniqueValueCountToBeBetween(ColumnAggregateExpectation): _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP metric_dependencies = ("column.distinct_values.count",) success_keys = ( "min_value", "max_value", + "strict_min", + "strict_max", ) args_keys = ( "column", "min_value", "max_value", + "strict_min", + "strict_max", ) """ A Column Aggregate Metric Decorator for the Unique Value Count""" @@ -287,7 +291,7 @@ def _prescriptive_template( elif not params.max_value: template_str = f"must have {at_least_str} $min_value unique values." else: - template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." # noqa: E501 + template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." # noqa: E501 # FIXME CoP if renderer_configuration.include_column_name: template_str = f"$column {template_str}" @@ -326,13 +330,13 @@ def _prescriptive_renderer( if (params["min_value"] is None) and (params["max_value"] is None): template_str = "may have any number of unique values." - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if params["min_value"] is None: template_str = f"must have {at_most_str} $max_value unique values." elif params["max_value"] is None: template_str = f"must have {at_least_str} $min_value unique values." else: - template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." # noqa: E501 + template_str = f"must have {at_least_str} $min_value and {at_most_str} $max_value unique values." # noqa: E501 # FIXME CoP if include_column_name: template_str = f"$column {template_str}" diff --git a/great_expectations/expectations/core/expect_column_value_lengths_to_be_between.py b/great_expectations/expectations/core/expect_column_value_lengths_to_be_between.py index 13101c2901b9..798d6dab5320 100644 --- a/great_expectations/expectations/core/expect_column_value_lengths_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_value_lengths_to_be_between.py @@ -8,12 +8,13 @@ root_validator, ) from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -38,7 +39,7 @@ ) try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass @@ -60,7 +61,7 @@ MAX_VALUE_DESCRIPTION = "The maximum value for a column entry length." STRICT_MIN_DESCRIPTION = "If True, values must be strictly larger than min_value." STRICT_MAX_DESCRIPTION = "If True, values must be strictly smaller than max_value." -DATA_QUALITY_ISSUES = ["Pattern matching"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] SUPPORTED_DATA_SOURCES = [ "Pandas", "Spark", @@ -68,7 +69,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", @@ -128,7 +128,7 @@ class ExpectColumnValueLengthsToBeBetween(ColumnMapExpectation): See Also: [ExpectColumnValueLengthsToEqual](https://greatexpectations.io/expectations/expect_column_value_lengths_to_equal) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -139,7 +139,7 @@ class ExpectColumnValueLengthsToBeBetween(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -212,7 +212,7 @@ class ExpectColumnValueLengthsToBeBetween(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Union[int, SuiteParameterDict, datetime, None] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -287,11 +287,11 @@ def _validate_min_or_max_set(cls, values): min_value = values.get("min_value") max_value = values.get("max_value") if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP return values @classmethod - def _prescriptive_template( # noqa: C901, PLR0912 + def _prescriptive_template( # noqa: C901, PLR0912 # FIXME CoP cls, renderer_configuration: RendererConfiguration, ) -> RendererConfiguration: @@ -327,14 +327,14 @@ def _prescriptive_template( # noqa: C901, PLR0912 renderer_configuration=renderer_configuration ) if params.min_value and params.max_value: - template_str = f"values must be {at_least_str} $min_value and {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 + template_str = f"values must be {at_least_str} $min_value and {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP elif not params.min_value: - template_str = f"values must be {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 + template_str = f"values must be {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP else: - template_str = f"values must be {at_least_str} $min_value characters long, at least $mostly_pct % of the time." # noqa: E501 - else: # noqa: PLR5501 + template_str = f"values must be {at_least_str} $min_value characters long, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP + else: # noqa: PLR5501 # FIXME CoP if params.min_value and params.max_value: - template_str = f"values must always be {at_least_str} $min_value and {at_most_str} $max_value characters long." # noqa: E501 + template_str = f"values must always be {at_least_str} $min_value and {at_most_str} $max_value characters long." # noqa: E501 # FIXME CoP elif not params.min_value: template_str = ( f"values must always be {at_most_str} $max_value characters long." @@ -354,7 +354,7 @@ def _prescriptive_template( # noqa: C901, PLR0912 @classmethod @renderer(renderer_type=LegacyRendererType.PRESCRIPTIVE) @render_suite_parameter_string - def _prescriptive_renderer( # noqa: C901 - too complex + def _prescriptive_renderer( # noqa: C901 # too complex cls, configuration: Optional[ExpectationConfiguration] = None, result: Optional[ExpectationValidationResult] = None, @@ -395,18 +395,18 @@ def _prescriptive_renderer( # noqa: C901 - too complex if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP if params["min_value"] is not None and params["max_value"] is not None: - template_str = f"values must be {at_least_str} $min_value and {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 + template_str = f"values must be {at_least_str} $min_value and {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP elif params["min_value"] is None: - template_str = f"values must be {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 + template_str = f"values must be {at_most_str} $max_value characters long, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP elif params["max_value"] is None: - template_str = f"values must be {at_least_str} $min_value characters long, at least $mostly_pct % of the time." # noqa: E501 - else: # noqa: PLR5501 + template_str = f"values must be {at_least_str} $min_value characters long, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP + else: # noqa: PLR5501 # FIXME CoP if params["min_value"] is not None and params["max_value"] is not None: - template_str = f"values must always be {at_least_str} $min_value and {at_most_str} $max_value characters long." # noqa: E501 + template_str = f"values must always be {at_least_str} $min_value and {at_most_str} $max_value characters long." # noqa: E501 # FIXME CoP elif params["min_value"] is None: template_str = ( diff --git a/great_expectations/expectations/core/expect_column_value_lengths_to_equal.py b/great_expectations/expectations/core/expect_column_value_lengths_to_equal.py index b9ab54c06b43..edbb4172ec08 100644 --- a/great_expectations/expectations/core/expect_column_value_lengths_to_equal.py +++ b/great_expectations/expectations/core/expect_column_value_lengths_to_equal.py @@ -4,12 +4,13 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -39,7 +40,7 @@ "Expect the column entries to be strings with length equal to the provided value." ) VALUE_DESCRIPTION = "The expected value for a column entry length." -DATA_QUALITY_ISSUES = ["Pattern matching"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] SUPPORTED_DATA_SOURCES = [ "Pandas", "Spark", @@ -47,7 +48,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", @@ -94,7 +94,7 @@ class ExpectColumnValueLengthsToEqual(ColumnMapExpectation): See Also: [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -105,7 +105,7 @@ class ExpectColumnValueLengthsToEqual(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -172,7 +172,7 @@ class ExpectColumnValueLengthsToEqual(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value: Union[float, SuiteParameterDict] = pydantic.Field(description=VALUE_DESCRIPTION) @@ -286,7 +286,7 @@ def _prescriptive_renderer( template_str = "values must be $value characters long" if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP template_str += ", at least $mostly_pct % of the time." else: template_str += "." diff --git a/great_expectations/expectations/core/expect_column_value_z_scores_to_be_less_than.py b/great_expectations/expectations/core/expect_column_value_z_scores_to_be_less_than.py index 78f4fb1afc6a..e482529f06d5 100644 --- a/great_expectations/expectations/core/expect_column_value_z_scores_to_be_less_than.py +++ b/great_expectations/expectations/core/expect_column_value_z_scores_to_be_less_than.py @@ -5,11 +5,12 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -34,7 +35,7 @@ "(double_sided = True, threshold = 2) -> Z scores in non-inclusive interval(-2,2) | " "(double_sided = False, threshold = 2) -> Z scores in non-inclusive interval (-infinity,2)" ) -DATA_QUALITY_ISSUES = ["Distribution"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] SUPPORTED_DATA_SOURCES = [ "Pandas", "Spark", @@ -42,7 +43,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", @@ -89,7 +89,7 @@ class ExpectColumnValueZScoresToBeLessThan(ColumnMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -100,7 +100,7 @@ class ExpectColumnValueZScoresToBeLessThan(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -171,7 +171,7 @@ class ExpectColumnValueZScoresToBeLessThan(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP threshold: Union[float, SuiteParameterDict] = pydantic.Field(description=THRESHOLD_DESCRIPTION) double_sided: Union[bool, SuiteParameterDict] = pydantic.Field( @@ -194,7 +194,7 @@ class ExpectColumnValueZScoresToBeLessThan(ColumnMapExpectation): } _library_metadata = library_metadata - # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 + # Setting necessary computation metric dependencies and defining kwargs, as well as assigning kwargs default values\ # noqa: E501 # FIXME CoP map_metric = "column_values.z_score.under_threshold" success_keys = ("threshold", "double_sided", "mostly") args_keys = ("column", "threshold") diff --git a/great_expectations/expectations/core/expect_column_values_to_be_between.py b/great_expectations/expectations/core/expect_column_values_to_be_between.py index 3809422a3a02..aae699b1ea1f 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_between.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_between.py @@ -5,11 +5,12 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.pydantic import root_validator from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -44,17 +45,17 @@ MAX_VALUE_DESCRIPTION = "The maximum value for a column entry." STRICT_MIN_DESCRIPTION = "If True, values must be strictly larger than min_value." STRICT_MAX_DESCRIPTION = "If True, values must be strictly smaller than max_value." -DATA_QUALITY_ISSUES = ["Distribution"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] SUPPORTED_DATA_SOURCES = [ "Pandas", "Spark", "SQLite", "PostgreSQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", + "MySQL", ] @@ -107,7 +108,7 @@ class ExpectColumnValuesToBeBetween(ColumnMapExpectation): See Also: [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -116,8 +117,9 @@ class ExpectColumnValuesToBeBetween(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -189,7 +191,7 @@ class ExpectColumnValuesToBeBetween(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -207,7 +209,7 @@ def check_min_val_or_max_val(cls, values: dict) -> dict: max_val = values.get("max_val") if min_val is None and max_val is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP return values @@ -274,7 +276,7 @@ def schema_extra( @classmethod @override - def _prescriptive_template( # noqa: C901 - too complex + def _prescriptive_template( # noqa: C901 # too complex cls, renderer_configuration: RendererConfiguration, ): @@ -370,11 +372,11 @@ def _prescriptive_renderer( mostly_str = "" if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP mostly_str = ", at least $mostly_pct % of the time" if params["min_value"] is not None and params["max_value"] is not None: - template_str += f"values must be {at_least_str} $min_value and {at_most_str} $max_value{mostly_str}." # noqa: E501 + template_str += f"values must be {at_least_str} $min_value and {at_most_str} $max_value{mostly_str}." # noqa: E501 # FIXME CoP elif params["min_value"] is None: template_str += f"values must be {at_most_str} $max_value{mostly_str}." diff --git a/great_expectations/expectations/core/expect_column_values_to_be_dateutil_parseable.py b/great_expectations/expectations/core/expect_column_values_to_be_dateutil_parseable.py index 0401092a2901..578db3cc10e2 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_dateutil_parseable.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_dateutil_parseable.py @@ -58,7 +58,7 @@ class ExpectColumnValuesToBeDateutilParseable(ColumnMapExpectation): An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result) Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # This dictionary contains metadata for display in the public gallery library_metadata = { diff --git a/great_expectations/expectations/core/expect_column_values_to_be_decreasing.py b/great_expectations/expectations/core/expect_column_values_to_be_decreasing.py index eb57850f6d93..6856d1af122e 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_decreasing.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_decreasing.py @@ -68,7 +68,7 @@ class ExpectColumnValuesToBeDecreasing(ColumnMapExpectation): See Also: [ExpectColumnValuesToBeIncreasing](https://greatexpectations.io/expectations/expect_column_values_to_be_increasing) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP strictly: Union[bool, None] = None diff --git a/great_expectations/expectations/core/expect_column_values_to_be_in_set.py b/great_expectations/expectations/core/expect_column_values_to_be_in_set.py index 47ceca0473ca..6640f90fcc45 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_in_set.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_in_set.py @@ -5,6 +5,7 @@ from great_expectations.expectations.expectation import ( ColumnMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.expectations.model_field_types import ( ValueSetField, # noqa: TCH001 # type needed in pydantic validation @@ -28,7 +29,7 @@ ) try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass from great_expectations.expectations.expectation import ( @@ -54,12 +55,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Sets"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value, DataQualityIssues.VALIDITY.value] class ExpectColumnValuesToBeInSet(ColumnMapExpectation): @@ -100,7 +100,7 @@ class ExpectColumnValuesToBeInSet(ColumnMapExpectation): See Also: [ExpectColumnValuesToNotBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_not_be_in_set) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -111,8 +111,9 @@ class ExpectColumnValuesToBeInSet(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} + {DATA_QUALITY_ISSUES[1]} Example Data: test test2 @@ -183,7 +184,7 @@ class ExpectColumnValuesToBeInSet(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value_set: ValueSetField @@ -378,7 +379,7 @@ def _descriptive_example_values_block_renderer( classes = ["col-3", "mt-1", "pl-1", "pr-1"] - if any(len(value) > 80 for value in values): # noqa: PLR2004 + if any(len(value) > 80 for value in values): # noqa: PLR2004 # FIXME CoP content_block_type = "bullet_list" content_block_class = RenderedBulletListContent else: diff --git a/great_expectations/expectations/core/expect_column_values_to_be_in_type_list.py b/great_expectations/expectations/core/expect_column_values_to_be_in_type_list.py index b406f63a2fb7..590388739c87 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_in_type_list.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_in_type_list.py @@ -1,6 +1,5 @@ from __future__ import annotations -import inspect import logging from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Tuple, Type, Union @@ -10,17 +9,21 @@ from great_expectations.compatibility import pydantic, pyspark from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.suite_parameters import ( # noqa: TCH001 - SuiteParameterDict, +from great_expectations.core.suite_parameters import ( + SuiteParameterDict, # noqa: TCH001, RUF100 # FIXME CoP +) +from great_expectations.execution_engine.sqlalchemy_dialect import ( + GXSqlDialect, ) from great_expectations.expectations.core.expect_column_values_to_be_of_type import ( - _get_dialect_type_module, + _get_potential_sqlalchemy_types, _native_type_type_map, ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.expectations.registry import get_metric_kwargs from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent @@ -34,11 +37,6 @@ parse_row_condition_string_pandas_engine, substitute_none_for_missing, ) -from great_expectations.util import ( - get_clickhouse_sqlalchemy_potential_type, - get_pyathena_potential_type, - get_trino_potential_type, -) from great_expectations.validator.metric_configuration import MetricConfiguration if TYPE_CHECKING: @@ -61,19 +59,8 @@ A list of strings representing the data type that each column should have as entries. \ Valid types are defined by the current backend implementation and are dynamically loaded. """ -SUPPORTED_DATA_SOURCES = [ - "Pandas", - "Spark", - "SQLite", - "PostgreSQL", - "MSSQL", - "Trino", - "Redshift", - "BigQuery", - "Snowflake", - "Databricks (SQL)", -] -DATA_QUALITY_ISSUES = ["Schema"] +SUPPORTED_DATA_SOURCES = ["Spark", "SQLite", "PostgreSQL", "MSSQL", "BigQuery"] +DATA_QUALITY_ISSUES = [DataQualityIssues.SCHEMA.value] class ExpectColumnValuesToBeInTypeList(ColumnMapExpectation): @@ -124,18 +111,14 @@ class ExpectColumnValuesToBeInTypeList(ColumnMapExpectation): See also: [ExpectColumnValuesToBeOfType](https://greatexpectations.io/expectations/expect_column_values_to_be_of_type) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) - [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) - [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) - [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) - [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -204,7 +187,7 @@ class ExpectColumnValuesToBeInTypeList(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type_list: Union[List[str], SuiteParameterDict, None] = pydantic.Field( description=TYPE_LIST_DESCRIPTION @@ -346,7 +329,7 @@ def _prescriptive_renderer( if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP if include_column_name: template_str = ( "$column value types must belong to this set: " @@ -359,12 +342,12 @@ def _prescriptive_renderer( + values_string + ", at least $mostly_pct % of the time." ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if include_column_name: template_str = f"$column value types must belong to this set: {values_string}." else: template_str = f"value types must belong to this set: {values_string}." - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if include_column_name: template_str = ( "$column value types may be any value, but observed value will be reported" @@ -391,7 +374,7 @@ def _prescriptive_renderer( ) ] - def _validate_pandas( # noqa: C901, PLR0912 + def _validate_pandas( # noqa: C901, PLR0912 # FIXME CoP self, actual_column_type, expected_types_list, @@ -436,8 +419,8 @@ def _validate_pandas( # noqa: C901, PLR0912 np.__version__ ) < version.parse("1.21") if _numpy_doesnt_support_extensions_properly and _pandas_supports_extension_dtypes: - # This works around a bug where Pandas nullable int types aren't compatible with Numpy dtypes # noqa: E501 - # Note: Can't do set difference, the whole bugfix is because numpy types can't be compared to # noqa: E501 + # This works around a bug where Pandas nullable int types aren't compatible with Numpy dtypes # noqa: E501 # FIXME CoP + # Note: Can't do set difference, the whole bugfix is because numpy types can't be compared to # noqa: E501 # FIXME CoP # ExtensionDtypes actual_type_is_ext_dtype = isinstance( actual_column_type, pd.core.dtypes.base.ExtensionDtype @@ -457,55 +440,31 @@ def _validate_pandas( # noqa: C901, PLR0912 "result": {"observed_value": actual_column_type.type.__name__}, } - def _validate_sqlalchemy( # noqa: C901 - too complex - self, actual_column_type, expected_types_list, execution_engine - ): - # Our goal is to be as explicit as possible. We will match the dialect - # if that is possible. If there is no dialect available, we *will* - # match against a top-level SqlAlchemy type. - # - # This is intended to be a conservative approach. - # - # In particular, we *exclude* types that would be valid under an ORM - # such as "float" for postgresql with this approach - + def _validate_sqlalchemy(self, actual_column_type, expected_types_list, execution_engine): if expected_types_list is None: success = True + elif execution_engine.dialect_name in [ + GXSqlDialect.DATABRICKS, + GXSqlDialect.POSTGRESQL, + GXSqlDialect.SNOWFLAKE, + ]: + success = isinstance(actual_column_type, str) and any( + actual_column_type.lower() == expected_type.lower() + for expected_type in expected_types_list + ) + return { + "success": success, + "result": {"observed_value": actual_column_type}, + } else: types = [] - type_module = _get_dialect_type_module(execution_engine=execution_engine) for type_ in expected_types_list: - try: - if type_module.__name__ == "pyathena.sqlalchemy_athena": - potential_type = get_pyathena_potential_type(type_module, type_) - # In the case of the PyAthena dialect we need to verify that - # the type returned is indeed a type and not an instance. - if not inspect.isclass(potential_type): - real_type = type(potential_type) - else: - real_type = potential_type - types.append(real_type) - elif type_module.__name__ == "trino.sqlalchemy.datatype": - potential_type = get_trino_potential_type(type_module, type_) - types.append(type(potential_type)) - elif type_module.__name__ == "clickhouse_sqlalchemy.drivers.base": - actual_column_type = get_clickhouse_sqlalchemy_potential_type( - type_module, actual_column_type - )() - potential_type = get_clickhouse_sqlalchemy_potential_type( - type_module, type_ - ) - types.append(potential_type) - else: - potential_type = getattr(type_module, type_) - types.append(potential_type) - except AttributeError: - logger.debug(f"Unrecognized type: {type_}") - - if len(types) == 0: - logger.warning("No recognized sqlalchemy types in type_list for current dialect.") - types = tuple(types) - success = isinstance(actual_column_type, types) + types.extend( + _get_potential_sqlalchemy_types( + execution_engine=execution_engine, expected_type=type_ + ) + ) + success = isinstance(actual_column_type, tuple(types)) return { "success": success, @@ -528,7 +487,7 @@ def _validate_spark( except AttributeError: logger.debug(f"Unrecognized type: {type_}") if len(types) == 0: - raise ValueError("No recognized spark types in expected_types_list") # noqa: TRY003 + raise ValueError("No recognized spark types in expected_types_list") # noqa: TRY003 # FIXME CoP success = isinstance(actual_column_type, tuple(types)) return { "success": success, @@ -546,11 +505,11 @@ def get_validation_dependencies( PandasExecutionEngine, ) - # This calls BatchExpectation.get_validation_dependencies to set baseline validation_dependencies for the aggregate version # noqa: E501 + # This calls BatchExpectation.get_validation_dependencies to set baseline validation_dependencies for the aggregate version # noqa: E501 # FIXME CoP # of the expectation. # We need to keep this as super(ColumnMapExpectation, self), which calls - # BatchExpectation.get_validation_dependencies instead of ColumnMapExpectation.get_validation_dependencies. # noqa: E501 - # This is because the map version of this expectation is only supported for Pandas, so we want the aggregate # noqa: E501 + # BatchExpectation.get_validation_dependencies instead of ColumnMapExpectation.get_validation_dependencies. # noqa: E501 # FIXME CoP + # This is because the map version of this expectation is only supported for Pandas, so we want the aggregate # noqa: E501 # FIXME CoP # version for the other backends. validation_dependencies: ValidationDependencies = super( ColumnMapExpectation, self @@ -592,7 +551,7 @@ def get_validation_dependencies( and actual_column_type.type.__name__ == "object_" and expected_types_list is not None ): - # this resets validation_dependencies using ColumnMapExpectation.get_validation_dependencies # noqa: E501 + # this resets validation_dependencies using ColumnMapExpectation.get_validation_dependencies # noqa: E501 # FIXME CoP validation_dependencies = super().get_validation_dependencies( execution_engine, runtime_configuration ) diff --git a/great_expectations/expectations/core/expect_column_values_to_be_increasing.py b/great_expectations/expectations/core/expect_column_values_to_be_increasing.py index 647926c0d00b..c86817ea94ea 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_increasing.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_increasing.py @@ -71,7 +71,7 @@ class ExpectColumnValuesToBeIncreasing(ColumnMapExpectation): See Also: [ExpectColumnValuesToBeDecreasing](https://greatexpectations.io/expectations/expect_column_values_to_be_decreasing) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP strictly: Union[bool, None] = None diff --git a/great_expectations/expectations/core/expect_column_values_to_be_json_parseable.py b/great_expectations/expectations/core/expect_column_values_to_be_json_parseable.py index 9506e84bb814..2768e5e4ba53 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_json_parseable.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_json_parseable.py @@ -28,7 +28,7 @@ from great_expectations.render.renderer_configuration import AddParamArgs try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass @@ -66,7 +66,7 @@ class ExpectColumnValuesToBeJsonParseable(ColumnMapExpectation): See Also: [ExpectColumnValuesToMatchJsonSchema](https://greatexpectations.io/expectations/expect_column_values_to_match_json_schema) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # This dictionary contains metadata for display in the public gallery library_metadata = { diff --git a/great_expectations/expectations/core/expect_column_values_to_be_null.py b/great_expectations/expectations/core/expect_column_values_to_be_null.py index e9fbca22c36c..34bd55fd8dbb 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_null.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_null.py @@ -13,6 +13,7 @@ from great_expectations.expectations.expectation_configuration import ( parse_result_format, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -54,12 +55,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Missingness"] +DATA_QUALITY_ISSUES = [DataQualityIssues.COMPLETENESS.value] class ExpectColumnValuesToBeNull(ColumnMapExpectation): @@ -97,7 +97,7 @@ class ExpectColumnValuesToBeNull(ColumnMapExpectation): See Also: [ExpectColumnValuesToNotBeNull](https://greatexpectations.io/expectations/expect_column_values_to_not_be_null) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -108,7 +108,7 @@ class ExpectColumnValuesToBeNull(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -169,7 +169,7 @@ class ExpectColumnValuesToBeNull(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_keys: ClassVar[Tuple[str, ...]] = ( "column", diff --git a/great_expectations/expectations/core/expect_column_values_to_be_of_type.py b/great_expectations/expectations/core/expect_column_values_to_be_of_type.py index 47a90a38a306..03c9b54f2c1c 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_of_type.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_of_type.py @@ -8,13 +8,23 @@ import pandas as pd from great_expectations.compatibility import aws, pydantic, pyspark, trino +from great_expectations.compatibility.bigquery import ( + BIGQUERY_GEO_SUPPORT, + bigquery_types_tuple, +) +from great_expectations.compatibility.bigquery import ( + sqlalchemy_bigquery as BigQueryDialect, +) from great_expectations.compatibility.sqlalchemy import sqlalchemy as sa from great_expectations.compatibility.typing_extensions import override -from great_expectations.execution_engine.sqlalchemy_dialect import GXSqlDialect +from great_expectations.execution_engine.sqlalchemy_dialect import ( + GXSqlDialect, # noqa: TC001, RUF100 # FIXME CoP +) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.expectations.registry import get_metric_kwargs from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent @@ -35,12 +45,8 @@ from great_expectations.validator.metric_configuration import MetricConfiguration if TYPE_CHECKING: - from great_expectations.core import ( - ExpectationValidationResult, - ) - from great_expectations.execution_engine import ( - ExecutionEngine, - ) + from great_expectations.core import ExpectationValidationResult + from great_expectations.execution_engine import ExecutionEngine from great_expectations.expectations.expectation_configuration import ( ExpectationConfiguration, ) @@ -49,19 +55,6 @@ logger = logging.getLogger(__name__) - -_BIGQUERY_MODULE_NAME = "sqlalchemy_bigquery" -BIGQUERY_GEO_SUPPORT = False -from great_expectations.compatibility.bigquery import GEOGRAPHY, bigquery_types_tuple -from great_expectations.compatibility.bigquery import ( - sqlalchemy_bigquery as BigQueryDialect, -) - -if GEOGRAPHY: - BIGQUERY_GEO_SUPPORT = True -else: - BIGQUERY_GEO_SUPPORT = False - try: import teradatasqlalchemy.dialect import teradatasqlalchemy.types as teradatatypes @@ -87,12 +80,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Schema"] +DATA_QUALITY_ISSUES = [DataQualityIssues.SCHEMA.value] class ExpectColumnValuesToBeOfType(ColumnMapExpectation): @@ -147,7 +139,7 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation): See also: [ExpectColumnValuesToBeInTypeList](https://greatexpectations.io/expectations/expect_column_values_to_be_in_type_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -158,7 +150,7 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -227,7 +219,7 @@ class ExpectColumnValuesToBeOfType(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP type_: str = pydantic.Field(description=TYPE__DESCRIPTION) @@ -419,47 +411,24 @@ def _validate_sqlalchemy(self, actual_column_type, expected_type, execution_engi if expected_type is None: success = True + elif execution_engine.dialect_name in [ + GXSqlDialect.DATABRICKS, + GXSqlDialect.POSTGRESQL, + GXSqlDialect.SNOWFLAKE, + ]: + success = ( + isinstance(actual_column_type, str) + and actual_column_type.lower() == expected_type.lower() + ) + return { + "success": success, + "result": {"observed_value": actual_column_type}, + } else: - types = [] - type_module = _get_dialect_type_module(execution_engine=execution_engine) - try: - # bigquery geography requires installing an extra package - if ( - expected_type.lower() == "geography" - and execution_engine.engine.dialect.name.lower() == GXSqlDialect.BIGQUERY - and not BIGQUERY_GEO_SUPPORT - ): - logger.warning( - "BigQuery GEOGRAPHY type is not supported by default. " - + "To install support, please run:" - + " $ pip install 'sqlalchemy-bigquery[geography]'" - ) - elif type_module.__name__ == "pyathena.sqlalchemy_athena": - potential_type = get_pyathena_potential_type(type_module, expected_type) - # In the case of the PyAthena dialect we need to verify that - # the type returned is indeed a type and not an instance. - if not inspect.isclass(potential_type): - real_type = type(potential_type) - else: - real_type = potential_type - types.append(real_type) - elif type_module.__name__ == "clickhouse_sqlalchemy.drivers.base": - actual_column_type = get_clickhouse_sqlalchemy_potential_type( - type_module, actual_column_type - )() - potential_type = get_clickhouse_sqlalchemy_potential_type( - type_module, expected_type - ) - types.append(potential_type) - else: - potential_type = getattr(type_module, expected_type) - types.append(potential_type) - except AttributeError: - logger.debug(f"Unrecognized type: {expected_type}") - if len(types) == 0: - logger.warning("No recognized sqlalchemy types in type_list for current dialect.") - types = tuple(types) - success = isinstance(actual_column_type, types) + types = _get_potential_sqlalchemy_types( + execution_engine=execution_engine, expected_type=expected_type + ) + success = isinstance(actual_column_type, tuple(types)) return { "success": success, @@ -481,7 +450,7 @@ def _validate_spark( except AttributeError: logger.debug(f"Unrecognized type: {expected_type}") if len(types) == 0: - raise ValueError("No recognized spark types in expected_types_list") # noqa: TRY003 + raise ValueError("No recognized spark types in expected_types_list") # noqa: TRY003 # FIXME CoP types = tuple(types) success = isinstance(actual_column_type, types) return { @@ -500,11 +469,11 @@ def get_validation_dependencies( PandasExecutionEngine, ) - # This calls BatchExpectation.get_validation_dependencies to set baseline validation_dependencies for the aggregate version # noqa: E501 + # This calls BatchExpectation.get_validation_dependencies to set baseline validation_dependencies for the aggregate version # noqa: E501 # FIXME CoP # of the expectation. # We need to keep this as super(ColumnMapExpectation, self), which calls - # BatchExpectation.get_validation_dependencies instead of ColumnMapExpectation.get_validation_dependencies. # noqa: E501 - # This is because the map version of this expectation is only supported for Pandas, so we want the aggregate # noqa: E501 + # BatchExpectation.get_validation_dependencies instead of ColumnMapExpectation.get_validation_dependencies. # noqa: E501 # FIXME CoP + # This is because the map version of this expectation is only supported for Pandas, so we want the aggregate # noqa: E501 # FIXME CoP # version for the other backends. validation_dependencies: ValidationDependencies = super( ColumnMapExpectation, self @@ -554,7 +523,7 @@ def get_validation_dependencies( None, ] ): - # this resets validation_dependencies using ColumnMapExpectation.get_validation_dependencies # noqa: E501 + # this resets validation_dependencies using ColumnMapExpectation.get_validation_dependencies # noqa: E501 # FIXME CoP validation_dependencies = super().get_validation_dependencies( execution_engine, runtime_configuration ) @@ -625,7 +594,45 @@ def _validate( ) -def _get_dialect_type_module( # noqa: C901, PLR0911 +def _get_potential_sqlalchemy_types(execution_engine, expected_type): + types = [] + type_module = _get_dialect_type_module(execution_engine=execution_engine) + try: + # bigquery geography requires installing an extra package + if ( + expected_type.lower() == "geography" + and execution_engine.engine.dialect.name.lower() == GXSqlDialect.BIGQUERY + and not BIGQUERY_GEO_SUPPORT + ): + logger.warning( + "BigQuery GEOGRAPHY type is not supported by default. " + + "To install support, please run:" + + " $ pip install 'sqlalchemy-bigquery[geography]'" + ) + elif type_module.__name__ == "pyathena.sqlalchemy_athena": + potential_type = get_pyathena_potential_type(type_module, expected_type) + # In the case of the PyAthena dialect we need to verify that + # the type returned is indeed a type and not an instance. + if not inspect.isclass(potential_type): + real_type = type(potential_type) + else: + real_type = potential_type + types.append(real_type) + elif type_module.__name__ == "clickhouse_sqlalchemy.drivers.base": + potential_type = get_clickhouse_sqlalchemy_potential_type(type_module, expected_type) + types.append(potential_type) + else: + potential_type = getattr(type_module, expected_type) + types.append(potential_type) + except AttributeError: + logger.debug(f"Unrecognized type: {expected_type}") + if len(types) == 0: + logger.debug("No recognized sqlalchemy types in type_list for current dialect.") + + return types + + +def _get_dialect_type_module( # noqa: C901, PLR0911 # FIXME CoP execution_engine, ): if execution_engine.dialect_module is None: @@ -696,7 +703,7 @@ def _get_dialect_type_module( # noqa: C901, PLR0911 return execution_engine.dialect_module -def _native_type_type_map(type_): # noqa: C901, PLR0911 +def _native_type_type_map(type_): # noqa: C901, PLR0911 # FIXME CoP # We allow native python types in cases where the underlying type is "object": if type_.lower() == "none": return (type(None),) diff --git a/great_expectations/expectations/core/expect_column_values_to_be_unique.py b/great_expectations/expectations/core/expect_column_values_to_be_unique.py index 085371193175..43c641e727dd 100644 --- a/great_expectations/expectations/core/expect_column_values_to_be_unique.py +++ b/great_expectations/expectations/core/expect_column_values_to_be_unique.py @@ -7,6 +7,7 @@ ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import COLUMN_DESCRIPTION from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer @@ -21,7 +22,7 @@ ) try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass @@ -41,12 +42,12 @@ "SQLite", "PostgreSQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", + "MySQL", ] -DATA_QUALITY_ISSUES = ["Cardinality"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectColumnValuesToBeUnique(ColumnMapExpectation): @@ -87,7 +88,7 @@ class ExpectColumnValuesToBeUnique(ColumnMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -96,8 +97,9 @@ class ExpectColumnValuesToBeUnique(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -163,7 +165,7 @@ class ExpectColumnValuesToBeUnique(ColumnMapExpectation): "meta": {{}}, "success": true }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP library_metadata: ClassVar[Dict[str, Union[str, list, bool]]] = { "maturity": "production", diff --git a/great_expectations/expectations/core/expect_column_values_to_match_json_schema.py b/great_expectations/expectations/core/expect_column_values_to_match_json_schema.py index 39fdfe177ddb..77265bf48b26 100644 --- a/great_expectations/expectations/core/expect_column_values_to_match_json_schema.py +++ b/great_expectations/expectations/core/expect_column_values_to_match_json_schema.py @@ -29,7 +29,7 @@ from great_expectations.render.renderer_configuration import AddParamArgs try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass @@ -70,7 +70,7 @@ class ExpectColumnValuesToMatchJsonSchema(ColumnMapExpectation): See Also: [ExpectColumnValuesToBeJsonParseable](https://greatexpectations.io/expectations/expect_column_values_to_be_json_parseable) [The JSON-schema docs](https://json-schema.org) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_schema: dict @@ -123,7 +123,7 @@ def _prescriptive_template( renderer_configuration = cls._add_mostly_pct_param( renderer_configuration=renderer_configuration ) - template_str = "values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json" # noqa: E501 + template_str = "values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json" # noqa: E501 # FIXME CoP else: template_str = "values must match the following JSON Schema: $formatted_json" @@ -160,8 +160,8 @@ def _prescriptive_renderer( ) if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 - template_str = "values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json" # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP + template_str = "values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json" # noqa: E501 # FIXME CoP else: template_str = "values must match the following JSON Schema: $formatted_json" diff --git a/great_expectations/expectations/core/expect_column_values_to_match_like_pattern.py b/great_expectations/expectations/core/expect_column_values_to_match_like_pattern.py index 842d36537828..d4a282234a14 100644 --- a/great_expectations/expectations/core/expect_column_values_to_match_like_pattern.py +++ b/great_expectations/expectations/core/expect_column_values_to_match_like_pattern.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -31,7 +32,7 @@ from great_expectations.render.renderer_configuration import AddParamArgs try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass @@ -39,8 +40,16 @@ "Expect the column entries to be strings that match a given like pattern expression." ) LIKE_PATTERN_DESCRIPTION = "The SQL like pattern expression the column entries should match." -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["SQLite", "PostgreSQL", "MySQL", "MSSQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "SQLite", + "PostgreSQL", + "MySQL", + "MSSQL", + "Databricks (SQL)", + "BigQuery", + "Snowflake", +] class ExpectColumnValuesToMatchLikePattern(ColumnMapExpectation): @@ -87,14 +96,16 @@ class ExpectColumnValuesToMatchLikePattern(ColumnMapExpectation): [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern) [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -162,7 +173,7 @@ class ExpectColumnValuesToMatchLikePattern(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP like_pattern: Union[str, SuiteParameterDict] = pydantic.Field( description=LIKE_PATTERN_DESCRIPTION @@ -277,7 +288,7 @@ def _prescriptive_renderer( if params["mostly"] is not None: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) mostly_str = "" if params.get("mostly") is None else ", at least $mostly_pct % of the time" - like_pattern = params.get("like_pattern") # noqa: F841 + like_pattern = params.get("like_pattern") # noqa: F841 # FIXME CoP template_str = f"Values must match like pattern $like_pattern {mostly_str}: " diff --git a/great_expectations/expectations/core/expect_column_values_to_match_like_pattern_list.py b/great_expectations/expectations/core/expect_column_values_to_match_like_pattern_list.py index a7e792112118..c5776de72f72 100644 --- a/great_expectations/expectations/core/expect_column_values_to_match_like_pattern_list.py +++ b/great_expectations/expectations/core/expect_column_values_to_match_like_pattern_list.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -45,8 +46,16 @@ "Use 'any' if the value should match at least one like pattern in the list. " "Use 'all' if it should match each like pattern in the list." ) -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["SQLite", "PostgreSQL", "MySQL", "MSSQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "SQLite", + "PostgreSQL", + "MySQL", + "MSSQL", + "Databricks (SQL)", + "BigQuery", + "Snowflake", +] class ExpectColumnValuesToMatchLikePatternList(ColumnMapExpectation): @@ -95,14 +104,16 @@ class ExpectColumnValuesToMatchLikePatternList(ColumnMapExpectation): [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern) [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -171,7 +182,7 @@ class ExpectColumnValuesToMatchLikePatternList(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP like_pattern_list: Union[List[str], SuiteParameterDict] = pydantic.Field( description=LIKE_PATTERN_DESCRIPTION @@ -185,7 +196,7 @@ def validate_like_pattern_list( cls, like_pattern_list: list[str] | SuiteParameterDict ) -> list[str] | SuiteParameterDict: if len(like_pattern_list) < 1: - raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 + raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 # FIXME CoP return like_pattern_list diff --git a/great_expectations/expectations/core/expect_column_values_to_match_regex.py b/great_expectations/expectations/core/expect_column_values_to_match_regex.py index 794268db1fa3..fa8a045811c7 100644 --- a/great_expectations/expectations/core/expect_column_values_to_match_regex.py +++ b/great_expectations/expectations/core/expect_column_values_to_match_regex.py @@ -4,12 +4,13 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -39,8 +40,16 @@ "Expect the column entries to be strings that match a given regular expression." ) REGEX_DESCRIPTION = "The regular expression the column entries should match." -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["Pandas", "Spark", "PostgreSQL", "MySQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "Pandas", + "Spark", + "PostgreSQL", + "MySQL", + "Databricks (SQL)", + "BigQuery", + "SQLite", +] class ExpectColumnValuesToMatchRegex(ColumnMapExpectation): @@ -91,14 +100,16 @@ class ExpectColumnValuesToMatchRegex(ColumnMapExpectation): [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern) [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -166,7 +177,7 @@ class ExpectColumnValuesToMatchRegex(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex: Union[str, SuiteParameterDict] = pydantic.Field( default="(?s).*", description=REGEX_DESCRIPTION @@ -234,7 +245,7 @@ def _question_renderer(cls, configuration, result=None, runtime_configuration=No mostly = configuration.kwargs.get("mostly") regex = configuration.kwargs.get("regex") - return f'Do at least {mostly * 100}% of values in column "{column}" match the regular expression {regex}?' # noqa: E501 + return f'Do at least {mostly * 100}% of values in column "{column}" match the regular expression {regex}?' # noqa: E501 # FIXME CoP @classmethod @renderer(renderer_type=LegacyRendererType.ANSWER) @@ -243,9 +254,9 @@ def _answer_renderer(cls, configuration=None, result=None, runtime_configuration mostly = result.expectation_config.kwargs.get("mostly") regex = result.expectation_config.kwargs.get("regex") if result.success: - return f'At least {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 + return f'At least {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 # FIXME CoP else: - return f'Less than {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 + return f'Less than {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 # FIXME CoP @classmethod def _prescriptive_template( @@ -305,7 +316,7 @@ def _prescriptive_renderer( template_str = "values must match this regular expression: $regex" if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP template_str += ", at least $mostly_pct % of the time." else: template_str += "." diff --git a/great_expectations/expectations/core/expect_column_values_to_match_regex_list.py b/great_expectations/expectations/core/expect_column_values_to_match_regex_list.py index 4ebd6ad053f9..6b2fcf7dfa95 100644 --- a/great_expectations/expectations/core/expect_column_values_to_match_regex_list.py +++ b/great_expectations/expectations/core/expect_column_values_to_match_regex_list.py @@ -4,12 +4,13 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -46,8 +47,16 @@ "Use 'any' if the value should match at least one regular expression in the list. " "Use 'all' if it should match each regular expression in the list." ) -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["Pandas", "Spark", "PostgreSQL", "MySQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "Pandas", + "Spark", + "PostgreSQL", + "MySQL", + "Databricks (SQL)", + "BigQuery", + "SQLite", +] class ExpectColumnValuesToMatchRegexList(ColumnMapExpectation): @@ -99,12 +108,14 @@ class ExpectColumnValuesToMatchRegexList(ColumnMapExpectation): [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern) [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[6]}](https://docs.greatexpectations.io/docs/application_integration_support/) Data Quality Issues: {DATA_QUALITY_ISSUES[0]} @@ -176,7 +187,7 @@ class ExpectColumnValuesToMatchRegexList(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex_list: Union[List[str], SuiteParameterDict] = pydantic.Field( description=REGEX_LIST_DESCRIPTION diff --git a/great_expectations/expectations/core/expect_column_values_to_match_strftime_format.py b/great_expectations/expectations/core/expect_column_values_to_match_strftime_format.py index 9b55da10229e..9a9fd42efddc 100644 --- a/great_expectations/expectations/core/expect_column_values_to_match_strftime_format.py +++ b/great_expectations/expectations/core/expect_column_values_to_match_strftime_format.py @@ -5,7 +5,7 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, @@ -65,7 +65,7 @@ class ExpectColumnValuesToMatchStrftimeFormat(ColumnMapExpectation): An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result) Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP strftime_format: Union[str, SuiteParameterDict] @@ -75,12 +75,12 @@ def validate_strftime_format( ) -> str | SuiteParameterDict: if isinstance(strftime_format, str): try: - datetime.strptime( # noqa: DTZ007 - datetime.strftime(datetime.now(), strftime_format), # noqa: DTZ005 + datetime.strptime( # noqa: DTZ007 # FIXME CoP + datetime.strftime(datetime.now(), strftime_format), # noqa: DTZ005 # FIXME CoP strftime_format, ) except ValueError as e: - raise ValueError(f"Unable to use provided strftime_format. {e!s}") from e # noqa: TRY003 + raise ValueError(f"Unable to use provided strftime_format. {e!s}") from e # noqa: TRY003 # FIXME CoP return strftime_format @@ -169,7 +169,7 @@ def _prescriptive_renderer( template_str = "values must match the following strftime format: $strftime_format" if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP template_str += ", at least $mostly_pct % of the time." else: template_str += "." diff --git a/great_expectations/expectations/core/expect_column_values_to_not_be_in_set.py b/great_expectations/expectations/core/expect_column_values_to_not_be_in_set.py index 0c976d02bbea..0a6e7884d45f 100644 --- a/great_expectations/expectations/core/expect_column_values_to_not_be_in_set.py +++ b/great_expectations/expectations/core/expect_column_values_to_not_be_in_set.py @@ -9,6 +9,7 @@ ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -40,7 +41,7 @@ EXPECTATION_SHORT_DESCRIPTION = "Expect column entries to not be in the set." -DATA_QUALITY_ISSUES = ["Sets"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value, DataQualityIssues.VALIDITY.value] SUPPORTED_DATA_SOURCES = [ "Pandas", "Spark", @@ -48,7 +49,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", @@ -93,7 +93,7 @@ class ExpectColumnValuesToNotBeInSet(ColumnMapExpectation): See Also: [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -104,8 +104,9 @@ class ExpectColumnValuesToNotBeInSet(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} + {DATA_QUALITY_ISSUES[1]} Example Data: test test2 @@ -172,7 +173,7 @@ class ExpectColumnValuesToNotBeInSet(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value_set: ValueSetField @@ -345,7 +346,7 @@ def _prescriptive_renderer( ) ] - def _pandas_column_values_not_in_set( # noqa: PLR0913 + def _pandas_column_values_not_in_set( # noqa: PLR0913 # FIXME CoP self, series: pd.Series, metrics: Dict, diff --git a/great_expectations/expectations/core/expect_column_values_to_not_be_null.py b/great_expectations/expectations/core/expect_column_values_to_not_be_null.py index 640e585233bc..33cada6cb14d 100644 --- a/great_expectations/expectations/core/expect_column_values_to_not_be_null.py +++ b/great_expectations/expectations/core/expect_column_values_to_not_be_null.py @@ -14,6 +14,7 @@ from great_expectations.expectations.expectation_configuration import ( parse_result_format, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -53,12 +54,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Missingness"] +DATA_QUALITY_ISSUES = [DataQualityIssues.COMPLETENESS.value] class ExpectColumnValuesToNotBeNull(ColumnMapExpectation): @@ -100,7 +100,7 @@ class ExpectColumnValuesToNotBeNull(ColumnMapExpectation): See Also: [ExpectColumnValuesToBeNull](https://greatexpectations.io/expectations/expect_column_values_to_be_null) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -111,7 +111,7 @@ class ExpectColumnValuesToNotBeNull(ColumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -172,7 +172,7 @@ class ExpectColumnValuesToNotBeNull(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP library_metadata: ClassVar[Dict[str, Union[str, list, bool]]] = { "maturity": "production", @@ -278,7 +278,7 @@ def _prescriptive_renderer( ) else: template_str = "values must not be null, at least $mostly_pct % of the time." - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if include_column_name: template_str = "$column values must never be null." else: diff --git a/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern.py b/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern.py index 43c6b3c16b38..8cab41644894 100644 --- a/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern.py +++ b/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -35,8 +36,15 @@ "Expect the column entries to be strings that do NOT match a given like pattern expression." ) LIKE_PATTERN_DESCRIPTION = "The SQL like pattern expression the column entries should NOT match." -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["SQLite", "PostgreSQL", "MySQL", "MSSQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "SQLite", + "PostgreSQL", + "MySQL", + "MSSQL", + "Databricks (SQL)", + "Snowflake", +] class ExpectColumnValuesToNotMatchLikePattern(ColumnMapExpectation): @@ -83,14 +91,15 @@ class ExpectColumnValuesToNotMatchLikePattern(ColumnMapExpectation): [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list) [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -162,7 +171,7 @@ class ExpectColumnValuesToNotMatchLikePattern(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP like_pattern: Union[str, SuiteParameterDict] = pydantic.Field( description=LIKE_PATTERN_DESCRIPTION @@ -278,7 +287,7 @@ def _prescriptive_renderer( params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) mostly_str = "" if params.get("mostly") is None else ", at least $mostly_pct % of the time" - like_pattern = params.get("like_pattern") # noqa: F841 + like_pattern = params.get("like_pattern") # noqa: F841 # FIXME CoP template_str = f"Values must not match like pattern : $like_pattern {mostly_str} " diff --git a/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern_list.py b/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern_list.py index 5e4813a104a9..c0ddcae651c0 100644 --- a/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern_list.py +++ b/great_expectations/expectations/core/expect_column_values_to_not_match_like_pattern_list.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -40,8 +41,15 @@ LIKE_PATTERN_LIST_DESCRIPTION = ( "The list of SQL like pattern expressions the column entries should NOT match." ) -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["SQLite", "PostgreSQL", "MySQL", "MSSQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "SQLite", + "PostgreSQL", + "MySQL", + "MSSQL", + "Databricks (SQL)", + "Snowflake", +] class ExpectColumnValuesToNotMatchLikePatternList(ColumnMapExpectation): @@ -88,14 +96,15 @@ class ExpectColumnValuesToNotMatchLikePatternList(ColumnMapExpectation): [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list) [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -167,7 +176,7 @@ class ExpectColumnValuesToNotMatchLikePatternList(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP like_pattern_list: Union[List[str], SuiteParameterDict] = pydantic.Field( description=LIKE_PATTERN_LIST_DESCRIPTION @@ -178,7 +187,7 @@ def validate_like_pattern_list( cls, like_pattern_list: list[str] | SuiteParameterDict ) -> list[str] | SuiteParameterDict: if len(like_pattern_list) < 1: - raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 + raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 # FIXME CoP return like_pattern_list diff --git a/great_expectations/expectations/core/expect_column_values_to_not_match_regex.py b/great_expectations/expectations/core/expect_column_values_to_not_match_regex.py index 44e57eef5d96..28c919e149cd 100644 --- a/great_expectations/expectations/core/expect_column_values_to_not_match_regex.py +++ b/great_expectations/expectations/core/expect_column_values_to_not_match_regex.py @@ -4,12 +4,13 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -40,7 +41,7 @@ from great_expectations.render.renderer_configuration import AddParamArgs try: - import sqlalchemy as sa # noqa: F401, TID251 + import sqlalchemy as sa # noqa: F401, TID251 # FIXME CoP except ImportError: pass @@ -48,8 +49,15 @@ "Expect the column entries to be strings that do NOT match a given regular expression." ) REGEX_DESCRIPTION = "The regular expression the column entries should NOT match." -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["Pandas", "Spark", "PostgreSQL", "MySQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "Pandas", + "Spark", + "PostgreSQL", + "MySQL", + "Databricks (SQL)", + "SQLite", +] class ExpectColumnValuesToNotMatchRegex(ColumnMapExpectation): @@ -100,14 +108,15 @@ class ExpectColumnValuesToNotMatchRegex(ColumnMapExpectation): [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern) [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -179,7 +188,7 @@ class ExpectColumnValuesToNotMatchRegex(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex: Union[str, SuiteParameterDict] = pydantic.Field(description=REGEX_DESCRIPTION) @@ -293,15 +302,15 @@ def _prescriptive_renderer( if not params.get("regex"): template_str = "values must not match a regular expression but none was specified." - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP if include_column_name: - template_str = "$column values must not match this regular expression: $regex, at least $mostly_pct % of the time." # noqa: E501 + template_str = "$column values must not match this regular expression: $regex, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP else: - template_str = "values must not match this regular expression: $regex, at least $mostly_pct % of the time." # noqa: E501 - else: # noqa: PLR5501 + template_str = "values must not match this regular expression: $regex, at least $mostly_pct % of the time." # noqa: E501 # FIXME CoP + else: # noqa: PLR5501 # FIXME CoP if include_column_name: template_str = "$column values must not match this regular expression: $regex." else: diff --git a/great_expectations/expectations/core/expect_column_values_to_not_match_regex_list.py b/great_expectations/expectations/core/expect_column_values_to_not_match_regex_list.py index b356bad6c47e..e0cca517b0ad 100644 --- a/great_expectations/expectations/core/expect_column_values_to_not_match_regex_list.py +++ b/great_expectations/expectations/core/expect_column_values_to_not_match_regex_list.py @@ -4,12 +4,13 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( ColumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -42,8 +43,15 @@ REGEX_LIST_DESCRIPTION = ( "The list of regular expressions which the column entries should not match." ) -DATA_QUALITY_ISSUES = ["Pattern matching"] -SUPPORTED_DATA_SOURCES = ["Pandas", "Spark", "PostgreSQL", "MySQL", "Redshift", "Databricks (SQL)"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VALIDITY.value] +SUPPORTED_DATA_SOURCES = [ + "Pandas", + "Spark", + "PostgreSQL", + "MySQL", + "Databricks (SQL)", + "SQLite", +] class ExpectColumnValuesToNotMatchRegexList(ColumnMapExpectation): @@ -90,14 +98,15 @@ class ExpectColumnValuesToNotMatchRegexList(ColumnMapExpectation): [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern) [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -164,7 +173,7 @@ class ExpectColumnValuesToNotMatchRegexList(ColumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex_list: Union[List[str], SuiteParameterDict] = pydantic.Field( description=REGEX_LIST_DESCRIPTION @@ -225,6 +234,14 @@ def schema_extra( } ) + @pydantic.validator("regex_list") + def _validate_regex_list( + cls, regex_list: list[str] | SuiteParameterDict + ) -> list[str] | SuiteParameterDict: + if not regex_list: + raise ValueError("regex_list must not be empty") # noqa: TRY003 # Error message gets swallowed by Pydantic + return regex_list + @classmethod def _prescriptive_template( cls, diff --git a/great_expectations/expectations/core/expect_compound_columns_to_be_unique.py b/great_expectations/expectations/core/expect_compound_columns_to_be_unique.py index 5aae53276fe2..b30313671767 100644 --- a/great_expectations/expectations/core/expect_compound_columns_to_be_unique.py +++ b/great_expectations/expectations/core/expect_compound_columns_to_be_unique.py @@ -7,6 +7,7 @@ MulticolumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_LIST_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -40,12 +41,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Cardinality"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectCompoundColumnsToBeUnique(MulticolumnMapExpectation): @@ -83,7 +83,7 @@ class ExpectCompoundColumnsToBeUnique(MulticolumnMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -94,7 +94,7 @@ class ExpectCompoundColumnsToBeUnique(MulticolumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -170,7 +170,7 @@ class ExpectCompoundColumnsToBeUnique(MulticolumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_list: Sequence[str] = pydantic.Field(description=COLUMN_LIST_DESCRIPTION) @@ -245,7 +245,7 @@ def _prescriptive_template( renderer_configuration = cls._add_mostly_pct_param( renderer_configuration=renderer_configuration ) - template_str = "Values for given compound columns must be unique together, at least $mostly_pct % of the time: " # noqa: E501 + template_str = "Values for given compound columns must be unique together, at least $mostly_pct % of the time: " # noqa: E501 # FIXME CoP else: template_str = "Values for given compound columns must be unique together: " @@ -293,7 +293,7 @@ def _prescriptive_renderer( if params["mostly"] is not None and params["mostly"] < 1.0: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - template_str = "Values for given compound columns must be unique together, at least $mostly_pct % of the time: " # noqa: E501 + template_str = "Values for given compound columns must be unique together, at least $mostly_pct % of the time: " # noqa: E501 # FIXME CoP else: template_str = "Values for given compound columns must be unique together: " diff --git a/great_expectations/expectations/core/expect_multicolumn_sum_to_equal.py b/great_expectations/expectations/core/expect_multicolumn_sum_to_equal.py index 1509495328d4..2efdd31a6f3d 100644 --- a/great_expectations/expectations/core/expect_multicolumn_sum_to_equal.py +++ b/great_expectations/expectations/core/expect_multicolumn_sum_to_equal.py @@ -7,6 +7,7 @@ from great_expectations.expectations.expectation import ( MulticolumnMapExpectation, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( COLUMN_LIST_DESCRIPTION, IGNORE_ROW_IF_DESCRIPTION, @@ -47,12 +48,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Data integrity"] +DATA_QUALITY_ISSUES = [DataQualityIssues.NUMERIC.value] class ExpectMulticolumnSumToEqual(MulticolumnMapExpectation): @@ -91,7 +91,7 @@ class ExpectMulticolumnSumToEqual(MulticolumnMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -102,7 +102,7 @@ class ExpectMulticolumnSumToEqual(MulticolumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -180,7 +180,7 @@ class ExpectMulticolumnSumToEqual(MulticolumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP sum_total: float = pydantic.Field(description=SUM_TOTAL_DESCRIPTION) ignore_row_if: Literal["all_values_are_missing", "any_value_is_missing", "never"] = ( @@ -303,7 +303,7 @@ def _prescriptive_renderer( if params["mostly"] is not None: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) mostly_str = "" if params.get("mostly") is None else ", at least $mostly_pct % of the time" - sum_total = params.get("sum_total") # noqa: F841 + sum_total = params.get("sum_total") # noqa: F841 # FIXME CoP column_list_str = "" for idx in range(len(params["column_list"]) - 1): diff --git a/great_expectations/expectations/core/expect_multicolumn_values_to_be_unique.py b/great_expectations/expectations/core/expect_multicolumn_values_to_be_unique.py index ffb2a0f81159..b5faa181976c 100644 --- a/great_expectations/expectations/core/expect_multicolumn_values_to_be_unique.py +++ b/great_expectations/expectations/core/expect_multicolumn_values_to_be_unique.py @@ -67,7 +67,7 @@ class ExpectMulticolumnValuesToBeUnique(ColumnMapExpectation): An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result) Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_list: Union[tuple, list] ignore_row_if: str = "all_values_are_missing" diff --git a/great_expectations/expectations/core/expect_select_column_values_to_be_unique_within_record.py b/great_expectations/expectations/core/expect_select_column_values_to_be_unique_within_record.py index 6ade327e393c..3c24db4dbf48 100644 --- a/great_expectations/expectations/core/expect_select_column_values_to_be_unique_within_record.py +++ b/great_expectations/expectations/core/expect_select_column_values_to_be_unique_within_record.py @@ -7,6 +7,7 @@ MulticolumnMapExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.expectations.model_field_descriptions import ( IGNORE_ROW_IF_DESCRIPTION, MOSTLY_DESCRIPTION, @@ -44,12 +45,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Cardinality"] +DATA_QUALITY_ISSUES = [DataQualityIssues.UNIQUENESS.value] class ExpectSelectColumnValuesToBeUniqueWithinRecord(MulticolumnMapExpectation): @@ -87,7 +87,7 @@ class ExpectSelectColumnValuesToBeUniqueWithinRecord(MulticolumnMapExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -98,7 +98,7 @@ class ExpectSelectColumnValuesToBeUniqueWithinRecord(MulticolumnMapExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} For example: @@ -176,7 +176,7 @@ class ExpectSelectColumnValuesToBeUniqueWithinRecord(MulticolumnMapExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_list: Sequence[str] = pydantic.Field(description=COLUMN_LIST_DESCRIPTION) ignore_row_if: str = pydantic.Field( diff --git a/great_expectations/expectations/core/expect_table_column_count_to_be_between.py b/great_expectations/expectations/core/expect_table_column_count_to_be_between.py index f6b5bb45cd77..47f9859d8ac1 100644 --- a/great_expectations/expectations/core/expect_table_column_count_to_be_between.py +++ b/great_expectations/expectations/core/expect_table_column_count_to_be_between.py @@ -4,11 +4,12 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.types import Comparable # noqa: TCH001 +from great_expectations.core.types import Comparable # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation import ( BatchExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( @@ -40,12 +41,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Schema"] +DATA_QUALITY_ISSUES = [DataQualityIssues.SCHEMA.value] class ExpectTableColumnCountToBeBetween(BatchExpectation): @@ -87,7 +87,7 @@ class ExpectTableColumnCountToBeBetween(BatchExpectation): See Also: [ExpectTableColumnCountToEqual](https://greatexpectations.io/expectations/expect_table_column_count_to_equal) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -98,7 +98,7 @@ class ExpectTableColumnCountToBeBetween(BatchExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -148,7 +148,7 @@ class ExpectTableColumnCountToBeBetween(BatchExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Optional[Comparable] = pydantic.Field(description=MIN_VALUE_DESCRIPTION) max_value: Optional[Comparable] = pydantic.Field(description=MAX_VALUE_DESCRIPTION) @@ -283,7 +283,7 @@ def _prescriptive_renderer( # type: ignore[override] # TODO: Fix this type igno elif params["max_value"] is None: template_str = f"Must have {at_least_str} $min_value columns." else: - raise ValueError("unresolvable template_str") # noqa: TRY003 + raise ValueError("unresolvable template_str") # noqa: TRY003 # FIXME CoP return [ RenderedStringTemplateContent( diff --git a/great_expectations/expectations/core/expect_table_column_count_to_equal.py b/great_expectations/expectations/core/expect_table_column_count_to_equal.py index bca642ce5836..6c92576ff98b 100644 --- a/great_expectations/expectations/core/expect_table_column_count_to_equal.py +++ b/great_expectations/expectations/core/expect_table_column_count_to_equal.py @@ -4,13 +4,14 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( BatchExpectation, Expectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( @@ -37,12 +38,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Schema"] +DATA_QUALITY_ISSUES = [DataQualityIssues.SCHEMA.value] class ExpectTableColumnCountToEqual(BatchExpectation): @@ -76,7 +76,7 @@ class ExpectTableColumnCountToEqual(BatchExpectation): See Also: [ExpectTableColumnCountToBeBetween](https://greatexpectations.io/expectations/expect_table_column_count_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -87,7 +87,7 @@ class ExpectTableColumnCountToEqual(BatchExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -136,7 +136,7 @@ class ExpectTableColumnCountToEqual(BatchExpectation): "observed_value": 2 }} }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value: Union[int, SuiteParameterDict] = pydantic.Field(description=VALUE_DESCRIPTION) diff --git a/great_expectations/expectations/core/expect_table_columns_to_match_ordered_list.py b/great_expectations/expectations/core/expect_table_columns_to_match_ordered_list.py index ab64ce436952..da1c20f169f8 100644 --- a/great_expectations/expectations/core/expect_table_columns_to_match_ordered_list.py +++ b/great_expectations/expectations/core/expect_table_columns_to_match_ordered_list.py @@ -5,12 +5,13 @@ from great_expectations.compatibility import pydantic from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( BatchExpectation, render_suite_parameter_string, ) +from great_expectations.expectations.metadata_types import DataQualityIssues from great_expectations.render import ( AtomicDiagnosticRendererType, LegacyRendererType, @@ -47,12 +48,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Schema"] +DATA_QUALITY_ISSUES = [DataQualityIssues.SCHEMA.value] class ExpectTableColumnsToMatchOrderedList(BatchExpectation): @@ -84,7 +84,7 @@ class ExpectTableColumnsToMatchOrderedList(BatchExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -95,7 +95,7 @@ class ExpectTableColumnsToMatchOrderedList(BatchExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -170,7 +170,7 @@ class ExpectTableColumnsToMatchOrderedList(BatchExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_list: Union[list, set, SuiteParameterDict, None] = pydantic.Field( description=COLUMN_LIST_DESCRIPTION @@ -319,7 +319,7 @@ def _validate( "result": {"observed_value": list(actual_column_list)}, } else: - # In the case of differing column lengths between the defined expectation and the observed column set, the # noqa: E501 + # In the case of differing column lengths between the defined expectation and the observed column set, the # noqa: E501 # FIXME CoP # max is determined to generate the column_index. number_of_columns = max(len(expected_column_list), len(actual_column_list)) column_index = range(number_of_columns) diff --git a/great_expectations/expectations/core/expect_table_columns_to_match_set.py b/great_expectations/expectations/core/expect_table_columns_to_match_set.py index aeb0ba584166..e791592030f6 100644 --- a/great_expectations/expectations/core/expect_table_columns_to_match_set.py +++ b/great_expectations/expectations/core/expect_table_columns_to_match_set.py @@ -3,14 +3,23 @@ from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Type, Union from great_expectations.compatibility import pydantic +from great_expectations.compatibility.typing_extensions import override from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( BatchExpectation, render_suite_parameter_string, ) -from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent +from great_expectations.expectations.metadata_types import DataQualityIssues +from great_expectations.render import ( + AtomicDiagnosticRendererType, + LegacyRendererType, + RenderedAtomicContent, + RenderedStringTemplateContent, + renderedAtomicValueSchema, +) +from great_expectations.render.renderer.observed_value_renderer import ObservedValueRenderState from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( RendererConfiguration, @@ -42,12 +51,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Schema"] +DATA_QUALITY_ISSUES = [DataQualityIssues.SCHEMA.value] class ExpectTableColumnsToMatchSet(BatchExpectation): @@ -80,7 +88,7 @@ class ExpectTableColumnsToMatchSet(BatchExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -91,7 +99,7 @@ class ExpectTableColumnsToMatchSet(BatchExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -165,13 +173,13 @@ class ExpectTableColumnsToMatchSet(BatchExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_set: Union[list, set, SuiteParameterDict, None] = pydantic.Field( description=COLUMN_SET_DESCRIPTION ) exact_match: Union[bool, None] = pydantic.Field( - default=None, description=EXACT_MATCH_DESCRIPTION + default=True, description=EXACT_MATCH_DESCRIPTION ) library_metadata: ClassVar[Dict[str, Union[str, list, bool]]] = { @@ -297,7 +305,7 @@ def _prescriptive_renderer( exact_match_str = "exactly" if params["exact_match"] is True else "at least" - template_str = f"Must have {exact_match_str} these columns (in any order): {column_list_template_str}" # noqa: E501 + template_str = f"Must have {exact_match_str} these columns (in any order): {column_list_template_str}" # noqa: E501 # FIXME CoP for idx in range(len(params["column_list"])): params[f"column_list_{idx!s}"] = params["column_list"][idx] @@ -315,6 +323,96 @@ def _prescriptive_renderer( ) ] + @classmethod + @renderer(renderer_type=AtomicDiagnosticRendererType.OBSERVED_VALUE) + @override + def _atomic_diagnostic_observed_value( + cls, + configuration: Optional[ExpectationConfiguration] = None, + result: Optional[ExpectationValidationResult] = None, + runtime_configuration: Optional[dict] = None, + ) -> RenderedAtomicContent: + renderer_configuration: RendererConfiguration = RendererConfiguration( + configuration=configuration, + result=result, + runtime_configuration=runtime_configuration, + ) + expected_param_prefix = "exp__" + expected_param_name = "expected_value" + ov_param_prefix = "ov__" + ov_param_name = "observed_value" + + renderer_configuration.add_param( + name=expected_param_name, + param_type=RendererValueType.ARRAY, + value=renderer_configuration.kwargs.get("column_set", []), + ) + renderer_configuration = cls._add_array_params( + array_param_name=expected_param_name, + param_prefix=expected_param_prefix, + renderer_configuration=renderer_configuration, + ) + + renderer_configuration.add_param( + name=ov_param_name, + param_type=RendererValueType.ARRAY, + value=result.get("result", {}).get("observed_value", []) if result else [], + ) + renderer_configuration = cls._add_array_params( + array_param_name=ov_param_name, + param_prefix=ov_param_prefix, + renderer_configuration=renderer_configuration, + ) + + expected_column_set = set(renderer_configuration.kwargs.get("column_set", [])) + observed_column_set = set( + result.get("result", {}).get("observed_value", []) if result else [] + ) + + observed_columns = ( + (name, sch) + for name, sch in renderer_configuration.params + if name.startswith(ov_param_prefix) + ) + expected_columns = ( + (name, sch) + for name, sch in renderer_configuration.params + if name.startswith(expected_param_prefix) + ) + + template_str_list = [] + for name, schema in observed_columns: + render_state = ( + ObservedValueRenderState.EXPECTED.value + if schema.value in expected_column_set + else ObservedValueRenderState.UNEXPECTED.value + ) + renderer_configuration.params.__dict__[name].render_state = render_state + template_str_list.append(f"${name}") + + for name, schema in expected_columns: + if schema.value not in observed_column_set: + renderer_configuration.params.__dict__[ + name + ].render_state = ObservedValueRenderState.MISSING.value + template_str_list.append(f"${name}") + + renderer_configuration.template_str = " ".join(template_str_list) + + value_obj = renderedAtomicValueSchema.load( + { + "template": renderer_configuration.template_str, + "params": renderer_configuration.params.dict(), + "meta_notes": renderer_configuration.meta_notes, + "schema": {"type": "com.superconductive.rendered.string"}, + } + ) + return RenderedAtomicContent( + name=AtomicDiagnosticRendererType.OBSERVED_VALUE, + value=value_obj, + value_type="StringValueType", + ) + def _validate( self, metrics: Dict, @@ -334,9 +432,9 @@ def _validate( return {"success": True, "result": {"observed_value": actual_column_list}} else: # Convert to lists and sort to lock order for testing and output rendering - # unexpected_list contains items from the dataset columns that are not in expected_column_set # noqa: E501 + # unexpected_list contains items from the dataset columns that are not in expected_column_set # noqa: E501 # FIXME CoP unexpected_list = sorted(list(actual_column_set - expected_column_set)) - # missing_list contains items from expected_column_set that are not in the dataset columns # noqa: E501 + # missing_list contains items from expected_column_set that are not in the dataset columns # noqa: E501 # FIXME CoP missing_list = sorted(list(expected_column_set - actual_column_set)) # observed_value contains items that are in the dataset columns observed_value = sorted(actual_column_list) @@ -363,7 +461,7 @@ def _validate( if exact_match: return return_failed - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP # Failed if there are items in the missing list (but OK to have unexpected_list) if len(missing_list) > 0: return return_failed diff --git a/great_expectations/expectations/core/expect_table_row_count_to_be_between.py b/great_expectations/expectations/core/expect_table_row_count_to_be_between.py index d77cc400c381..4836286dce62 100644 --- a/great_expectations/expectations/core/expect_table_row_count_to_be_between.py +++ b/great_expectations/expectations/core/expect_table_row_count_to_be_between.py @@ -6,13 +6,16 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( BatchExpectation, render_suite_parameter_string, ) -from great_expectations.expectations.model_field_types import ConditionParser # noqa: TCH001 +from great_expectations.expectations.metadata_types import DataQualityIssues +from great_expectations.expectations.model_field_types import ( + ConditionParser, # noqa: TCH001 # FIXME CoP +) from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( @@ -37,6 +40,13 @@ EXPECTATION_SHORT_DESCRIPTION = "Expect the number of rows to be between two values." MIN_VALUE_DESCRIPTION = "The minimum number of rows, inclusive." MAX_VALUE_DESCRIPTION = "The maximum number of rows, inclusive." + +STRICT_MIN_DESCRIPTION = ( + "If True, the row count must be strictly larger than min_value, default=False" +) +STRICT_MAX_DESCRIPTION = ( + "If True, the row count must be strictly smaller than max_value, default=False" +) SUPPORTED_DATA_SOURCES = [ "Pandas", "Spark", @@ -44,12 +54,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Volume"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VOLUME.value] class ExpectTableRowCountToBeBetween(BatchExpectation): @@ -66,6 +75,10 @@ class ExpectTableRowCountToBeBetween(BatchExpectation): {MIN_VALUE_DESCRIPTION} max_value (int or None): \ {MAX_VALUE_DESCRIPTION} + strict_min (boolean): \ + {STRICT_MIN_DESCRIPTION} + strict_max (boolean): \ + {STRICT_MAX_DESCRIPTION} Other Parameters: result_format (str or None): \ @@ -84,7 +97,7 @@ class ExpectTableRowCountToBeBetween(BatchExpectation): Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta. Notes: - * min_value and max_value are both inclusive. + * min_value and max_value are both inclusive unless strict_min or strict_max are set to True. * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has \ no minimum. * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has \ @@ -93,7 +106,7 @@ class ExpectTableRowCountToBeBetween(BatchExpectation): See Also: [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -104,7 +117,7 @@ class ExpectTableRowCountToBeBetween(BatchExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -154,7 +167,7 @@ class ExpectTableRowCountToBeBetween(BatchExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP min_value: Union[int, SuiteParameterDict, datetime, None] = pydantic.Field( default=None, description=MIN_VALUE_DESCRIPTION @@ -162,6 +175,8 @@ class ExpectTableRowCountToBeBetween(BatchExpectation): max_value: Union[int, SuiteParameterDict, datetime, None] = pydantic.Field( default=None, description=MAX_VALUE_DESCRIPTION ) + strict_min: bool = pydantic.Field(default=False, description=STRICT_MAX_DESCRIPTION) + strict_max: bool = pydantic.Field(default=False, description=STRICT_MIN_DESCRIPTION) row_condition: Union[str, None] = None condition_parser: Union[ConditionParser, None] = None @@ -180,10 +195,14 @@ class ExpectTableRowCountToBeBetween(BatchExpectation): success_keys = ( "min_value", "max_value", + "strict_min", + "strict_max", ) args_keys = ( "min_value", "max_value", + "strict_min", + "strict_max", ) class Config: @@ -219,6 +238,18 @@ def schema_extra( } ) + @pydantic.root_validator + def _root_validate(cls, values: dict) -> dict: + min_value = values.get("min_value") + max_value = values.get("max_value") + + if min_value is not None and max_value is not None and min_value > max_value: + raise ValueError( # noqa: TRY003 # Error message gets swallowed by Pydantic + f"min_value ({min_value}) must be less than or equal to max_value ({max_value})" + ) + + return values + @classmethod @override def _prescriptive_template( @@ -277,7 +308,7 @@ def _prescriptive_renderer( _ = runtime_configuration.get("include_column_name") is not False styling = runtime_configuration.get("styling") params = substitute_none_for_missing( - configuration.kwargs, # type: ignore[union-attr] + configuration.kwargs, # type: ignore[union-attr] # FIXME CoP [ "min_value", "max_value", @@ -300,7 +331,7 @@ def _prescriptive_renderer( elif params["max_value"] is None: template_str = f"Must have {at_least_str} $min_value rows." else: - raise ValueError("unresolvable template_str") # noqa: TRY003 + raise ValueError("unresolvable template_str") # noqa: TRY003 # FIXME CoP return [ RenderedStringTemplateContent( diff --git a/great_expectations/expectations/core/expect_table_row_count_to_equal.py b/great_expectations/expectations/core/expect_table_row_count_to_equal.py index abb09cc9698b..dbd23ded4708 100644 --- a/great_expectations/expectations/core/expect_table_row_count_to_equal.py +++ b/great_expectations/expectations/core/expect_table_row_count_to_equal.py @@ -5,13 +5,16 @@ from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override from great_expectations.core.suite_parameters import ( - SuiteParameterDict, # noqa: TCH001 + SuiteParameterDict, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation import ( BatchExpectation, render_suite_parameter_string, ) -from great_expectations.expectations.model_field_types import ConditionParser # noqa: TCH001 +from great_expectations.expectations.metadata_types import DataQualityIssues +from great_expectations.expectations.model_field_types import ( + ConditionParser, # noqa: TCH001 # FIXME CoP +) from great_expectations.render import LegacyRendererType, RenderedStringTemplateContent from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( @@ -38,12 +41,11 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)", ] -DATA_QUALITY_ISSUES = ["Volume"] +DATA_QUALITY_ISSUES = [DataQualityIssues.VOLUME.value] class ExpectTableRowCountToEqual(BatchExpectation): @@ -78,7 +80,7 @@ class ExpectTableRowCountToEqual(BatchExpectation): See Also: [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) @@ -89,7 +91,7 @@ class ExpectTableRowCountToEqual(BatchExpectation): [{SUPPORTED_DATA_SOURCES[7]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[8]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -138,7 +140,7 @@ class ExpectTableRowCountToEqual(BatchExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value: Union[int, SuiteParameterDict] = pydantic.Field(description=VALUE_DESCRIPTION) row_condition: Union[str, None] = None @@ -224,7 +226,7 @@ def _prescriptive_renderer( return [ RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": template_str, diff --git a/great_expectations/expectations/core/expect_table_row_count_to_equal_other_table.py b/great_expectations/expectations/core/expect_table_row_count_to_equal_other_table.py index b54dc4a003fc..a810b9bbe6f5 100644 --- a/great_expectations/expectations/core/expect_table_row_count_to_equal_other_table.py +++ b/great_expectations/expectations/core/expect_table_row_count_to_equal_other_table.py @@ -9,7 +9,10 @@ BatchExpectation, render_suite_parameter_string, ) -from great_expectations.expectations.model_field_types import ConditionParser # noqa: TCH001 +from great_expectations.expectations.metadata_types import DataQualityIssues +from great_expectations.expectations.model_field_types import ( + ConditionParser, # noqa: TCH001 # FIXME CoP +) from great_expectations.render import ( LegacyDiagnosticRendererType, LegacyRendererType, @@ -21,7 +24,7 @@ RendererValueType, ) from great_expectations.render.util import num_to_str, substitute_none_for_missing -from great_expectations.validator.metric_configuration import ( # noqa: TCH001 +from great_expectations.validator.metric_configuration import ( # noqa: TCH001 # FIXME CoP MetricConfiguration, ) @@ -41,8 +44,15 @@ OTHER_TABLE_NAME_DESCRIPTION = ( "The name of the other table. Other table must be located within the same database." ) -SUPPORTED_DATA_SOURCES = ["SQLite", "PostgreSQL", "MySQL", "MSSQL", "Redshift", "Databricks (SQL)"] -DATA_QUALITY_ISSUES = ["Volume"] +SUPPORTED_DATA_SOURCES = [ + "SQLite", + "PostgreSQL", + "MySQL", + "MSSQL", + "Databricks (SQL)", + "Snowflake", +] +DATA_QUALITY_ISSUES = [DataQualityIssues.VOLUME.value] class ExpectTableRowCountToEqualOtherTable(BatchExpectation): @@ -77,14 +87,15 @@ class ExpectTableRowCountToEqualOtherTable(BatchExpectation): [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between) [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) - Data Quality Category: + Data Quality Issues: {DATA_QUALITY_ISSUES[0]} Example Data: @@ -145,7 +156,7 @@ class ExpectTableRowCountToEqualOtherTable(BatchExpectation): "meta": {{}}, "success": false }} - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP other_table_name: str = pydantic.Field(description=OTHER_TABLE_NAME_DESCRIPTION) row_condition: Union[str, None] = None @@ -228,7 +239,7 @@ def _prescriptive_renderer( runtime_configuration = runtime_configuration or {} styling = runtime_configuration.get("styling") if not configuration: - raise ValueError("configuration is required for prescriptive renderer") # noqa: TRY003 + raise ValueError("configuration is required for prescriptive renderer") # noqa: TRY003 # FIXME CoP params = substitute_none_for_missing(configuration.kwargs, ["other_table_name"]) template_str = "Row count must equal the row count of table $other_table_name." @@ -262,7 +273,7 @@ def _diagnostic_observed_value_renderer( return RenderedStringTemplateContent( content_block_type="string_template", string_template={ - "template": "Row Count: $self_table_row_count
Other Table Row Count: $other_table_row_count", # noqa: E501 + "template": "Row Count: $self_table_row_count
Other Table Row Count: $other_table_row_count", # noqa: E501 # FIXME CoP "params": { "self_table_row_count": self_table_row_count, "other_table_row_count": other_table_row_count, @@ -285,7 +296,7 @@ def get_validation_dependencies( kwargs = configuration.kwargs if configuration else {} other_table_name = kwargs.get("other_table_name") - # create copy of table.row_count metric and modify "table" metric domain kwarg to be other table name # noqa: E501 + # create copy of table.row_count metric and modify "table" metric domain kwarg to be other table name # noqa: E501 # FIXME CoP table_row_count_metric_config_other: Optional[MetricConfiguration] = deepcopy( validation_dependencies.get_metric_configuration(metric_name="table.row_count") ) diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json index 746a8a43d9ce..dcb7db91c853 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToBeInSet.json @@ -1,6 +1,6 @@ { "title": "Expect column distinct values to be in set", - "description": "Expect the set of distinct column values to be contained by a given set.\n\nExpectColumnDistinctValuesToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n The success value for this expectation will match that of [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set).\n\nSee Also:\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Sets\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2, 3, 4, 5]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the set of distinct column values to be contained by a given set.\n\nExpectColumnDistinctValuesToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n The success value for this expectation will match that of [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set).\n\nSee Also:\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2, 3, 4, 5]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToBeInSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -180,7 +180,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Sets" + "Uniqueness" ] }, "library_metadata": { @@ -215,7 +215,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" @@ -225,7 +224,8 @@ } }, "required": [ - "column" + "column", + "value_set" ], "additionalProperties": false, "definitions": { diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json index deab0a2f6be0..83ad42d8af54 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToContainSet.json @@ -1,6 +1,6 @@ { "title": "Expect column distinct values to contain set", - "description": "Expect the set of distinct column values to contain a given set.\n\nExpectColumnDistinctValuesToContainSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Sets\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test\",\n value_set=[1, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the set of distinct column values to contain a given set.\n\nExpectColumnDistinctValuesToContainSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToEqualSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_equal_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test\",\n value_set=[1, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToContainSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -180,7 +180,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Sets" + "Uniqueness" ] }, "library_metadata": { @@ -215,7 +215,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" @@ -225,7 +224,8 @@ } }, "required": [ - "column" + "column", + "value_set" ], "additionalProperties": false, "definitions": { diff --git a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json index 989c3dc4dd5b..2d5436f9db4b 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnDistinctValuesToEqualSet.json @@ -1,6 +1,6 @@ { "title": "Expect column distinct values to equal set", - "description": "Expect the set of distinct column values to equal a given set.\n\nExpectColumnDistinctValuesToEqualSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Sets\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the set of distinct column values to equal a given set.\n\nExpectColumnDistinctValuesToEqualSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnDistinctValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_be_in_set)\n [ExpectColumnDistinctValuesToContainSet](https://greatexpectations.io/expectations/expect_column_distinct_values_to_contain_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 1\n },\n {\n \"value\": 2,\n \"count\": 1\n },\n {\n \"value\": 4,\n \"count\": 1\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnDistinctValuesToEqualSet(\n column=\"test2\",\n value_set=[3, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ],\n \"details\": {\n \"value_counts\": [\n {\n \"value\": 1,\n \"count\": 3\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -180,7 +180,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Sets" + "Uniqueness" ] }, "library_metadata": { @@ -215,7 +215,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" @@ -225,7 +224,8 @@ } }, "required": [ - "column" + "column", + "value_set" ], "additionalProperties": false, "definitions": { diff --git a/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json b/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json index e927583e9532..0256fe90eaee 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnKLDivergenceToBeLessThan.json @@ -181,7 +181,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Distribution" + "Numeric" ] }, "library_metadata": { @@ -217,7 +217,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake" ] diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json index ab06dcf7441c..ecbebe65787c 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnMaxToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column maximum to be between", - "description": "Expect the column maximum to be between a minimum value and a maximum value.\n\nExpectColumnMaxToBeBetween is a Column Aggregate Expectation\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value of the acceptable range for the column maximum.\n max_value (comparable type or None): The maximum value of the acceptable range for the column maximum.\n strict_min (boolean): If True, the lower bound of the column maximum acceptable rangemust be strictly larger than min_value, default=False\n strict_max (boolean): If True, the upper bound of the column maximum acceptable rangemust be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column max\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Numerical data\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 7.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column maximum to be between a minimum value and a maximum value.\n\nExpectColumnMaxToBeBetween is a Column Aggregate Expectation\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value of the acceptable range for the column maximum.\n max_value (comparable type or None): The maximum value of the acceptable range for the column maximum.\n strict_min (boolean): If True, the lower bound of the column maximum acceptable rangemust be strictly larger than min_value, default=False\n strict_max (boolean): If True, the upper bound of the column maximum acceptable rangemust be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column max\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMaxToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 7.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -161,7 +161,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Numerical data" + "Numeric" ] }, "library_metadata": { @@ -196,7 +196,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json index 32ca3b182cd1..0a9a46c03331 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnMeanToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column mean to be between", - "description": "Expect the column mean to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnMeanToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column mean.\n max_value (float or None): The maximum value for the column mean.\n strict_min (boolean): If True, the column mean must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column mean must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound.\n * If max_value is None, then min_value is treated as a lower bound.\n * observed_value field in the result object is customized for this expectation to be a float representing the true mean for the column\n\nSee Also:\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Numerical data\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.275\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3.375\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column mean to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnMeanToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column mean.\n max_value (float or None): The maximum value for the column mean.\n strict_min (boolean): If True, the column mean must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column mean must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound.\n * If max_value is None, then min_value is treated as a lower bound.\n * observed_value field in the result object is customized for this expectation to be a float representing the true mean for the column\n\nSee Also:\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.275\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMeanToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3.375\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -161,7 +161,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Numerical data" + "Numeric" ] }, "library_metadata": { @@ -196,7 +196,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json index c841ce12aba5..bf79317decdf 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnMedianToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column median to be between", - "description": "Expect the column median to be between a minimum value and a maximum value.\n\nExpectColumnMedianToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for the column median.\n max_value (int or None): The maximum value for the column median.\n strict_min (boolean): If True, the column median must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column median must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true median for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Numerical data\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.15\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.75\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column median to be between a minimum value and a maximum value.\n\nExpectColumnMedianToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for the column median.\n max_value (int or None): The maximum value for the column median.\n strict_min (boolean): If True, the column median must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the column median must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true median for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnStdevToBeBetween](https://greatexpectations.io/expectations/expect_column_stdev_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test\",\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1.15\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.75\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -161,7 +161,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Numerical data" + "Numeric" ] }, "library_metadata": { @@ -196,7 +196,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json index 460636f94f74..7072b0d417a5 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnMinToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column minimum to be between", - "description": "Expect the column minimum to be between a minimum value and a maximum value.\n\nExpectColumnMinToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal column minimum allowed.\n max_value (comparable type or None): The maximal column minimum allowed.\n strict_min (boolean): If True, the minimal column minimum must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the maximal column minimum must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column min\n\nSee Also:\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Numerical data\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMinToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .8\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column minimum to be between a minimum value and a maximum value.\n\nExpectColumnMinToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal column minimum allowed.\n max_value (comparable type or None): The maximal column minimum allowed.\n strict_min (boolean): If True, the minimal column minimum must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the maximal column minimum must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column min\n\nSee Also:\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMinToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .8\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMedianToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=3,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 1\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -161,7 +161,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Numerical data" + "Numeric" ] }, "library_metadata": { @@ -196,7 +196,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json index 7450cf90a29d..78518c9dcb2e 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnMostCommonValueToBeInSet.json @@ -1,6 +1,6 @@ { "title": "Expect column most common value to be in set", - "description": "Expect the most common value to be within the designated value set.\n\nExpectColumnMostCommonValueToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A list of potential values to match.\n ties_okay (boolean or None): If True, then the expectation will still succeed if values outside the designated set are as common (but not more common) than designated values. Default False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * observed_value field in the result object is customized for this expectation to be a list representing the most common values in the column, which is often a single element... if there is a tie for most common among multiple values, observed_value will contain a single copy of each most common value\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Sets\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test2\",\n value_set=[1, 2, 4],\n ties_okay=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ]\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the most common value to be within the designated value set.\n\nExpectColumnMostCommonValueToBeInSet is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A list of potential values to match.\n ties_okay (boolean or None): If True, then the expectation will still succeed if values outside the designated set are as common (but not more common) than designated values. Default False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * observed_value field in the result object is customized for this expectation to be a list representing the most common values in the column, which is often a single element... if there is a tie for most common among multiple values, observed_value will contain a single copy of each most common value\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test2\",\n value_set=[1, 2, 4],\n ties_okay=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnMostCommonValueToBeInSet(\n column=\"test\",\n value_set=[1, 2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n 1,\n 2,\n 4\n ]\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -185,7 +185,8 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Sets" + "Numeric", + "Validity" ] }, "library_metadata": { @@ -220,7 +221,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" @@ -230,7 +230,8 @@ } }, "required": [ - "column" + "column", + "value_set" ], "additionalProperties": false, "definitions": { diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json index bfbb6f933d70..5ee7af3dfa72 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesAToBeGreaterThanB.json @@ -1,6 +1,6 @@ { "title": "Expect column pair values A to be greater than B", - "description": "Expect the values in column A to be greater than column B.\n\nExpectColumnPairValuesAToBeGreaterThanB is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n or_equal (boolean or None): If True, then values can be equal, not strictly greater.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\n\nData Quality Category:\n Distribution\n\nExample Data:\n test test2\n 0 2 1\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test\",\n column_B=\"test2\",\n or_equal=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test2\",\n column_B=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ],\n [\n 2,\n 2\n ],\n [\n 4,\n 4\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the values in column A to be greater than column B.\n\nExpectColumnPairValuesAToBeGreaterThanB is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n or_equal (boolean or None): If True, then values can be equal, not strictly greater.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 2 1\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test\",\n column_B=\"test2\",\n or_equal=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesAToBeGreaterThanB(\n column_A=\"test2\",\n column_B=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ],\n [\n 2,\n 2\n ],\n [\n 4,\n 4\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -82,9 +82,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -140,7 +147,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Distribution" + "Numeric" ] }, "library_metadata": { @@ -174,7 +181,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json index e571acefb2e5..ac8ad360b416 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeEqual.json @@ -1,6 +1,6 @@ { "title": "Expect column pair values to be equal", - "description": "Expect the values in column A to be the same as column B.\n\nExpectColumnPairValuesToBeEqual is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"both_values_are_missing\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Data integrity\n\nExample Data:\n test test2\n 0 1 2\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=0.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=1.0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the values in column A to be the same as column B.\n\nExpectColumnPairValuesToBeEqual is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"both_values_are_missing\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n Validity\n\nExample Data:\n test test2\n 0 1 2\n 1 2 2\n 2 4 4\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=0.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeEqual(\n column_A=\"test\",\n column_B=\"test2\",\n mostly=1.0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 1,\n 2\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -82,9 +82,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -135,7 +142,8 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Data integrity" + "Numeric", + "Validity" ] }, "library_metadata": { @@ -170,7 +178,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json index ead32d0f9730..d62b5babc2c0 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnPairValuesToBeInSet.json @@ -1,6 +1,6 @@ { "title": "Expect column pair values to be in set", - "description": "Expect the paired values from columns A and B to belong to a set of valid pairs.\n\nExpectColumnPairValuesToBeInSet is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n value_pairs_set (list of tuples): All the valid pairs to be matched.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Sets\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(2,1), (1,1)],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 4,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(1,2) (4,1)],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n [\n 1,\n 1\n ],\n [\n 2,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the paired values from columns A and B to belong to a set of valid pairs.\n\nExpectColumnPairValuesToBeInSet is a Column Pair Map Expectation.\n\nColumn Pair Map Expectations are evaluated for a pair of columns and ask a yes/no question about the row-wise relationship between those two columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_A (str): The first column name.\n column_B (str): The second column name.\n value_pairs_set (list of tuples): All the valid pairs to be matched.\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(2,1), (1,1)],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n [\n 4,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnPairValuesToBeInSet(\n column_A=\"test\",\n column_B=\"test2\",\n value_pairs_set=[(1,2) (4,1)],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n [\n 1,\n 1\n ],\n [\n 2,\n 1\n ]\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -82,9 +82,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -147,7 +154,8 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Sets" + "Numeric", + "Validity" ] }, "library_metadata": { @@ -182,7 +190,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake" ] diff --git a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json index f6916c80f065..9489902a090d 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnProportionOfUniqueValuesToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column proportion of unique values to be between", - "description": "Expect the proportion of unique values to be between a minimum value and a maximum value.\n\nFor example, in a column containing [1, 2, 2, 3, 3, 3, 4, 4, 4, 4], there are 4 unique values and 10 total values for a proportion of 0.4.\n\nExpectColumnProportionOfUniqueValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum proportion of unique values (Proportions are on the range 0 to 1).\n max_value (float or None): The maximum proportion of unique values (Proportions are on the range 0 to 1).\n strict_min (boolean): If True, the minimum proportion of unique values must be strictly larger than min_value. default=False\n strict_max (boolean): If True, the maximum proportion of unique values must be strictly smaller than max_value. default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the proportion of unique values in the column\n\nSee Also:\n [ExpectColumnUniqueValueCountToBeBetween](https://greatexpectations.io/expectations/expect_column_unique_value_count_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Cardinality\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test\",\n min_value=0,\n max_value=0.8\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .75\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test2\",\n min_value=0.3,\n max_value=0.5,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .5\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the proportion of unique values to be between a minimum value and a maximum value.\n\nFor example, in a column containing [1, 2, 2, 3, 3, 3, 4, 4, 4, 4], there are 4 unique values and 10 total values for a proportion of 0.4.\n\nExpectColumnProportionOfUniqueValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum proportion of unique values (Proportions are on the range 0 to 1).\n max_value (float or None): The maximum proportion of unique values (Proportions are on the range 0 to 1).\n strict_min (boolean): If True, the minimum proportion of unique values must be strictly larger than min_value. default=False\n strict_max (boolean): If True, the maximum proportion of unique values must be strictly smaller than max_value. default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the proportion of unique values in the column\n\nSee Also:\n [ExpectColumnUniqueValueCountToBeBetween](https://greatexpectations.io/expectations/expect_column_unique_value_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test\",\n min_value=0,\n max_value=0.8\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .75\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnProportionOfUniqueValuesToBeBetween(\n column=\"test2\",\n min_value=0.3,\n max_value=0.5,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": .5\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -178,7 +178,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Cardinality" + "Uniqueness" ] }, "library_metadata": { @@ -213,7 +213,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json index 06b12ef52512..2d43008235ee 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnQuantileValuesToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column quantile values to be between", - "description": "Expect the specific provided column quantiles to be between a minimum value and a maximum value.\n\nExpectColumnQuantileValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nExpectColumnQuantileValuesToBeBetween can be computationally intensive for large datasets.\n\nArgs:\n column (str): The column name.\n quantile_ranges (dictionary with keys 'quantiles' and 'value_ranges'): Key 'quantiles' is an increasingly ordered list of desired quantile values (floats). Key 'value_ranges' is a list of 2-value lists that specify a lower and upper bound (inclusive) for the corresponding quantile (with [min, max] ordering). The length of the 'quantiles' list and the 'value_ranges' list must be equal.\n allow_relative_error (boolean or string): Whether to allow relative error in quantile communications on backends that support or require it.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound only\n * If max_value is None, then min_value is treated as a lower bound only\n * details.success_details field in the result object is customized for this expectation\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Numerical data\n\nExample Data:\n test\n 0 1 1\n 1 2 7\n 2 2 2.5\n 3 3 3\n 4 3 2\n 5 3 5\n 6 4 6\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1,\n 2,\n 3,\n 4\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n true,\n true\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test2\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1.0,\n 2.5,\n 5.0,\n 7.0\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n false,\n false\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the specific provided column quantiles to be between a minimum value and a maximum value.\n\nExpectColumnQuantileValuesToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nExpectColumnQuantileValuesToBeBetween can be computationally intensive for large datasets.\n\nArgs:\n column (str): The column name.\n quantile_ranges (dictionary with keys 'quantiles' and 'value_ranges'): Key 'quantiles' is an increasingly ordered list of desired quantile values (floats). Key 'value_ranges' is a list of 2-value lists that specify a lower and upper bound (inclusive) for the corresponding quantile (with [min, max] ordering). The length of the 'quantiles' list and the 'value_ranges' list must be equal.\n allow_relative_error (boolean or string): Whether to allow relative error in quantile communications on backends that support or require it.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound only\n * If max_value is None, then min_value is treated as a lower bound only\n * details.success_details field in the result object is customized for this expectation\n\nSee Also:\n [ExpectColumnMinToBeBetween](https://greatexpectations.io/expectations/expect_column_min_to_be_between)\n [ExpectColumnMaxToBeBetween](https://greatexpectations.io/expectations/expect_column_max_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test\n 0 1 1\n 1 2 7\n 2 2 2.5\n 3 3 3\n 4 3 2\n 5 3 5\n 6 4 6\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1,\n 2,\n 3,\n 4\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n true,\n true\n ]\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnQuantileValuesToBeBetween(\n column=\"test2\",\n quantile_ranges={\n \"quantiles\": [0, .333, .667, 1],\n \"value_ranges\": [[0,1], [2,3], [3,4], [4,5]]\n }\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": {\n \"quantiles\": [\n 0,\n 0.333,\n 0.6667,\n 1\n ],\n \"values\": [\n 1.0,\n 2.5,\n 5.0,\n 7.0\n ]\n },\n \"details\": {\n \"success_details\": [\n true,\n true,\n false,\n false\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -131,7 +131,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Numerical data" + "Numeric" ] }, "library_metadata": { @@ -166,7 +166,8 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift" + "Snowflake", + "BigQuery" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json index 7bf8d6d38d9d..ab51e0c7b7ba 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnStdevToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column standard deviation to be between", - "description": "Expect the column standard deviation to be between a minimum value and a maximum value.\n\nUses sample standard deviation (normalized by N-1).\n\nExpectColumnStdevToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column standard deviation.\n max_value (float or None): The maximum value for the column standard deviation.\n strict_min (boolean): If True, the column standard deviation must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the column standard deviation must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true standard deviation for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Distribution\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.5251983752196243\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test2\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.5617376914898995\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column standard deviation to be between a minimum value and a maximum value.\n\nUses sample standard deviation (normalized by N-1).\n\nExpectColumnStdevToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (float or None): The minimum value for the column standard deviation.\n max_value (float or None): The maximum value for the column standard deviation.\n strict_min (boolean): If True, the column standard deviation must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the column standard deviation must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a float representing the true standard deviation for the column\n\nSee Also:\n [ExpectColumnMeanToBeBetween](https://greatexpectations.io/expectations/expect_column_mean_to_be_between)\n [ExpectColumnMedianToBeBetween](https://greatexpectations.io/expectations/expect_column_median_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 0.5251983752196243\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnStdevToBeBetween(\n column=\"test2\",\n min_value=.5,\n max_value=.6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2.5617376914898995\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -161,7 +161,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Distribution" + "Numeric" ] }, "library_metadata": { @@ -196,7 +196,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json index acb5c82472a1..3a68f4c89502 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnSumToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column sum to be between", - "description": "Expect the column sum to be between a minimum value and a maximum value.\n\nExpectColumnSumToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal sum allowed.\n max_value (comparable type or None): The maximal sum allowed.\n strict_min (boolean): If True, the minimal sum must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the maximal sum must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column sum\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Distribution\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 5.1\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test2\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 13.5\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column sum to be between a minimum value and a maximum value.\n\nExpectColumnSumToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimal sum allowed.\n max_value (comparable type or None): The maximal sum allowed.\n strict_min (boolean): If True, the minimal sum must be strictly larger than min_value. default=False.\n strict_max (boolean): If True, the maximal sum must be strictly smaller than max_value. default=False.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be a list representing the actual column sum\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 5.1\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnSumToBeBetween(\n column=\"test2\",\n min_value=2,\n max_value=6\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 13.5\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -161,7 +161,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Distribution" + "Numeric" ] }, "library_metadata": { @@ -195,10 +195,10 @@ "SQLite", "PostgreSQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", - "Databricks (SQL)" + "Databricks (SQL)", + "MySQL" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnToExist.json b/great_expectations/expectations/core/schemas/ExpectColumnToExist.json index 267658983a3b..71cf2e118f41 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnToExist.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnToExist.json @@ -1,6 +1,6 @@ { "title": "Expect column to exist", - "description": "Checks for the existence of a specified column within a table.\n\nExpectColumnToExist is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation. They are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n column_index (int or None, optional): If not None, checks the order of the columns. The expectation will fail if the column is not in location column_index (zero-indexed).\n result_format (str or None, optional): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None, optional): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None, optional): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nPassing Case:\n Input:\n ExpectColumnToExist(\n column=\"test\",\n column_index=0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {}\n }\n\nFailing Case:\n Input:\n ExpectColumnToExist(\n column=\"missing_column\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {}\n }", + "description": "Checks for the existence of a specified column within a table.\n\nExpectColumnToExist is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation. They are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n column_index (int or None, optional): If not None, checks the order of the columns. The expectation will fail if the column is not in location column_index (zero-indexed).\n result_format (str or None, optional): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None, optional): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None, optional): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nPassing Case:\n Input:\n ExpectColumnToExist(\n column=\"test\",\n column_index=0\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {}\n }\n\nFailing Case:\n Input:\n ExpectColumnToExist(\n column=\"missing_column\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {}\n }", "type": "object", "properties": { "id": { @@ -142,7 +142,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json index fbee0010d145..77513bf0f861 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnUniqueValueCountToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column unique value count to be between", - "description": "Expect the number of unique values to be between a minimum value and a maximum value.\n\nExpectColumnUniqueValueCountToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum number of unique values allowed.\n max_value (int or None): The maximum number of unique values allowed.\n strict_min (bool): If True, the column must have strictly more unique value count than min_value to pass.\n strict_max (bool): If True, the column must have strictly fewer unique value count than max_value to pass.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be an int representing the number of unique values the column\n\nSee Also:\n [ExpectColumnProportionOfUniqueValuesToBeBetween](https://greatexpectations.io/expectations/expect_column_proportion_of_unique_values_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Cardinality\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the number of unique values to be between a minimum value and a maximum value.\n\nExpectColumnUniqueValueCountToBeBetween is a Column Aggregate Expectation.\n\nColumn Aggregate Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column, and produce an aggregate Metric, such as a mean, standard deviation, number of unique values, column type, etc.\nIf that Metric meets the conditions you set, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum number of unique values allowed.\n max_value (int or None): The maximum number of unique values allowed.\n strict_min (bool): If True, the column must have strictly more unique value count than min_value to pass.\n strict_max (bool): If True, the column must have strictly fewer unique value count than max_value to pass.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound\n * If max_value is None, then min_value is treated as a lower bound\n * observed_value field in the result object is customized for this expectation to be an int representing the number of unique values the column\n\nSee Also:\n [ExpectColumnProportionOfUniqueValuesToBeBetween](https://greatexpectations.io/expectations/expect_column_proportion_of_unique_values_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 \"aaa\" 1\n 1 \"abb\" 1\n 2 \"acc\" 1\n 3 \"aaa\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test\",\n min_value=2,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnUniqueValueCountToBeBetween(\n column=\"test2\",\n min_value=3,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -178,7 +178,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Cardinality" + "Uniqueness" ] }, "library_metadata": { @@ -213,7 +213,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json index d71d3b1e7439..9f1d02ad6304 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column value lengths to be between", - "description": "Expect the column entries to be strings with length between a minimum value and a maximum value (inclusive).\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToBeBetween is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for a column entry length.\n max_value (int or None): The maximum value for a column entry length.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectColumnValueLengthsToEqual](https://greatexpectations.io/expectations/expect_column_value_lengths_to_equal)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test\",\n min_value=5,\n max_value=5,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings with length between a minimum value and a maximum value (inclusive).\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToBeBetween is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (int or None): The minimum value for a column entry length.\n max_value (int or None): The maximum value for a column entry length.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectColumnValueLengthsToEqual](https://greatexpectations.io/expectations/expect_column_value_lengths_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToBeBetween(\n column=\"test\",\n min_value=5,\n max_value=5,\n strict_min=True,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -162,7 +169,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -197,7 +204,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json index 817314eb2adb..c0211899bc1f 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValueLengthsToEqual.json @@ -1,6 +1,6 @@ { "title": "Expect column value lengths to equal", - "description": "Expect the column entries to be strings with length equal to the provided value.\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToEqual is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value (int): The expected value for a column entry length.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test2\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings with length equal to the provided value.\n\nThis expectation only works for string-type values. Invoking it on ints or floats will raise a TypeError.\n\nExpectColumnValueLengthsToEqual is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value (int): The expected value for a column entry length.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"12345\" \"A\"\n 1 \"abcde\" \"13579\"\n 2 \"1b3d5\" \"24680\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueLengthsToEqual(\n column=\"test2\",\n value=5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -130,7 +137,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -165,7 +172,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json b/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json index b182744e0e63..3f13881e2715 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValueZScoresToBeLessThan.json @@ -1,6 +1,6 @@ { "title": "Expect column value z-scores to be less than", - "description": "Expect the Z-scores of a column's values to be less than a given threshold.\n\nExpectColumnValueZScoresToBeLessThan is a Column Map Expectation for typed-column backends, and also for PandasExecutionEngine where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n threshold (number): A maximum Z-score threshold. All column Z-scores that are lower than this threshold will evaluate successfully.\n double_sided (boolean): A True or False value indicating whether to evaluate double sidedly. Examples: (double_sided = True, threshold = 2) -> Z scores in non-inclusive interval(-2,2) | (double_sided = False, threshold = 2) -> Z scores in non-inclusive interval (-infinity,2)\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Distribution\n\nExample Data:\n test test2\n 0 1 -100000000000\n 1 1 -1\n 2 1 0\n 3 3 1\n 4 3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test\",\n threshold=1.96,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test2\",\n threshold=1,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 20.0,\n \"partial_unexpected_list\": [\n -100000000000\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 20.0,\n \"unexpected_percent_nonmissing\": 20.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the Z-scores of a column's values to be less than a given threshold.\n\nExpectColumnValueZScoresToBeLessThan is a Column Map Expectation for typed-column backends, and also for PandasExecutionEngine where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n threshold (number): A maximum Z-score threshold. All column Z-scores that are lower than this threshold will evaluate successfully.\n double_sided (boolean): A True or False value indicating whether to evaluate double sidedly. Examples: (double_sided = True, threshold = 2) -> Z scores in non-inclusive interval(-2,2) | (double_sided = False, threshold = 2) -> Z scores in non-inclusive interval (-infinity,2)\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 -100000000000\n 1 1 -1\n 2 1 0\n 3 3 1\n 4 3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test\",\n threshold=1.96,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValueZScoresToBeLessThan(\n column=\"test2\",\n threshold=1,\n double_sided=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 5,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 20.0,\n \"partial_unexpected_list\": [\n -100000000000\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 20.0,\n \"unexpected_percent_nonmissing\": 20.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -142,7 +149,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Distribution" + "Numeric" ] }, "library_metadata": { @@ -177,7 +184,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json index f299c607fa11..df0e958962f7 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect column values to be between", - "description": "Expect the column entries to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnValuesToBeBetween is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value for a column entry.\n max_value (comparable type or None): The maximum value for a column entry.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False.\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and there is no minimum value checked.\n * If max_value is None, then min_value is treated as a lower bound, and there is no maximum value checked.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Distribution\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 25.0,\n \"partial_unexpected_list\": [\n 7.0\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 25.0,\n \"unexpected_percent_nonmissing\": 25.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be between a minimum value and a maximum value (inclusive).\n\nExpectColumnValuesToBeBetween is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n min_value (comparable type or None): The minimum value for a column entry.\n max_value (comparable type or None): The maximum value for a column entry.\n strict_min (boolean): If True, values must be strictly larger than min_value. Default=False.\n strict_max (boolean): If True, values must be strictly smaller than max_value. Default=False.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and there is no minimum value checked.\n * If max_value is None, then min_value is treated as a lower bound, and there is no maximum value checked.\n\nSee Also:\n [ExpectColumnValueLengthsToBeBetween](https://greatexpectations.io/expectations/expect_column_value_lengths_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2\n 0 1 1\n 1 1.3 7\n 2 .8 2.5\n 3 2 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test\",\n min_value=.5,\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeBetween(\n column=\"test2\",\n min_value=1,\n max_value=7,\n strict_min=False,\n strict_max=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 4,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 25.0,\n \"partial_unexpected_list\": [\n 7.0\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 25.0,\n \"unexpected_percent_nonmissing\": 25.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -170,7 +177,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Distribution" + "Numeric" ] }, "library_metadata": { @@ -204,10 +211,10 @@ "SQLite", "PostgreSQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", - "Databricks (SQL)" + "Databricks (SQL)", + "MySQL" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json index 93de59247afc..ea0613b50293 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInSet.json @@ -1,6 +1,6 @@ { "title": "Expect column values to be in set", - "description": "Expect each column value to be in a given set.\n\nExpectColumnValuesToBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_not_be_in_set)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Sets\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test2\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n 1,\n 1,\n 1\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect each column value to be in a given set.\n\nExpectColumnValuesToBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_not_be_in_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test\",\n value_set=[1, 2],\n mostly=.5\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInSet(\n column=\"test2\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n 1,\n 1,\n 1\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -189,7 +196,8 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Sets" + "Numeric", + "Validity" ] }, "library_metadata": { @@ -224,7 +232,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" @@ -234,7 +241,8 @@ } }, "required": [ - "column" + "column", + "value_set" ], "additionalProperties": false, "definitions": { diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json index 5949d1819b8e..98154a10471b 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeInTypeList.json @@ -1,6 +1,6 @@ { "title": "Expect column values to be in type list", - "description": "Expect a column to contain values from a specified type list.\n\nExpectColumnValuesToBeInTypeList is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype provides an unambiguous constraints (any dtype except 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeInTypeList will independently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type_list (list[str] or None): \nA list of strings representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeOfType](https://greatexpectations.io/expectations/expect_column_values_to_be_of_type)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Trino](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test2\",\n type_list=[\"NUMBER\", \"STRING\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test\",\n type_list=[\"NUMBER\", \"DOUBLE\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect a column to contain values from a specified type list.\n\nExpectColumnValuesToBeInTypeList is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype provides an unambiguous constraints (any dtype except 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeInTypeList will independently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type_list (list[str] or None): \nA list of strings representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeOfType](https://greatexpectations.io/expectations/expect_column_values_to_be_of_type)\n\nSupported Data Sources:\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test2\",\n type_list=[\"NUMBER\", \"STRING\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeInTypeList(\n column=\"test\",\n type_list=[\"NUMBER\", \"DOUBLE\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -162,16 +169,11 @@ "title": "Supported Data Sources", "type": "array", "const": [ - "Pandas", "Spark", "SQLite", "PostgreSQL", "MSSQL", - "Trino", - "Redshift", - "BigQuery", - "Snowflake", - "Databricks (SQL)" + "BigQuery" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json index 63e47df4e973..2413c879ab61 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeNull.json @@ -1,6 +1,6 @@ { "title": "Expect column values to be null", - "description": "Expect the column values to be null.\n\nExpectColumnValuesToBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeNull](https://greatexpectations.io/expectations/expect_column_values_to_not_be_null)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Missingness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test2\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n true,\n false\n ]\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column values to be null.\n\nExpectColumnValuesToBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToNotBeNull](https://greatexpectations.io/expectations/expect_column_values_to_not_be_null)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test2\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"A\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeNull(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n true,\n false\n ]\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -118,7 +125,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Missingness" + "Completeness" ] }, "library_metadata": { @@ -153,7 +160,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json index 7056bcf15504..fd29ce378f3c 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeOfType.json @@ -1,6 +1,6 @@ { "title": "Expect column values to be of type", - "description": "Expect a column to contain values of a specified data type.\n\nExpectColumnValuesToBeOfType is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeOfType will\nindependently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type\\_ (str): \nA string representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names. Note that the strings representing these types are sometimes case-sensitive. For instance, with a Pandas backend `timestamp` will be unrecognized and fail the expectation, while `Timestamp` would pass with valid data.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeInTypeList](https://greatexpectations.io/expectations/expect_column_values_to_be_in_type_list)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test2\",\n type_=\"NUMBER\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test\",\n type_=\"DOUBLE\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect a column to contain values of a specified data type.\n\nExpectColumnValuesToBeOfType is a Column Map Expectation for typed-column backends, and also for Pandas Datasources where the column dtype and provided type_ are unambiguous constraints (any dtype except 'object' or dtype of 'object' with type_ specified as 'object').\n\nFor Pandas columns with dtype of 'object' ExpectColumnValuesToBeOfType will\nindependently check each row's type.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n type\\_ (str): \nA string representing the data type that each column should have as entries. Valid types are defined by the current backend implementation and are dynamically loaded.\n\n For example, valid types for Pandas Datasources include any numpy dtype values (such as 'int64') or native python types (such as 'int'), whereas valid types for a SqlAlchemy Datasource include types named by the current driver such as 'INTEGER' in most SQL dialects and 'TEXT' in dialects such as postgresql. Valid types for Spark Datasources include 'StringType', 'BooleanType' and other pyspark-defined type names. Note that the strings representing these types are sometimes case-sensitive. For instance, with a Pandas backend `timestamp` will be unrecognized and fail the expectation, while `Timestamp` would pass with valid data.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee also:\n [ExpectColumnValuesToBeInTypeList](https://greatexpectations.io/expectations/expect_column_values_to_be_in_type_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 \"12345\" 1\n 1 \"abcde\" 2\n 2 \"1b3d5\" 3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test2\",\n type_=\"NUMBER\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeOfType(\n column=\"test\",\n type_=\"DOUBLE\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n \"12345\",\n \"abcde\",\n \"1b3d5\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -158,7 +165,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json index 5fb3f098f51b..74c1773dadc5 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToBeUnique.json @@ -1,6 +1,6 @@ { "title": "Expect column values to be unique", - "description": "Expect each column value to be unique.\n\nThis expectation detects duplicates. All duplicated values are counted as exceptions.\n\nFor example, [1, 2, 3, 3, 3] will return [3, 3, 3] in result.exceptions_list, with unexpected_percent = 60.0.\n\nExpectColumnValuesToBeUnique is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Cardinality\n\nExample Data:\n test test2\n 0 1 \"A\"\n 1 2 \"A\"\n 2 3 \"B\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"A\",\n \"A\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": true\n }", + "description": "Expect each column value to be unique.\n\nThis expectation detects duplicates. All duplicated values are counted as exceptions.\n\nFor example, [1, 2, 3, 3, 3] will return [3, 3, 3] in result.exceptions_list, with unexpected_percent = 60.0.\n\nExpectColumnValuesToBeUnique is a Column Map Expectation\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least mostly fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2\n 0 1 \"A\"\n 1 2 \"A\"\n 2 3 \"B\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToBeUnique(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"A\",\n \"A\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": true\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -118,7 +125,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Cardinality" + "Uniqueness" ] }, "library_metadata": { @@ -152,10 +159,10 @@ "SQLite", "PostgreSQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", - "Databricks (SQL)" + "Databricks (SQL)", + "MySQL" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json index 89cb88fbbcde..7987ec16eac0 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePattern.json @@ -1,6 +1,6 @@ { "title": "Expect column values to match like pattern", - "description": "Expect the column entries to be strings that match a given like pattern expression.\n\nExpectColumnValuesToMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Datasources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bee\",\n \"24601\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that match a given like pattern expression.\n\nExpectColumnValuesToMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bee\",\n \"24601\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -130,7 +137,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -163,8 +170,9 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "BigQuery", + "Snowflake" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json index 4e27314a1fc6..82c53c6d7c9c 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchLikePatternList.json @@ -1,6 +1,6 @@ { "title": "Expect column values to match like pattern list", - "description": "Expect the column entries to be strings that match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one like pattern in the list. Use 'all' if it should match each like pattern in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Datasources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[ad]%\", \"[a]%\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one like pattern in the list. Use 'all' if it should match each like pattern in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[ad]%\", \"[a]%\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -143,7 +150,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -176,8 +183,9 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "BigQuery", + "Snowflake" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json index c9bc6e07f0d7..cff3820baf74 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegex.json @@ -1,6 +1,6 @@ { "title": "Expect column values to match regex", - "description": "Expect the column entries to be strings that match a given regular expression.\n\nValid matches can be found anywhere in the string, for example \"[at]+\" will identify the following strings as expected: \"cat\", \"hat\", \"aa\", \"a\", and \"t\", and the following strings as unexpected: \"fish\", \"dog\".\n\nExpectColumnValuesToMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that match a given regular expression.\n\nValid matches can be found anywhere in the string, for example \"[at]+\" will identify the following strings as expected: \"cat\", \"hat\", \"aa\", \"a\", and \"t\", and the following strings as unexpected: \"fish\", \"dog\".\n\nExpectColumnValuesToMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -131,7 +138,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -164,8 +171,9 @@ "Spark", "PostgreSQL", "MySQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "BigQuery", + "SQLite" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json index 67bf10396675..a8ef243ed18d 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToMatchRegexList.json @@ -1,6 +1,6 @@ { "title": "Expect column values to match regex list", - "description": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions.\n\nMatches can be anywhere in the string.\n\nExpectColumnValuesToMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one regular expression in the list. Use 'all' if it should match each regular expression in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test2\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that can be matched to either any of or all of a list of regular expressions.\n\nMatches can be anywhere in the string.\n\nExpectColumnValuesToMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should match.\n match_on (string): 'any' or 'all'. Use 'any' if the value should match at least one regular expression in the list. Use 'all' if it should match each regular expression in the list.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test2\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"any\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToMatchRegexList(\n column=\"test\",\n regex_list=[\"^a.*\", \"^b.*\"],\n match_on=\"all\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"bcc\",\n \"bdd\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -143,7 +150,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -176,8 +183,9 @@ "Spark", "PostgreSQL", "MySQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "BigQuery", + "SQLite" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json index e0697cf2bdcf..ed50f4686f7e 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeInSet.json @@ -1,6 +1,6 @@ { "title": "Expect column values to not be in set", - "description": "Expect column entries to not be in the set.\n\nExpectColumnValuesToNotBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Sets\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test2\",\n value_set=[2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n 2,\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect column entries to not be in the set.\n\nExpectColumnValuesToNotBeInSet is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n value_set (set-like): A set of objects used for comparison.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeInSet](https://greatexpectations.io/expectations/expect_column_values_to_be_in_set)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n Validity\n\nExample Data:\n test test2\n 0 1 1\n 1 2 1\n 2 4 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test2\",\n value_set=[2, 4]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeInSet(\n column=\"test\",\n value_set=[2, 4],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n 2,\n 4\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 66.66666666666666,\n \"unexpected_percent_nonmissing\": 66.66666666666666\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -189,7 +196,8 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Sets" + "Numeric", + "Validity" ] }, "library_metadata": { @@ -224,7 +232,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" @@ -234,7 +241,8 @@ } }, "required": [ - "column" + "column", + "value_set" ], "additionalProperties": false, "definitions": { diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json index e76dfa9fb7d1..e296f15eabe4 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotBeNull.json @@ -1,6 +1,6 @@ { "title": "Expect column values to not be null", - "description": "Expect the column values to not be null.\n\nTo be counted as an exception, values must be explicitly null or missing, such as a NULL in PostgreSQL or an\nnp.NaN in pandas. Empty strings don't count as null unless they have been coerced to a null type.\n\nExpectColumnValuesToNotBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeNull](https://greatexpectations.io/expectations/expect_column_values_to_be_null)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Missingness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n null\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n null,\n null\n ]\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column values to not be null.\n\nTo be counted as an exception, values must be explicitly null or missing, such as a NULL in PostgreSQL or an\nnp.NaN in pandas. Empty strings don't count as null unless they have been coerced to a null type.\n\nExpectColumnValuesToNotBeNull is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToBeNull](https://greatexpectations.io/expectations/expect_column_values_to_be_null)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Completeness\n\nExample Data:\n test test2\n 0 NaN \"A\"\n 1 True NaN\n 2 False NaN\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test\",\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n null\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotBeNull(\n column=\"test2\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 2,\n \"unexpected_percent\": 66.66666666666666,\n \"partial_unexpected_list\": [\n null,\n null\n ]\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -118,7 +125,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Missingness" + "Completeness" ] }, "library_metadata": { @@ -153,7 +160,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json index 69a35413d7f5..bf079e17201d 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePattern.json @@ -1,6 +1,6 @@ { "title": "Expect column values to not match like pattern", - "description": "Expect the column entries to be strings that do NOT match a given like pattern expression.\n\nExpectColumnValuesToNotMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Datasources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"ade\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that do NOT match a given like pattern expression.\n\nExpectColumnValuesToNotMatchLikePattern is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern (str): The SQL like pattern expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"bee\"\n 2 \"acc\" \"24601\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test2\",\n like_pattern=\"[a]%\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"ade\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePattern(\n column=\"test\",\n like_pattern=\"[a]%\"\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\"\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -130,7 +137,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -163,8 +170,8 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "Snowflake" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json index b66be9480624..50fb271c8afc 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchLikePatternList.json @@ -1,6 +1,6 @@ { "title": "Expect column values to not match like pattern list", - "description": "Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToNotMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n\nSupported Datasources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions.\n\nExpectColumnValuesToNotMatchLikePatternList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n like_pattern_list (List[str]): The list of SQL like pattern expressions the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"ade\"\n 1 \"abb\" \"adb\"\n 2 \"acc\" \"aaa\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test2\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"aaa\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchLikePatternList(\n column=\"test\",\n like_pattern_list=[\"[aa]%\", \"[ab]%\", \"[ac]%\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -133,7 +140,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -166,8 +173,8 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "Snowflake" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json index 0e745ce41b28..aa4bff65cebd 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegex.json @@ -1,6 +1,6 @@ { "title": "Expect column values to not match regex", - "description": "Expect the column entries to be strings that do NOT match a given regular expression.\n\nThe regex must not match any portion of the provided string. For example, \"[at]+\" would identify the following strings as expected: \"fish\", \"dog\", and the following as unexpected: \"cat\", \"hat\".\n\nExpectColumnValuesToNotMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that do NOT match a given regular expression.\n\nThe regex must not match any portion of the provided string. For example, \"[at]+\" would identify the following strings as expected: \"fish\", \"dog\", and the following as unexpected: \"cat\", \"hat\".\n\nExpectColumnValuesToNotMatchRegex is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex (str): The regular expression the column entries should NOT match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex_list)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test2\",\n regex=\"^a.*\",\n mostly=.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegex(\n column=\"test\",\n regex=\"^a.*\",\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100,\n \"partial_unexpected_list\": [\n \"aaa\",\n \"abb\",\n \"acc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100,\n \"unexpected_percent_nonmissing\": 100\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -130,7 +137,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -163,8 +170,8 @@ "Spark", "PostgreSQL", "MySQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "SQLite" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json index 1431e56e552e..6ad8956cbd03 100644 --- a/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json +++ b/great_expectations/expectations/core/schemas/ExpectColumnValuesToNotMatchRegexList.json @@ -1,6 +1,6 @@ { "title": "Expect column values to not match regex list", - "description": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string.\n\nExpectColumnValuesToNotMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should not match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Pattern matching\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test2\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the column entries to be strings that do not match any of a list of regular expressions. Matches can be anywhere in the string.\n\nExpectColumnValuesToNotMatchRegexList is a Column Map Expectation.\n\nColumn Map Expectations are one of the most common types of Expectation.\nThey are evaluated for a single column and ask a yes/no question for every row in that column.\nBased on the result, they then calculate the percentage of rows that gave a positive answer. If the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column (str): The column name.\n regex_list (list): The list of regular expressions which the column entries should not match.\n\nOther Parameters:\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly).\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectColumnValuesToMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_match_regex)\n [ExpectColumnValuesToMatchRegexList](https://greatexpectations.io/expectations/expect_column_values_to_match_regex_list)\n [ExpectColumnValuesToNotMatchRegex](https://greatexpectations.io/expectations/expect_column_values_to_not_match_regex)\n [ExpectColumnValuesToMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern)\n [ExpectColumnValuesToMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_match_like_pattern_list)\n [ExpectColumnValuesToNotMatchLikePattern](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern)\n [ExpectColumnValuesToNotMatchLikePatternList](https://greatexpectations.io/expectations/expect_column_values_to_not_match_like_pattern_list)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Validity\n\nExample Data:\n test test2\n 0 \"aaa\" \"bcc\"\n 1 \"abb\" \"bdd\"\n 2 \"acc\" \"abc\"\n\nCode Examples:\n Passing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectColumnValuesToNotMatchRegexList(\n column=\"test2\",\n regex_list=[\"^b.*\", \"^c.*\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n \"abc\",\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -76,9 +76,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -133,7 +140,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Pattern matching" + "Validity" ] }, "library_metadata": { @@ -166,8 +173,8 @@ "Spark", "PostgreSQL", "MySQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "SQLite" ] } } diff --git a/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json b/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json index 512d04b9fb14..7a3a9e51b336 100644 --- a/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json +++ b/great_expectations/expectations/core/schemas/ExpectCompoundColumnsToBeUnique.json @@ -1,6 +1,6 @@ { "title": "Expect compound columns to be unique", - "description": "Expect the compound columns to be unique.\n\nExpectCompoundColumnsToBeUnique is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Cardinality\n\nExample Data:\n test test2 test3 test4\n 0 1 1 4 1\n 1 2 1 7 1\n 2 4 1 -3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test2\", \"test4\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the compound columns to be unique.\n\nExpectCompoundColumnsToBeUnique is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nExample Data:\n test test2 test3 test4\n 0 1 1 4 1\n 1 2 1 7 1\n 2 4 1 -3 1\n\nCode Examples:\n Passing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectCompoundColumnsToBeUnique(\n column_list=[\"test2\", \"test4\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 3,\n \"unexpected_percent\": 100.0,\n \"partial_unexpected_list\": [\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n },\n {\n \"test2\": 1,\n \"test4\": 1\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 100.0,\n \"unexpected_percent_nonmissing\": 100.0\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -78,9 +78,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -130,7 +137,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Cardinality" + "Uniqueness" ] }, "library_metadata": { @@ -165,7 +172,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json b/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json index 09d32ddfd5b7..25bb8665f960 100644 --- a/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json +++ b/great_expectations/expectations/core/schemas/ExpectMulticolumnSumToEqual.json @@ -1,6 +1,6 @@ { "title": "Expect multicolumn sum to equal", - "description": "Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total.\n\nExpectMulticolumnSumToEqual is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n sum_total (int or float): Expected sum of columns\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Data integrity\n\nExample Data:\n test test2 test3\n 0 1 2 4\n 1 2 -2 7\n 2 4 4 -3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7,\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect that the sum of row values in a specified column list is the same for each row, and equal to a specified sum total.\n\nExpectMulticolumnSumToEqual is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): Set of columns to be checked.\n sum_total (int or float): Expected sum of columns\n\nOther Parameters:\n ignore_row_if (str): \"both_values_are_missing\", \"either_value_is_missing\", \"neither\" If specified, sets the condition on which a given row is to be ignored. Default \"neither\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Numeric\n\nExample Data:\n test test2 test3\n 0 1 2 4\n 1 2 -2 7\n 2 4 4 -3\n\nCode Examples:\n Passing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7,\n mostly=0.66\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectMulticolumnSumToEqual(\n column_list=[\"test\", \"test2\", \"test3\"],\n sum_total=7\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 4,\n \"test2\": 4,\n \"test3\": -3\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -78,9 +78,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -136,7 +143,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Data integrity" + "Numeric" ] }, "library_metadata": { @@ -171,7 +178,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json b/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json index 1b29e0f59fec..12db74d55bf2 100644 --- a/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json +++ b/great_expectations/expectations/core/schemas/ExpectSelectColumnValuesToBeUniqueWithinRecord.json @@ -1,6 +1,6 @@ { "title": "Expect select column values to be unique within record", - "description": "Expect the values for each record to be unique across the columns listed. Note that records can be duplicated.\n\nExpectSelectColumnValuesToBeUniqueWithinRecord is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): The column names to evaluate.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Cardinality\n\nFor example:\n::\n\n A B C\n 1 1 2 Fail\n 1 2 3 Pass\n 8 2 7 Pass\n 1 2 3 Pass\n 4 4 4 Fail\nExample Data:\n test test2 test3\n 0 1 1 2\n 1 1 2 3\n 2 8 2 7\n\nCode Examples:\n Passing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 1,\n \"test2\": 1,\n \"test3\": 2\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the values for each record to be unique across the columns listed. Note that records can be duplicated.\n\nExpectSelectColumnValuesToBeUniqueWithinRecord is a Multicolumn Map Expectation.\n\nMulticolumn Map Expectations are evaluated for a set of columns and ask a yes/no question about the row-wise relationship between those columns.\nBased on the result, they then calculate the percentage of rows that gave a positive answer.\nIf the percentage is high enough, the Expectation considers that data valid.\n\nArgs:\n column_list (tuple or list): The column names to evaluate.\n\nOther Parameters:\n ignore_row_if (str): \"all_values_are_missing\", \"any_value_is_missing\", \"never\" If specified, sets the condition on which a given row is to be ignored. Default \"never\".\n mostly (None or a float between 0 and 1): Successful if at least `mostly` fraction of values match the expectation. For more detail, see [mostly](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#mostly). Default 1.\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Uniqueness\n\nFor example:\n::\n\n A B C\n 1 1 2 Fail\n 1 2 3 Pass\n 8 2 7 Pass\n 1 2 3 Pass\n 4 4 4 Fail\nExample Data:\n test test2 test3\n 0 1 1 2\n 1 1 2 3\n 2 8 2 7\n\nCode Examples:\n Passing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 0,\n \"unexpected_percent\": 0.0,\n \"partial_unexpected_list\": [],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 0.0,\n \"unexpected_percent_nonmissing\": 0.0\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectSelectColumnValuesToBeUniqueWithinRecord(\n column_list=[\"test\", \"test2\", \"test3\"],\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"element_count\": 3,\n \"unexpected_count\": 1,\n \"unexpected_percent\": 33.33333333333333,\n \"partial_unexpected_list\": [\n {\n \"test\": 1,\n \"test2\": 1,\n \"test3\": 2\n }\n ],\n \"missing_count\": 0,\n \"missing_percent\": 0.0,\n \"unexpected_percent_total\": 33.33333333333333,\n \"unexpected_percent_nonmissing\": 33.33333333333333\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -78,9 +78,16 @@ "title": "Mostly", "description": "Successful if at least `mostly` fraction of values match the expectation.", "default": 1, - "minimum": 0.0, - "maximum": 1.0, - "type": "number", + "anyOf": [ + { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + { + "type": "object" + } + ], "multipleOf": 0.01 }, "row_condition": { @@ -126,7 +133,7 @@ "title": "Data Quality Issues", "type": "array", "const": [ - "Cardinality" + "Uniqueness" ] }, "library_metadata": { @@ -161,7 +168,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json index 83772fe5c41e..a2f6d86c11ea 100644 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect table column count to be between", - "description": "Expect the number of columns in a table to be between two values.\n\nExpectTableColumnCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of columns, inclusive.\n max_value (int or None): The maximum number of columns, inclusive.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable columns has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable columns has no maximum.\n\nSee Also:\n [ExpectTableColumnCountToEqual](https://greatexpectations.io/expectations/expect_table_column_count_to_equal)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the number of columns in a table to be between two values.\n\nExpectTableColumnCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of columns, inclusive.\n max_value (int or None): The maximum number of columns, inclusive.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable columns has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable columns has no maximum.\n\nSee Also:\n [ExpectTableColumnCountToEqual](https://greatexpectations.io/expectations/expect_table_column_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=1,\n max_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToBeBetween(\n min_value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -164,7 +164,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json index 4ae5466667ef..c8a3384b1f16 100644 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnCountToEqual.json @@ -1,6 +1,6 @@ { "title": "Expect table column count to equal", - "description": "Expect the number of columns in a table to equal a value.\n\nExpectTableColumnCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of columns.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableColumnCountToBeBetween](https://greatexpectations.io/expectations/expect_table_column_count_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {\n \"observed_value\": 2\n }\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {\n \"observed_value\": 2\n }\n }", + "description": "Expect the number of columns in a table to equal a value.\n\nExpectTableColumnCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of columns.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableColumnCountToBeBetween](https://greatexpectations.io/expectations/expect_table_column_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": true,\n \"result\": {\n \"observed_value\": 2\n }\n }\n\n Failing Case:\n Input:\n ExpectTableColumnCountToEqual(\n value=1\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"meta\": {},\n \"success\": false,\n \"result\": {\n \"observed_value\": 2\n }\n }", "type": "object", "properties": { "id": { @@ -136,7 +136,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json index f1fcfb06ddc8..063d53b263ec 100644 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchOrderedList.json @@ -1,6 +1,6 @@ { "title": "Expect table columns to match ordered list", - "description": "Expect the columns in a table to exactly match a specified list.\n\nExpectTableColumnsToMatchOrderedList is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_list (list of str): The column names, in the correct order.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test\", \"test2\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test2\", \"test\", \"test3\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"Unnamed: 0\",\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": [\n {\n \"Expected Column Position\": 1,\n \"Expected\": \"test2\",\n \"Found\": \"test\"\n },\n {\n \"Expected Column Position\": 2,\n \"Expected\": \"test\",\n \"Found\": \"test2\"\n },\n {\n \"Expected Column Position\": 3,\n \"Expected\": \"test3\",\n \"Found\": null\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the columns in a table to exactly match a specified list.\n\nExpectTableColumnsToMatchOrderedList is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_list (list of str): The column names, in the correct order.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test\", \"test2\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ]\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchOrderedList(\n column_list=[\"test2\", \"test\", \"test3\"]\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"Unnamed: 0\",\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": [\n {\n \"Expected Column Position\": 1,\n \"Expected\": \"test2\",\n \"Found\": \"test\"\n },\n {\n \"Expected Column Position\": 2,\n \"Expected\": \"test\",\n \"Found\": \"test2\"\n },\n {\n \"Expected Column Position\": 3,\n \"Expected\": \"test3\",\n \"Found\": null\n }\n ]\n }\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -142,7 +142,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json index b7389c58401d..046d58f7ccbe 100644 --- a/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json +++ b/great_expectations/expectations/core/schemas/ExpectTableColumnsToMatchSet.json @@ -1,6 +1,6 @@ { "title": "Expect table columns to match set", - "description": "Expect the columns in a table to match an unordered set.\n\nExpectTableColumnsToMatchSet is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_set (list of str): The column names, in any order.\n exact_match (boolean): If True, the list of columns must exactly match the observed columns. If False, observed columns must include column_set but additional columns will pass. Default True.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test\"],\n exact_match=False\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test2\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test2\", \"test3\"],\n exact_match=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test\"\n ],\n \"missing\": [\n \"test3\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the columns in a table to match an unordered set.\n\nExpectTableColumnsToMatchSet is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n column_set (list of str): The column names, in any order.\n exact_match (boolean): If True, the list of columns must exactly match the observed columns. If False, observed columns must include column_set but additional columns will pass. Default True.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Schema\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test\"],\n exact_match=False\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test2\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableColumnsToMatchSet(\n column_set=[\"test2\", \"test3\"],\n exact_match=True\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": [\n \"test\",\n \"test2\"\n ],\n \"details\": {\n \"mismatched\": {\n \"unexpected\": [\n \"test\"\n ],\n \"missing\": [\n \"test3\"\n ]\n }\n }\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -87,6 +87,7 @@ "exact_match": { "title": "Exact Match", "description": "If True, the list of columns must exactly match the observed columns. If False, observed columns must include column_set but additional columns will pass.", + "default": true, "type": "boolean" }, "metadata": { @@ -147,7 +148,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json index b30dbee2ef93..4caa101b609e 100644 --- a/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json +++ b/great_expectations/expectations/core/schemas/ExpectTableRowCountToBeBetween.json @@ -1,6 +1,6 @@ { "title": "Expect table row count to be between", - "description": "Expect the number of rows to be between two values.\n\nExpectTableRowCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of rows, inclusive.\n max_value (int or None): The maximum number of rows, inclusive.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n min_value=1,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the number of rows to be between two values.\n\nExpectTableRowCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of rows, inclusive.\n max_value (int or None): The maximum number of rows, inclusive.\n strict_min (boolean): If True, the row count must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the row count must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n min_value=1,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -98,6 +98,18 @@ } ] }, + "strict_min": { + "title": "Strict Min", + "description": "If True, the row count must be strictly smaller than max_value, default=False", + "default": false, + "type": "boolean" + }, + "strict_max": { + "title": "Strict Max", + "description": "If True, the row count must be strictly larger than min_value, default=False", + "default": false, + "type": "boolean" + }, "row_condition": { "title": "Row Condition", "type": "string" @@ -170,7 +182,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json index 7635b1bb75b5..3851d57f451d 100644 --- a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json +++ b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqual.json @@ -1,6 +1,6 @@ { "title": "Expect table row count to equal", - "description": "Expect the number of rows to equal a value.\n\nExpectTableRowCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of rows.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n\nSupported Datasources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the number of rows to equal a value.\n\nExpectTableRowCountToEqual is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n value (int): The expected number of rows.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=3\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqual(\n value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -150,7 +150,6 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", "BigQuery", "Snowflake", "Databricks (SQL)" diff --git a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json index 82acd69bf6db..f87333f35f9c 100644 --- a/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json +++ b/great_expectations/expectations/core/schemas/ExpectTableRowCountToEqualOtherTable.json @@ -1,6 +1,6 @@ { "title": "Expect table row count to equal other table", - "description": "Expect the number of rows to equal the number in another table within the same database.\n\nExpectTableRowCountToEqualOtherTable is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n other_table_name (str): The name of the other table. Other table must be located within the same database.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Datasources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Redshift](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Category:\n Volume\n\nExample Data:\n test_table\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_two\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_three\n test test2\n 0 1.00 2\n 1 2.30 5\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_two\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_three\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", + "description": "Expect the number of rows to equal the number in another table within the same database.\n\nExpectTableRowCountToEqualOtherTable is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n other_table_name (str): The name of the other table. Other table must be located within the same database.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nSee Also:\n [ExpectTableRowCountToBeBetween](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between)\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test_table\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_two\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\n test_table_three\n test test2\n 0 1.00 2\n 1 2.30 5\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_two\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToEqualOtherTable(\n other_table_name=test_table_three\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 2\n },\n \"meta\": {},\n \"success\": false\n }", "type": "object", "properties": { "id": { @@ -142,8 +142,8 @@ "PostgreSQL", "MySQL", "MSSQL", - "Redshift", - "Databricks (SQL)" + "Databricks (SQL)", + "Snowflake" ] } } diff --git a/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json b/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json index 67720ac0a527..ed5510c0f387 100644 --- a/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json +++ b/great_expectations/expectations/core/schemas/UnexpectedRowsExpectation.json @@ -1,6 +1,6 @@ { "title": "Custom Expectation with SQL", - "description": "This Expectation will fail validation if the query returns one or more rows. The WHERE clause defines the fail criteria.\n\nUnexpectedRowsExpectations facilitate the execution of SQL or Spark-SQL queries as the core logic for an Expectation. UnexpectedRowsExpectations must implement a `_validate(...)` method containing logic for determining whether data returned by the executed query is successfully validated. One is written by default, but can be overridden.\n\nA successful validation is one where the unexpected_rows_query returns no rows.\n\nUnexpectedRowsExpectation is a [Batch Expectation](https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations).\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n unexpected_rows_query (str): A SQL or Spark-SQL query to be executed for validation.\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\nSupported Datasources:\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)", + "description": "This Expectation will fail validation if the query returns one or more rows. The WHERE clause defines the fail criteria.\n\nUnexpectedRowsExpectations facilitate the execution of SQL or Spark-SQL queries as the core logic for an Expectation. UnexpectedRowsExpectations must implement a `_validate(...)` method containing logic for determining whether data returned by the executed query is successfully validated. One is written by default, but can be overridden.\n\nA successful validation is one where the unexpected_rows_query returns no rows.\n\nUnexpectedRowsExpectation is a [Batch Expectation](https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_batch_expectations).\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n unexpected_rows_query (str): A SQL or Spark-SQL query to be executed for validation.\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\nSupported Data Sources:\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n SQL", "type": "object", "properties": { "id": { @@ -93,7 +93,9 @@ "data_quality_issues": { "title": "Data Quality Issues", "type": "array", - "const": [] + "const": [ + "SQL" + ] }, "short_description": { "title": "Short Description", @@ -104,9 +106,12 @@ "title": "Supported Data Sources", "type": "array", "const": [ + "Spark", "PostgreSQL", + "BigQuery", "Snowflake", - "SQLite" + "MySQL", + "Databricks (SQL)" ] } } diff --git a/great_expectations/expectations/core/unexpected_rows_expectation.py b/great_expectations/expectations/core/unexpected_rows_expectation.py index 34b7f6d080d1..98e375062654 100644 --- a/great_expectations/expectations/core/unexpected_rows_expectation.py +++ b/great_expectations/expectations/core/unexpected_rows_expectation.py @@ -2,21 +2,34 @@ import logging from string import Formatter -from typing import TYPE_CHECKING, Any, ClassVar, Dict, Tuple, Type, Union +from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Tuple, Type, Union from great_expectations.compatibility import pydantic from great_expectations.compatibility.typing_extensions import override -from great_expectations.expectations.expectation import BatchExpectation +from great_expectations.expectations.expectation import ( + BatchExpectation, + render_suite_parameter_string, +) +from great_expectations.expectations.metadata_types import DataQualityIssues +from great_expectations.render import ( + AtomicDiagnosticRendererType, + RenderedAtomicContent, + renderedAtomicValueSchema, +) +from great_expectations.render.components import LegacyRendererType, RenderedStringTemplateContent +from great_expectations.render.renderer.renderer import renderer from great_expectations.render.renderer_configuration import ( CodeBlock, CodeBlockLanguage, RendererConfiguration, RendererValueType, ) +from great_expectations.render.util import substitute_none_for_missing if TYPE_CHECKING: from great_expectations.core import ExpectationValidationResult from great_expectations.execution_engine import ExecutionEngine + from great_expectations.expectations.expectation_configuration import ExpectationConfiguration logger = logging.getLogger(__name__) @@ -28,10 +41,14 @@ ) UNEXPECTED_ROWS_QUERY_DESCRIPTION = "A SQL or Spark-SQL query to be executed for validation." SUPPORTED_DATA_SOURCES = [ + "Spark", "PostgreSQL", + "BigQuery", "Snowflake", - "SQLite", + "MySQL", + "Databricks (SQL)", ] +DATA_QUALITY_ISSUES = [DataQualityIssues.SQL.value] class UnexpectedRowsExpectation(BatchExpectation): @@ -57,10 +74,16 @@ class UnexpectedRowsExpectation(BatchExpectation): Returns: An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result) - Supported Datasources: + Supported Data Sources: [{SUPPORTED_DATA_SOURCES[0]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[1]}](https://docs.greatexpectations.io/docs/application_integration_support/) [{SUPPORTED_DATA_SOURCES[2]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[3]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[4]}](https://docs.greatexpectations.io/docs/application_integration_support/) + [{SUPPORTED_DATA_SOURCES[5]}](https://docs.greatexpectations.io/docs/application_integration_support/) + + Data Quality Issues: + {DATA_QUALITY_ISSUES[0]} """ unexpected_rows_query: str = pydantic.Field(description=UNEXPECTED_ROWS_QUERY_DESCRIPTION) @@ -103,7 +126,7 @@ def schema_extra(schema: Dict[str, Any], model: Type[UnexpectedRowsExpectation]) "data_quality_issues": { "title": "Data Quality Issues", "type": "array", - "const": [], + "const": DATA_QUALITY_ISSUES, }, "short_description": { "title": "Short Description", @@ -133,6 +156,86 @@ def _prescriptive_template( ) return renderer_configuration + @classmethod + @renderer(renderer_type=LegacyRendererType.PRESCRIPTIVE) + @render_suite_parameter_string + @override + def _prescriptive_renderer( + cls, + configuration: Optional[ExpectationConfiguration] = None, + result: Optional[ExpectationValidationResult] = None, + runtime_configuration: Optional[dict] = None, + **kwargs, + ) -> list[RenderedStringTemplateContent]: + runtime_configuration = runtime_configuration or {} + styling = runtime_configuration.get("styling") + params = substitute_none_for_missing( + configuration.kwargs, # type: ignore[union-attr] # FIXME CoP + ["unexpected_rows_query"], + ) + + template_str = "Unexpected rows query: $unexpected_rows_query" + + return [ + RenderedStringTemplateContent( + content_block_type="string_template", + string_template={ + "template": template_str, + "params": params, + "styling": styling, + }, + ) + ] + + @classmethod + @renderer(renderer_type=AtomicDiagnosticRendererType.OBSERVED_VALUE) + @override + def _atomic_diagnostic_observed_value( + cls, + configuration: Optional[ExpectationConfiguration] = None, + result: Optional[ExpectationValidationResult] = None, + runtime_configuration: Optional[dict] = None, + ) -> RenderedAtomicContent: + renderer_configuration: RendererConfiguration = RendererConfiguration( + configuration=configuration, + result=result, + runtime_configuration=runtime_configuration, + ) + + unexpected_row_count = ( + result.get("result").get("observed_value") if result is not None else None + ) + + template_str = "" + if isinstance(unexpected_row_count, (int, float)): + renderer_configuration.add_param( + name="observed_value", + param_type=RendererValueType.NUMBER, + value=unexpected_row_count, + ) + + template_str = "$observed_value unexpected " + if unexpected_row_count == 1: + template_str += "row" + else: + template_str += "rows" + + renderer_configuration.template_str = template_str + + value_obj = renderedAtomicValueSchema.load( + { + "template": renderer_configuration.template_str, + "params": renderer_configuration.params.dict(), + "meta_notes": renderer_configuration.meta_notes, + "schema": {"type": "com.superconductive.rendered.string"}, + } + ) + return RenderedAtomicContent( + name=AtomicDiagnosticRendererType.OBSERVED_VALUE, + value=value_obj, + value_type="StringValueType", + ) + @override def _validate( self, @@ -142,15 +245,10 @@ def _validate( ) -> Union[ExpectationValidationResult, dict]: metric_value = metrics["unexpected_rows_query.table"] unexpected_row_count = metrics["unexpected_rows_query.row_count"] - observed_value = f"{unexpected_row_count} unexpected " - if unexpected_row_count == 1: - observed_value += "row" - else: - observed_value += "rows" return { "success": unexpected_row_count == 0, "result": { - "observed_value": observed_value, + "observed_value": unexpected_row_count, "details": {"unexpected_rows": metric_value}, }, } diff --git a/great_expectations/expectations/expectation.py b/great_expectations/expectations/expectation.py index 8fa34188c697..053f178413cc 100644 --- a/great_expectations/expectations/expectation.py +++ b/great_expectations/expectations/expectation.py @@ -100,7 +100,7 @@ num_to_str, ) from great_expectations.util import camel_to_snake -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP from great_expectations.validator.metric_configuration import MetricConfiguration if TYPE_CHECKING: @@ -120,7 +120,7 @@ T = TypeVar("T", List[RenderedStringTemplateContent], RenderedAtomicContent) -def render_suite_parameter_string(render_func: Callable[P, T]) -> Callable[P, T]: # noqa: C901 +def render_suite_parameter_string(render_func: Callable[P, T]) -> Callable[P, T]: # noqa: C901 # FIXME CoP """Decorator for Expectation classes that renders suite parameters as strings. allows Expectations that use Suite Parameters to render the values @@ -133,7 +133,7 @@ def render_suite_parameter_string(render_func: Callable[P, T]) -> Callable[P, T] GreatExpectationsError: If runtime_configuration with suite_parameters is not provided. """ - def inner_func(*args: P.args, **kwargs: P.kwargs) -> T: # noqa: C901 - too complex + def inner_func(*args: P.args, **kwargs: P.kwargs) -> T: # noqa: C901 # too complex rendered_string_template = render_func(*args, **kwargs) current_expectation_params: list = [] app_template_str = "\n - $eval_param = $eval_param_value (at time of validation)." @@ -145,8 +145,8 @@ def inner_func(*args: P.args, **kwargs: P.kwargs) -> T: # noqa: C901 - too comp key = get_suite_parameter_key(value) current_expectation_params.append(key) - # if expectation configuration has no eval params, then don't look for the values in runtime_configuration # noqa: E501 - # isinstance check should be removed upon implementation of RenderedAtomicContent suite parameter support # noqa: E501 + # if expectation configuration has no eval params, then don't look for the values in runtime_configuration # noqa: E501 # FIXME CoP + # isinstance check should be removed upon implementation of RenderedAtomicContent suite parameter support # noqa: E501 # FIXME CoP if current_expectation_params and not isinstance( rendered_string_template, RenderedAtomicContent ): @@ -156,13 +156,13 @@ def inner_func(*args: P.args, **kwargs: P.kwargs) -> T: # noqa: C901 - too comp styling = runtime_configuration.get("styling") for key, val in eval_params.items(): for param in current_expectation_params: - # "key in param" condition allows for eval param values to be rendered if arithmetic is present # noqa: E501 + # "key in param" condition allows for eval param values to be rendered if arithmetic is present # noqa: E501 # FIXME CoP if key == param or key in param: app_params = {} app_params["eval_param"] = key app_params["eval_param_value"] = val rendered_content = RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": app_template_str, @@ -173,9 +173,9 @@ def inner_func(*args: P.args, **kwargs: P.kwargs) -> T: # noqa: C901 - too comp ) rendered_string_template.append(rendered_content) else: - raise GreatExpectationsError( # noqa: TRY003 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP f"""GX was not able to render the value of suite parameters. - Expectation {render_func} had suite parameters set, but they were not passed in.""" # noqa: E501 + Expectation {render_func} had suite parameters set, but they were not passed in.""" # noqa: E501 # FIXME CoP ) return rendered_string_template @@ -190,10 +190,10 @@ def param_method(param_name: str) -> Callable: If a helper method is decorated with @param_method(param_name="") and the param attribute does not exist, the method will return either the input RendererConfiguration or None depending on the declared return type. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not param_name: # If param_name was passed as an empty string - raise RendererConfigurationError( # noqa: TRY003 + raise RendererConfigurationError( # noqa: TRY003 # FIXME CoP "Method decorated with @param_method must be passed an existing param_name." ) @@ -206,7 +206,7 @@ def wrapper( return_type: Type = param_func.__annotations__["return"] except KeyError: method_name: str = getattr(param_func, "__name__", repr(param_func)) - raise RendererConfigurationError( # noqa: TRY003 + raise RendererConfigurationError( # noqa: TRY003 # FIXME CoP "Methods decorated with @param_method must have an annotated return " f"type, but method {method_name} does not." ) @@ -214,13 +214,13 @@ def wrapper( if hasattr(renderer_configuration.params, param_name): if getattr(renderer_configuration.params, param_name, None): return_obj = param_func(renderer_configuration=renderer_configuration) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if return_type is RendererConfiguration: return_obj = renderer_configuration else: return_obj = None else: - raise RendererConfigurationError( # noqa: TRY003 + raise RendererConfigurationError( # noqa: TRY003 # FIXME CoP f"RendererConfiguration.param does not have a param called {param_name}. " f'Use RendererConfiguration.add_param() with name="{param_name}" to add it.' ) @@ -238,7 +238,7 @@ class MetaExpectation(ModelMetaclass): Any class inheriting from Expectation will be registered based on the value of the "expectation_type" class attribute, or, if that is not set, by snake-casing the name of the class. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __new__(cls, clsname, bases, attrs): newclass = super().__new__(cls, clsname, bases, attrs) @@ -281,10 +281,8 @@ class Expectation(pydantic.BaseModel, metaclass=MetaExpectation): In some cases, subclasses of Expectation, such as ColumnMapExpectation will already have correct implementations that may simply be inherited. - Additionally, they *may* provide implementations of: - 1. `validate_configuration`, which should raise an error if the configuration - will not be usable for the Expectation - 2. Data Docs rendering methods decorated with the @renderer decorator. See the + Additionally, they *may* provide implementations of Data Docs rendering methods + decorated with the @renderer decorator. """ class Config: @@ -385,8 +383,8 @@ def __eq__(self, other: object) -> bool: @pydantic.validator("result_format") def _validate_result_format(cls, result_format: ResultFormat | dict) -> ResultFormat | dict: if isinstance(result_format, dict) and "result_format" not in result_format: - raise ValueError( # noqa: TRY003 - "If configuring result format with a dictionary, the key 'result_format' must be present." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "If configuring result format with a dictionary, the key 'result_format' must be present." # noqa: E501 # FIXME CoP ) return result_format @@ -401,7 +399,7 @@ def register_save_callback(self, save_callback: Callable[[Expectation], Expectat def save(self): """Save the current state of this Expectation.""" if not self._save_callback: - raise RuntimeError( # noqa: TRY003 + raise RuntimeError( # noqa: TRY003 # FIXME CoP "Expectation must be added to ExpectationSuite before it can be saved." ) if self._include_rendered_content: @@ -427,7 +425,7 @@ def render(self) -> None: """ Renders content using the atomic prescriptive renderer for each expectation configuration associated with this ExpectationSuite to ExpectationConfiguration.rendered_content. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.render.renderer.inline_renderer import InlineRenderer inline_renderer = InlineRenderer(render_object=self.configuration) @@ -452,7 +450,7 @@ def _prescriptive_failed( ) -> RenderedAtomicContent: """ Default rendering function that is utilized by GX Cloud Front-end if an implemented atomic renderer fails - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP renderer_configuration: RendererConfiguration = RendererConfiguration( configuration=configuration, result=result, @@ -504,7 +502,7 @@ def _prescriptive_template( elif renderer_configuration.expectation_type: template_str = "$expectation_type" else: - raise ValueError("RendererConfiguration does not contain an expectation_type.") # noqa: TRY003 + raise ValueError("RendererConfiguration does not contain an expectation_type.") # noqa: TRY003 # FIXME CoP add_param_args = ( ( @@ -530,11 +528,11 @@ def _atomic_prescriptive_template( """ Template function that contains the logic that is shared by AtomicPrescriptiveRendererType.SUMMARY and LegacyRendererType.PRESCRIPTIVE. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # deprecated-v0.15.43 warnings.warn( - "The method _atomic_prescriptive_template is deprecated as of v0.15.43 and will be removed in v0.18. " # noqa: E501 - "Please refer to Expectation method _prescriptive_template for the latest renderer template pattern.", # noqa: E501 + "The method _atomic_prescriptive_template is deprecated as of v0.15.43 and will be removed in v0.18. " # noqa: E501 # FIXME CoP + "Please refer to Expectation method _prescriptive_template for the latest renderer template pattern.", # noqa: E501 # FIXME CoP DeprecationWarning, ) renderer_configuration: RendererConfiguration = RendererConfiguration( @@ -601,7 +599,7 @@ def _prescriptive_renderer( ) return [ RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "styling": {"parent": {"classes": ["alert", "alert-warning"]}}, "string_template": { @@ -624,7 +622,7 @@ def _prescriptive_renderer( @classmethod @renderer(renderer_type=LegacyDiagnosticRendererType.META_PROPERTIES) - def _diagnostic_meta_properties_renderer( # noqa: C901 + def _diagnostic_meta_properties_renderer( # noqa: C901 # FIXME CoP cls, result: Optional[ExpectationValidationResult] = None, ) -> Union[list, List[str], List[list]]: @@ -647,7 +645,7 @@ def _diagnostic_meta_properties_renderer( # noqa: C901 | | must be exactly 4 columns | 4 | 1 | Here the custom column will be added in data docs. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not result: return [] @@ -694,12 +692,12 @@ def _diagnostic_status_icon_renderer( raised_exception = result.exception_info["raised_exception"] else: for k, v in result.exception_info.items(): - # TODO JT: This accounts for a dictionary of type {"metric_id": ExceptionInfo} path defined in # noqa: E501 + # TODO JT: This accounts for a dictionary of type {"metric_id": ExceptionInfo} path defined in # noqa: E501 # FIXME CoP # validator._resolve_suite_level_graph_and_process_metric_evaluation_errors raised_exception = v["raised_exception"] if raised_exception: return RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": "$icon", @@ -722,7 +720,7 @@ def _diagnostic_status_icon_renderer( if result.success: return RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": "$icon", @@ -745,7 +743,7 @@ def _diagnostic_status_icon_renderer( ) else: return RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": "$icon", @@ -784,12 +782,12 @@ def _diagnostic_unexpected_statement_renderer( exception["exception_traceback"] = result.exception_info["exception_traceback"] else: for k, v in result.exception_info.items(): - # TODO JT: This accounts for a dictionary of type {"metric_id": ExceptionInfo} path defined in # noqa: E501 + # TODO JT: This accounts for a dictionary of type {"metric_id": ExceptionInfo} path defined in # noqa: E501 # FIXME CoP # validator._resolve_suite_level_graph_and_process_metric_evaluation_errors exception["raised_exception"] = v["raised_exception"] exception["exception_message"] = v["exception_message"] exception["exception_traceback"] = v["exception_traceback"] - # This only pulls the first exception message and traceback from a list of exceptions to render in the data docs. # noqa: E501 + # This only pulls the first exception message and traceback from a list of exceptions to render in the data docs. # noqa: E501 # FIXME CoP break if exception["raised_exception"]: @@ -803,7 +801,7 @@ def _diagnostic_unexpected_statement_renderer( expectation_type = None exception_message = RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": exception_message_template_str, @@ -824,11 +822,11 @@ def _diagnostic_unexpected_statement_renderer( ) exception_traceback_collapse = CollapseContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "collapse_toggle_link": "Show exception traceback...", "collapse": [ RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": exception["exception_traceback"], @@ -858,7 +856,7 @@ def _diagnostic_unexpected_statement_renderer( return [ RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": template_str, @@ -876,7 +874,7 @@ def _diagnostic_unexpected_statement_renderer( @classmethod @renderer(renderer_type=LegacyDiagnosticRendererType.UNEXPECTED_TABLE) - def _diagnostic_unexpected_table_renderer( # noqa: C901, PLR0912 + def _diagnostic_unexpected_table_renderer( # noqa: C901, PLR0912 # FIXME CoP cls, configuration: Optional[ExpectationConfiguration] = None, result: Optional[ExpectationValidationResult] = None, @@ -944,7 +942,7 @@ def _diagnostic_unexpected_table_renderer( # noqa: C901, PLR0912 sampled_values_set.add(string_unexpected_value) unexpected_table_content_block = RenderedTableContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "table", "table": table_rows, "header_row": header_row, @@ -957,11 +955,11 @@ def _diagnostic_unexpected_table_renderer( # noqa: C901, PLR0912 if not isinstance(query, str): query = str(query) query_info = CollapseContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "collapse_toggle_link": "To retrieve all unexpected values...", "collapse": [ RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": query, @@ -1146,7 +1144,7 @@ def metrics_validate( metric_configuration: MetricConfiguration provided_metrics: Dict[str, MetricValue] = { metric_name: metrics[metric_configuration.id] - for metric_name, metric_configuration in validation_dependencies.metric_configurations.items() # noqa: E501 + for metric_name, metric_configuration in validation_dependencies.metric_configurations.items() # noqa: E501 # FIXME CoP } expectation_validation_result: Union[ExpectationValidationResult, dict] = self._validate( @@ -1173,7 +1171,7 @@ def _build_evr( **kwargs: dict, ) -> ExpectationValidationResult: """_build_evr is a lightweight convenience wrapper handling cases where an Expectation implementor - fails to return an EVR but returns the necessary components in a dictionary.""" # noqa: E501 + fails to return an EVR but returns the necessary components in a dictionary.""" # noqa: E501 # FIXME CoP configuration = self.configuration evr: ExpectationValidationResult @@ -1182,7 +1180,7 @@ def _build_evr( evr = ExpectationValidationResult(**raw_response) evr.expectation_config = configuration else: - raise GreatExpectationsError("Unable to build EVR") # noqa: TRY003 + raise GreatExpectationsError("Unable to build EVR") # noqa: TRY003 # FIXME CoP else: raw_response_dict: dict = raw_response.to_json_dict() evr = ExpectationValidationResult(**raw_response_dict) @@ -1194,7 +1192,7 @@ def get_validation_dependencies( execution_engine: Optional[ExecutionEngine] = None, runtime_configuration: Optional[dict] = None, ) -> ValidationDependencies: - """Returns the result format and metrics required to validate this Expectation using the provided result format.""" # noqa: E501 + """Returns the result format and metrics required to validate this Expectation using the provided result format.""" # noqa: E501 # FIXME CoP from great_expectations.validator.validator import ValidationDependencies runtime_configuration = self._get_runtime_kwargs( @@ -1220,7 +1218,7 @@ def _get_domain_kwargs(self) -> Dict[str, Optional[str]]: } missing_kwargs: Union[set, Set[str]] = set(self.domain_keys) - set(domain_kwargs.keys()) if missing_kwargs: - raise InvalidExpectationKwargsError(f"Missing domain kwargs: {list(missing_kwargs)}") # noqa: TRY003 + raise InvalidExpectationKwargsError(f"Missing domain kwargs: {list(missing_kwargs)}") # noqa: TRY003 # FIXME CoP return domain_kwargs def _get_success_kwargs(self) -> Dict[str, Any]: @@ -1270,7 +1268,6 @@ def _get_result_format( result_format = configuration_result_format return result_format - @public_api def validate_configuration( self, configuration: Optional[ExpectationConfiguration] = None ) -> None: @@ -1298,7 +1295,7 @@ def validate_( runtime_configuration: The runtime configuration for the Expectation. Returns: An ExpectationValidationResult object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP configuration = deepcopy(self.configuration) # issue warnings if necessary @@ -1360,7 +1357,7 @@ def __copy__(self): return self.copy(update={"id": None}, deep=True) @public_api - def run_diagnostics( # noqa: PLR0913 + def run_diagnostics( # noqa: PLR0913 # FIXME CoP self, raise_exceptions_for_backends: bool = False, ignore_suppress: bool = False, @@ -1401,7 +1398,7 @@ def run_diagnostics( # noqa: PLR0913 Returns: An Expectation Diagnostics report object - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.core.expectation_diagnostics.expectation_doctor import ( ExpectationDoctor, ) @@ -1434,7 +1431,7 @@ def print_diagnostic_checklist( show_failed_tests (bool): If true, failing tests will be printed. backends: list of backends to pass to run_diagnostics show_debug_messages (bool): If true, create a logger and pass to run_diagnostics - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP from great_expectations.core.expectation_diagnostics.expectation_doctor import ( ExpectationDoctor, ) @@ -1454,7 +1451,7 @@ def _warn_if_result_format_config_in_runtime_configuration( """ if runtime_configuration and runtime_configuration.get("result_format"): warnings.warn( - "`result_format` configured at the Validator-level will not be persisted. Please add the configuration to your Checkpoint config or checkpoint_run() method instead.", # noqa: E501 + "`result_format` configured at the Validator-level will not be persisted. Please add the configuration to your Checkpoint config or checkpoint_run() method instead.", # noqa: E501 # FIXME CoP UserWarning, ) @@ -1467,7 +1464,7 @@ def _warn_if_result_format_config_in_expectation_configuration( if configuration.kwargs.get("result_format"): warnings.warn( - "`result_format` configured at the Expectation-level will not be persisted. Please add the configuration to your Checkpoint config or checkpoint_run() method instead.", # noqa: E501 + "`result_format` configured at the Expectation-level will not be persisted. Please add the configuration to your Checkpoint config or checkpoint_run() method instead.", # noqa: E501 # FIXME CoP UserWarning, ) @@ -1478,7 +1475,7 @@ def _add_array_params( renderer_configuration: RendererConfiguration, ) -> RendererConfiguration: if not param_prefix: - raise RendererConfigurationError("Array param_prefix must be a non-empty string.") # noqa: TRY003 + raise RendererConfigurationError("Array param_prefix must be a non-empty string.") # noqa: TRY003 # FIXME CoP @param_method(param_name=array_param_name) def _add_params( @@ -1509,7 +1506,7 @@ def _get_array_string( renderer_configuration: RendererConfiguration, ) -> str: if not param_prefix: - raise RendererConfigurationError("Array param_prefix must be a non-empty string.") # noqa: TRY003 + raise RendererConfigurationError("Array param_prefix must be a non-empty string.") # noqa: TRY003 # FIXME CoP @param_method(param_name=array_param_name) def _get_string(renderer_configuration: RendererConfiguration) -> str: @@ -1570,16 +1567,13 @@ class BatchExpectation(Expectation, ABC): BatchExpectations must implement a `_validate(...)` method containing logic for determining whether the Expectation is successfully validated. - BatchExpectations may optionally provide implementations of `validate_configuration`, - which should raise an error if the configuration will not be usable for the Expectation. - Raises: InvalidExpectationConfigurationError: The configuration does not contain the values required by the Expectation. Args: domain_keys (tuple): A tuple of the keys used to determine the domain of the expectation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_id: Union[str, None] = None @@ -1635,7 +1629,7 @@ def get_validation_dependencies( return validation_dependencies - def _validate_metric_value_between( # noqa: C901, PLR0912 + def _validate_metric_value_between( # noqa: C901, PLR0912 # FIXME CoP self, metric_name, metrics: Dict, @@ -1662,18 +1656,18 @@ def _validate_metric_value_between( # noqa: C901, PLR0912 try: min_value = parse(min_value) except TypeError: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Could not parse "min_value" of {min_value} (of type "{type(min_value)!s}) into datetime \ -representation.""" # noqa: E501 +representation.""" # noqa: E501 # FIXME CoP ) if isinstance(max_value, str): try: max_value = parse(max_value) except TypeError: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Could not parse "max_value" of {max_value} (of type "{type(max_value)!s}) into datetime \ -representation.""" # noqa: E501 +representation.""" # noqa: E501 # FIXME CoP ) if isinstance(min_value, datetime.datetime) or isinstance(max_value, datetime.datetime): @@ -1681,9 +1675,9 @@ def _validate_metric_value_between( # noqa: C901, PLR0912 try: metric_value = parse(metric_value) except TypeError: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Could not parse "metric_value" of {metric_value} (of type "{type(metric_value)!s}) into datetime \ -representation.""" # noqa: E501 +representation.""" # noqa: E501 # FIXME CoP ) if isinstance(min_value, datetime.date) or isinstance(max_value, datetime.date): @@ -1691,9 +1685,9 @@ def _validate_metric_value_between( # noqa: C901, PLR0912 try: metric_value = parse(metric_value).date() except TypeError: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Could not parse "metric_value" of {metric_value} (of type "{type(metric_value)!s}) into datetime \ -representation.""" # noqa: E501 +representation.""" # noqa: E501 # FIXME CoP ) # Checking if mean lies between thresholds @@ -1725,13 +1719,9 @@ class QueryExpectation(BatchExpectation, ABC): QueryExpectations must implement a `_validate(...)` method containing logic for determining whether data returned by the executed query is successfully validated. - Query Expectations may optionally provide implementations of: - - 1. `validate_configuration`, which should raise an error if the configuration will not be usable for the Expectation. - - 2. Data Docs rendering methods decorated with the @renderer decorator. + Query Expectations may optionally provide implementations of Data Docs rendering methods decorated with the @renderer decorator. - QueryExpectations may optionally define a `query` attribute + QueryExpectations may optionally define a `query` attribute. Doing so precludes the need to pass a query into the Expectation. This default will be overridden if a query is passed in. @@ -1747,7 +1737,7 @@ class QueryExpectation(BatchExpectation, ABC): --Documentation-- - https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_query_expectations - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_keys: ClassVar[Tuple] = ("batch_id",) @@ -1778,7 +1768,7 @@ def validate_configuration( raise InvalidExpectationConfigurationError(str(e)) try: if not isinstance(query, str): - raise TypeError(f"'query' must be a string, but your query is type: {type(query)}") # noqa: TRY003, TRY301 + raise TypeError(f"'query' must be a string, but your query is type: {type(query)}") # noqa: TRY003, TRY301 # FIXME CoP parsed_query: Set[str] = { x for x in re.split(", |\\(|\n|\\)| |/", query) @@ -1787,12 +1777,12 @@ def validate_configuration( assert "{batch}" in parsed_query, ( "Your query appears to not be parameterized for a data asset. " "By not parameterizing your query with `{batch}`, " - "you may not be validating against your intended data asset, or the expectation may fail." # noqa: E501 + "you may not be validating against your intended data asset, or the expectation may fail." # noqa: E501 # FIXME CoP ) assert all(re.match("{.*?}", x) for x in parsed_query), ( "Your query appears to have hard-coded references to your data. " "By not parameterizing your query with `{batch}`, {col}, etc., " - "you may not be validating against your intended data asset, or the expectation may fail." # noqa: E501 + "you may not be validating against your intended data asset, or the expectation may fail." # noqa: E501 # FIXME CoP ) except (TypeError, AssertionError) as e: warnings.warn(str(e), UserWarning) @@ -1817,7 +1807,7 @@ class ColumnAggregateExpectation(BatchExpectation, ABC): Raises: InvalidExpectationConfigurationError: If no `column` is specified - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column: StrictStr = Field(min_length=1, description=COLUMN_DESCRIPTION) row_condition: Union[str, None] = None @@ -1858,10 +1848,6 @@ class ColumnMapExpectation(BatchExpectation, ABC): ColumnMapExpectations must implement a `_validate(...)` method containing logic for determining whether the Expectation is successfully validated. - ColumnMapExpectations may optionally provide implementations of `validate_configuration`, - which should raise an error if the configuration will not be usable for the Expectation. By default, - the `validate_configuration` method will return an error if `column` is missing from the configuration. - Raises: InvalidExpectationConfigurationError: If `column` is missing from configuration. Args: @@ -1869,7 +1855,7 @@ class ColumnMapExpectation(BatchExpectation, ABC): expectation. success_keys (tuple): A tuple of the keys used to determine the success of the expectation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column: StrictStr = Field(min_length=1, description=COLUMN_DESCRIPTION) mostly: MostlyField = 1 @@ -1922,10 +1908,10 @@ def get_validation_dependencies( ) assert isinstance( self.map_metric, str - ), "ColumnMapExpectation must override get_validation_dependencies or declare exactly one map_metric" # noqa: E501 + ), "ColumnMapExpectation must override get_validation_dependencies or declare exactly one map_metric" # noqa: E501 # FIXME CoP assert ( self.metric_dependencies == tuple() - ), "ColumnMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared." # noqa: E501 + ), "ColumnMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared." # noqa: E501 # FIXME CoP metric_kwargs: dict @@ -2124,10 +2110,6 @@ class ColumnPairMapExpectation(BatchExpectation, ABC): ColumnPairMapExpectations must implement a `_validate(...)` method containing logic for determining whether the Expectation is successfully validated. - ColumnPairMapExpectations may optionally provide implementations of `validate_configuration`, - which should raise an error if the configuration will not be usable for the Expectation. By default, - the `validate_configuration` method will return an error if `column_A` and `column_B` are missing from the configuration. - Raises: InvalidExpectationConfigurationError: If `column_A` and `column_B` parameters are missing from the configuration. @@ -2136,7 +2118,7 @@ class ColumnPairMapExpectation(BatchExpectation, ABC): expectation. success_keys (tuple): A tuple of the keys used to determine the success of the expectation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_A: StrictStr = Field(min_length=1, description=COLUMN_A_DESCRIPTION) column_B: StrictStr = Field(min_length=1, description=COLUMN_B_DESCRIPTION) @@ -2190,10 +2172,10 @@ def get_validation_dependencies( ) assert isinstance( self.map_metric, str - ), "ColumnPairMapExpectation must override get_validation_dependencies or declare exactly one map_metric" # noqa: E501 + ), "ColumnPairMapExpectation must override get_validation_dependencies or declare exactly one map_metric" # noqa: E501 # FIXME CoP assert ( self.metric_dependencies == tuple() - ), "ColumnPairMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared." # noqa: E501 + ), "ColumnPairMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared." # noqa: E501 # FIXME CoP metric_kwargs: dict configuration = self.configuration @@ -2381,10 +2363,6 @@ class MulticolumnMapExpectation(BatchExpectation, ABC): MulticolumnMapExpectations must implement a `_validate(...)` method containing logic for determining whether the Expectation is successfully validated. - MulticolumnMapExpectations may optionally provide implementations of `validate_configuration`, - which should raise an error if the configuration will not be usable for the Expectation. By default, - the `validate_configuration` method will return an error if `column_list` is missing from the configuration. - Raises: InvalidExpectationConfigurationError: If `column_list` is missing from configuration. @@ -2393,7 +2371,7 @@ class MulticolumnMapExpectation(BatchExpectation, ABC): expectation. success_keys (tuple): A tuple of the keys used to determine the success of the expectation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_list: List[StrictStr] = pydantic.Field(description=COLUMN_LIST_DESCRIPTION) mostly: MostlyField = 1 @@ -2431,6 +2409,13 @@ def schema_extra(schema: Dict[str, Any], model: Type[MulticolumnMapExpectation]) } ) + @pydantic.validator("column_list") + def _validate_column_list(cls, v: List[str]) -> List[str]: + min_length = 2 + if len(v) < min_length: + raise ValueError("column_list must contain at least two columns.") # noqa: TRY003 # Error message swallowed by Pydantic + return v + @classmethod @override def is_abstract(cls) -> bool: @@ -2448,10 +2433,10 @@ def get_validation_dependencies( ) assert isinstance( self.map_metric, str - ), "MulticolumnMapExpectation must override get_validation_dependencies or declare exactly one map_metric" # noqa: E501 + ), "MulticolumnMapExpectation must override get_validation_dependencies or declare exactly one map_metric" # noqa: E501 # FIXME CoP assert ( self.metric_dependencies == tuple() - ), "MulticolumnMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared." # noqa: E501 + ), "MulticolumnMapExpectation must be configured using map_metric, and cannot have metric_dependencies declared." # noqa: E501 # FIXME CoP # convenient name for updates configuration = self.configuration @@ -2663,7 +2648,7 @@ def __new__( ) -def _format_map_output( # noqa: C901, PLR0912, PLR0913, PLR0915 +def _format_map_output( # noqa: C901, PLR0912, PLR0913, PLR0915 # FIXME CoP result_format: dict, success: bool, element_count: Optional[int] = None, @@ -2684,11 +2669,11 @@ def _format_map_output( # noqa: C901, PLR0912, PLR0913, PLR0915 See :ref:`result_format` for more information. This function handles the logic for mapping those fields for column_map_expectations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if element_count is None: element_count = 0 - # NB: unexpected_count parameter is explicit some implementing classes may limit the length of unexpected_list # noqa: E501 + # NB: unexpected_count parameter is explicit some implementing classes may limit the length of unexpected_list # noqa: E501 # FIXME CoP # Incrementally add to result and return when all values for the specified level are present return_obj: Dict[str, Any] = {"success": success} @@ -2810,7 +2795,7 @@ def _format_map_output( # noqa: C901, PLR0912, PLR0913, PLR0915 if result_format["result_format"] == ResultFormat.COMPLETE: return return_obj - raise ValueError(f"Unknown result_format {result_format['result_format']}.") # noqa: TRY003 + raise ValueError(f"Unknown result_format {result_format['result_format']}.") # noqa: TRY003 # FIXME CoP def _validate_dependencies_against_available_metrics( @@ -2825,11 +2810,11 @@ def _validate_dependencies_against_available_metrics( Raises: InvalidExpectationConfigurationError: If a validation dependency is not available as a Metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP for metric_config in validation_dependencies: if metric_config.id not in metrics: - raise InvalidExpectationConfigurationError( # noqa: TRY003 - f"Metric {metric_config.id} is not available for validation of configuration. Please check your configuration." # noqa: E501 + raise InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP + f"Metric {metric_config.id} is not available for validation of configuration. Please check your configuration." # noqa: E501 # FIXME CoP ) @@ -2855,11 +2840,11 @@ def add_values_with_json_schema_from_list_in_params( """ Utility function used in _atomic_prescriptive_template() to take list values from a given params dict key, convert each value to a dict with JSON schema type info, then add it to params_with_json_schema (dict). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # deprecated-v0.15.43 warnings.warn( - "The method add_values_with_json_schema_from_list_in_params is deprecated as of v0.15.43 and will be removed in " # noqa: E501 - "v0.18. Please refer to Expectation method _prescriptive_template for the latest renderer template pattern.", # noqa: E501 + "The method add_values_with_json_schema_from_list_in_params is deprecated as of v0.15.43 and will be removed in " # noqa: E501 # FIXME CoP + "v0.18. Please refer to Expectation method _prescriptive_template for the latest renderer template pattern.", # noqa: E501 # FIXME CoP DeprecationWarning, ) target_list = params.get(param_key_with_list) diff --git a/great_expectations/expectations/expectation_configuration.py b/great_expectations/expectations/expectation_configuration.py index ceb68e25130e..b1012852360f 100644 --- a/great_expectations/expectations/expectation_configuration.py +++ b/great_expectations/expectations/expectation_configuration.py @@ -33,8 +33,8 @@ from great_expectations.render import RenderedAtomicContent, RenderedAtomicContentSchema from great_expectations.types import SerializableDictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 - ensure_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP + ensure_json_serializable, # noqa: TID251 # FIXME CoP ) if TYPE_CHECKING: @@ -47,7 +47,7 @@ def parse_result_format(result_format: Union[str, dict]) -> dict: """This is a simple helper utility that can be used to parse a string result_format into the dict format used internally by great_expectations. It is not necessary but allows shorthand for result_format in cases where - there is no need to specify a custom partial_unexpected_count.""" # noqa: E501 + there is no need to specify a custom partial_unexpected_count.""" # noqa: E501 # FIXME CoP if isinstance(result_format, str): result_format = { "result_format": result_format, @@ -56,7 +56,7 @@ def parse_result_format(result_format: Union[str, dict]) -> dict: } else: if "include_unexpected_rows" in result_format and "result_format" not in result_format: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "When using `include_unexpected_rows`, `result_format` must be explicitly specified" ) @@ -117,14 +117,14 @@ class ExpectationConfiguration(SerializableDictDot): InvalidExpectationConfigurationError: If `kwargs` arg is not a dict. InvalidExpectationKwargsError: If domain kwargs are missing. ValueError: If a `domain_type` cannot be determined. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP runtime_kwargs: ClassVar[tuple[str, ...]] = ( "result_format", "catch_exceptions", ) - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, type: str, kwargs: dict, @@ -137,10 +137,10 @@ def __init__( # noqa: PLR0913 rendered_content: Optional[List[RenderedAtomicContent]] = None, ) -> None: if not isinstance(type, str): - raise InvalidExpectationConfigurationError("expectation_type must be a string") # noqa: TRY003 + raise InvalidExpectationConfigurationError("expectation_type must be a string") # noqa: TRY003 # FIXME CoP self._type = type if not isinstance(kwargs, dict): - raise InvalidExpectationConfigurationError( # noqa: TRY003 + raise InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP "expectation configuration kwargs must be a dict." ) self._kwargs = kwargs @@ -278,7 +278,7 @@ def get_domain_kwargs(self) -> dict: } missing_kwargs = set(domain_keys) - set(domain_kwargs.keys()) if missing_kwargs: - raise InvalidExpectationKwargsError(f"Missing domain kwargs: {list(missing_kwargs)}") # noqa: TRY003 + raise InvalidExpectationKwargsError(f"Missing domain kwargs: {list(missing_kwargs)}") # noqa: TRY003 # FIXME CoP return domain_kwargs @@ -349,7 +349,7 @@ def isEquivalentTo( other: Union[dict, ExpectationConfiguration], match_type: str = "success", ) -> bool: - """ExpectationConfiguration equivalence does not include meta, and relies on *equivalence* of kwargs.""" # noqa: E501 + """ExpectationConfiguration equivalence does not include meta, and relies on *equivalence* of kwargs.""" # noqa: E501 # FIXME CoP if not isinstance(other, self.__class__): if isinstance(other, dict): try: @@ -357,7 +357,7 @@ def isEquivalentTo( other = expectationConfigurationSchema.load(other) except ValidationError: logger.debug( - "Unable to evaluate equivalence of ExpectationConfiguration object with dict because " # noqa: E501 + "Unable to evaluate equivalence of ExpectationConfiguration object with dict because " # noqa: E501 # FIXME CoP "dict other could not be instantiated as an ExpectationConfiguration" ) return NotImplemented @@ -427,11 +427,11 @@ def to_json_dict(self) -> Dict[str, JSONValues]: A JSON-serializable dict representation of this ExpectationConfiguration. """ myself = expectationConfigurationSchema.dump(self) - # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 + # NOTE - JPC - 20191031: migrate to expectation-specific schemas that subclass result with properly-typed # noqa: E501 # FIXME CoP # schemas to get serialization all-the-way down via dump myself["kwargs"] = convert_to_json_serializable(myself["kwargs"]) - # Post dump hook removes this value if null so we need to ensure applicability before conversion # noqa: E501 + # Post dump hook removes this value if null so we need to ensure applicability before conversion # noqa: E501 # FIXME CoP if "expectation_context" in myself: myself["expectation_context"] = convert_to_json_serializable( myself["expectation_context"] @@ -475,8 +475,8 @@ def get_domain_type(self) -> MetricDomainTypes: if "column_list" in self.kwargs: return MetricDomainTypes.MULTICOLUMN - raise ValueError( # noqa: TRY003 - 'Unable to determine "domain_type" of this "ExpectationConfiguration" object from "kwargs" and heuristics.' # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + 'Unable to determine "domain_type" of this "ExpectationConfiguration" object from "kwargs" and heuristics.' # noqa: E501 # FIXME CoP ) def _get_expectation_class_defaults(self) -> dict[str, Any]: @@ -536,7 +536,7 @@ def convert_result_to_serializable(self, data, **kwargs): @post_dump def clean_null_attrs(self, data: dict, **kwargs: dict) -> dict: """Removes the attributes in ExpectationConfigurationSchema.REMOVE_KEYS_IF_NONE during serialization if - their values are None.""" # noqa: E501 + their values are None.""" # noqa: E501 # FIXME CoP data = copy.deepcopy(data) for key in ExpectationConfigurationSchema.REMOVE_KEYS_IF_NONE: if key in data and data[key] is None: diff --git a/great_expectations/expectations/metadata_types.py b/great_expectations/expectations/metadata_types.py new file mode 100644 index 000000000000..551ff9434869 --- /dev/null +++ b/great_expectations/expectations/metadata_types.py @@ -0,0 +1,13 @@ +from enum import Enum + + +class DataQualityIssues(str, Enum): + """Data quality issues addressed by Core Expectations.""" + + VOLUME = "Volume" + SCHEMA = "Schema" + COMPLETENESS = "Completeness" + UNIQUENESS = "Uniqueness" + NUMERIC = "Numeric" + VALIDITY = "Validity" + SQL = "SQL" diff --git a/great_expectations/expectations/metrics/__init__.py b/great_expectations/expectations/metrics/__init__.py index 9ae9a87ef896..ba1457dd3491 100644 --- a/great_expectations/expectations/metrics/__init__.py +++ b/great_expectations/expectations/metrics/__init__.py @@ -4,7 +4,7 @@ ) from .column_aggregate_metric_provider import ( ColumnAggregateMetricProvider, - ColumnMetricProvider, # This class name is being deprecated (use "ColumnAggregateMetricProvider" going forward). # noqa: E501 + ColumnMetricProvider, # This class name is being deprecated (use "ColumnAggregateMetricProvider" going forward). # noqa: E501 # FIXME CoP column_aggregate_partial, column_aggregate_value, ) diff --git a/great_expectations/expectations/metrics/column_aggregate_metric_provider.py b/great_expectations/expectations/metrics/column_aggregate_metric_provider.py index 50009490f9b7..9d491bb16c5a 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metric_provider.py +++ b/great_expectations/expectations/metrics/column_aggregate_metric_provider.py @@ -55,14 +55,14 @@ def column_aggregate_value( Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type: MetricDomainTypes = MetricDomainTypes.COLUMN if issubclass(engine, PandasExecutionEngine): def wrapper(metric_fn: Callable): @metric_value(engine=PandasExecutionEngine) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -99,10 +99,10 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError("column_aggregate_value decorator only supports PandasExecutionEngine") # noqa: TRY003, TRY004 + raise ValueError("column_aggregate_value decorator only supports PandasExecutionEngine") # noqa: TRY003, TRY004 # FIXME CoP -def column_aggregate_partial(engine: Type[ExecutionEngine], **kwargs): # noqa: C901 +def column_aggregate_partial(engine: Type[ExecutionEngine], **kwargs): # noqa: C901 # FIXME CoP """Provides engine-specific support for authoring a metric_fn with a simplified signature. A column_aggregate_partial must provide an aggregate function; it will be executed with the specified engine @@ -119,7 +119,7 @@ def column_aggregate_partial(engine: Type[ExecutionEngine], **kwargs): # noqa: Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP partial_fn_type: MetricPartialFunctionTypes = MetricPartialFunctionTypes.AGGREGATE_FN domain_type: MetricDomainTypes = MetricDomainTypes.COLUMN if issubclass(engine, SqlAlchemyExecutionEngine): @@ -131,7 +131,7 @@ def wrapper(metric_fn: Callable): domain_type=domain_type, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -153,7 +153,7 @@ def inner_func( # noqa: PLR0913 metric_domain_kwargs ) else: - # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 + # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 # FIXME CoP compute_domain_kwargs = metric_domain_kwargs ( selectable, @@ -193,7 +193,7 @@ def wrapper(metric_fn: Callable): domain_type=domain_type, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -215,7 +215,7 @@ def inner_func( # noqa: PLR0913 metric_domain_kwargs ) else: - # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 + # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 # FIXME CoP compute_domain_kwargs = metric_domain_kwargs ( @@ -244,7 +244,7 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError("Unsupported engine for column_aggregate_partial") # noqa: TRY003, TRY004 + raise ValueError("Unsupported engine for column_aggregate_partial") # noqa: TRY003, TRY004 # FIXME CoP class ColumnAggregateMetricProvider(TableMetricProvider): diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py index c8903eb559f2..ad4626af6d34 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_bootstrapped_ks_test_p_value.py @@ -28,7 +28,7 @@ class ColumnBootstrappedKSTestPValue(ColumnAggregateMetricProvider): value_keys = ("partition_object", "p", "bootstrap_sample", "bootstrap_sample_size") @column_aggregate_value(engine=PandasExecutionEngine) - def _pandas( # noqa: C901 + def _pandas( # noqa: C901 # FIXME CoP cls, column, partition_object=None, @@ -38,14 +38,14 @@ def _pandas( # noqa: C901 **kwargs, ): if not is_valid_continuous_partition_object(partition_object): - raise ValueError("Invalid continuous partition object.") # noqa: TRY003 + raise ValueError("Invalid continuous partition object.") # noqa: TRY003 # FIXME CoP - # TODO: consider changing this into a check that tail_weights does not exist exclusively, by moving this check into is_valid_continuous_partition_object # noqa: E501 + # TODO: consider changing this into a check that tail_weights does not exist exclusively, by moving this check into is_valid_continuous_partition_object # noqa: E501 # FIXME CoP if (partition_object["bins"][0] == -np.inf) or (partition_object["bins"][-1] == np.inf): - raise ValueError("Partition endpoints must be finite.") # noqa: TRY003 + raise ValueError("Partition endpoints must be finite.") # noqa: TRY003 # FIXME CoP if "tail_weights" in partition_object and np.sum(partition_object["tail_weights"]) > 0: - raise ValueError("Partition cannot have tail weights -- endpoints must be finite.") # noqa: TRY003 + raise ValueError("Partition cannot have tail weights -- endpoints must be finite.") # noqa: TRY003 # FIXME CoP test_cdf = np.append(np.array([0]), np.cumsum(partition_object["weights"])) @@ -56,10 +56,10 @@ def estimated_cdf(x): bootstrap_samples = 1000 if bootstrap_sample_size is None: - # Sampling too many elements (or not bootstrapping) will make the test too sensitive to the fact that we've # noqa: E501 + # Sampling too many elements (or not bootstrapping) will make the test too sensitive to the fact that we've # noqa: E501 # FIXME CoP # compressed via a partition. - # Sampling too few elements will make the test insensitive to significant differences, especially # noqa: E501 + # Sampling too few elements will make the test insensitive to significant differences, especially # noqa: E501 # FIXME CoP # for nonoverlapping ranges. bootstrap_sample_size = len(partition_object["weights"]) * 2 diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py index bafaaac49823..cc2b106f53fe 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py @@ -49,7 +49,7 @@ def _sqlalchemy( Past implementations of column.distinct_values depended on column.value_counts. This was causing performance issues due to the complex query used in column.value_counts and subsequent in-memory operations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP selectable: sqlalchemy.Selectable accessor_domain_kwargs: Dict[str, str] ( @@ -62,14 +62,14 @@ def _sqlalchemy( distinct_values: List[sqlalchemy.Row] if hasattr(column, "is_not"): - distinct_values = execution_engine.execute_query( # type: ignore[assignment] - sa.select(column).where(column.is_not(None)).distinct().select_from(selectable) # type: ignore[arg-type] + distinct_values = execution_engine.execute_query( # type: ignore[assignment] # FIXME CoP + sa.select(column).where(column.is_not(None)).distinct().select_from(selectable) # type: ignore[arg-type] # FIXME CoP ).fetchall() else: - distinct_values = execution_engine.execute_query( # type: ignore[assignment] - sa.select(column).where(column.isnot(None)).distinct().select_from(selectable) # type: ignore[arg-type] + distinct_values = execution_engine.execute_query( # type: ignore[assignment] # FIXME CoP + sa.select(column).where(column.isnot(None)).distinct().select_from(selectable) # type: ignore[arg-type] # FIXME CoP ).fetchall() - # Vectorized operation is not faster here due to overhead of converting to and from numpy array # noqa: E501 + # Vectorized operation is not faster here due to overhead of converting to and from numpy array # noqa: E501 # FIXME CoP return {row[0] for row in distinct_values} @metric_value(engine=SparkDFExecutionEngine) @@ -83,7 +83,7 @@ def _spark( Past implementations of column.distinct_values depended on column.value_counts. This was causing performance issues due to the complex query used in column.value_counts and subsequent in-memory operations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP df: pyspark.DataFrame accessor_domain_kwargs: Dict[str, str] ( @@ -119,7 +119,7 @@ def _sqlalchemy( Past implementations of column.distinct_values.count depended on column.value_counts and column.distinct_values. This was causing performance issues due to the complex query used in column.value_counts and subsequent in-memory operations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return sa.func.count(sa.distinct(column)) @column_aggregate_partial(engine=SparkDFExecutionEngine) # type: ignore[misc] # untyped-decorator @@ -132,7 +132,7 @@ def _spark( Past implementations of column.distinct_values.count depended on column.value_counts and column.distinct_values. This was causing performance issues due to the complex query used in column.value_counts and subsequent in-memory operations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return F.countDistinct(column) diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py index 8f874b02d5e8..2f7c1b6eee3a 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py @@ -22,7 +22,7 @@ ColumnAggregateMetricProvider, ) from great_expectations.expectations.metrics.metric_provider import metric_value -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: import pandas as pd @@ -68,7 +68,7 @@ def _sqlalchemy( Args: column: the name of the column for which to get the histogram bins: tuple of bin edges for which to get histogram values; *must* be tuple to support caching - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP selectable, _, accessor_domain_kwargs = execution_engine.get_compute_domain( domain_kwargs=metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN ) @@ -99,7 +99,7 @@ def _sqlalchemy( == get_sql_dialect_floating_point_infinity_value(schema="api_cast", negative=False) ) ): - # Single-valued column data are modeled using "impulse" (or "sample") distributions (on open interval). # noqa: E501 + # Single-valued column data are modeled using "impulse" (or "sample") distributions (on open interval). # noqa: E501 # FIXME CoP case_conditions.append( sa.func.sum( sa.case( @@ -117,14 +117,14 @@ def _sqlalchemy( query = ( sa.select(*case_conditions) .where( - sa.column(column) != None, # noqa: E711 + sa.column(column) != None, # noqa: E711 # FIXME CoP ) - .select_from(selectable) # type: ignore[arg-type] + .select_from(selectable) # type: ignore[arg-type] # FIXME CoP ) - # Run the data through convert_to_json_serializable to ensure we do not have Decimal types # noqa: E501 + # Run the data through convert_to_json_serializable to ensure we do not have Decimal types # noqa: E501 # FIXME CoP return convert_to_json_serializable( - list(execution_engine.execute_query(query).fetchone()) # type: ignore[arg-type] + list(execution_engine.execute_query(query).fetchone()) # type: ignore[arg-type] # FIXME CoP ) idx = 0 @@ -199,16 +199,16 @@ def _sqlalchemy( query = ( sa.select(*case_conditions) .where( - sa.column(column) != None, # noqa: E711 + sa.column(column) != None, # noqa: E711 # FIXME CoP ) - .select_from(selectable) # type: ignore[arg-type] + .select_from(selectable) # type: ignore[arg-type] # FIXME CoP ) # Run the data through convert_to_json_serializable to ensure we do not have Decimal types - return convert_to_json_serializable(list(execution_engine.execute_query(query).fetchone())) # type: ignore[arg-type] + return convert_to_json_serializable(list(execution_engine.execute_query(query).fetchone())) # type: ignore[arg-type] # FIXME CoP @metric_value(engine=SparkDFExecutionEngine) - def _spark( # noqa: C901 + def _spark( # noqa: C901 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_mean.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_mean.py index 34f880e709d7..28b097198558 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_mean.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_mean.py @@ -44,7 +44,7 @@ def _spark(cls, column, _table, _column_name, **kwargs): types.FloatType, types.LongType, ): - raise TypeError( # noqa: TRY003 - f"Expected numeric column type for function mean(). Recieved type: {column_data_type}" # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + f"Expected numeric column type for function mean(). Recieved type: {column_data_type}" # noqa: E501 # FIXME CoP ) return F.mean(column) diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py index 0ffd6da68e08..ef48fbeaa8f7 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py @@ -55,7 +55,7 @@ def _sqlalchemy( accessor_domain_kwargs, ) = execution_engine.get_compute_domain(metric_domain_kwargs, MetricDomainTypes.COLUMN) column_name = accessor_domain_kwargs["column"] - column = sa.column(column_name) # type: ignore[var-annotated] + column = sa.column(column_name) # type: ignore[var-annotated] # FIXME CoP """SqlAlchemy Median Implementation""" nonnull_count = metrics.get("column_values.nonnull.count") if not nonnull_count: @@ -64,10 +64,10 @@ def _sqlalchemy( element_values = execution_engine.execute_query( sa.select(column) .order_by(column) - .where(column != None) # noqa: E711 + .where(column != None) # noqa: E711 # FIXME CoP .offset(max(nonnull_count // 2 - 1, 0)) .limit(2) - .select_from(selectable) # type: ignore[arg-type] + .select_from(selectable) # type: ignore[arg-type] # FIXME CoP ) column_values = list(element_values.fetchall()) @@ -83,7 +83,7 @@ def _sqlalchemy( ) / 2.0 ) # Average center values - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP # An odd number of column values, we can just take the center value if len(column_values) == 1: column_median = column_values[0][0] # The only value diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_parameterized_distribution_ks_test_p_value.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_parameterized_distribution_ks_test_p_value.py index cf12ff55cfc5..0c5bae917d8b 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_parameterized_distribution_ks_test_p_value.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_parameterized_distribution_ks_test_p_value.py @@ -26,7 +26,7 @@ class ColumnParameterizedDistributionKSTestPValue(ColumnAggregateMetricProvider) @column_aggregate_value(engine=PandasExecutionEngine) def _pandas(cls, column, distribution, p_value=0.05, params=None, **kwargs): if p_value <= 0 or p_value >= 1: - raise ValueError("p_value must be between 0 and 1 exclusive") # noqa: TRY003 + raise ValueError("p_value must be between 0 and 1 exclusive") # noqa: TRY003 # FIXME CoP validate_distribution_parameters(distribution=distribution, params=params) diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_partition.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_partition.py index 79119c3e6386..70dd4a9f103c 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_partition.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_partition.py @@ -126,7 +126,7 @@ def _get_evaluation_dependencies( }, ) else: - raise ValueError("Invalid parameter for bins argument") # noqa: TRY003 + raise ValueError("Invalid parameter for bins argument") # noqa: TRY003 # FIXME CoP return dependencies @@ -199,10 +199,10 @@ def _get_column_partition_using_metrics( max_as_float_ = max_ if ( - iqr < 1.0e-10 # noqa: PLR2004 + iqr < 1.0e-10 # noqa: PLR2004 # FIXME CoP ): # Consider IQR 0 and do not use variance-based estimator n_bins = int(np.ceil(sturges)) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if nonnull_count == 0: n_bins = 0 else: @@ -219,7 +219,7 @@ def _get_column_partition_using_metrics( max_=max_, ) else: - raise ValueError("Invalid parameter for bins argument") # noqa: TRY003 + raise ValueError("Invalid parameter for bins argument") # noqa: TRY003 # FIXME CoP return result_bins diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_proportion_of_unique_values.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_proportion_of_unique_values.py index 827b3a475d01..b2be17bd8d19 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_proportion_of_unique_values.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_proportion_of_unique_values.py @@ -32,7 +32,7 @@ def unique_proportion(_metrics): f"column_values.nonnull.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}" ) - # Ensuring that we do not divide by 0, returning 0 if all values are nulls (we only consider non-nulls unique values) # noqa: E501 + # Ensuring that we do not divide by 0, returning 0 if all values are nulls (we only consider non-nulls unique values) # noqa: E501 # FIXME CoP if total_values > 0 and total_values != null_count: return unique_values / (total_values - null_count) else: diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py index f7c6500d7ac9..4c1826e611c1 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py @@ -44,15 +44,15 @@ def _pandas(cls, column, quantiles, allow_relative_error, **kwargs): allow_relative_error = "nearest" if allow_relative_error not in interpolation_options: - raise ValueError( # noqa: TRY003 - f"If specified for pandas, allow_relative_error must be one an allowed value for the 'interpolation'" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"If specified for pandas, allow_relative_error must be one an allowed value for the 'interpolation'" # noqa: E501 # FIXME CoP f"parameter of .quantile() (one of {interpolation_options})" ) return column.quantile(quantiles, interpolation=allow_relative_error).tolist() @metric_value(engine=SqlAlchemyExecutionEngine) - def _sqlalchemy( # noqa: C901, PLR0911 + def _sqlalchemy( # noqa: C901, PLR0911 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -68,7 +68,7 @@ def _sqlalchemy( # noqa: C901, PLR0911 metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN ) column_name = accessor_domain_kwargs["column"] - column = sa.column(column_name) # type: ignore[var-annotated] + column = sa.column(column_name) # type: ignore[var-annotated] # FIXME CoP dialect_name = execution_engine.dialect_name quantiles = metric_value_kwargs["quantiles"] allow_relative_error = metric_value_kwargs.get("allow_relative_error", False) @@ -96,7 +96,7 @@ def _sqlalchemy( # noqa: C901, PLR0911 ) elif dialect_name.lower() == GXSqlDialect.CLICKHOUSE: return _get_column_quantiles_clickhouse( - column=column, # type: ignore[arg-type] + column=column, # type: ignore[arg-type] # FIXME CoP quantiles=quantiles, selectable=selectable, execution_engine=execution_engine, @@ -176,11 +176,11 @@ def _spark( or allow_relative_error < 0.0 or allow_relative_error > 1.0 ): - raise ValueError( # noqa: TRY003 - "SparkDFExecutionEngine requires relative error to be False or to be a float between 0 and 1." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "SparkDFExecutionEngine requires relative error to be False or to be a float between 0 and 1." # noqa: E501 # FIXME CoP ) - return df.approxQuantile(column, list(quantiles), allow_relative_error) # type: ignore[attr-defined] + return df.approxQuantile(column, list(quantiles), allow_relative_error) # type: ignore[attr-defined] # FIXME CoP def _get_column_quantiles_mssql( @@ -188,20 +188,20 @@ def _get_column_quantiles_mssql( ) -> list: # mssql requires over(), so we add an empty over() clause selects: list[sqlalchemy.WithinGroup] = [ - sa.func.percentile_disc(quantile).within_group(column.asc()).over() # type: ignore[misc] + sa.func.percentile_disc(quantile).within_group(column.asc()).over() # type: ignore[misc] # FIXME CoP for quantile in quantiles ] quantiles_query: sqlalchemy.Select = sa.select(*selects).select_from(selectable) try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) # type: ignore[arg-type] + return list(quantiles_results) # type: ignore[arg-type] # FIXME CoP except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP def _get_column_quantiles_bigquery( @@ -209,27 +209,27 @@ def _get_column_quantiles_bigquery( ) -> list: # BigQuery does not support "WITHIN", so we need a special case for it selects: list[sqlalchemy.WithinGroup] = [ - sa.func.percentile_disc(column, quantile).over() # type: ignore[misc] + sa.func.percentile_disc(column, quantile).over() # type: ignore[misc] # FIXME CoP for quantile in quantiles ] quantiles_query: sqlalchemy.Select = sa.select(*selects).select_from(selectable) try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) # type: ignore[arg-type] + return list(quantiles_results) # type: ignore[arg-type] # FIXME CoP except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP def _get_column_quantiles_mysql( column, quantiles: Iterable, selectable, execution_engine: SqlAlchemyExecutionEngine ) -> list: # MySQL does not support "percentile_disc", so we implement it as a compound query. - # Please see https://stackoverflow.com/questions/19770026/calculate-percentile-value-using-mysql for reference. # noqa: E501 + # Please see https://stackoverflow.com/questions/19770026/calculate-percentile-value-using-mysql for reference. # noqa: E501 # FIXME CoP percent_rank_query: sqlalchemy.CTE = ( sa.select( column, @@ -247,7 +247,7 @@ def _get_column_quantiles_mysql( for idx, quantile in enumerate(quantiles): # pymysql cannot handle conversion of numpy float64 to float; convert just in case if np.issubdtype(type(quantile), np.double): - quantile = float(quantile) # noqa: PLW2901 + quantile = float(quantile) # noqa: PLW2901 # FIXME CoP quantile_column: sqlalchemy.Label = ( sa.func.first_value(column) .over( @@ -262,20 +262,20 @@ def _get_column_quantiles_mysql( ) .label(f"q_{idx}") ) - selects.append(quantile_column) # type: ignore[arg-type] + selects.append(quantile_column) # type: ignore[arg-type] # FIXME CoP quantiles_query: sqlalchemy.Select = ( sa.select(*selects).distinct().order_by(percent_rank_query.columns.p.desc()) ) try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) # type: ignore[arg-type] + return list(quantiles_results) # type: ignore[arg-type] # FIXME CoP except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP def _get_column_quantiles_trino( @@ -288,13 +288,13 @@ def _get_column_quantiles_trino( try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results)[0] # type: ignore[arg-type] + return list(quantiles_results)[0] # type: ignore[arg-type] # FIXME CoP except (sqlalchemy.ProgrammingError, trino.trinoexceptions.TrinoUserError) as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP def _get_column_quantiles_clickhouse( @@ -303,7 +303,7 @@ def _get_column_quantiles_clickhouse( quantiles_list = list(quantiles) sql_approx: str = f"quantilesExact({', '.join([str(x) for x in quantiles_list])})({column})" selects_approx: list[sqlalchemy.TextClause] = [sa.text(sql_approx)] - quantiles_query: sqlalchemy.Select = sa.select(selects_approx).select_from(selectable) # type: ignore[call-overload] + quantiles_query: sqlalchemy.Select = sa.select(selects_approx).select_from(selectable) # type: ignore[call-overload] # FIXME CoP try: quantiles_results = execution_engine.execute(quantiles_query).fetchone()[0] return quantiles_results @@ -312,8 +312,8 @@ def _get_column_quantiles_clickhouse( exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP def _get_column_quantiles_sqlite( @@ -328,7 +328,7 @@ def _get_column_quantiles_sqlite( "execution_engine.execute_query()" as the number of partitions in the "quantiles" parameter (albeit, typically, only a few). However, this is the only mechanism available for SQLite at the present time (11/17/2021), because the analytical processing is not a very strongly represented capability of the SQLite database management system. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP offsets: list[int] = [quantile * table_row_count - 1 for quantile in quantiles] quantile_queries: list[sqlalchemy.Select] = [ sa.select(column).order_by(column.asc()).offset(offset).limit(1).select_from(selectable) @@ -342,15 +342,15 @@ def _get_column_quantiles_sqlite( ] return list( itertools.chain.from_iterable( - [list(quantile_result) for quantile_result in quantiles_results] # type: ignore[arg-type] + [list(quantile_result) for quantile_result in quantiles_results] # type: ignore[arg-type] # FIXME CoP ) ) except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP def _get_column_quantiles_athena( @@ -364,21 +364,21 @@ def _get_column_quantiles_athena( quantiles_query_approx: sqlalchemy.Select = sa.select(*selects_approx).select_from(selectable) try: quantiles_results = execution_engine.execute_query(quantiles_query_approx).fetchone() - # the ast literal eval is needed because the method is returning a json string and not a dict # noqa: E501 - results = ast.literal_eval(quantiles_results[0]) # type: ignore[index] + # the ast literal eval is needed because the method is returning a json string and not a dict # noqa: E501 # FIXME CoP + results = ast.literal_eval(quantiles_results[0]) # type: ignore[index] # FIXME CoP return results except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP -# Support for computing the quantiles column for PostGreSQL and Redshift is included in the same method as that for # noqa: E501 -# the generic sqlalchemy compatible DBMS engine, because users often use the postgresql driver to connect to Redshift # noqa: E501 +# Support for computing the quantiles column for PostGreSQL and Redshift is included in the same method as that for # noqa: E501 # FIXME CoP +# the generic sqlalchemy compatible DBMS engine, because users often use the postgresql driver to connect to Redshift # noqa: E501 # FIXME CoP # The key functional difference is that Redshift does not support the aggregate function -# "percentile_disc", but does support the approximate percentile_disc or percentile_cont function version instead.``` # noqa: E501 +# "percentile_disc", but does support the approximate percentile_disc or percentile_cont function version instead.``` # noqa: E501 # FIXME CoP def _get_column_quantiles_generic_sqlalchemy( column, quantiles: Iterable, @@ -393,12 +393,12 @@ def _get_column_quantiles_generic_sqlalchemy( try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) # type: ignore[arg-type] + return list(quantiles_results) # type: ignore[arg-type] # FIXME CoP except sqlalchemy.ProgrammingError: - # ProgrammingError: (psycopg2.errors.SyntaxError) Aggregate function "percentile_disc" is not supported; # noqa: E501 + # ProgrammingError: (psycopg2.errors.SyntaxError) Aggregate function "percentile_disc" is not supported; # noqa: E501 # FIXME CoP # use approximate percentile_disc or percentile_cont instead. if attempt_allowing_relative_error(execution_engine.dialect): - # Redshift does not have a percentile_disc method, but does support an approximate version. # noqa: E501 + # Redshift does not have a percentile_disc method, but does support an approximate version. # noqa: E501 # FIXME CoP sql_approx: str = get_approximate_percentile_disc_sql( selects=selects, sql_engine_dialect=execution_engine.dialect ) @@ -411,22 +411,22 @@ def _get_column_quantiles_generic_sqlalchemy( quantiles_results = execution_engine.execute_query( quantiles_query_approx ).fetchone() - return list(quantiles_results) # type: ignore[arg-type] + return list(quantiles_results) # type: ignore[arg-type] # FIXME CoP except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += ( f'{type(pe).__name__}: "{pe!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 - raise pe # noqa: TRY201 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP + raise pe # noqa: TRY201 # FIXME CoP else: - raise ValueError( # noqa: TRY003 - f'The SQL engine dialect "{execution_engine.dialect!s}" does not support computing quantiles ' # noqa: E501 - "without approximation error; set allow_relative_error to True to allow approximate quantiles." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f'The SQL engine dialect "{execution_engine.dialect!s}" does not support computing quantiles ' # noqa: E501 # FIXME CoP + "without approximation error; set allow_relative_error to True to allow approximate quantiles." # noqa: E501 # FIXME CoP ) else: - raise ValueError( # noqa: TRY003 - f'The SQL engine dialect "{execution_engine.dialect!s}" does not support computing quantiles with ' # noqa: E501 - "approximation error; set allow_relative_error to False to disable approximate quantiles." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f'The SQL engine dialect "{execution_engine.dialect!s}" does not support computing quantiles with ' # noqa: E501 # FIXME CoP + "approximation error; set allow_relative_error to False to disable approximate quantiles." # noqa: E501 # FIXME CoP ) diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_standard_deviation.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_standard_deviation.py index 0690807cdd22..7279876db8d6 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_standard_deviation.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_standard_deviation.py @@ -77,7 +77,7 @@ def _get_evaluation_dependencies( runtime_configuration: Optional[dict] = None, ): """Returns a dictionary of given metric names and their corresponding configuration, specifying the metric - types and their respective domains""" # noqa: E501 + types and their respective domains""" # noqa: E501 # FIXME CoP dependencies: dict = super()._get_evaluation_dependencies( metric=metric, configuration=configuration, diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py index ca3d5ba85237..98a066f55f84 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py @@ -41,9 +41,9 @@ def _pandas( ) if sort not in ["value", "count", "none"]: - raise ValueError("sort must be either 'value', 'count', or 'none'") # noqa: TRY003 + raise ValueError("sort must be either 'value', 'count', or 'none'") # noqa: TRY003 # FIXME CoP if collate is not None: - raise ValueError("collate parameter is not supported in PandasDataset") # noqa: TRY003 + raise ValueError("collate parameter is not supported in PandasDataset") # noqa: TRY003 # FIXME CoP df: pd.DataFrame accessor_domain_kwargs: Dict[str, str] @@ -57,7 +57,7 @@ def _pandas( try: counts.sort_index(inplace=True) except TypeError: - # Having values of multiple types in a object dtype column (e.g., strings and floats) # noqa: E501 + # Having values of multiple types in a object dtype column (e.g., strings and floats) # noqa: E501 # FIXME CoP # raises a TypeError when the sorting method performs comparisons. # Related to the noqa E721 below: numpy / pandas implements equality, see https://github.com/astral-sh/ruff/issues/9570 if df[column].dtype == object: @@ -83,9 +83,9 @@ def _sqlalchemy( ) if sort not in ["value", "count", "none"]: - raise ValueError("sort must be either 'value', 'count', or 'none'") # noqa: TRY003 + raise ValueError("sort must be either 'value', 'count', or 'none'") # noqa: TRY003 # FIXME CoP if collate is not None: - raise ValueError("collate parameter is not supported in PandasDataset") # noqa: TRY003 + raise ValueError("collate parameter is not supported in PandasDataset") # noqa: TRY003 # FIXME CoP selectable: sqlalchemy.Selectable accessor_domain_kwargs: Dict[str, str] @@ -124,10 +124,10 @@ def _sqlalchemy( query = query.order_by(sa.column(column)) elif sort == "count": query = query.order_by(sa.column("count").desc()) - results: List[sqlalchemy.Row] = execution_engine.execute_query( # type: ignore[assignment] - query.select_from(selectable) # type: ignore[arg-type] + results: List[sqlalchemy.Row] = execution_engine.execute_query( # type: ignore[assignment] # FIXME CoP + query.select_from(selectable) # type: ignore[arg-type] # FIXME CoP ).fetchall() - # Numpy does not always infer the correct DataTypes for SqlAlchemy Row, so we cannot use vectorized approach. # noqa: E501 + # Numpy does not always infer the correct DataTypes for SqlAlchemy Row, so we cannot use vectorized approach. # noqa: E501 # FIXME CoP series = pd.Series( data=[row[1] for row in results], index=pd.Index(data=[row[0] for row in results], name="value"), @@ -149,9 +149,9 @@ def _spark( ) if sort not in ["value", "count", "none"]: - raise ValueError("sort must be either 'value', 'count', or 'none'") # noqa: TRY003 + raise ValueError("sort must be either 'value', 'count', or 'none'") # noqa: TRY003 # FIXME CoP if collate is not None: - raise ValueError("collate parameter is not supported in SparkDFDataset") # noqa: TRY003 + raise ValueError("collate parameter is not supported in SparkDFDataset") # noqa: TRY003 # FIXME CoP df: pyspark.DataFrame accessor_domain_kwargs: Dict[str, str] @@ -171,7 +171,7 @@ def _spark( value_counts: List[pyspark.Row] = value_counts_df.collect() - # Numpy does not always infer the correct DataTypes for Spark df, so we cannot use vectorized approach. # noqa: E501 + # Numpy does not always infer the correct DataTypes for Spark df, so we cannot use vectorized approach. # noqa: E501 # FIXME CoP values: Iterable[Any] counts: Iterable[int] if len(value_counts) > 0: diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py index 231e1b7ab1d2..a44e07cb4d66 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py @@ -30,7 +30,7 @@ class ColumnValuesBetweenCount(MetricProvider): ) @metric_value(engine=PandasExecutionEngine) - def _pandas( # noqa: C901, PLR0912 + def _pandas( # noqa: C901, PLR0912 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -43,10 +43,10 @@ def _pandas( # noqa: C901, PLR0912 strict_min = metric_value_kwargs.get("strict_min") strict_max = metric_value_kwargs.get("strict_max") if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP if min_value is not None and max_value is not None and min_value > max_value: - raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 + raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 # FIXME CoP ( df, @@ -79,12 +79,12 @@ def _pandas( # noqa: C901, PLR0912 else: series = min_value <= val else: - raise ValueError("unable to parse domain and value kwargs") # noqa: TRY003 + raise ValueError("unable to parse domain and value kwargs") # noqa: TRY003 # FIXME CoP return np.count_nonzero(series) @metric_value(engine=SqlAlchemyExecutionEngine) - def _sqlalchemy( # noqa: C901, PLR0912 + def _sqlalchemy( # noqa: C901, PLR0912 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -97,10 +97,10 @@ def _sqlalchemy( # noqa: C901, PLR0912 strict_min = metric_value_kwargs.get("strict_min") strict_max = metric_value_kwargs.get("strict_max") if min_value is not None and max_value is not None and min_value > max_value: - raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 + raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 # FIXME CoP if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP dialect_name = execution_engine.engine.dialect.name.lower() if ( @@ -154,7 +154,7 @@ def _sqlalchemy( # noqa: C901, PLR0912 ) = execution_engine.get_compute_domain( domain_kwargs=metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN ) - column = sa.column(accessor_domain_kwargs["column"]) # type: ignore[var-annotated] + column = sa.column(accessor_domain_kwargs["column"]) # type: ignore[var-annotated] # FIXME CoP if min_value is None: if strict_max: @@ -168,7 +168,7 @@ def _sqlalchemy( # noqa: C901, PLR0912 else: condition = column >= min_value - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if strict_min and strict_max: condition = sa.and_(column > min_value, column < max_value) elif strict_min: @@ -179,11 +179,11 @@ def _sqlalchemy( # noqa: C901, PLR0912 condition = sa.and_(column >= min_value, column <= max_value) return execution_engine.execute_query( - sa.select(sa.func.count()).select_from(selectable).where(condition) # type: ignore[arg-type] + sa.select(sa.func.count()).select_from(selectable).where(condition) # type: ignore[arg-type] # FIXME CoP ).scalar() @metric_value(engine=SparkDFExecutionEngine) - def _spark( # noqa: C901, PLR0912 + def _spark( # noqa: C901, PLR0912 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -196,10 +196,10 @@ def _spark( # noqa: C901, PLR0912 strict_min = metric_value_kwargs.get("strict_min") strict_max = metric_value_kwargs.get("strict_max") if min_value is not None and max_value is not None and min_value > max_value: - raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 + raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 # FIXME CoP if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP ( df, @@ -211,10 +211,10 @@ def _spark( # noqa: C901, PLR0912 column = df[accessor_domain_kwargs["column"]] if min_value is not None and max_value is not None and min_value > max_value: - raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 + raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 # FIXME CoP if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP if min_value is None: if strict_max: @@ -228,7 +228,7 @@ def _spark( # noqa: C901, PLR0912 else: condition = column >= min_value - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if strict_min and strict_max: condition = (column > min_value) & (column < max_value) elif strict_min: diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_value_lengths.py b/great_expectations/expectations/metrics/column_map_metrics/column_value_lengths.py index c887d6ce9557..45f52b78baf0 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_value_lengths.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_value_lengths.py @@ -19,7 +19,7 @@ column_condition_partial, column_function_partial, ) -from great_expectations.util import pandas_series_between_inclusive +from great_expectations.util import pandas_series_between from great_expectations.validator.metric_configuration import MetricConfiguration if TYPE_CHECKING: @@ -71,7 +71,7 @@ def _get_evaluation_dependencies( if ( metric.metric_name - == f"column_values.value_length.equals.{MetricPartialFunctionTypeSuffixes.CONDITION.value}" # noqa: E501 + == f"column_values.value_length.equals.{MetricPartialFunctionTypeSuffixes.CONDITION.value}" # noqa: E501 # FIXME CoP ): dependencies[ f"column_values.value_length.{MetricPartialFunctionTypeSuffixes.MAP.value}" @@ -107,7 +107,7 @@ def _spark_function(cls, column, **kwargs): return F.length(column) @column_condition_partial(engine=PandasExecutionEngine) - def _pandas( # noqa: C901 + def _pandas( # noqa: C901 # FIXME CoP cls, column, _metrics, @@ -123,15 +123,34 @@ def _pandas( # noqa: C901 metric_series = None if min_value is not None and max_value is not None: + # the word "strict" can be taken as a synonym for the word "exclusive" if strict_min and strict_max: - metric_series = column_lengths.between(min_value, max_value, inclusive=False) + metric_series = pandas_series_between( + series=column_lengths, + min_value=min_value, + max_value=max_value, + inclusive="neither", + ) elif strict_min and not strict_max: - metric_series = (column_lengths > min_value) & (column_lengths <= max_value) + metric_series = pandas_series_between( + series=column_lengths, + min_value=min_value, + max_value=max_value, + inclusive="right", + ) elif not strict_min and strict_max: - metric_series = (column_lengths >= min_value) & (column_lengths < max_value) + metric_series = pandas_series_between( + series=column_lengths, + min_value=min_value, + max_value=max_value, + inclusive="left", + ) elif not strict_min and not strict_max: - metric_series = pandas_series_between_inclusive( - series=column_lengths, min_value=min_value, max_value=max_value + metric_series = pandas_series_between( + series=column_lengths, + min_value=min_value, + max_value=max_value, + inclusive="both", ) elif min_value is None and max_value is not None: if strict_max: @@ -145,7 +164,7 @@ def _pandas( # noqa: C901 metric_series = column_lengths >= min_value else: - raise ValueError("Invalid configuration") # noqa: TRY003 + raise ValueError("Invalid configuration") # noqa: TRY003 # FIXME CoP return metric_series @@ -165,18 +184,18 @@ def _sqlalchemy( ) if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP # Assert that min_value and max_value are integers try: if min_value is not None and not float(min_value).is_integer(): - raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 + raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 # FIXME CoP if max_value is not None and not float(max_value).is_integer(): - raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 + raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 # FIXME CoP except ValueError: - raise ValueError("min_value and max_value must be integers") # noqa: TRY003 + raise ValueError("min_value and max_value must be integers") # noqa: TRY003 # FIXME CoP if min_value is not None and max_value is not None: return sa.and_( @@ -206,18 +225,18 @@ def _spark( ) if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP # Assert that min_value and max_value are integers try: if min_value is not None and not float(min_value).is_integer(): - raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 + raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 # FIXME CoP if max_value is not None and not float(max_value).is_integer(): - raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 + raise ValueError("min_value and max_value must be integers") # noqa: TRY003, TRY301 # FIXME CoP except ValueError: - raise ValueError("min_value and max_value must be integers") # noqa: TRY003 + raise ValueError("min_value and max_value must be integers") # noqa: TRY003 # FIXME CoP if min_value is not None and max_value is not None: return (column_lengths >= min_value) & (column_lengths <= max_value) @@ -246,7 +265,7 @@ def _get_evaluation_dependencies( if ( metric.metric_name - == f"column_values.value_length.between.{MetricPartialFunctionTypeSuffixes.CONDITION.value}" # noqa: E501 + == f"column_values.value_length.between.{MetricPartialFunctionTypeSuffixes.CONDITION.value}" # noqa: E501 # FIXME CoP ): dependencies[ f"column_values.value_length.{MetricPartialFunctionTypeSuffixes.MAP.value}" diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_between.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_between.py index 3d349100d810..a2b5b75d5b5b 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_between.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_between.py @@ -29,7 +29,7 @@ class ColumnValuesBetween(ColumnMapMetricProvider): ) @column_condition_partial(engine=PandasExecutionEngine) - def _pandas( # noqa: C901 + def _pandas( # noqa: C901 # FIXME CoP cls, column, min_value=None, @@ -39,12 +39,12 @@ def _pandas( # noqa: C901 **kwargs, ): if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP temp_column = column if min_value is not None and max_value is not None and min_value > max_value: - raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 + raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 # FIXME CoP # Use a vectorized approach for native numpy dtypes if column.dtype in [int, float]: @@ -60,22 +60,22 @@ def _pandas( # noqa: C901 return cls._pandas_vectorized(temp_column, min_value, max_value, strict_min, strict_max) - def is_between(val): # noqa: C901, PLR0911, PLR0912 - # TODO Might be worth explicitly defining comparisons between types (for example, between strings and ints). # noqa: E501 - # Ensure types can be compared since some types in Python 3 cannot be logically compared. # noqa: E501 + def is_between(val): # noqa: C901, PLR0911, PLR0912 # FIXME CoP + # TODO Might be worth explicitly defining comparisons between types (for example, between strings and ints). # noqa: E501 # FIXME CoP + # Ensure types can be compared since some types in Python 3 cannot be logically compared. # noqa: E501 # FIXME CoP # print type(val), type(min_value), type(max_value), val, min_value, max_value if type(val) is None: return False if min_value is not None and max_value is not None: - # Type of column values is either string or specific rich type (or "None"). In all cases, type of # noqa: E501 - # column must match type of constant being compared to column value (otherwise, error is raised). # noqa: E501 + # Type of column values is either string or specific rich type (or "None"). In all cases, type of # noqa: E501 # FIXME CoP + # column must match type of constant being compared to column value (otherwise, error is raised). # noqa: E501 # FIXME CoP if (isinstance(val, str) != isinstance(min_value, str)) or ( isinstance(val, str) != isinstance(max_value, str) ): - raise TypeError( # noqa: TRY003 - "Column values, min_value, and max_value must either be None or of the same type." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "Column values, min_value, and max_value must either be None or of the same type." # noqa: E501 # FIXME CoP ) if strict_min and strict_max: @@ -90,11 +90,11 @@ def is_between(val): # noqa: C901, PLR0911, PLR0912 return (val >= min_value) and (val <= max_value) elif min_value is None and max_value is not None: - # Type of column values is either string or specific rich type (or "None"). In all cases, type of # noqa: E501 - # column must match type of constant being compared to column value (otherwise, error is raised). # noqa: E501 + # Type of column values is either string or specific rich type (or "None"). In all cases, type of # noqa: E501 # FIXME CoP + # column must match type of constant being compared to column value (otherwise, error is raised). # noqa: E501 # FIXME CoP if isinstance(val, str) != isinstance(max_value, str): - raise TypeError( # noqa: TRY003 - "Column values, min_value, and max_value must either be None or of the same type." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "Column values, min_value, and max_value must either be None or of the same type." # noqa: E501 # FIXME CoP ) if strict_max: @@ -103,11 +103,11 @@ def is_between(val): # noqa: C901, PLR0911, PLR0912 return val <= max_value elif min_value is not None and max_value is None: - # Type of column values is either string or specific rich type (or "None"). In all cases, type of # noqa: E501 - # column must match type of constant being compared to column value (otherwise, error is raised). # noqa: E501 + # Type of column values is either string or specific rich type (or "None"). In all cases, type of # noqa: E501 # FIXME CoP + # column must match type of constant being compared to column value (otherwise, error is raised). # noqa: E501 # FIXME CoP if isinstance(val, str) != isinstance(min_value, str): - raise TypeError( # noqa: TRY003 - "Column values, min_value, and max_value must either be None or of the same type." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "Column values, min_value, and max_value must either be None or of the same type." # noqa: E501 # FIXME CoP ) if strict_min: @@ -121,7 +121,7 @@ def is_between(val): # noqa: C901, PLR0911, PLR0912 return temp_column.map(is_between) @classmethod - def _pandas_vectorized( # noqa: C901, PLR0911 + def _pandas_vectorized( # noqa: C901, PLR0911 # FIXME CoP cls, column: pd.Series, min_value: Optional[Union[int, float, datetime.datetime]], @@ -130,7 +130,7 @@ def _pandas_vectorized( # noqa: C901, PLR0911 strict_max: bool, ): if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP if min_value is None: if strict_max: @@ -154,7 +154,7 @@ def _pandas_vectorized( # noqa: C901, PLR0911 return (min_value <= column) & (column <= max_value) @column_condition_partial(engine=SqlAlchemyExecutionEngine) - def _sqlalchemy( # noqa: C901, PLR0911 + def _sqlalchemy( # noqa: C901, PLR0911 # FIXME CoP cls, column, min_value=None, @@ -164,10 +164,10 @@ def _sqlalchemy( # noqa: C901, PLR0911 **kwargs, ): if min_value is not None and max_value is not None and min_value > max_value: - raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 + raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 # FIXME CoP if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP if min_value is None: if strict_max: @@ -206,7 +206,7 @@ def _sqlalchemy( # noqa: C901, PLR0911 ) @column_condition_partial(engine=SparkDFExecutionEngine) - def _spark( # noqa: C901, PLR0911 + def _spark( # noqa: C901, PLR0911 # FIXME CoP cls, column, min_value=None, @@ -216,10 +216,10 @@ def _spark( # noqa: C901, PLR0911 **kwargs, ): if min_value is not None and max_value is not None and min_value > max_value: - raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 + raise ValueError("min_value cannot be greater than max_value") # noqa: TRY003 # FIXME CoP if min_value is None and max_value is None: - raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 + raise ValueError("min_value and max_value cannot both be None") # noqa: TRY003 # FIXME CoP if min_value is None: if strict_max: diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_dateutil_parseable.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_dateutil_parseable.py index 8e994bdb6c7a..8e755b911a01 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_dateutil_parseable.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_dateutil_parseable.py @@ -16,9 +16,9 @@ class ColumnValuesDateutilParseable(ColumnMapMetricProvider): def _pandas(cls, column, **kwargs): def is_parseable(val): try: - if type(val) != str: # noqa: E721 - raise TypeError( # noqa: TRY003 - "Values passed to expect_column_values_to_be_dateutil_parseable must be of type string.\nIf you want to validate a column of dates or timestamps, please call the expectation before converting from string format." # noqa: E501 + if type(val) != str: # noqa: E721 # FIXME CoP + raise TypeError( # noqa: TRY003 # FIXME CoP + "Values passed to expect_column_values_to_be_dateutil_parseable must be of type string.\nIf you want to validate a column of dates or timestamps, please call the expectation before converting from string format." # noqa: E501 # FIXME CoP ) parse(val) diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_decreasing.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_decreasing.py index ef21daca6017..82b79465385c 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_decreasing.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_decreasing.py @@ -65,7 +65,7 @@ def _spark( pyspark.types.IntegerType, ), ): - # if column is any type that could have NA values, remove them (not filtered by .isNotNull()) # noqa: E501 + # if column is any type that could have NA values, remove them (not filtered by .isNotNull()) # noqa: E501 # FIXME CoP compute_domain_kwargs = execution_engine.add_column_row_condition( metric_domain_kwargs, filter_null=cls.filter_column_isnull, diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py index 6f1065ed94ac..a42cff2b3c20 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py @@ -16,9 +16,9 @@ ) try: - import sqlalchemy as sa # noqa: TID251 + import sqlalchemy as sa # noqa: TID251 # FIXME CoP except ImportError: - sa = None # type: ignore[assignment] + sa = None # type: ignore[assignment] # FIXME CoP class ColumnValuesInSet(ColumnMapMetricProvider): diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_in_type_list.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_in_type_list.py index e3b97c816a6f..d9bdfe169d55 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_in_type_list.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_in_type_list.py @@ -18,7 +18,7 @@ class ColumnValuesInTypeList(ColumnMapMetricProvider): condition_value_keys = ("type_list",) @column_condition_partial(engine=PandasExecutionEngine) - def _pandas(cls, column, type_list, **kwargs): # noqa: C901 - too complex + def _pandas(cls, column, type_list, **kwargs): # noqa: C901 # too complex comp_types = [] for type_ in type_list: try: @@ -43,6 +43,6 @@ def _pandas(cls, column, type_list, **kwargs): # noqa: C901 - too complex comp_types.extend(native_type) if len(comp_types) < 1: - raise ValueError(f"No recognized numpy/python type in list: {type_list}") # noqa: TRY003 + raise ValueError(f"No recognized numpy/python type in list: {type_list}") # noqa: TRY003 # FIXME CoP return column.map(lambda x: isinstance(x, tuple(comp_types))) diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_increasing.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_increasing.py index 4cc02cd36a6d..d33fb8d62583 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_increasing.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_increasing.py @@ -66,7 +66,7 @@ def _spark( pyspark.types.IntegerType, ), ): - # if column is any type that could have NA values, remove them (not filtered by .isNotNull()) # noqa: E501 + # if column is any type that could have NA values, remove them (not filtered by .isNotNull()) # noqa: E501 # FIXME CoP compute_domain_kwargs = execution_engine.add_column_row_condition( metric_domain_kwargs, filter_null=cls.filter_column_isnull, diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_json_schema.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_json_schema.py index 25ab28f5fea3..bde8edc28762 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_json_schema.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_json_schema.py @@ -14,7 +14,7 @@ ColumnMapMetricProvider, column_condition_partial, ) -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP class ColumnValuesMatchJsonSchema(ColumnMapMetricProvider): @@ -41,7 +41,7 @@ def matches_json_schema(val): @column_condition_partial(engine=SparkDFExecutionEngine) def _spark(cls, column, json_schema, **kwargs): - # This step insures that Spark UDF defined can be pickled; otherwise, pickle serialization exceptions may occur. # noqa: E501 + # This step insures that Spark UDF defined can be pickled; otherwise, pickle serialization exceptions may occur. # noqa: E501 # FIXME CoP json_schema = convert_to_json_serializable(data=json_schema) def matches_json_schema(val): diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_like_pattern_list.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_like_pattern_list.py index 1c1564e70168..9fcdf0847d83 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_like_pattern_list.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_like_pattern_list.py @@ -27,10 +27,10 @@ def _sqlalchemy(cls, column, like_pattern_list, match_on, _dialect, **kwargs): match_on = "any" if match_on not in ["any", "all"]: - raise ValueError("match_on must be any or all") # noqa: TRY003 + raise ValueError("match_on must be any or all") # noqa: TRY003 # FIXME CoP if len(like_pattern_list) == 0: - raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 + raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 # FIXME CoP like_pattern_expression = get_dialect_like_pattern_expression( column, _dialect, like_pattern_list[0] diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_regex_list.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_regex_list.py index 870a79a7343f..361f1b0883d4 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_regex_list.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_regex_list.py @@ -39,17 +39,17 @@ def _pandas(cls, column, regex_list, match_on, **kwargs): elif match_on == "all": result = regex_match_df.all(axis="columns") else: - raise ValueError("match_on must be either 'any' or 'all'") # noqa: TRY003 + raise ValueError("match_on must be either 'any' or 'all'") # noqa: TRY003 # FIXME CoP return result @column_condition_partial(engine=SqlAlchemyExecutionEngine) def _sqlalchemy(cls, column, regex_list, match_on, _dialect, **kwargs): if match_on not in ["any", "all"]: - raise ValueError("match_on must be any or all") # noqa: TRY003 + raise ValueError("match_on must be any or all") # noqa: TRY003 # FIXME CoP if len(regex_list) == 0: - raise ValueError("At least one regex must be supplied in the regex_list.") # noqa: TRY003 + raise ValueError("At least one regex must be supplied in the regex_list.") # noqa: TRY003 # FIXME CoP regex_expression = get_dialect_regex_expression(column, regex_list[0], _dialect) if regex_expression is None: @@ -74,4 +74,4 @@ def _spark(cls, column, regex_list, match_on, **kwargs): formatted_regex_list = [f"(?={regex})" for regex in regex_list] return column.rlike("".join(formatted_regex_list)) else: - raise ValueError("match_on must be either 'any' or 'all'") # noqa: TRY003 + raise ValueError("match_on must be either 'any' or 'all'") # noqa: TRY003 # FIXME CoP diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_strftime_format.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_strftime_format.py index 4fd08b3c2333..5653d613da2a 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_match_strftime_format.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_match_strftime_format.py @@ -22,11 +22,11 @@ class ColumnValuesMatchStrftimeFormat(ColumnMapMetricProvider): def _pandas(cls, column, strftime_format, **kwargs): def is_parseable_by_format(val): try: - datetime.strptime(val, strftime_format) # noqa: DTZ007 + datetime.strptime(val, strftime_format) # noqa: DTZ007 # FIXME CoP return True except TypeError: - raise TypeError( # noqa: TRY003 - "Values passed to expect_column_values_to_match_strftime_format must be of type string.\nIf you want to validate a column of dates or timestamps, please call the expectation before converting from string format." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "Values passed to expect_column_values_to_match_strftime_format must be of type string.\nIf you want to validate a column of dates or timestamps, please call the expectation before converting from string format." # noqa: E501 # FIXME CoP ) except ValueError: return False @@ -35,25 +35,25 @@ def is_parseable_by_format(val): @column_condition_partial(engine=SparkDFExecutionEngine) def _spark(cls, column, strftime_format, **kwargs): - # Below is a simple validation that the provided format can both format and parse a datetime object. # noqa: E501 + # Below is a simple validation that the provided format can both format and parse a datetime object. # noqa: E501 # FIXME CoP # %D is an example of a format that can format but not parse, e.g. try: - datetime.strptime( # noqa: DTZ007 - datetime.strftime(datetime.now(), strftime_format), # noqa: DTZ005 + datetime.strptime( # noqa: DTZ007 # FIXME CoP + datetime.strftime(datetime.now(), strftime_format), # noqa: DTZ005 # FIXME CoP strftime_format, ) except ValueError as e: - raise ValueError(f"Unable to use provided strftime_format: {e!s}") # noqa: TRY003 + raise ValueError(f"Unable to use provided strftime_format: {e!s}") # noqa: TRY003 # FIXME CoP def is_parseable_by_format(val): if val is None: return False try: - datetime.strptime(val, strftime_format) # noqa: DTZ007 + datetime.strptime(val, strftime_format) # noqa: DTZ007 # FIXME CoP return True except TypeError: - raise TypeError( # noqa: TRY003 - "Values passed to expect_column_values_to_match_strftime_format must be of type string.\nIf you want to validate a column of dates or timestamps, please call the expectation before converting from string format." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "Values passed to expect_column_values_to_match_strftime_format must be of type string.\nIf you want to validate a column of dates or timestamps, please call the expectation before converting from string format." # noqa: E501 # FIXME CoP ) except ValueError: return False diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_non_null.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_non_null.py index 5124daaec33b..adc32ab16101 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_non_null.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_non_null.py @@ -38,7 +38,7 @@ def _pandas(cls, column, **kwargs): @column_condition_partial(engine=SqlAlchemyExecutionEngine) def _sqlalchemy(cls, column, **kwargs): - return column != None # noqa: E711 + return column != None # noqa: E711 # FIXME CoP @column_condition_partial(engine=SparkDFExecutionEngine) def _spark(cls, column, **kwargs): diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_like_pattern_list.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_like_pattern_list.py index fc99c5b69035..34ac36f1aee8 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_like_pattern_list.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_like_pattern_list.py @@ -24,7 +24,7 @@ class ColumnValuesNotMatchLikePatternList(ColumnMapMetricProvider): @column_condition_partial(engine=SqlAlchemyExecutionEngine) def _sqlalchemy(cls, column, like_pattern_list, _dialect, **kwargs): if len(like_pattern_list) == 0: - raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 + raise ValueError("At least one like_pattern must be supplied in the like_pattern_list.") # noqa: TRY003 # FIXME CoP like_pattern_expression = get_dialect_like_pattern_expression( column, _dialect, like_pattern_list[0], positive=False diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_regex_list.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_regex_list.py index 2b37d880a1c6..efae8989b911 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_regex_list.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_not_match_regex_list.py @@ -35,7 +35,7 @@ def _pandas(cls, column, regex_list, **kwargs): @column_condition_partial(engine=SqlAlchemyExecutionEngine) def _sqlalchemy(cls, column, regex_list, _dialect, **kwargs): if len(regex_list) == 0: - raise ValueError("At least one regex must be supplied in the regex_list.") # noqa: TRY003 + raise ValueError("At least one regex must be supplied in the regex_list.") # noqa: TRY003 # FIXME CoP regex_expression = get_dialect_regex_expression( column, regex_list[0], _dialect, positive=False diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_null.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_null.py index 247895e9ee6c..e5e2afd89d0d 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_null.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_null.py @@ -38,7 +38,7 @@ def _pandas(cls, column, **kwargs): @column_condition_partial(engine=SqlAlchemyExecutionEngine) def _sqlalchemy(cls, column, **kwargs): - return column == None # noqa: E711 + return column == None # noqa: E711 # FIXME CoP @column_condition_partial(engine=SparkDFExecutionEngine) def _spark(cls, column, **kwargs): diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_of_type.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_of_type.py index f73f13e62b18..4ce99de6dd3f 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_of_type.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_of_type.py @@ -42,6 +42,6 @@ def _pandas(cls, column, type_, **kwargs): comp_types.extend(native_type) if len(comp_types) < 1: - raise ValueError(f"Unrecognized numpy/python type: {type_}") # noqa: TRY003 + raise ValueError(f"Unrecognized numpy/python type: {type_}") # noqa: TRY003 # FIXME CoP return column.map(lambda x: isinstance(x, tuple(comp_types))) diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_unique.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_unique.py index 13477909cccb..10ca47fe9457 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_unique.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_unique.py @@ -45,9 +45,9 @@ def _pandas(cls, column, **kwargs): ) def _sqlalchemy_window(cls, column, _table, **kwargs): # Will - 20210126 - # This is a special case that needs to be handled for mysql, where you cannot refer to a temp_table # noqa: E501 - # more than once in the same query. So instead of passing dup_query as-is, a second temp_table is created with # noqa: E501 - # the column we will be performing the expectation on, and the query is performed against it. # noqa: E501 + # This is a special case that needs to be handled for mysql, where you cannot refer to a temp_table # noqa: E501 # FIXME CoP + # more than once in the same query. So instead of passing dup_query as-is, a second temp_table is created with # noqa: E501 # FIXME CoP + # the column we will be performing the expectation on, and the query is performed against it. # noqa: E501 # FIXME CoP dialect = kwargs.get("_dialect") sql_engine = kwargs.get("_sqlalchemy_engine") execution_engine = kwargs.get("_execution_engine") @@ -60,7 +60,7 @@ def _sqlalchemy_window(cls, column, _table, **kwargs): dialect_name = "" if sql_engine and dialect and dialect_name == "mysql": temp_table_name = generate_temporary_table_name() - temp_table_stmt = f"CREATE TEMPORARY TABLE {temp_table_name} AS SELECT tmp.{column.name} FROM {_table} tmp" # noqa: E501 + temp_table_stmt = f"CREATE TEMPORARY TABLE {temp_table_name} AS SELECT tmp.{column.name} FROM {_table} tmp" # noqa: E501 # FIXME CoP execution_engine.execute_query_in_transaction(sa.text(temp_table_stmt)) dup_query = ( sa.select(column) diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_z_score.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_z_score.py index 08c7deec3b93..86b3e8db30fe 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_z_score.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_z_score.py @@ -48,7 +48,7 @@ def _pandas_function(self, column, _metrics, **kwargs): try: return (column - mean) / std_dev except TypeError: - raise (TypeError("Cannot complete Z-score calculations on a non-numerical column.")) # noqa: TRY003 + raise (TypeError("Cannot complete Z-score calculations on a non-numerical column.")) # noqa: TRY003 # FIXME CoP @column_condition_partial(engine=PandasExecutionEngine) # type: ignore[misc] # untyped-decorator def _pandas_condition(cls, column, _metrics, threshold, double_sided, **kwargs) -> pd.Series: @@ -63,7 +63,7 @@ def _pandas_condition(cls, column, _metrics, threshold, double_sided, **kwargs) under_threshold = z_score < threshold return under_threshold except TypeError: - raise (TypeError("Cannot check if a string lies under a numerical threshold")) # noqa: TRY003 + raise (TypeError("Cannot check if a string lies under a numerical threshold")) # noqa: TRY003 # FIXME CoP @column_function_partial(engine=SqlAlchemyExecutionEngine) def _sqlalchemy_function(cls, column, _metrics, _dialect, **kwargs): @@ -112,7 +112,7 @@ def _get_evaluation_dependencies( runtime_configuration: Optional[dict] = None, ): """Returns a dictionary of given metric names and their corresponding configuration, specifying the metric - types and their respective domains""" # noqa: E501 + types and their respective domains""" # noqa: E501 # FIXME CoP dependencies: dict = super()._get_evaluation_dependencies( metric=metric, configuration=configuration, @@ -122,7 +122,7 @@ def _get_evaluation_dependencies( if ( metric.metric_name - == f"column_values.z_score.under_threshold.{MetricPartialFunctionTypeSuffixes.CONDITION.value}" # noqa: E501 + == f"column_values.z_score.under_threshold.{MetricPartialFunctionTypeSuffixes.CONDITION.value}" # noqa: E501 # FIXME CoP ): dependencies[f"column_values.z_score.{MetricPartialFunctionTypeSuffixes.MAP.value}"] = ( MetricConfiguration( diff --git a/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_equal.py b/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_equal.py index 634f8b9602e5..16ea42e36bfa 100644 --- a/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_equal.py +++ b/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_equal.py @@ -35,7 +35,7 @@ def _pandas(cls, column_A, column_B, **kwargs): def _sqlalchemy(cls, column_A, column_B, **kwargs): row_wise_cond = sa.and_( column_A == column_B, - sa.not_(sa.or_(column_A == None, column_B == None)), # noqa: E711 + sa.not_(sa.or_(column_A == None, column_B == None)), # noqa: E711 # FIXME CoP ) return row_wise_cond diff --git a/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_greater.py b/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_greater.py index 432ccf51ea7b..1c8daf21c256 100644 --- a/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_greater.py +++ b/great_expectations/expectations/metrics/column_pair_map_metrics/column_pair_values_greater.py @@ -44,7 +44,7 @@ def _sqlalchemy(cls, column_A, column_B, **kwargs): if or_equal: return sa.or_( column_A >= column_B, - sa.and_(column_A == None, column_B == None), # noqa: E711 + sa.and_(column_A == None, column_B == None), # noqa: E711 # FIXME CoP ) else: return column_A > column_B diff --git a/great_expectations/expectations/metrics/map_metric_provider/__init__.py b/great_expectations/expectations/metrics/map_metric_provider/__init__.py index 9bff47c4d36c..ab9a0d59c97f 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/__init__.py +++ b/great_expectations/expectations/metrics/map_metric_provider/__init__.py @@ -10,24 +10,24 @@ from great_expectations.expectations.metrics.map_metric_provider.column_map_metric_provider import ( ColumnMapMetricProvider, ) -from great_expectations.expectations.metrics.map_metric_provider.column_pair_condition_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.column_pair_condition_partial import ( # noqa: E501 # FIXME CoP column_pair_condition_partial, ) -from great_expectations.expectations.metrics.map_metric_provider.column_pair_function_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.column_pair_function_partial import ( # noqa: E501 # FIXME CoP column_pair_function_partial, ) -from great_expectations.expectations.metrics.map_metric_provider.column_pair_map_metric_provider import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.column_pair_map_metric_provider import ( # noqa: E501 # FIXME CoP ColumnPairMapMetricProvider, ) from great_expectations.expectations.metrics.map_metric_provider.map_metric_provider import ( MapMetricProvider, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 # FIXME CoP multicolumn_condition_partial, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_function_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_function_partial import ( # noqa: E501 # FIXME CoP multicolumn_function_partial, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_map_metric_provider import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_map_metric_provider import ( # noqa: E501 # FIXME CoP MulticolumnMapMetricProvider, ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py b/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py index 17046a1c337b..0dcca47dce4e 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py @@ -38,7 +38,7 @@ from great_expectations.compatibility import sqlalchemy -def column_condition_partial( # noqa: C901, PLR0915 +def column_condition_partial( # noqa: C901, PLR0915 # FIXME CoP engine: Type[ExecutionEngine], partial_fn_type: Optional[MetricPartialFunctionTypes] = None, **kwargs, @@ -58,7 +58,7 @@ def column_condition_partial( # noqa: C901, PLR0915 Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type = MetricDomainTypes.COLUMN if issubclass(engine, PandasExecutionEngine): if partial_fn_type is None: @@ -66,9 +66,9 @@ def column_condition_partial( # noqa: C901, PLR0915 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type not in [MetricPartialFunctionTypes.MAP_CONDITION_SERIES]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_SERIES.value}" for \ -"column_condition_partial" "partial_fn_type" property.""" # noqa: E501 +"column_condition_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -81,7 +81,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -135,9 +135,9 @@ def inner_func( # noqa: PLR0913 MetricPartialFunctionTypes.MAP_CONDITION_FN, MetricPartialFunctionTypes.WINDOW_CONDITION_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SqlAlchemyExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_FN.value}" and \ -"{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "column_condition_partial" "partial_fn_type" property.""" # noqa: E501 +"{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "column_condition_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -150,7 +150,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -179,7 +179,7 @@ def inner_func( # noqa: PLR0913 if dialect is None: # Trino if hasattr(sqlalchemy_engine, "dialect"): - dialect = sqlalchemy_engine.dialect # type: ignore[assignment] + dialect = sqlalchemy_engine.dialect # type: ignore[assignment] # FIXME CoP expected_condition = metric_fn( cls, @@ -196,7 +196,7 @@ def inner_func( # noqa: PLR0913 "filter_column_isnull", getattr(cls, "filter_column_isnull", True) ) if filter_column_isnull: - # If we "filter" (ignore) nulls then we allow null as part of our new expected condition # noqa: E501 + # If we "filter" (ignore) nulls then we allow null as part of our new expected condition # noqa: E501 # FIXME CoP unexpected_condition = sa.and_( sa.not_(sa.column(column_name).is_(None)), sa.not_(expected_condition), @@ -222,9 +222,9 @@ def inner_func( # noqa: PLR0913 MetricPartialFunctionTypes.MAP_CONDITION_FN, MetricPartialFunctionTypes.WINDOW_CONDITION_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SparkDFExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_FN.value}" and \ -"{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "column_condition_partial" "partial_fn_type" property.""" # noqa: E501 +"{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "column_condition_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -235,7 +235,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -277,7 +277,7 @@ def inner_func( # noqa: PLR0913 compute_domain_kwargs, column_name=column_name ) unexpected_condition = ~expected_condition - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if filter_column_isnull: unexpected_condition = column.isNotNull() & ~expected_condition else: @@ -293,6 +293,6 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP 'Unsupported engine for "column_condition_partial" metric function decorator.' ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_function_partial.py b/great_expectations/expectations/metrics/map_metric_provider/column_function_partial.py index 7b658f44fe8d..17335873cefe 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_function_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_function_partial.py @@ -36,7 +36,7 @@ from great_expectations.compatibility import sqlalchemy -def column_function_partial( # noqa: C901, PLR0915 +def column_function_partial( # noqa: C901, PLR0915 # FIXME CoP engine: Type[ExecutionEngine], partial_fn_type: Optional[MetricPartialFunctionTypes] = None, **kwargs, @@ -54,7 +54,7 @@ def column_function_partial( # noqa: C901, PLR0915 Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type = MetricDomainTypes.COLUMN if issubclass(engine, PandasExecutionEngine): if partial_fn_type is None: @@ -62,9 +62,9 @@ def column_function_partial( # noqa: C901, PLR0915 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type != MetricPartialFunctionTypes.MAP_SERIES: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_SERIES.value}" for \ -"column_function_partial" "partial_fn_type" property.""" # noqa: E501 +"column_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -75,7 +75,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -122,9 +122,9 @@ def inner_func( # noqa: PLR0913 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type not in [MetricPartialFunctionTypes.MAP_FN]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SqlAlchemyExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_FN.value}" for \ -"column_function_partial" "partial_fn_type" property.""" # noqa: E501 +"column_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -135,7 +135,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -157,7 +157,7 @@ def inner_func( # noqa: PLR0913 metric_domain_kwargs ) else: - # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 + # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 # FIXME CoP compute_domain_kwargs = metric_domain_kwargs ( @@ -194,9 +194,9 @@ def inner_func( # noqa: PLR0913 MetricPartialFunctionTypes.MAP_FN, MetricPartialFunctionTypes.WINDOW_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SparkDFExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_FN.value}" and \ -"{MetricPartialFunctionTypes.WINDOW_FN.value}" for "column_function_partial" "partial_fn_type" property.""" # noqa: E501 +"{MetricPartialFunctionTypes.WINDOW_FN.value}" for "column_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -207,7 +207,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -229,7 +229,7 @@ def inner_func( # noqa: PLR0913 metric_domain_kwargs ) else: - # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 + # We do not copy here because if compute domain is different, it will be copied by get_compute_domain # noqa: E501 # FIXME CoP compute_domain_kwargs = metric_domain_kwargs ( @@ -257,6 +257,6 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP 'Unsupported engine for "column_function_partial" metric function decorator.' ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py index 6023943d644a..e013b4b7282d 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py @@ -26,7 +26,7 @@ from great_expectations.compatibility.pyspark import functions as F from great_expectations.compatibility.sqlalchemy import sqlalchemy as sa from great_expectations.execution_engine.sqlalchemy_dialect import GXSqlDialect -from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 # FIXME CoP _is_sqlalchemy_metric_selectable, ) @@ -47,7 +47,7 @@ def _pandas_column_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[dict]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( boolean_mapped_unexpected_values, compute_domain_kwargs, @@ -55,10 +55,10 @@ def _pandas_column_map_condition_values( ) = metrics["unexpected_condition"] if "column" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column" found in provided metric_domain_kwargs, but it is required for a column map metric (_pandas_column_map_condition_values). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -84,7 +84,7 @@ def _pandas_column_map_condition_values( domain_values = df[column_name] domain_values = domain_values[ - boolean_mapped_unexpected_values == True # noqa: E712 + boolean_mapped_unexpected_values == True # noqa: E712 # FIXME CoP ] result_format = metric_value_kwargs["result_format"] @@ -112,10 +112,10 @@ def _pandas_column_map_condition_value_counts( ) = metrics["unexpected_condition"] if "column" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column" found in provided metric_domain_kwargs, but it is required for a column map metric (_pandas_column_map_condition_value_counts). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -153,7 +153,7 @@ def _pandas_column_map_condition_value_counts( pass if not value_counts: - raise gx_exceptions.MetricComputationError("Unable to compute value counts") # noqa: TRY003 + raise gx_exceptions.MetricComputationError("Unable to compute value counts") # noqa: TRY003 # FIXME CoP if result_format["result_format"] == "COMPLETE": return value_counts @@ -172,16 +172,16 @@ def _sqlalchemy_column_map_condition_values( """ Particularly for the purpose of finding unexpected values, returns all the metric values which do not meet an expected Expectation condition for ColumnMapExpectation Expectations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP unexpected_condition, compute_domain_kwargs, accessor_domain_kwargs = metrics[ "unexpected_condition" ] if "column" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column" found in provided metric_domain_kwargs, but it is required for a column map metric (_sqlalchemy_column_map_condition_values). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -193,12 +193,12 @@ def _sqlalchemy_column_map_condition_values( selectable = execution_engine.get_domain_records(domain_kwargs=compute_domain_kwargs) - query = sa.select(sa.column(column_name).label("unexpected_values")).where(unexpected_condition) # type: ignore[var-annotated] + query = sa.select(sa.column(column_name).label("unexpected_values")).where(unexpected_condition) # type: ignore[var-annotated] # FIXME CoP if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): if hasattr(selectable, "subquery"): query = query.select_from(selectable.subquery()) else: - query = query.select_from(selectable) # type: ignore[arg-type] + query = query.select_from(selectable) # type: ignore[arg-type] # FIXME CoP result_format = metric_value_kwargs["result_format"] @@ -231,16 +231,16 @@ def _sqlalchemy_column_map_condition_value_counts( """ Returns value counts for all the metric values which do not meet an expected Expectation condition for instances of ColumnMapExpectation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP unexpected_condition, compute_domain_kwargs, accessor_domain_kwargs = metrics[ "unexpected_condition" ] if "column" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column" found in provided metric_domain_kwargs, but it is required for a column map metric (_sqlalchemy_column_map_condition_value_counts). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -250,13 +250,13 @@ def _sqlalchemy_column_map_condition_value_counts( column_name: Union[str, sqlalchemy.quoted_name] = accessor_domain_kwargs["column"] - column: sa.Column = sa.column(column_name) # type: ignore[assignment] + column: sa.Column = sa.column(column_name) # type: ignore[assignment] # FIXME CoP selectable = execution_engine.get_domain_records(domain_kwargs=compute_domain_kwargs) query = sa.select(column, sa.func.count(column)).where(unexpected_condition).group_by(column) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - query = query.select_from(selectable) # type: ignore[arg-type] + query = query.select_from(selectable) # type: ignore[arg-type] # FIXME CoP return execution_engine.execute_query(query).fetchall() @@ -269,16 +269,16 @@ def _spark_column_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[dict]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP unexpected_condition, compute_domain_kwargs, accessor_domain_kwargs = metrics[ "unexpected_condition" ] if "column" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column" found in provided metric_domain_kwargs, but it is required for a column map metric (_spark_column_map_condition_values). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -292,7 +292,7 @@ def _spark_column_map_condition_values( # withColumn is required to transform window functions returned by some metrics to boolean mask data = df.withColumn("__unexpected", unexpected_condition) - filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 + filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 # FIXME CoP F.col("__unexpected") ) @@ -322,10 +322,10 @@ def _spark_column_map_condition_value_counts( ] if "column" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column" found in provided metric_domain_kwargs, but it is required for a column map metric (_spark_column_map_condition_value_counts). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -339,7 +339,7 @@ def _spark_column_map_condition_value_counts( # withColumn is required to transform window functions returned by some metrics to boolean mask data = df.withColumn("__unexpected", unexpected_condition) - filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 + filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 # FIXME CoP F.col("__unexpected") ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_map_metric_provider.py b/great_expectations/expectations/metrics/map_metric_provider/column_map_metric_provider.py index 8c724cd9512d..0e4192581d35 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_map_metric_provider.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_map_metric_provider.py @@ -25,7 +25,7 @@ class ColumnMapMetricProvider(MapMetricProvider): ---Documentation--- - https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_column_map_expectations - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP condition_domain_keys = ( "batch_id", diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_pair_condition_partial.py b/great_expectations/expectations/metrics/map_metric_provider/column_pair_condition_partial.py index 6bbdef7c2a86..8b524f5644eb 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_pair_condition_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_pair_condition_partial.py @@ -37,7 +37,7 @@ from great_expectations.compatibility import sqlalchemy -def column_pair_condition_partial( # noqa: C901 - 16 +def column_pair_condition_partial( # noqa: C901 # 16 engine: Type[ExecutionEngine], partial_fn_type: Optional[MetricPartialFunctionTypes] = None, **kwargs, @@ -57,7 +57,7 @@ def column_pair_condition_partial( # noqa: C901 - 16 Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type = MetricDomainTypes.COLUMN_PAIR if issubclass(engine, PandasExecutionEngine): if partial_fn_type is None: @@ -65,9 +65,9 @@ def column_pair_condition_partial( # noqa: C901 - 16 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type not in [MetricPartialFunctionTypes.MAP_CONDITION_SERIES]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_SERIES.value}" for \ -"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 +"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -80,7 +80,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -132,10 +132,10 @@ def inner_func( # noqa: PLR0913 MetricPartialFunctionTypes.MAP_CONDITION_FN, MetricPartialFunctionTypes.WINDOW_CONDITION_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SqlAlchemyExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_FN.value}" and \ "{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "column_pair_condition_partial" "partial_fn_type" property. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -148,7 +148,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -208,10 +208,10 @@ def inner_func( # noqa: PLR0913 MetricPartialFunctionTypes.MAP_CONDITION_FN, MetricPartialFunctionTypes.WINDOW_CONDITION_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SparkDFExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_FN.value}" and \ "{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "column_pair_condition_partial" "partial_fn_type" property. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -222,7 +222,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -266,6 +266,6 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP 'Unsupported engine for "column_pair_condition_partial" metric function decorator.' ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_pair_function_partial.py b/great_expectations/expectations/metrics/map_metric_provider/column_pair_function_partial.py index 2c7949cb7e7e..12f2ba114a70 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_pair_function_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_pair_function_partial.py @@ -30,7 +30,7 @@ logger = logging.getLogger(__name__) -def column_pair_function_partial( # noqa: C901 - 16 +def column_pair_function_partial( # noqa: C901 # 16 engine: Type[ExecutionEngine], partial_fn_type: MetricPartialFunctionTypes | None = None, **kwargs, @@ -47,7 +47,7 @@ def column_pair_function_partial( # noqa: C901 - 16 Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type = MetricDomainTypes.COLUMN_PAIR if issubclass(engine, PandasExecutionEngine): if partial_fn_type is None: @@ -55,9 +55,9 @@ def column_pair_function_partial( # noqa: C901 - 16 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type != MetricPartialFunctionTypes.MAP_SERIES: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_SERIES.value}" for \ -"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 +"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -70,7 +70,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -115,9 +115,9 @@ def inner_func( # noqa: PLR0913 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type != MetricPartialFunctionTypes.MAP_FN: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SqlAlchemyExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_FN.value}" for \ -"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 +"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -130,7 +130,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -179,9 +179,9 @@ def inner_func( # noqa: PLR0913 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type != MetricPartialFunctionTypes.MAP_FN: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SparkDFExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_FN.value}" for \ -"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 +"column_pair_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -192,7 +192,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -236,6 +236,6 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP 'Unsupported engine for "column_pair_function_partial" metric function decorator.' ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py index ae061aa13c66..1de24f5259c3 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py @@ -11,7 +11,7 @@ from great_expectations.compatibility.pyspark import functions as F from great_expectations.compatibility.sqlalchemy import sqlalchemy as sa -from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 # FIXME CoP _is_sqlalchemy_metric_selectable, ) from great_expectations.expectations.metrics.util import ( @@ -42,7 +42,7 @@ def _pandas_column_pair_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[tuple[Any, Any]]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( boolean_mapped_unexpected_values, compute_domain_kwargs, @@ -57,15 +57,15 @@ def _pandas_column_pair_map_condition_values( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if not ("column_A" in domain_kwargs and "column_B" in domain_kwargs): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_A" and "column_B" found in provided metric_domain_kwargs, but it is required for a column pair map metric (_pandas_column_pair_map_condition_values). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) # noinspection PyPep8Naming @@ -81,7 +81,7 @@ def _pandas_column_pair_map_condition_values( domain_values = df[column_names] domain_values = domain_values[ - boolean_mapped_unexpected_values == True # noqa: E712 + boolean_mapped_unexpected_values == True # noqa: E712 # FIXME CoP ] result_format = metric_value_kwargs["result_format"] @@ -107,7 +107,7 @@ def _pandas_column_pair_map_condition_filtered_row_count( metrics: Dict[str, Any], **kwargs, ) -> int: - """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP _, compute_domain_kwargs, accessor_domain_kwargs = metrics["unexpected_condition"] accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -118,15 +118,15 @@ def _pandas_column_pair_map_condition_filtered_row_count( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if not ("column_A" in domain_kwargs and "column_B" in domain_kwargs): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_A" and "column_B" found in provided metric_domain_kwargs, but it is required for a column pair map metric (_pandas_column_pair_map_condition_filtered_row_count). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) return df.shape[0] @@ -140,7 +140,7 @@ def _sqlalchemy_column_pair_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[tuple[Any, Any]]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( boolean_mapped_unexpected_values, compute_domain_kwargs, @@ -155,7 +155,7 @@ def _sqlalchemy_column_pair_map_condition_values( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) @@ -164,13 +164,13 @@ def _sqlalchemy_column_pair_map_condition_values( # noinspection PyPep8Naming column_B_name = accessor_domain_kwargs["column_B"] - query = sa.select( # type: ignore[var-annotated] + query = sa.select( # type: ignore[var-annotated] # FIXME CoP sa.column(column_A_name).label("unexpected_values_A"), sa.column(column_B_name).label("unexpected_values_B"), ).where(boolean_mapped_unexpected_values) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] - query = query.select_from(selectable) # type: ignore[arg-type] + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] # FIXME CoP + query = query.select_from(selectable) # type: ignore[arg-type] # FIXME CoP result_format = metric_value_kwargs["result_format"] if result_format["result_format"] != "COMPLETE": @@ -192,7 +192,7 @@ def _sqlalchemy_column_pair_map_condition_filtered_row_count( metrics: Dict[str, Any], **kwargs, ) -> Any | None: - """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP _, compute_domain_kwargs, accessor_domain_kwargs = metrics["unexpected_condition"] accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -203,12 +203,12 @@ def _sqlalchemy_column_pair_map_condition_filtered_row_count( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) return execution_engine.execute_query( - sa.select(sa.func.count()).select_from(selectable) # type: ignore[arg-type] + sa.select(sa.func.count()).select_from(selectable) # type: ignore[arg-type] # FIXME CoP ).scalar() @@ -220,7 +220,7 @@ def _spark_column_pair_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[tuple[Any, Any]]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( unexpected_condition, compute_domain_kwargs, @@ -235,7 +235,7 @@ def _spark_column_pair_map_condition_values( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) @@ -246,7 +246,7 @@ def _spark_column_pair_map_condition_values( # withColumn is required to transform window functions returned by some metrics to boolean mask data = df.withColumn("__unexpected", unexpected_condition) - filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 + filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 # FIXME CoP F.col("__unexpected") ) @@ -279,7 +279,7 @@ def _spark_column_pair_map_condition_filtered_row_count( metrics: Dict[str, Any], **kwargs, ) -> int: - """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP _, compute_domain_kwargs, accessor_domain_kwargs = metrics["unexpected_condition"] accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -290,7 +290,7 @@ def _spark_column_pair_map_condition_filtered_row_count( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_metric_provider.py b/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_metric_provider.py index ef02e45ee108..bec2d16e6269 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_metric_provider.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_metric_provider.py @@ -27,7 +27,7 @@ class ColumnPairMapMetricProvider(MapMetricProvider): keys of `column_A` and `column_B`. `expect_column_pair_values_to_be_equal` is an example of an Expectation that uses this metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP condition_domain_keys: Tuple[str, ...] = ( "batch_id", diff --git a/great_expectations/expectations/metrics/map_metric_provider/is_sqlalchemy_metric_selectable.py b/great_expectations/expectations/metrics/map_metric_provider/is_sqlalchemy_metric_selectable.py index a8a280f23171..042ea24d4fc6 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/is_sqlalchemy_metric_selectable.py +++ b/great_expectations/expectations/metrics/map_metric_provider/is_sqlalchemy_metric_selectable.py @@ -25,7 +25,7 @@ def _is_sqlalchemy_metric_selectable( :return: boolean indicating whether or not the returned value of a method implementing the metric resolves all columns -- hence the caller must not use "select_from" clause as part of its own SQLAlchemy query; otherwise an unwanted selectable (e.g., table) will be added to "FROM", leading to duplicated and/or erroneous results. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # noinspection PyUnresolvedReferences return ( hasattr(map_metric_provider, "condition_metric_name") diff --git a/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py index 7199105c87aa..8d32a8660bf4 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py @@ -24,7 +24,7 @@ SummarizationMetricNameSuffixes, ) from great_expectations.execution_engine.sqlalchemy_dialect import GXSqlDialect -from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 # FIXME CoP _is_sqlalchemy_metric_selectable, ) from great_expectations.expectations.metrics.util import ( @@ -35,7 +35,7 @@ sql_statement_with_post_compile_to_string, ) from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP generate_temporary_table_name, get_sqlalchemy_selectable, ) @@ -87,7 +87,7 @@ def _pandas_map_condition_index( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) domain_records_df: pd.DataFrame = execution_engine.get_domain_records( domain_kwargs=domain_kwargs @@ -153,7 +153,7 @@ def _pandas_map_condition_query( Requires `unexpected_index_column_names` to be part of `result_format` dict to specify primary_key columns to return, along with column the Expectation is run on. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP result_format: dict = metric_value_kwargs["result_format"] # We will not return map_condition_query if return_unexpected_index_query = False @@ -177,7 +177,7 @@ def _pandas_map_condition_query( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) domain_records_df: pd.DataFrame = execution_engine.get_domain_records( domain_kwargs=domain_kwargs @@ -205,7 +205,7 @@ def _pandas_map_condition_rows( metrics: Dict[str, Any], **kwargs, ) -> pd.DataFrame: - """Return values from the specified domain (ignoring the column constraint) that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain (ignoring the column constraint) that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( boolean_mapped_unexpected_values, compute_domain_kwargs, @@ -220,7 +220,7 @@ def _pandas_map_condition_rows( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) @@ -291,12 +291,12 @@ def _sqlalchemy_map_condition_unexpected_count_value( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) - # The integral values are cast to SQL Numeric in order to avoid a bug in AWS Redshift (converted to integer later). # noqa: E501 - count_case_statement: List[sqlalchemy.Label] = sa.case( # type: ignore[assignment] + # The integral values are cast to SQL Numeric in order to avoid a bug in AWS Redshift (converted to integer later). # noqa: E501 # FIXME CoP + count_case_statement: List[sqlalchemy.Label] = sa.case( # type: ignore[assignment] # FIXME CoP ( unexpected_condition, sa.sql.expression.cast(1, sa.Numeric), @@ -304,10 +304,10 @@ def _sqlalchemy_map_condition_unexpected_count_value( else_=sa.sql.expression.cast(0, sa.Numeric), ).label("condition") - count_selectable: sqlalchemy.Select = sa.select(count_case_statement) # type: ignore[call-overload] + count_selectable: sqlalchemy.Select = sa.select(count_case_statement) # type: ignore[call-overload] # FIXME CoP if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] - count_selectable = count_selectable.select_from(selectable) # type: ignore[arg-type] + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] # FIXME CoP + count_selectable = count_selectable.select_from(selectable) # type: ignore[arg-type] # FIXME CoP try: if execution_engine.dialect_name == GXSqlDialect.MSSQL: @@ -328,22 +328,22 @@ def _sqlalchemy_map_condition_unexpected_count_value( metrics=metrics, ) inner_case_query: sqlalchemy.Insert = temp_table_obj.insert().from_select( - [count_case_statement], # type: ignore[list-item] + [count_case_statement], # type: ignore[list-item] # FIXME CoP count_selectable, ) - execution_engine.execute_query_in_transaction(inner_case_query) # type: ignore[arg-type] + execution_engine.execute_query_in_transaction(inner_case_query) # type: ignore[arg-type] # FIXME CoP - count_selectable = temp_table_obj # type: ignore[assignment] + count_selectable = temp_table_obj # type: ignore[assignment] # FIXME CoP - count_selectable = get_sqlalchemy_selectable(count_selectable) # type: ignore[assignment] + count_selectable = get_sqlalchemy_selectable(count_selectable) # type: ignore[assignment] # FIXME CoP unexpected_count_query: sqlalchemy.Select = ( - sa.select( # type: ignore[assignment] + sa.select( # type: ignore[assignment] # FIXME CoP sa.func.sum(sa.column("condition")).label("unexpected_count"), ) - .select_from(count_selectable) # type: ignore[arg-type] + .select_from(count_selectable) # type: ignore[arg-type] # FIXME CoP .alias("UnexpectedCountSubquery") ) - unexpected_count: Union[float, int] = execution_engine.execute_query( # type: ignore[assignment] + unexpected_count: Union[float, int] = execution_engine.execute_query( # type: ignore[assignment] # FIXME CoP sa.select( unexpected_count_query.c[ f"{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}" @@ -375,23 +375,23 @@ def _sqlalchemy_map_condition_rows( """ Returns all rows of the metric values which do not meet an expected Expectation condition for instances of ColumnMapExpectation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP unexpected_condition, compute_domain_kwargs, accessor_domain_kwargs = metrics[ "unexpected_condition" ] """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) table_columns: list[str] = metrics["table.columns"] - column_selector = [sa.column(column_name) for column_name in table_columns] # type: ignore[var-annotated] + column_selector = [sa.column(column_name) for column_name in table_columns] # type: ignore[var-annotated] # FIXME CoP query = sa.select(*column_selector).where(unexpected_condition) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] - query = query.select_from(selectable) # type: ignore[arg-type] + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] # FIXME CoP + query = query.select_from(selectable) # type: ignore[arg-type] # FIXME CoP result_format = metric_value_kwargs["result_format"] if result_format["result_format"] != "COMPLETE": @@ -404,7 +404,7 @@ def _sqlalchemy_map_condition_rows( raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError(message=exception_message) -def _sqlalchemy_map_condition_query( # noqa: C901 - too complex +def _sqlalchemy_map_condition_query( # noqa: C901 # too complex cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: Dict, @@ -418,7 +418,7 @@ def _sqlalchemy_map_condition_query( # noqa: C901 - too complex Requires `unexpected_index_column_names` to be part of `result_format` dict to specify primary_key columns to return, along with column the Expectation is run on. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ( unexpected_condition, _compute_domain_kwargs, @@ -461,16 +461,16 @@ def _sqlalchemy_map_condition_query( # noqa: C901 - too complex for column_name in unexpected_index_column_names: if column_name not in all_table_columns: raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError( - message=f'Error: The unexpected_index_column: "{column_name}" in does not exist in SQL Table. ' # noqa: E501 + message=f'Error: The unexpected_index_column: "{column_name}" in does not exist in SQL Table. ' # noqa: E501 # FIXME CoP f"Please check your configuration and try again." ) - column_selector.append(sa.column(column_name)) # type: ignore[arg-type] + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] # FIXME CoP for column_name in domain_column_name_list: - column_selector.append(sa.column(column_name)) # type: ignore[arg-type] + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] # FIXME CoP - unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( # type: ignore[valid-type] + unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( # type: ignore[valid-type] # FIXME CoP unexpected_condition ) source_table_and_schema: sa.Table = get_sqlalchemy_source_table_and_schema(execution_engine) @@ -478,8 +478,8 @@ def _sqlalchemy_map_condition_query( # noqa: C901 - too complex source_table_and_schema_as_selectable: Union[sa.Table, sa.Select] = get_sqlalchemy_selectable( source_table_and_schema ) - final_select_statement: sa.select = ( # type: ignore[valid-type] - unexpected_condition_query_with_selected_columns.select_from( # type: ignore[attr-defined] + final_select_statement: sa.select = ( # type: ignore[valid-type] # FIXME CoP + unexpected_condition_query_with_selected_columns.select_from( # type: ignore[attr-defined] # FIXME CoP source_table_and_schema_as_selectable ) ) @@ -490,7 +490,7 @@ def _sqlalchemy_map_condition_query( # noqa: C901 - too complex return query_as_string -def _sqlalchemy_map_condition_index( # noqa: C901 - too complex +def _sqlalchemy_map_condition_index( # noqa: C901 # too complex cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: Dict, @@ -504,7 +504,7 @@ def _sqlalchemy_map_condition_index( # noqa: C901 - too complex Requires `unexpected_index_column_names` to be part of `result_format` dict to specify primary_key columns to return. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ( unexpected_condition, compute_domain_kwargs, @@ -544,30 +544,30 @@ def _sqlalchemy_map_condition_index( # noqa: C901 - too complex for column_name in unexpected_index_column_names: if column_name not in all_table_columns: raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError( - message=f'Error: The unexpected_index_column: "{column_name}" in does not exist in SQL Table. ' # noqa: E501 + message=f'Error: The unexpected_index_column: "{column_name}" in does not exist in SQL Table. ' # noqa: E501 # FIXME CoP f"Please check your configuration and try again." ) - column_selector.append(sa.column(column_name)) # type: ignore[arg-type] + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] # FIXME CoP # the last column we SELECT is the column the Expectation is being run on for column_name in domain_column_name_list: - column_selector.append(sa.column(column_name)) # type: ignore[arg-type] + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] # FIXME CoP domain_records_as_selectable: sa.sql.Selectable = execution_engine.get_domain_records( domain_kwargs=domain_kwargs ) - unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( # type: ignore[valid-type] + unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( # type: ignore[valid-type] # FIXME CoP unexpected_condition ) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - domain_records_as_selectable = get_sqlalchemy_selectable(domain_records_as_selectable) # type: ignore[arg-type] + domain_records_as_selectable = get_sqlalchemy_selectable(domain_records_as_selectable) # type: ignore[arg-type] # FIXME CoP - # since SQL tables can be **very** large, truncate query_result values at 20, or at `partial_unexpected_count` # noqa: E501 - final_query: sa.select = unexpected_condition_query_with_selected_columns.select_from( # type: ignore[valid-type,attr-defined] + # since SQL tables can be **very** large, truncate query_result values at 20, or at `partial_unexpected_count` # noqa: E501 # FIXME CoP + final_query: sa.select = unexpected_condition_query_with_selected_columns.select_from( # type: ignore[valid-type,attr-defined] # FIXME CoP domain_records_as_selectable ).limit(result_format["partial_unexpected_count"]) - query_result: List[sqlalchemy.Row] = execution_engine.execute_query(final_query).fetchall() # type: ignore[assignment] + query_result: List[sqlalchemy.Row] = execution_engine.execute_query(final_query).fetchall() # type: ignore[assignment] # FIXME CoP exclude_unexpected_values: bool = result_format.get("exclude_unexpected_values", False) @@ -612,13 +612,13 @@ def _spark_map_condition_unexpected_count_value( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) # withColumn is required to transform window functions returned by some metrics to boolean mask data = df.withColumn("__unexpected", unexpected_condition) - filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 + filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 # FIXME CoP F.col("__unexpected") ) @@ -639,13 +639,13 @@ def _spark_map_condition_rows( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) # withColumn is required to transform window functions returned by some metrics to boolean mask data = df.withColumn("__unexpected", unexpected_condition) - filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 + filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 # FIXME CoP F.col("__unexpected") ) @@ -658,7 +658,7 @@ def _spark_map_condition_rows( return filtered.limit(limit).collect() -def _spark_map_condition_index( # noqa: C901 - too complex +def _spark_map_condition_index( # noqa: C901 # too complex cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: Dict, @@ -672,7 +672,7 @@ def _spark_map_condition_index( # noqa: C901 - too complex Requires `unexpected_index_column_names` to be part of `result_format` dict to specify primary_key columns to return. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ( unexpected_condition, compute_domain_kwargs, @@ -712,12 +712,12 @@ def _spark_map_condition_index( # noqa: C901 - too complex result_format = metric_value_kwargs["result_format"] if not result_format.get("unexpected_index_column_names"): raise gx_exceptions.MetricResolutionError( - message="unexpected_indices cannot be returned without 'unexpected_index_column_names'. Please check your configuration.", # noqa: E501 + message="unexpected_indices cannot be returned without 'unexpected_index_column_names'. Please check your configuration.", # noqa: E501 # FIXME CoP failed_metrics=["unexpected_index_list"], ) # withColumn is required to transform window functions returned by some metrics to boolean mask data = df.withColumn("__unexpected", unexpected_condition) - filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 + filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 # FIXME CoP F.col("__unexpected") ) exclude_unexpected_values: bool = result_format.get("exclude_unexpected_values", False) @@ -729,8 +729,8 @@ def _spark_map_condition_index( # noqa: C901 - too complex # check that column name is in row for col_name in columns_to_keep: if col_name not in filtered.columns: - raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError( # noqa: TRY003 - f"Error: The unexpected_index_column '{col_name}' does not exist in Spark DataFrame. Please check your configuration and try again." # noqa: E501 + raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError( # noqa: TRY003 # FIXME CoP + f"Error: The unexpected_index_column '{col_name}' does not exist in Spark DataFrame. Please check your configuration and try again." # noqa: E501 # FIXME CoP ) if result_format["result_format"] != "COMPLETE": @@ -765,7 +765,7 @@ def _spark_map_condition_query( df.filter(F.expr( [unexpected_condition] )) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP result_format: dict = metric_value_kwargs["result_format"] # We will not return map_condition_query if return_unexpected_index_query = False return_unexpected_index_query: Optional[bool] = result_format.get( @@ -780,7 +780,7 @@ def _spark_map_condition_query( _, ) = metrics.get("unexpected_condition", (None, None, None)) - # unexpected_condition is an F.column object, meaning the str representation is wrapped in Column<> syntax. # noqa: E501 + # unexpected_condition is an F.column object, meaning the str representation is wrapped in Column<> syntax. # noqa: E501 # FIXME CoP # like Column<'[unexpected_expression]'> unexpected_condition_as_string: str = str(unexpected_condition) unexpected_condition_filtered: str = unexpected_condition_as_string.replace( diff --git a/great_expectations/expectations/metrics/map_metric_provider/map_metric_provider.py b/great_expectations/expectations/metrics/map_metric_provider/map_metric_provider.py index e6932252c178..4e047849e84b 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/map_metric_provider.py +++ b/great_expectations/expectations/metrics/map_metric_provider/map_metric_provider.py @@ -20,7 +20,7 @@ SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.expectations.metrics.map_metric_provider.column_map_condition_auxilliary_methods import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.column_map_condition_auxilliary_methods import ( # noqa: E501 # FIXME CoP _pandas_column_map_condition_value_counts, _pandas_column_map_condition_values, _spark_column_map_condition_value_counts, @@ -28,7 +28,7 @@ _sqlalchemy_column_map_condition_value_counts, _sqlalchemy_column_map_condition_values, ) -from great_expectations.expectations.metrics.map_metric_provider.column_pair_map_condition_auxilliary_methods import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.column_pair_map_condition_auxilliary_methods import ( # noqa: E501 # FIXME CoP _pandas_column_pair_map_condition_filtered_row_count, _pandas_column_pair_map_condition_values, _spark_column_pair_map_condition_filtered_row_count, @@ -36,10 +36,10 @@ _sqlalchemy_column_pair_map_condition_filtered_row_count, _sqlalchemy_column_pair_map_condition_values, ) -from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 # FIXME CoP _is_sqlalchemy_metric_selectable, ) -from great_expectations.expectations.metrics.map_metric_provider.map_condition_auxilliary_methods import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.map_condition_auxilliary_methods import ( # noqa: E501 # FIXME CoP _pandas_map_condition_index, _pandas_map_condition_query, _pandas_map_condition_rows, @@ -55,7 +55,7 @@ _sqlalchemy_map_condition_unexpected_count_aggregate_fn, _sqlalchemy_map_condition_unexpected_count_value, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_map_condition_auxilliary_methods import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_map_condition_auxilliary_methods import ( # noqa: E501 # FIXME CoP _pandas_multicolumn_map_condition_filtered_row_count, _pandas_multicolumn_map_condition_values, _spark_multicolumn_map_condition_filtered_row_count, @@ -84,7 +84,7 @@ class MapMetricProvider(MetricProvider): """The base class for defining metrics that are evaluated for every row. An example of a map metric is `column_values.null` (which is implemented as a `ColumnMapMetricProvider`, a subclass of `MapMetricProvider`). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP condition_domain_keys: tuple[str, ...] = ( "batch_id", @@ -103,7 +103,7 @@ class MapMetricProvider(MetricProvider): filter_column_isnull = True @classmethod - def _register_metric_functions(cls): # noqa: C901, PLR0912, PLR0915 + def _register_metric_functions(cls): # noqa: C901, PLR0912, PLR0915 # FIXME CoP if not (hasattr(cls, "function_metric_name") or hasattr(cls, "condition_metric_name")): return @@ -119,8 +119,8 @@ def _register_metric_functions(cls): # noqa: C901, PLR0912, PLR0915 engine = candidate_metric_fn.metric_engine if not issubclass(engine, ExecutionEngine): - raise ValueError( # noqa: TRY003, TRY004 - "Metric functions must be defined with an ExecutionEngine as part of registration." # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + "Metric functions must be defined with an ExecutionEngine as part of registration." # noqa: E501 # FIXME CoP ) if metric_fn_type in [ @@ -129,9 +129,9 @@ def _register_metric_functions(cls): # noqa: C901, PLR0912, PLR0915 MetricPartialFunctionTypes.WINDOW_CONDITION_FN, ]: if not hasattr(cls, "condition_metric_name"): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """A "MapMetricProvider" must have a "condition_metric_name" to have a decorated \ -"column_condition_partial" method.""" # noqa: E501 +"column_condition_partial" method.""" # noqa: E501 # FIXME CoP ) condition_provider = candidate_metric_fn @@ -288,7 +288,7 @@ def _register_metric_functions(cls): # noqa: C901, PLR0912, PLR0915 metric_fn_type=MetricFunctionTypes.VALUE, ) if metric_fn_type == MetricPartialFunctionTypes.MAP_CONDITION_FN: - # Documentation in "MetricProvider._register_metric_functions()" explains registration protocol. # noqa: E501 + # Documentation in "MetricProvider._register_metric_functions()" explains registration protocol. # noqa: E501 # FIXME CoP if domain_type == MetricDomainTypes.COLUMN: register_metric( metric_name=f"{metric_name}.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}.{MetricPartialFunctionTypes.AGGREGATE_FN.metric_suffix}", @@ -423,7 +423,7 @@ def _register_metric_functions(cls): # noqa: C901, PLR0912, PLR0915 metric_fn_type=MetricFunctionTypes.VALUE, ) if metric_fn_type == MetricPartialFunctionTypes.MAP_CONDITION_FN: - # Documentation in "MetricProvider._register_metric_functions()" explains registration protocol. # noqa: E501 + # Documentation in "MetricProvider._register_metric_functions()" explains registration protocol. # noqa: E501 # FIXME CoP if domain_type == MetricDomainTypes.COLUMN: register_metric( metric_name=f"{metric_name}.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}.{MetricPartialFunctionTypes.AGGREGATE_FN.metric_suffix}", @@ -526,9 +526,9 @@ def _register_metric_functions(cls): # noqa: C901, PLR0912, PLR0915 MetricPartialFunctionTypes.WINDOW_FN, ]: if not hasattr(cls, "function_metric_name"): - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """A "MapMetricProvider" must have a "function_metric_name" to have a decorated \ -"column_function_partial" method.""" # noqa: E501 +"column_function_partial" method.""" # noqa: E501 # FIXME CoP ) map_function_provider = candidate_metric_fn @@ -565,7 +565,7 @@ def _get_evaluation_dependencies( metric_suffix: str = f".{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}" - # Documentation in "MetricProvider._register_metric_functions()" explains registration/dependency protocol. # noqa: E501 + # Documentation in "MetricProvider._register_metric_functions()" explains registration/dependency protocol. # noqa: E501 # FIXME CoP if metric_name.endswith(metric_suffix): has_aggregate_fn: bool = False @@ -592,7 +592,7 @@ def _get_evaluation_dependencies( metric_value_kwargs=base_metric_value_kwargs, ) - # MapMetric uses "condition" metric to build "unexpected_count.aggregate_fn" and other listed metrics as well. # noqa: E501 + # MapMetric uses "condition" metric to build "unexpected_count.aggregate_fn" and other listed metrics as well. # noqa: E501 # FIXME CoP unexpected_condition_dependent_metric_name_suffixes: list[str] = list( filter( lambda element: metric_name.endswith(element), @@ -625,7 +625,7 @@ def is_sqlalchemy_metric_selectable( # deprecated-v0.16.1 warnings.warn( "MapMetricProvider.is_sqlalchemy_metric_selectable is deprecated." - "You can use the great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable._is_sqlalchemy_metric_selectable function, but please note that it is not considered part of the public API, and could change in the future.", # noqa: E501 + "You can use the great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable._is_sqlalchemy_metric_selectable function, but please note that it is not considered part of the public API, and could change in the future.", # noqa: E501 # FIXME CoP DeprecationWarning, ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py index 098968e28ef0..da66fd79dbd2 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py @@ -39,7 +39,7 @@ from great_expectations.compatibility import sqlalchemy -def multicolumn_condition_partial( # noqa: C901 - 16 +def multicolumn_condition_partial( # noqa: C901 # 16 engine: Type[ExecutionEngine], partial_fn_type: Optional[MetricPartialFunctionTypes] = None, **kwargs, @@ -59,7 +59,7 @@ def multicolumn_condition_partial( # noqa: C901 - 16 Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type = MetricDomainTypes.MULTICOLUMN if issubclass(engine, PandasExecutionEngine): if partial_fn_type is None: @@ -67,9 +67,9 @@ def multicolumn_condition_partial( # noqa: C901 - 16 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type not in [MetricPartialFunctionTypes.MAP_CONDITION_SERIES]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_SERIES.value}" for \ -"multicolumn_condition_partial" "partial_fn_type" property.""" # noqa: E501 +"multicolumn_condition_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -82,7 +82,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -132,10 +132,10 @@ def inner_func( # noqa: PLR0913 MetricPartialFunctionTypes.MAP_CONDITION_FN, MetricPartialFunctionTypes.WINDOW_CONDITION_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SqlAlchemyExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_FN.value}" and \ "{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "multicolumn_condition_partial" "partial_fn_type" property. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -148,7 +148,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -175,7 +175,7 @@ def inner_func( # noqa: PLR0913 sqlalchemy_engine: sqlalchemy.Engine = execution_engine.engine - column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] + column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] # FIXME CoP dialect = execution_engine.dialect_module expected_condition = metric_fn( cls, @@ -207,10 +207,10 @@ def inner_func( # noqa: PLR0913 MetricPartialFunctionTypes.MAP_CONDITION_FN, MetricPartialFunctionTypes.WINDOW_CONDITION_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SparkDFExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_CONDITION_FN.value}" and \ "{MetricPartialFunctionTypes.WINDOW_CONDITION_FN.value}" for "multicolumn_condition_partial" "partial_fn_type" property. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -221,7 +221,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -263,6 +263,6 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP 'Unsupported engine for "multicolumn_condition_partial" metric function decorator.' ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py index 8034d25102bd..ff7d1103fd26 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py @@ -39,7 +39,7 @@ from great_expectations.compatibility import sqlalchemy -def multicolumn_function_partial( # noqa: C901 - 16 +def multicolumn_function_partial( # noqa: C901 # 16 engine: Type[ExecutionEngine], partial_fn_type: Optional[MetricPartialFunctionTypes] = None, **kwargs, @@ -56,7 +56,7 @@ def multicolumn_function_partial( # noqa: C901 - 16 Returns: An annotated metric_function which will be called with a simplified signature. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type = MetricDomainTypes.MULTICOLUMN if issubclass(engine, PandasExecutionEngine): if partial_fn_type is None: @@ -64,9 +64,9 @@ def multicolumn_function_partial( # noqa: C901 - 16 partial_fn_type__typed = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type != MetricPartialFunctionTypes.MAP_SERIES: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""PandasExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_SERIES.value}" for \ -"multicolumn_function_partial" "partial_fn_type" property.""" # noqa: E501 +"multicolumn_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -77,7 +77,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: PandasExecutionEngine, metric_domain_kwargs: dict, @@ -120,9 +120,9 @@ def inner_func( # noqa: PLR0913 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type != MetricPartialFunctionTypes.MAP_FN: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SqlAlchemyExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_FN.value}" for \ -"multicolumn_function_partial" "partial_fn_type" property.""" # noqa: E501 +"multicolumn_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -135,7 +135,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, @@ -164,7 +164,7 @@ def inner_func( # noqa: PLR0913 sqlalchemy_engine: sqlalchemy.Engine = execution_engine.engine - column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] + column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] # FIXME CoP dialect = execution_engine.dialect_module multicolumn_function = metric_fn( cls, @@ -193,9 +193,9 @@ def inner_func( # noqa: PLR0913 partial_fn_type = MetricPartialFunctionTypes(partial_fn_type) if partial_fn_type != MetricPartialFunctionTypes.MAP_FN: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""SparkDFExecutionEngine only supports "{MetricPartialFunctionTypes.MAP_FN.value}" for \ -"multicolumn_function_partial" "partial_fn_type" property.""" # noqa: E501 +"multicolumn_function_partial" "partial_fn_type" property.""" # noqa: E501 # FIXME CoP ) def wrapper(metric_fn: Callable): @@ -206,7 +206,7 @@ def wrapper(metric_fn: Callable): **kwargs, ) @wraps(metric_fn) - def inner_func( # noqa: PLR0913 + def inner_func( # noqa: PLR0913 # FIXME CoP cls, execution_engine: SparkDFExecutionEngine, metric_domain_kwargs: dict, @@ -248,6 +248,6 @@ def inner_func( # noqa: PLR0913 return wrapper else: - raise ValueError( # noqa: TRY003, TRY004 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP 'Unsupported engine for "multicolumn_function_partial" metric function decorator.' ) diff --git a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py index c78fecc85ad6..338abb2b4fb0 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py @@ -11,7 +11,7 @@ from great_expectations.compatibility.pyspark import functions as F from great_expectations.compatibility.sqlalchemy import sqlalchemy as sa -from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.is_sqlalchemy_metric_selectable import ( # noqa: E501 # FIXME CoP _is_sqlalchemy_metric_selectable, ) from great_expectations.expectations.metrics.util import ( @@ -42,7 +42,7 @@ def _pandas_multicolumn_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[dict]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( boolean_mapped_unexpected_values, compute_domain_kwargs, @@ -57,15 +57,15 @@ def _pandas_multicolumn_map_condition_values( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if "column_list" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_list" found in provided metric_domain_kwargs, but it is required for a multicolumn map metric (_pandas_multicolumn_map_condition_values). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) column_list: List[Union[str, sqlalchemy.quoted_name]] = accessor_domain_kwargs["column_list"] @@ -73,7 +73,7 @@ def _pandas_multicolumn_map_condition_values( domain_values = df[column_list] domain_values = domain_values[ - boolean_mapped_unexpected_values == True # noqa: E712 + boolean_mapped_unexpected_values == True # noqa: E712 # FIXME CoP ] result_format = metric_value_kwargs["result_format"] @@ -93,7 +93,7 @@ def _pandas_multicolumn_map_condition_filtered_row_count( metrics: Dict[str, Any], **kwargs, ) -> int: - """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP _, compute_domain_kwargs, accessor_domain_kwargs = metrics["unexpected_condition"] accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -104,15 +104,15 @@ def _pandas_multicolumn_map_condition_filtered_row_count( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if "column_list" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_list" found in provided metric_domain_kwargs, but it is required for a multicolumn map metric (_pandas_multicolumn_map_condition_filtered_row_count). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) return df.shape[0] @@ -126,7 +126,7 @@ def _sqlalchemy_multicolumn_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[dict]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( boolean_mapped_unexpected_values, compute_domain_kwargs, @@ -141,24 +141,24 @@ def _sqlalchemy_multicolumn_map_condition_values( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if "column_list" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_list" found in provided metric_domain_kwargs, but it is required for a multicolumn map metric (_sqlalchemy_multicolumn_map_condition_values). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) column_list: List[Union[str, sqlalchemy.quoted_name]] = accessor_domain_kwargs["column_list"] - column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] + column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] # FIXME CoP query = sa.select(*column_selector).where(boolean_mapped_unexpected_values) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] - query = query.select_from(selectable) # type: ignore[arg-type] + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] # FIXME CoP + query = query.select_from(selectable) # type: ignore[arg-type] # FIXME CoP result_format = metric_value_kwargs["result_format"] if result_format["result_format"] != "COMPLETE": @@ -178,7 +178,7 @@ def _sqlalchemy_multicolumn_map_condition_filtered_row_count( metrics: Dict[str, Any], **kwargs, ) -> Any | None: - """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP _, compute_domain_kwargs, accessor_domain_kwargs = metrics["unexpected_condition"] accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -189,21 +189,21 @@ def _sqlalchemy_multicolumn_map_condition_filtered_row_count( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if "column_list" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_list" found in provided metric_domain_kwargs, but it is required for a multicolumn map metric (_sqlalchemy_multicolumn_map_condition_filtered_row_count). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) - selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] # FIXME CoP return execution_engine.execute_query( - sa.select(sa.func.count()).select_from(selectable) # type: ignore[arg-type] + sa.select(sa.func.count()).select_from(selectable) # type: ignore[arg-type] # FIXME CoP ).scalar() @@ -215,7 +215,7 @@ def _spark_multicolumn_map_condition_values( metrics: Dict[str, Any], **kwargs, ) -> list[dict]: - """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return values from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP ( unexpected_condition, compute_domain_kwargs, @@ -230,22 +230,22 @@ def _spark_multicolumn_map_condition_values( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if "column_list" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_list" found in provided metric_domain_kwargs, but it is required for a multicolumn map metric (_spark_multicolumn_map_condition_values). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) column_list: List[Union[str, sqlalchemy.quoted_name]] = accessor_domain_kwargs["column_list"] # withColumn is required to transform window functions returned by some metrics to boolean mask data = df.withColumn("__unexpected", unexpected_condition) - filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 + filtered = data.filter(F.col("__unexpected") == True).drop( # noqa: E712 # FIXME CoP F.col("__unexpected") ) @@ -271,7 +271,7 @@ def _spark_multicolumn_map_condition_filtered_row_count( metrics: Dict[str, Any], **kwargs, ) -> int: - """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 + """Return record counts from the specified domain that match the map-style metric in the metrics dictionary.""" # noqa: E501 # FIXME CoP _, compute_domain_kwargs, accessor_domain_kwargs = metrics["unexpected_condition"] accessor_domain_kwargs = get_dbms_compatible_metric_domain_kwargs( @@ -282,15 +282,15 @@ def _spark_multicolumn_map_condition_filtered_row_count( """ In order to invoke the "ignore_row_if" filtering logic, "execution_engine.get_domain_records()" must be supplied with all of the available "domain_kwargs" keys. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_kwargs = dict(**compute_domain_kwargs, **accessor_domain_kwargs) df = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) if "column_list" not in accessor_domain_kwargs: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP """No "column_list" found in provided metric_domain_kwargs, but it is required for a multicolumn map metric (_spark_multicolumn_map_condition_filtered_row_count). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) return df.count() diff --git a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_metric_provider.py b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_metric_provider.py index 5d9751e1550b..588f77abca83 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_metric_provider.py +++ b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_metric_provider.py @@ -23,7 +23,7 @@ class MulticolumnMapMetricProvider(MapMetricProvider): domain key `column_list`. `expect_compound_columns_to_be_unique` is an example of an Expectation that uses this metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP condition_domain_keys: Tuple[str, ...] = ( "batch_id", diff --git a/great_expectations/expectations/metrics/meta_metric_provider.py b/great_expectations/expectations/metrics/meta_metric_provider.py index 32438f630a5e..069ba243b8ef 100644 --- a/great_expectations/expectations/metrics/meta_metric_provider.py +++ b/great_expectations/expectations/metrics/meta_metric_provider.py @@ -25,7 +25,7 @@ class DeprecatedMetaMetricProvider(MetaMetricProvider): Support isinstance and issubclass checks. """ - # TODO: All logging/warning directives should be placed into a common module to be imported as needed. # noqa: E501 + # TODO: All logging/warning directives should be placed into a common module to be imported as needed. # noqa: E501 # FIXME CoP # deprecated-v0.13.12 warnings.simplefilter("default", category=DeprecationWarning) @@ -45,7 +45,7 @@ def new(cls, *args, **kwargs): warnings.warn( f"""{cls.__name__} has been renamed to {alias} -- the alias {cls.__name__} is \ deprecated as of v0.13.12 and will be removed in v0.16. -""", # noqa: E501 +""", # noqa: E501 # FIXME CoP DeprecationWarning, stacklevel=2, ) @@ -65,13 +65,13 @@ def new(cls, *args, **kwargs): warnings.warn( f"""{b.__name__} has been renamed to {alias.__name__} -- the alias {b.__name__} is deprecated \ as of v0.13.12 and will be removed in v0.16. -""", # noqa: E501 +""", # noqa: E501 # FIXME CoP DeprecationWarning, stacklevel=2, ) # Avoid duplicate base classes. - b = alias or b # noqa: PLW2901 + b = alias or b # noqa: PLW2901 # FIXME CoP if b not in fixed_bases: fixed_bases.append(b) diff --git a/great_expectations/expectations/metrics/metric_provider.py b/great_expectations/expectations/metrics/metric_provider.py index 568b5e031958..5779269f7914 100644 --- a/great_expectations/expectations/metrics/metric_provider.py +++ b/great_expectations/expectations/metrics/metric_provider.py @@ -59,9 +59,9 @@ def wrapper(metric_fn: Callable[P, T]) -> Callable[P, T]: def inner_func(*args: P.args, **kwargs: P.kwargs): return metric_fn(*args, **kwargs) - inner_func.metric_engine = engine # type: ignore[attr-defined] - inner_func.metric_fn_type = MetricFunctionTypes(metric_fn_type) # type: ignore[attr-defined] - inner_func.metric_definition_kwargs = kwargs # type: ignore[attr-defined] + inner_func.metric_engine = engine # type: ignore[attr-defined] # FIXME CoP + inner_func.metric_fn_type = MetricFunctionTypes(metric_fn_type) # type: ignore[attr-defined] # FIXME CoP + inner_func.metric_definition_kwargs = kwargs # type: ignore[attr-defined] # FIXME CoP return inner_func return wrapper @@ -98,12 +98,12 @@ def wrapper(metric_fn: Callable[P, T]) -> Callable[P, T]: def inner_func(*args: P.args, **kwargs: P.kwargs): return metric_fn(*args, **kwargs) - inner_func.metric_engine = engine # type: ignore[attr-defined] - inner_func.metric_fn_type = MetricPartialFunctionTypes( # type: ignore[attr-defined] + inner_func.metric_engine = engine # type: ignore[attr-defined] # FIXME CoP + inner_func.metric_fn_type = MetricPartialFunctionTypes( # type: ignore[attr-defined] # FIXME CoP partial_fn_type ) # raises ValueError if unknown type - inner_func.domain_type = MetricDomainTypes(domain_type) # type: ignore[attr-defined] - inner_func.metric_definition_kwargs = kwargs # type: ignore[attr-defined] + inner_func.domain_type = MetricDomainTypes(domain_type) # type: ignore[attr-defined] # FIXME CoP + inner_func.metric_definition_kwargs = kwargs # type: ignore[attr-defined] # FIXME CoP return inner_func return wrapper @@ -134,14 +134,14 @@ class MetricProvider(metaclass=MetaMetricProvider): 1. Data Docs rendering methods decorated with the @renderer decorator. See the guide "How to create renderers for custom expectations" for more information. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_keys: Tuple[str, ...] = tuple() value_keys: Tuple[str, ...] = tuple() default_kwarg_values: dict = {} @classmethod - def _register_metric_functions(cls) -> None: # noqa: C901 - too complex + def _register_metric_functions(cls) -> None: # noqa: C901 # too complex metric_name = getattr(cls, "metric_name", None) if not metric_name: # No metric name has been defined @@ -158,7 +158,7 @@ def _register_metric_functions(cls) -> None: # noqa: C901 - too complex if engine := getattr(attr_obj, "metric_engine", None): if not issubclass(engine, ExecutionEngine): - raise ValueError("metric functions must be defined with an Execution Engine") # noqa: TRY003 + raise ValueError("metric functions must be defined with an Execution Engine") # noqa: TRY003 # FIXME CoP metric_fn = attr_obj metric_definition_kwargs = getattr(metric_fn, "metric_definition_kwargs", {}) @@ -196,15 +196,15 @@ def _register_metric_functions(cls) -> None: # noqa: C901 - too complex of "resolved_metric_dependencies_by_metric_name" using previously declared "metric_partial_fn" key (as described above), composes full metric execution configuration structure, and adds this configuration to list of metrics to be resolved as one bundle (specifics pertaining to "ExecutionEngine" subclasses). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if metric_fn_type not in [ MetricFunctionTypes.VALUE, MetricPartialFunctionTypes.AGGREGATE_FN, ]: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f"""Basic metric implementations (defined by specifying "metric_name" class variable) only \ support "{MetricFunctionTypes.VALUE.value}" and "{MetricPartialFunctionTypes.AGGREGATE_FN.value}" for "metric_value" \ -"metric_fn_type" property.""" # noqa: E501 +"metric_fn_type" property.""" # noqa: E501 # FIXME CoP ) if metric_fn_type == MetricFunctionTypes.VALUE: diff --git a/great_expectations/expectations/metrics/multicolumn_map_metrics/compound_columns_unique.py b/great_expectations/expectations/metrics/multicolumn_map_metrics/compound_columns_unique.py index 79ab924360ff..4c4184ee5340 100644 --- a/great_expectations/expectations/metrics/multicolumn_map_metrics/compound_columns_unique.py +++ b/great_expectations/expectations/metrics/multicolumn_map_metrics/compound_columns_unique.py @@ -18,10 +18,10 @@ from great_expectations.expectations.metrics.map_metric_provider import ( MulticolumnMapMetricProvider, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 # FIXME CoP multicolumn_condition_partial, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_function_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_function_partial import ( # noqa: E501 # FIXME CoP multicolumn_function_partial, ) from great_expectations.validator.validation_graph import MetricConfiguration @@ -38,7 +38,7 @@ class CompoundColumnsUnique(MulticolumnMapMetricProvider): implementation, which combines the "map" and "condition" parts in a single step, the support for "SqlAlchemyExecutionEngine" is more detailed. Thus, the "map" and "condition" parts for "SqlAlchemyExecutionEngine" are handled separately, with the "condition" part relying on the "map" part as a metric dependency. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP function_metric_name = "compound_columns.count" # pre-requisite "map" style metric condition_metric_name = ( @@ -76,12 +76,12 @@ def _sqlalchemy_function(self, column_list, **kwargs): 3 2 3 1 The fourth column, "_num_rows", holds the value of the "map" function -- the number of rows the group occurs in. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - # Needed as keys (hence, string valued) to access "ColumnElement" objects contained within the "FROM" clauses. # noqa: E501 + # Needed as keys (hence, string valued) to access "ColumnElement" objects contained within the "FROM" clauses. # noqa: E501 # FIXME CoP column_names = kwargs.get("_column_names") - # Need all columns of the table for the purposes of reporting entire rows satisfying unexpected condition logic. # noqa: E501 + # Need all columns of the table for the purposes of reporting entire rows satisfying unexpected condition logic. # noqa: E501 # FIXME CoP table_columns = kwargs.get("_table_columns") table = kwargs.get( @@ -89,10 +89,10 @@ def _sqlalchemy_function(self, column_list, **kwargs): ) # Note that here, "table" is of the "sqlalchemy.sql.selectable.Subquery" type. # Filipe - 20231114 - # This is a special case that needs to be handled for mysql, where you cannot refer to a temp_table # noqa: E501 - # more than once in the same query. The solution to this is to perform our operation without the need # noqa: E501 - # for a sub query. We can do this by using the window function count, to get the number of duplicate # noqa: E501 - # rows by over partition by the compound unique columns. This will give a table which has the same # noqa: E501 + # This is a special case that needs to be handled for mysql, where you cannot refer to a temp_table # noqa: E501 # FIXME CoP + # more than once in the same query. The solution to this is to perform our operation without the need # noqa: E501 # FIXME CoP + # for a sub query. We can do this by using the window function count, to get the number of duplicate # noqa: E501 # FIXME CoP + # rows by over partition by the compound unique columns. This will give a table which has the same # noqa: E501 # FIXME CoP # number of rows as the original table, but with an additional column _num_rows column. dialect = kwargs.get("_dialect") try: @@ -115,18 +115,18 @@ def _sqlalchemy_function(self, column_list, **kwargs): ) return original_table_clause - # Step-1: Obtain the SQLAlchemy "FromClause" version of the original "table" for the purposes of gaining the # noqa: E501 - # "FromClause.c" attribute, which is a namespace of all the columns contained within the "FROM" clause (these # noqa: E501 + # Step-1: Obtain the SQLAlchemy "FromClause" version of the original "table" for the purposes of gaining the # noqa: E501 # FIXME CoP + # "FromClause.c" attribute, which is a namespace of all the columns contained within the "FROM" clause (these # noqa: E501 # FIXME CoP # elements are themselves subclasses of the SQLAlchemy "ColumnElement" class). table_columns_selector = [sa.column(column_name) for column_name in table_columns] original_table_clause = ( sa.select(*table_columns_selector).select_from(table).alias("original_table_clause") ) - # Step-2: "SELECT FROM" the original table, represented by the "FromClause" object, querying all columns of the # noqa: E501 - # table and the count of occurrences of distinct "compound" (i.e., group, as specified by "column_list") values. # noqa: E501 + # Step-2: "SELECT FROM" the original table, represented by the "FromClause" object, querying all columns of the # noqa: E501 # FIXME CoP + # table and the count of occurrences of distinct "compound" (i.e., group, as specified by "column_list") values. # noqa: E501 # FIXME CoP # Give this aggregated group count a distinctive label. - # Give the resulting sub-query a unique alias in order to disambiguate column names in subsequent queries. # noqa: E501 + # Give the resulting sub-query a unique alias in order to disambiguate column names in subsequent queries. # noqa: E501 # FIXME CoP count_selector = column_list + [sa.func.count().label("_num_rows")] group_count_query = ( sa.select(*count_selector) @@ -135,11 +135,11 @@ def _sqlalchemy_function(self, column_list, **kwargs): .alias("group_counts_subquery") ) - # The above "group_count_query", if executed, will produce the result set containing the number of rows that # noqa: E501 - # equals the number of distinct values of the group -- unique grouping (e.g., as in a multi-column primary key). # noqa: E501 - # Hence, in order for the "_num_rows" column values to provide an entry for each row of the original table, the # noqa: E501 - # "SELECT FROM" of "group_count_query" must undergo an "INNER JOIN" operation with the "original_table_clause" # noqa: E501 - # object, whereby all table columns in the two "FromClause" objects must match, respectively, as the conditions. # noqa: E501 + # The above "group_count_query", if executed, will produce the result set containing the number of rows that # noqa: E501 # FIXME CoP + # equals the number of distinct values of the group -- unique grouping (e.g., as in a multi-column primary key). # noqa: E501 # FIXME CoP + # Hence, in order for the "_num_rows" column values to provide an entry for each row of the original table, the # noqa: E501 # FIXME CoP + # "SELECT FROM" of "group_count_query" must undergo an "INNER JOIN" operation with the "original_table_clause" # noqa: E501 # FIXME CoP + # object, whereby all table columns in the two "FromClause" objects must match, respectively, as the conditions. # noqa: E501 # FIXME CoP conditions = sa.and_( *(group_count_query.c[name] == original_table_clause.c[name] for name in column_names) ) @@ -157,7 +157,7 @@ def _sqlalchemy_function(self, column_list, **kwargs): .alias("records_with_grouped_column_counts_subquery") ) - # The returned SQLAlchemy "FromClause" "compound_columns_count_query" object realizes the "map" metric function. # noqa: E501 + # The returned SQLAlchemy "FromClause" "compound_columns_count_query" object realizes the "map" metric function. # noqa: E501 # FIXME CoP return compound_columns_count_query @multicolumn_condition_partial(engine=SqlAlchemyExecutionEngine) @@ -172,7 +172,7 @@ def _sqlalchemy_condition(cls, column_list, **kwargs): Other than boolean operations, column access, argument of filtering, and limiting the size of the result set, this "row_wise_cond", serving as the main component of the unexpected condition logic, carries along with it the entire object hierarchy, making any encapsulating query ready for execution against the database engine. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metrics = kwargs.get("_metrics") compound_columns_count_query, _, _ = metrics[ @@ -180,7 +180,7 @@ def _sqlalchemy_condition(cls, column_list, **kwargs): ] # noinspection PyProtectedMember - row_wise_cond = compound_columns_count_query.c._num_rows < 2 # noqa: PLR2004 + row_wise_cond = compound_columns_count_query.c._num_rows < 2 # noqa: PLR2004 # FIXME CoP return row_wise_cond @@ -204,7 +204,7 @@ def _get_evaluation_dependencies( """ Returns a dictionary of given metric names and their corresponding configuration, specifying the metric types and their respective domains. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dependencies: dict = super()._get_evaluation_dependencies( metric=metric, diff --git a/great_expectations/expectations/metrics/multicolumn_map_metrics/multicolumn_sum_equal.py b/great_expectations/expectations/metrics/multicolumn_map_metrics/multicolumn_sum_equal.py index 2522a7a717d2..0f7b608dd23b 100644 --- a/great_expectations/expectations/metrics/multicolumn_map_metrics/multicolumn_sum_equal.py +++ b/great_expectations/expectations/metrics/multicolumn_map_metrics/multicolumn_sum_equal.py @@ -9,7 +9,7 @@ from great_expectations.expectations.metrics.map_metric_provider import ( MulticolumnMapMetricProvider, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 # FIXME CoP multicolumn_condition_partial, ) diff --git a/great_expectations/expectations/metrics/multicolumn_map_metrics/select_column_values_unique_within_record.py b/great_expectations/expectations/metrics/multicolumn_map_metrics/select_column_values_unique_within_record.py index 3efd80697adc..438926aafdcb 100644 --- a/great_expectations/expectations/metrics/multicolumn_map_metrics/select_column_values_unique_within_record.py +++ b/great_expectations/expectations/metrics/multicolumn_map_metrics/select_column_values_unique_within_record.py @@ -13,7 +13,7 @@ from great_expectations.expectations.metrics.map_metric_provider import ( MulticolumnMapMetricProvider, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 # FIXME CoP multicolumn_condition_partial, ) @@ -44,15 +44,15 @@ def _sqlalchemy(cls, column_list, **kwargs): cost is O(num_columns^2). However, until a more efficient implementation compatible with SQLAlchemy is available, this is the only feasible mechanism under the current architecture, where map metric providers must return a condition. Nevertheless, SQL query length limit is 1GB (sufficient for most practical scenarios). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP num_columns = len(column_list) # An arbitrary "num_columns" value used for issuing an explanatory message as a warning. - if num_columns > 100: # noqa: PLR2004 + if num_columns > 100: # noqa: PLR2004 # FIXME CoP logger.warning( f"""Batch data with {num_columns} columns is detected. Computing the "{cls.condition_metric_name}" \ metric for wide tables using SQLAlchemy leads to long WHERE clauses for the underlying database engine to process. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) conditions = sa.or_( @@ -60,8 +60,8 @@ def _sqlalchemy(cls, column_list, **kwargs): sa.or_( column_list[idx_src] == column_list[idx_dest], sa.and_( - column_list[idx_src] == None, # noqa: E711 - column_list[idx_dest] == None, # noqa: E711 + column_list[idx_src] == None, # noqa: E711 # FIXME CoP + column_list[idx_dest] == None, # noqa: E711 # FIXME CoP ), ) for idx_src in range(num_columns - 1) diff --git a/great_expectations/expectations/metrics/query_metric_provider.py b/great_expectations/expectations/metrics/query_metric_provider.py index b2367cdb23ba..8a73334038fe 100644 --- a/great_expectations/expectations/metrics/query_metric_provider.py +++ b/great_expectations/expectations/metrics/query_metric_provider.py @@ -69,9 +69,9 @@ def _get_query_from_metric_value_kwargs(cls, metric_value_kwargs: dict) -> str: query_param ) if not query: - raise ValueError(f"Must provide `{query_param}` to `{cls.__name__}` metric.") # noqa: TRY003 + raise ValueError(f"Must provide `{query_param}` to `{cls.__name__}` metric.") # noqa: TRY003 # FIXME CoP if not isinstance(query, str): - raise TypeError(f"`{query_param}` must be provided as a string.") # noqa: TRY003 + raise TypeError(f"`{query_param}` must be provided as a string.") # noqa: TRY003 # FIXME CoP return query diff --git a/great_expectations/expectations/metrics/query_metrics/query_column.py b/great_expectations/expectations/metrics/query_metrics/query_column.py index 516548cba18f..6514cacd88a5 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_column.py +++ b/great_expectations/expectations/metrics/query_metrics/query_column.py @@ -42,7 +42,7 @@ def _sqlalchemy( if column: query_parameters = QueryParameters(column=column) else: - raise ValueError("`column` must be provided.") # noqa: TRY003 + raise ValueError("`column` must be provided.") # noqa: TRY003 # FIXME CoP substituted_batch_subquery = ( cls._get_substituted_batch_subquery_from_query_and_batch_selectable( query=query, diff --git a/great_expectations/expectations/metrics/query_metrics/query_column_pair.py b/great_expectations/expectations/metrics/query_metrics/query_column_pair.py index 98bff70b83dd..3240dabdd4f8 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_column_pair.py +++ b/great_expectations/expectations/metrics/query_metrics/query_column_pair.py @@ -46,7 +46,7 @@ def _sqlalchemy( column_B=column_B, ) else: - raise ValueError("Both `column_A` and `column_B` must be provided.") # noqa: TRY003 + raise ValueError("Both `column_A` and `column_B` must be provided.") # noqa: TRY003 # FIXME CoP substituted_batch_subquery = ( cls._get_substituted_batch_subquery_from_query_and_batch_selectable( query=query, diff --git a/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py b/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py index a59a391b2711..5e7a6fe08397 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py +++ b/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py @@ -40,7 +40,7 @@ def _sqlalchemy( query = cls._get_query_from_metric_value_kwargs(metric_value_kwargs) columns = metric_value_kwargs.get("columns") if not isinstance(columns, list): - raise TypeError("Columns must be supplied as a list") # noqa: TRY003 + raise TypeError("Columns must be supplied as a list") # noqa: TRY003 # FIXME CoP substituted_batch_subquery = ( cls._get_substituted_batch_subquery_from_query_and_batch_selectable( @@ -67,7 +67,7 @@ def _spark( query = cls._get_query_from_metric_value_kwargs(metric_value_kwargs) if not isinstance(query, str): - raise TypeError("Query must be supplied as a string") # noqa: TRY003 + raise TypeError("Query must be supplied as a string") # noqa: TRY003 # FIXME CoP df: pyspark.DataFrame df, _, _ = execution_engine.get_compute_domain( @@ -78,7 +78,7 @@ def _spark( columns = metric_value_kwargs.get("columns") if not isinstance(columns, list): - raise TypeError("Columns must be supplied as a list") # noqa: TRY003 + raise TypeError("Columns must be supplied as a list") # noqa: TRY003 # FIXME CoP query = query.format( **{f"col_{i}": entry for i, entry in enumerate(columns, 1)}, diff --git a/great_expectations/expectations/metrics/query_metrics/query_template_values.py b/great_expectations/expectations/metrics/query_metrics/query_template_values.py index dc3046e451db..83f46843201b 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_template_values.py +++ b/great_expectations/expectations/metrics/query_metrics/query_template_values.py @@ -58,20 +58,20 @@ def _sqlalchemy( template_dict = metric_value_kwargs.get("template_dict") if not isinstance(template_dict, dict): - raise TypeError("template_dict supplied by the expectation must be a dict") # noqa: TRY003 + raise TypeError("template_dict supplied by the expectation must be a dict") # noqa: TRY003 # FIXME CoP if isinstance(selectable, sa.Table): query = cls.get_query(query, template_dict, selectable) elif isinstance( selectable, get_sqlalchemy_subquery_type() - ): # Specifying a runtime query in a RuntimeBatchRequest returns the active batch as a Subquery; sectioning # noqa: E501 + ): # Specifying a runtime query in a RuntimeBatchRequest returns the active batch as a Subquery; sectioning # noqa: E501 # FIXME CoP # the active batch off w/ parentheses ensures flow of operations doesn't break query = cls.get_query(query, template_dict, f"({selectable})") elif isinstance( selectable, sa.sql.Select - ): # Specifying a row_condition returns the active batch as a Select object, requiring compilation & # noqa: E501 + ): # Specifying a row_condition returns the active batch as a Select object, requiring compilation & # noqa: E501 # FIXME CoP # aliasing when formatting the parameterized query query = cls.get_query( query, @@ -90,7 +90,7 @@ def _sqlalchemy( if hasattr(e, "_query_id"): # query_id removed because it duplicates the validation_results e._query_id = None - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP if isinstance(result, Sequence): return [element._asdict() for element in result] @@ -116,9 +116,9 @@ def _spark( df.createOrReplaceTempView("tmp_view") template_dict = metric_value_kwargs.get("template_dict") if not isinstance(query, str): - raise TypeError("template_dict supplied by the expectation must be a dict") # noqa: TRY003 + raise TypeError("template_dict supplied by the expectation must be a dict") # noqa: TRY003 # FIXME CoP if not isinstance(template_dict, dict): - raise TypeError("template_dict supplied by the expectation must be a dict") # noqa: TRY003 + raise TypeError("template_dict supplied by the expectation must be a dict") # noqa: TRY003 # FIXME CoP query = query.format(**template_dict, batch="tmp_view") diff --git a/great_expectations/expectations/metrics/table_metric_provider.py b/great_expectations/expectations/metrics/table_metric_provider.py index 30598ff14f54..35b444fe79ab 100644 --- a/great_expectations/expectations/metrics/table_metric_provider.py +++ b/great_expectations/expectations/metrics/table_metric_provider.py @@ -25,7 +25,7 @@ class TableMetricProvider(MetricProvider): ---Documentation--- - https://docs.greatexpectations.io/docs/guides/expectations/custom_expectations_lp - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_keys: Tuple[str, ...] = ( "batch_id", diff --git a/great_expectations/expectations/metrics/table_metrics/table_column_count.py b/great_expectations/expectations/metrics/table_metrics/table_column_count.py index 44a454b23633..d8894d4cf72a 100644 --- a/great_expectations/expectations/metrics/table_metrics/table_column_count.py +++ b/great_expectations/expectations/metrics/table_metrics/table_column_count.py @@ -34,7 +34,7 @@ def _pandas( runtime_configuration: dict, ): columns = metrics.get("table.columns") - return len(columns) # type: ignore[arg-type] + return len(columns) # type: ignore[arg-type] # FIXME CoP @metric_value(engine=SqlAlchemyExecutionEngine) def _sqlalchemy( @@ -46,7 +46,7 @@ def _sqlalchemy( runtime_configuration: dict, ): columns = metrics.get("table.columns") - return len(columns) # type: ignore[arg-type] + return len(columns) # type: ignore[arg-type] # FIXME CoP @metric_value(engine=SparkDFExecutionEngine) def _spark( @@ -58,7 +58,7 @@ def _spark( runtime_configuration: dict, ): columns = metrics.get("table.columns") - return len(columns) # type: ignore[arg-type] + return len(columns) # type: ignore[arg-type] # FIXME CoP @classmethod @override diff --git a/great_expectations/expectations/metrics/table_metrics/table_column_types.py b/great_expectations/expectations/metrics/table_metrics/table_column_types.py index 729487d4a443..a6abcdcb2e1e 100644 --- a/great_expectations/expectations/metrics/table_metrics/table_column_types.py +++ b/great_expectations/expectations/metrics/table_metrics/table_column_types.py @@ -53,8 +53,8 @@ def _sqlalchemy( if execution_engine.batch_manager.active_batch_data_id is not None: batch_id = execution_engine.batch_manager.active_batch_data_id else: - raise GreatExpectationsError( # noqa: TRY003 - "batch_id could not be determined from domain kwargs and no active_batch_data is loaded into the " # noqa: E501 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP + "batch_id could not be determined from domain kwargs and no active_batch_data is loaded into the " # noqa: E501 # FIXME CoP "execution engine" ) @@ -63,8 +63,8 @@ def _sqlalchemy( execution_engine.batch_manager.batch_data_cache.get(batch_id), ) if batch_data is None: - raise GreatExpectationsError( # noqa: TRY003 - "the requested batch is not available; please load the batch into the execution engine." # noqa: E501 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP + "the requested batch is not available; please load the batch into the execution engine." # noqa: E501 # FIXME CoP ) return _get_sqlalchemy_column_metadata(execution_engine, batch_data) @@ -92,12 +92,12 @@ def _get_sqlalchemy_column_metadata( ): table_selectable: str | sqlalchemy.TextClause - if sqlalchemy.Table and isinstance(batch_data.selectable, sqlalchemy.Table): # type: ignore[truthy-function] + if sqlalchemy.Table and isinstance(batch_data.selectable, sqlalchemy.Table): # type: ignore[truthy-function] # FIXME CoP table_selectable = batch_data.source_table_name or batch_data.selectable.name schema_name = batch_data.source_schema_name or batch_data.selectable.schema # if custom query was passed in - elif sqlalchemy.TextClause and isinstance(batch_data.selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] + elif sqlalchemy.TextClause and isinstance(batch_data.selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] # FIXME CoP table_selectable = batch_data.selectable schema_name = None else: @@ -106,12 +106,12 @@ def _get_sqlalchemy_column_metadata( return get_sqlalchemy_column_metadata( execution_engine=execution_engine, - table_selectable=table_selectable, # type: ignore[arg-type] + table_selectable=table_selectable, # type: ignore[arg-type] # FIXME CoP schema_name=schema_name, ) -def _get_spark_column_metadata(field, parent_name="", include_nested=True): # noqa: C901 - too complex +def _get_spark_column_metadata(field, parent_name="", include_nested=True): # noqa: C901 # too complex cols = [] if parent_name != "": parent_name = f"{parent_name}." @@ -146,6 +146,6 @@ def _get_spark_column_metadata(field, parent_name="", include_nested=True): # n include_nested=include_nested, ) else: - raise ValueError("unrecognized field type") # noqa: TRY003 + raise ValueError("unrecognized field type") # noqa: TRY003 # FIXME CoP return cols diff --git a/great_expectations/expectations/metrics/table_metrics/table_head.py b/great_expectations/expectations/metrics/table_metrics/table_head.py index 8ede588a472e..159da352adf9 100644 --- a/great_expectations/expectations/metrics/table_metrics/table_head.py +++ b/great_expectations/expectations/metrics/table_metrics/table_head.py @@ -75,7 +75,7 @@ def _sqlalchemy( if metric_value_kwargs["fetch_all"]: limit = None - selectable = sa.select("*").select_from(selectable).limit(limit).selectable # type: ignore[assignment,arg-type] + selectable = sa.select("*").select_from(selectable).limit(limit).selectable # type: ignore[assignment,arg-type] # FIXME CoP try: with execution_engine.get_connection() as con: @@ -90,7 +90,7 @@ def _sqlalchemy( MetricConfiguration("table.columns", metric_domain_kwargs) ) df = pd.DataFrame(columns=columns) - return df # type: ignore[return-value] + return df # type: ignore[return-value] # FIXME CoP @metric_value(engine=SparkDFExecutionEngine) def _spark( @@ -119,6 +119,6 @@ def _spark( rows = df.head(n=df.count() + n_rows) rows = [element.asDict() for element in rows] - df = pd.DataFrame(data=rows) # type: ignore[assignment] + df = pd.DataFrame(data=rows) # type: ignore[assignment] # FIXME CoP - return df # type: ignore[return-value] + return df # type: ignore[return-value] # FIXME CoP diff --git a/great_expectations/expectations/metrics/util.py b/great_expectations/expectations/metrics/util.py index d8c9e4e75c1f..c809dc6dd363 100644 --- a/great_expectations/expectations/metrics/util.py +++ b/great_expectations/expectations/metrics/util.py @@ -30,8 +30,8 @@ ) from great_expectations.compatibility.typing_extensions import override from great_expectations.execution_engine import ( - PandasExecutionEngine, # noqa: TCH001 - SqlAlchemyExecutionEngine, # noqa: TCH001 + PandasExecutionEngine, # noqa: TCH001 # FIXME CoP + SqlAlchemyExecutionEngine, # noqa: TCH001 # FIXME CoP ) from great_expectations.execution_engine.sqlalchemy_batch_data import ( SqlAlchemyBatchData, @@ -42,10 +42,10 @@ from great_expectations.execution_engine.util import check_sql_engine_dialect try: - import psycopg2 # noqa: F401 - import sqlalchemy.dialects.postgresql.psycopg2 as sqlalchemy_psycopg2 # noqa: TID251 + import psycopg2 # noqa: F401 # FIXME CoP + import sqlalchemy.dialects.postgresql.psycopg2 as sqlalchemy_psycopg2 # noqa: TID251 # FIXME CoP except (ImportError, KeyError): - sqlalchemy_psycopg2 = None # type: ignore[assignment] + sqlalchemy_psycopg2 = None # type: ignore[assignment] # FIXME CoP try: import snowflake @@ -70,12 +70,10 @@ try: import databricks.sqlalchemy as sqla_databricks except (ImportError, AttributeError): - sqla_databricks = None # type: ignore[assignment] + sqla_databricks = None # type: ignore[assignment] # FIXME CoP _BIGQUERY_MODULE_NAME = "sqlalchemy_bigquery" -from great_expectations.compatibility import bigquery as sqla_bigquery -from great_expectations.compatibility.bigquery import bigquery_types_tuple if TYPE_CHECKING: import pandas as pd @@ -105,14 +103,14 @@ def _is_databricks_dialect(dialect: ModuleType | sa.Dialect | Type[sa.Dialect]) return True if hasattr(dialect, "DatabricksDialect"): return True - if issubclass(dialect, sqla_databricks.DatabricksDialect): # type: ignore[arg-type] + if issubclass(dialect, sqla_databricks.DatabricksDialect): # type: ignore[arg-type] # FIXME CoP return True except Exception: pass return False -def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 +def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 # FIXME CoP column: sa.Column, regex: str, dialect: ModuleType | Type[sa.Dialect] | sa.Dialect, @@ -120,7 +118,7 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 ) -> sa.SQLColumnExpression | None: try: # postgres - if issubclass(dialect.dialect, sa.dialects.postgresql.dialect): # type: ignore[union-attr] + if issubclass(dialect.dialect, sa.dialects.postgresql.dialect): # type: ignore[union-attr] # FIXME CoP if positive: return sqlalchemy.BinaryExpression( column, sqlalchemy.literal(regex), sqlalchemy.custom_op("~") @@ -143,7 +141,7 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 # noinspection PyUnresolvedReferences try: if hasattr(dialect, "RedshiftDialect") or ( - aws.redshiftdialect and issubclass(dialect.dialect, aws.redshiftdialect.RedshiftDialect) # type: ignore[union-attr] + aws.redshiftdialect and issubclass(dialect.dialect, aws.redshiftdialect.RedshiftDialect) # type: ignore[union-attr] # FIXME CoP ): if positive: return sqlalchemy.BinaryExpression( @@ -160,7 +158,7 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 try: # MySQL - if issubclass(dialect.dialect, sa.dialects.mysql.dialect): # type: ignore[union-attr] + if issubclass(dialect.dialect, sa.dialects.mysql.dialect): # type: ignore[union-attr] # FIXME CoP if positive: return sqlalchemy.BinaryExpression( column, sqlalchemy.literal(regex), sqlalchemy.custom_op("REGEXP") @@ -177,7 +175,7 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 try: # Snowflake if issubclass( - dialect.dialect, # type: ignore[union-attr] + dialect.dialect, # type: ignore[union-attr] # FIXME CoP snowflake.sqlalchemy.snowdialect.SnowflakeDialect, ): if positive: @@ -208,7 +206,7 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 TypeError, ): # TypeError can occur if the driver was not installed and so is None logger.debug( - "Unable to load BigQueryDialect dialect while running get_dialect_regex_expression in expectations.metrics.util", # noqa: E501 + "Unable to load BigQueryDialect dialect while running get_dialect_regex_expression in expectations.metrics.util", # noqa: E501 # FIXME CoP exc_info=True, ) pass @@ -259,7 +257,7 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 try: # Teradata - if issubclass(dialect.dialect, teradatasqlalchemy.dialect.TeradataDialect): # type: ignore[union-attr] + if issubclass(dialect.dialect, teradatasqlalchemy.dialect.TeradataDialect): # type: ignore[union-attr] # FIXME CoP if positive: return ( sa.func.REGEXP_SIMILAR( @@ -280,7 +278,7 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 try: # sqlite # regex_match for sqlite introduced in sqlalchemy v1.4 - if issubclass(dialect.dialect, sa.dialects.sqlite.dialect) and version.parse( # type: ignore[union-attr] + if issubclass(dialect.dialect, sa.dialects.sqlite.dialect) and version.parse( # type: ignore[union-attr] # FIXME CoP sa.__version__ ) >= version.parse("1.4"): if positive: @@ -299,50 +297,6 @@ def get_dialect_regex_expression( # noqa: C901, PLR0911, PLR0912, PLR0915 return None -def _get_dialect_type_module( - dialect: ModuleType | Type[sa.Dialect] | sa.Dialect | None = None, -) -> ModuleType | Type[sa.Dialect] | sa.Dialect: - if dialect is None: - logger.warning("No sqlalchemy dialect found; relying in top-level sqlalchemy types.") - return sa - - # Redshift does not (yet) export types to top level; only recognize base SA types - # noinspection PyUnresolvedReferences - if aws.redshiftdialect and isinstance( - dialect, - aws.redshiftdialect.RedshiftDialect, - ): - return dialect.sa - - # Bigquery works with newer versions, but use a patch if we had to define bigquery_types_tuple - try: - if ( - isinstance( - dialect, - sqla_bigquery.BigQueryDialect, # type: ignore[attr-defined] - ) - and bigquery_types_tuple is not None - ): - return bigquery_types_tuple - except (TypeError, AttributeError): - pass - - # Teradata types module - try: - if ( - issubclass( - dialect, # type: ignore[arg-type] - teradatasqlalchemy.dialect.TeradataDialect, - ) - and teradatatypes is not None - ): - return teradatatypes - except (TypeError, AttributeError): - pass - - return dialect - - def attempt_allowing_relative_error(dialect): # noinspection PyUnresolvedReferences detected_redshift: bool = aws.redshiftdialect and check_sql_engine_dialect( @@ -406,7 +360,7 @@ def __getitem__(self, key: Any) -> Any: return item -def get_sqlalchemy_column_metadata( +def get_sqlalchemy_column_metadata( # noqa: C901, PLR0912 # FIXME CoP execution_engine: SqlAlchemyExecutionEngine, table_selectable: sqlalchemy.Select, schema_name: Optional[str] = None, @@ -418,7 +372,7 @@ def get_sqlalchemy_column_metadata( inspector = execution_engine.get_inspector() try: # if a custom query was passed - if sqlalchemy.TextClause and isinstance(table_selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] + if sqlalchemy.TextClause and isinstance(table_selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] # FIXME CoP if hasattr(table_selectable, "selected_columns"): # New in version 1.4. columns = table_selectable.selected_columns.columns @@ -427,11 +381,11 @@ def get_sqlalchemy_column_metadata( # We must explicitly create a subquery columns = table_selectable.columns().subquery().columns else: - # TODO: remove cast to a string once [this](https://github.com/snowflakedb/snowflake-sqlalchemy/issues/157) issue is resovled # noqa: E501 + # TODO: remove cast to a string once [this](https://github.com/snowflakedb/snowflake-sqlalchemy/issues/157) issue is resovled # noqa: E501 # FIXME CoP table_name = str(table_selectable) if execution_engine.dialect_name == GXSqlDialect.SNOWFLAKE: table_name = table_name.lower() - columns = inspector.get_columns( # type: ignore[assignment] + columns = inspector.get_columns( # type: ignore[assignment] # FIXME CoP table_name=table_name, schema=schema_name, ) @@ -451,7 +405,7 @@ def get_sqlalchemy_column_metadata( sqlalchemy_engine=engine, ) - # Use fallback because for mssql and trino reflection mechanisms do not throw an error but return an empty list # noqa: E501 + # Use fallback because for mssql and trino reflection mechanisms do not throw an error but return an empty list # noqa: E501 # FIXME CoP if len(columns) == 0: columns = column_reflection_fallback( selectable=table_selectable, @@ -460,12 +414,26 @@ def get_sqlalchemy_column_metadata( ) dialect_name = execution_engine.dialect.name - if dialect_name == GXSqlDialect.SNOWFLAKE: - return [ - # TODO: SmartColumn should know the dialect and do lookups based on that - CaseInsensitiveNameDict(column) - for column in columns - ] + if dialect_name in [ + GXSqlDialect.DATABRICKS, + GXSqlDialect.POSTGRESQL, + GXSqlDialect.SNOWFLAKE, + ]: + # WARNING: Do not alter columns in place, as they are cached on the inspector + columns_copy = [column.copy() for column in columns] + for column in columns_copy: + if column.get("type"): + # When using column_reflection_fallback, we might not be able to + # extract the column type, and only have the column name + column["type"] = column["type"].compile(dialect=execution_engine.dialect) + if dialect_name == GXSqlDialect.SNOWFLAKE: + return [ + # TODO: SmartColumn should know the dialect and do lookups based on that + CaseInsensitiveNameDict(column) + for column in columns_copy + ] + else: + return columns_copy return columns except AttributeError as e: @@ -473,7 +441,7 @@ def get_sqlalchemy_column_metadata( return None -def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 +def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 # FIXME CoP selectable: sqlalchemy.Select, dialect: sqlalchemy.Dialect, sqlalchemy_engine: sqlalchemy.Engine, @@ -492,7 +460,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 if dialect.name.lower() == "mssql": # Get column names and types from the database # Reference: https://dataedo.com/kb/query/sql-server/list-table-columns-in-database - tables_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] + tables_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] # FIXME CoP "tables", sa.column("object_id"), sa.column("schema_id"), @@ -500,7 +468,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="sys", ).alias("sys_tables_table_clause") tables_table_query: sqlalchemy.Select = ( - sa.select( # type: ignore[assignment] + sa.select( # type: ignore[assignment] # FIXME CoP tables_table_clause.columns.object_id.label("object_id"), sa.func.schema_name(tables_table_clause.columns.schema_id).label("schema_name"), tables_table_clause.columns.name.label("table_name"), @@ -508,7 +476,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 .select_from(tables_table_clause) .alias("sys_tables_table_subquery") ) - columns_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] + columns_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] # FIXME CoP "columns", sa.column("object_id"), sa.column("user_type_id"), @@ -519,7 +487,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="sys", ).alias("sys_columns_table_clause") columns_table_query: sqlalchemy.Select = ( - sa.select( # type: ignore[assignment] + sa.select( # type: ignore[assignment] # FIXME CoP columns_table_clause.columns.object_id.label("object_id"), columns_table_clause.columns.user_type_id.label("user_type_id"), columns_table_clause.columns.column_id.label("column_id"), @@ -530,24 +498,24 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 .select_from(columns_table_clause) .alias("sys_columns_table_subquery") ) - types_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] + types_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] # FIXME CoP "types", sa.column("user_type_id"), sa.column("name"), schema="sys", ).alias("sys_types_table_clause") types_table_query: sqlalchemy.Select = ( - sa.select( # type: ignore[assignment] + sa.select( # type: ignore[assignment] # FIXME CoP types_table_clause.columns.user_type_id.label("user_type_id"), types_table_clause.columns.name.label("column_data_type"), ) .select_from(types_table_clause) .alias("sys_types_table_subquery") ) - inner_join_conditions: sqlalchemy.BinaryExpression = sa.and_( # type: ignore[assignment] + inner_join_conditions: sqlalchemy.BinaryExpression = sa.and_( # type: ignore[assignment] # FIXME CoP *(tables_table_query.c.object_id == columns_table_query.c.object_id,) ) - outer_join_conditions: sqlalchemy.BinaryExpression = sa.and_( # type: ignore[assignment] + outer_join_conditions: sqlalchemy.BinaryExpression = sa.and_( # type: ignore[assignment] # FIXME CoP *( columns_table_query.columns.user_type_id == types_table_query.columns.user_type_id, @@ -564,7 +532,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 columns_table_query.c.column_precision, ) .select_from( - tables_table_query.join( # type: ignore[call-arg,arg-type] + tables_table_query.join( # type: ignore[call-arg,arg-type] # FIXME CoP right=columns_table_query, onclause=inner_join_conditions, isouter=False, @@ -574,26 +542,25 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 isouter=True, ) ) - .where(tables_table_query.c.table_name == selectable.name) # type: ignore[attr-defined] + .where(tables_table_query.c.table_name == selectable.name) # type: ignore[attr-defined] # FIXME CoP .order_by( tables_table_query.c.schema_name.asc(), tables_table_query.c.table_name.asc(), columns_table_query.c.column_id.asc(), ) ) - col_info_tuples_list: List[tuple] = connection.execute(col_info_query).fetchall() # type: ignore[assignment] - # type_module = _get_dialect_type_module(dialect=dialect) + col_info_tuples_list: List[tuple] = connection.execute(col_info_query).fetchall() # type: ignore[assignment] # FIXME CoP col_info_dict_list = [ { "name": column_name, # "type": getattr(type_module, column_data_type.upper())(), "type": column_data_type.upper(), } - for schema_name, table_name, column_id, column_name, column_data_type, column_max_length, column_precision in col_info_tuples_list # noqa: E501 + for schema_name, table_name, column_id, column_name, column_data_type, column_max_length, column_precision in col_info_tuples_list # noqa: E501 # FIXME CoP ] elif dialect.name.lower() == "trino": try: - table_name = selectable.name # type: ignore[attr-defined] + table_name = selectable.name # type: ignore[attr-defined] # FIXME CoP except AttributeError: table_name = selectable if str(table_name).lower().startswith("select"): @@ -609,7 +576,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="information_schema", ) tables_table_query = ( - sa.select( # type: ignore[assignment] + sa.select( # type: ignore[assignment] # FIXME CoP sa.column("table_schema").label("schema_name"), sa.column("table_name").label("table_name"), ) @@ -622,7 +589,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="information_schema", ) columns_table_query = ( - sa.select( # type: ignore[assignment] + sa.select( # type: ignore[assignment] # FIXME CoP sa.column("column_name").label("column_name"), sa.column("table_name").label("table_name"), sa.column("table_schema").label("schema_name"), @@ -638,14 +605,14 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 ) ) col_info_query = ( - sa.select( # type: ignore[assignment] + sa.select( # type: ignore[assignment] # FIXME CoP tables_table_query.c.schema_name, tables_table_query.c.table_name, columns_table_query.c.column_name, columns_table_query.c.column_data_type, ) .select_from( - tables_table_query.join( # type: ignore[call-arg,arg-type] + tables_table_query.join( # type: ignore[call-arg,arg-type] # FIXME CoP right=columns_table_query, onclause=conditions, isouter=False ) ) @@ -667,10 +634,9 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 # in sqlalchemy > 2.0.0 this is a Subquery, which we need to convert into a Selectable if not col_info_query.supports_execution: - col_info_query = sa.select(col_info_query) # type: ignore[call-overload] + col_info_query = sa.select(col_info_query) # type: ignore[call-overload] # FIXME CoP - col_info_tuples_list = connection.execute(col_info_query).fetchall() # type: ignore[assignment] - # type_module = _get_dialect_type_module(dialect=dialect) + col_info_tuples_list = connection.execute(col_info_query).fetchall() # type: ignore[assignment] # FIXME CoP col_info_dict_list = [ { "name": column_name, @@ -680,21 +646,21 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 ] else: # if a custom query was passed - if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] + if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] # FIXME CoP query: sqlalchemy.TextClause = selectable - elif sqlalchemy.Table and isinstance(selectable, sqlalchemy.Table): # type: ignore[truthy-function] + elif sqlalchemy.Table and isinstance(selectable, sqlalchemy.Table): # type: ignore[truthy-function] # FIXME CoP query = sa.select(sa.text("*")).select_from(selectable).limit(1) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP # noinspection PyUnresolvedReferences if dialect.name.lower() == GXSqlDialect.REDSHIFT: # Redshift needs temp tables to be declared as text - query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) # type: ignore[assignment,arg-type] + query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) # type: ignore[assignment,arg-type] # FIXME CoP else: - query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) # type: ignore[assignment,arg-type] + query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) # type: ignore[assignment,arg-type] # FIXME CoP result_object = connection.execute(query) # noinspection PyProtectedMember - col_names: List[str] = result_object._metadata.keys # type: ignore[assignment] + col_names: List[str] = result_object._metadata.keys # type: ignore[assignment] # FIXME CoP col_info_dict_list = [{"name": col_name} for col_name in col_names] return col_info_dict_list @@ -713,7 +679,7 @@ def get_dbms_compatible_metric_domain_kwargs( Returns: metric_domain_kwargs: Updated "metric_domain_kwargs" dictionary with quoted column names, where appropriate. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_names: List[str | sqlalchemy.quoted_name] if "column" in metric_domain_kwargs: column_name: str | sqlalchemy.quoted_name = get_dbms_compatible_column_names( @@ -766,7 +732,7 @@ def get_dbms_compatible_column_names( def get_dbms_compatible_column_names( column_names: List[str] | str, batch_columns_list: Sequence[str | sqlalchemy.quoted_name], - error_message_template: str = 'Error: The column "{column_name:s}" in BatchData does not exist.', # noqa: E501 + error_message_template: str = 'Error: The column "{column_name:s}" in BatchData does not exist.', # noqa: E501 # FIXME CoP ) -> List[str | sqlalchemy.quoted_name] | str | sqlalchemy.quoted_name: """ Case non-sensitivity is expressed in upper case by common DBMS backends and in lower case by SQLAlchemy, with any @@ -784,7 +750,7 @@ def get_dbms_compatible_column_names( Returns: Single property-typed column name object or list of property-typed column name objects (depending on input). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP normalized_typed_batch_columns_mappings: List[Tuple[str, str | sqlalchemy.quoted_name]] = ( _verify_column_names_exist_and_get_normalized_typed_column_names_map( column_names=column_names, @@ -807,7 +773,7 @@ def get_dbms_compatible_column_names( def verify_column_names_exist( column_names: List[str] | str, batch_columns_list: List[str | sqlalchemy.quoted_name], - error_message_template: str = 'Error: The column "{column_name:s}" in BatchData does not exist.', # noqa: E501 + error_message_template: str = 'Error: The column "{column_name:s}" in BatchData does not exist.', # noqa: E501 # FIXME CoP ) -> None: _ = _verify_column_names_exist_and_get_normalized_typed_column_names_map( column_names=column_names, @@ -817,10 +783,10 @@ def verify_column_names_exist( ) -def _verify_column_names_exist_and_get_normalized_typed_column_names_map( # noqa: C901 +def _verify_column_names_exist_and_get_normalized_typed_column_names_map( # noqa: C901 # FIXME CoP column_names: List[str] | str, batch_columns_list: Sequence[str | sqlalchemy.quoted_name], - error_message_template: str = 'Error: The column "{column_name:s}" in BatchData does not exist.', # noqa: E501 + error_message_template: str = 'Error: The column "{column_name:s}" in BatchData does not exist.', # noqa: E501 # FIXME CoP verify_only: bool = False, ) -> List[Tuple[str, str | sqlalchemy.quoted_name]] | None: """ @@ -834,7 +800,7 @@ def _verify_column_names_exist_and_get_normalized_typed_column_names_map( # noq Returns: List of tuples having mapping from string-valued column name to typed column name; None if "verify_only" is set. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_names_list: List[str] if isinstance(column_names, list): column_names_list = column_names @@ -847,7 +813,7 @@ def _get_normalized_column_name_mapping_if_exists( typed_column_name_cursor: str | sqlalchemy.quoted_name for typed_column_name_cursor in batch_columns_list: if ( - (type(typed_column_name_cursor) == str) # noqa: E721 + (type(typed_column_name_cursor) == str) # noqa: E721 # FIXME CoP and (column_name.casefold() == typed_column_name_cursor.casefold()) ) or (column_name == str(typed_column_name_cursor)): return column_name, typed_column_name_cursor @@ -874,7 +840,7 @@ def _get_normalized_column_name_mapping_if_exists( raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError( message=error_message_template.format(column_name=column_name) ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if not verify_only: normalized_batch_columns_mappings.append(normalized_column_name_mapping) @@ -886,7 +852,7 @@ def parse_value_set(value_set: Iterable) -> list: return parsed_value_set -def get_dialect_like_pattern_expression( # noqa: C901, PLR0912, PLR0915 +def get_dialect_like_pattern_expression( # noqa: C901, PLR0912, PLR0915 # FIXME CoP column: sa.Column, dialect: ModuleType, like_pattern: str, positive: bool = True ) -> sa.BinaryExpression | None: dialect_supported: bool = False @@ -985,7 +951,7 @@ def get_dialect_like_pattern_expression( # noqa: C901, PLR0912, PLR0915 return None -def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 +def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 # FIXME CoP distribution, params ): """Ensures that necessary parameters for a distribution are present and that all parameters are sensical. @@ -1007,14 +973,14 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 ValueError: \ With an informative description, usually when necessary parameters are omitted or are invalid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP norm_msg = "norm distributions require 0 parameters and optionally 'mean', 'std_dev'." - beta_msg = "beta distributions require 2 positive parameters 'alpha', 'beta' and optionally 'loc', 'scale'." # noqa: E501 + beta_msg = "beta distributions require 2 positive parameters 'alpha', 'beta' and optionally 'loc', 'scale'." # noqa: E501 # FIXME CoP gamma_msg = ( "gamma distributions require 1 positive parameter 'alpha' and optionally 'loc','scale'." ) - # poisson_msg = "poisson distributions require 1 positive parameter 'lambda' and optionally 'loc'." # noqa: E501 + # poisson_msg = "poisson distributions require 1 positive parameter 'lambda' and optionally 'loc'." # noqa: E501 # FIXME CoP uniform_msg = "uniform distributions require 0 parameters and optionally 'loc', 'scale'." chi2_msg = "chi2 distributions require 1 positive parameter 'df' and optionally 'loc', 'scale'." expon_msg = "expon distributions require 0 parameters and optionally 'loc', 'scale'." @@ -1028,20 +994,20 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 "chi2", "expon", ]: - raise AttributeError(f"Unsupported distribution provided: {distribution}") # noqa: TRY003 + raise AttributeError(f"Unsupported distribution provided: {distribution}") # noqa: TRY003 # FIXME CoP if isinstance(params, dict): # `params` is a dictionary if params.get("std_dev", 1) <= 0 or params.get("scale", 1) <= 0: - raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 + raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 # FIXME CoP # alpha and beta are required and positive if distribution == "beta" and (params.get("alpha", -1) <= 0 or params.get("beta", -1) <= 0): - raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 # FIXME CoP # alpha is required and positive elif distribution == "gamma" and params.get("alpha", -1) <= 0: - raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 # FIXME CoP # lambda is a required and positive # elif distribution == 'poisson' and params.get('lambda', -1) <= 0: @@ -1049,37 +1015,37 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 # df is necessary and required to be positive elif distribution == "chi2" and params.get("df", -1) <= 0: - raise ValueError(f"Invalid parameters: {chi2_msg}:") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {chi2_msg}:") # noqa: TRY003 # FIXME CoP elif isinstance(params, (tuple, list)): scale = None # `params` is a tuple or a list if distribution == "beta": - if len(params) < 2: # noqa: PLR2004 - raise ValueError(f"Missing required parameters: {beta_msg}") # noqa: TRY003 + if len(params) < 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Missing required parameters: {beta_msg}") # noqa: TRY003 # FIXME CoP if params[0] <= 0 or params[1] <= 0: - raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 - if len(params) == 4: # noqa: PLR2004 + raise ValueError(f"Invalid parameters: {beta_msg}") # noqa: TRY003 # FIXME CoP + if len(params) == 4: # noqa: PLR2004 # FIXME CoP scale = params[3] - elif len(params) > 4: # noqa: PLR2004 - raise ValueError(f"Too many parameters provided: {beta_msg}") # noqa: TRY003 + elif len(params) > 4: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many parameters provided: {beta_msg}") # noqa: TRY003 # FIXME CoP elif distribution == "norm": - if len(params) > 2: # noqa: PLR2004 - raise ValueError(f"Too many parameters provided: {norm_msg}") # noqa: TRY003 - if len(params) == 2: # noqa: PLR2004 + if len(params) > 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many parameters provided: {norm_msg}") # noqa: TRY003 # FIXME CoP + if len(params) == 2: # noqa: PLR2004 # FIXME CoP scale = params[1] elif distribution == "gamma": if len(params) < 1: - raise ValueError(f"Missing required parameters: {gamma_msg}") # noqa: TRY003 - if len(params) == 3: # noqa: PLR2004 + raise ValueError(f"Missing required parameters: {gamma_msg}") # noqa: TRY003 # FIXME CoP + if len(params) == 3: # noqa: PLR2004 # FIXME CoP scale = params[2] - if len(params) > 3: # noqa: PLR2004 - raise ValueError(f"Too many parameters provided: {gamma_msg}") # noqa: TRY003 + if len(params) > 3: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many parameters provided: {gamma_msg}") # noqa: TRY003 # FIXME CoP elif params[0] <= 0: - raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {gamma_msg}") # noqa: TRY003 # FIXME CoP # elif distribution == 'poisson': # if len(params) < 1: @@ -1090,33 +1056,33 @@ def validate_distribution_parameters( # noqa: C901, PLR0912, PLR0915 # raise ValueError("Invalid parameters: %s" %poisson_msg) elif distribution == "uniform": - if len(params) == 2: # noqa: PLR2004 + if len(params) == 2: # noqa: PLR2004 # FIXME CoP scale = params[1] - if len(params) > 2: # noqa: PLR2004 - raise ValueError(f"Too many arguments provided: {uniform_msg}") # noqa: TRY003 + if len(params) > 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many arguments provided: {uniform_msg}") # noqa: TRY003 # FIXME CoP elif distribution == "chi2": if len(params) < 1: - raise ValueError(f"Missing required parameters: {chi2_msg}") # noqa: TRY003 - elif len(params) == 3: # noqa: PLR2004 + raise ValueError(f"Missing required parameters: {chi2_msg}") # noqa: TRY003 # FIXME CoP + elif len(params) == 3: # noqa: PLR2004 # FIXME CoP scale = params[2] - elif len(params) > 3: # noqa: PLR2004 - raise ValueError(f"Too many arguments provided: {chi2_msg}") # noqa: TRY003 + elif len(params) > 3: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many arguments provided: {chi2_msg}") # noqa: TRY003 # FIXME CoP if params[0] <= 0: - raise ValueError(f"Invalid parameters: {chi2_msg}") # noqa: TRY003 + raise ValueError(f"Invalid parameters: {chi2_msg}") # noqa: TRY003 # FIXME CoP elif distribution == "expon": - if len(params) == 2: # noqa: PLR2004 + if len(params) == 2: # noqa: PLR2004 # FIXME CoP scale = params[1] - if len(params) > 2: # noqa: PLR2004 - raise ValueError(f"Too many arguments provided: {expon_msg}") # noqa: TRY003 + if len(params) > 2: # noqa: PLR2004 # FIXME CoP + raise ValueError(f"Too many arguments provided: {expon_msg}") # noqa: TRY003 # FIXME CoP if scale is not None and scale <= 0: - raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 + raise ValueError("std_dev and scale must be positive.") # noqa: TRY003 # FIXME CoP else: - raise ValueError( # noqa: TRY003, TRY004 - "params must be a dict or list, or use great_expectations.dataset.util.infer_distribution_parameters(data, distribution)" # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + "params must be a dict or list, or use great_expectations.dataset.util.infer_distribution_parameters(data, distribution)" # noqa: E501 # FIXME CoP ) @@ -1136,7 +1102,7 @@ def _scipy_distribution_positional_args_from_dict(distribution, params): Raises: AttributeError: \ If an unsupported distribution is provided. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP params["loc"] = params.get("loc", 0) if "scale" not in params: @@ -1163,7 +1129,7 @@ def is_valid_continuous_partition_object(partition_object): :param partition_object: The partition_object to evaluate :return: Boolean - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if ( (partition_object is None) or ("weights" not in partition_object) @@ -1172,17 +1138,17 @@ def is_valid_continuous_partition_object(partition_object): return False if "tail_weights" in partition_object: - if len(partition_object["tail_weights"]) != 2: # noqa: PLR2004 + if len(partition_object["tail_weights"]) != 2: # noqa: PLR2004 # FIXME CoP return False comb_weights = partition_object["tail_weights"] + partition_object["weights"] else: comb_weights = partition_object["weights"] - ## TODO: Consider adding this check to migrate to the tail_weights structure of partition objects # noqa: E501 + ## TODO: Consider adding this check to migrate to the tail_weights structure of partition objects # noqa: E501 # FIXME CoP # if (partition_object['bins'][0] == -np.inf) or (partition_object['bins'][-1] == np.inf): # return False - # Expect one more bin edge than weight; all bin edges should be monotonically increasing; weights should sum to one # noqa: E501 + # Expect one more bin edge than weight; all bin edges should be monotonically increasing; weights should sum to one # noqa: E501 # FIXME CoP return ( (len(partition_object["bins"]) == (len(partition_object["weights"]) + 1)) and np.all(np.diff(partition_object["bins"]) > 0) @@ -1208,8 +1174,8 @@ def sql_statement_with_post_compile_to_string( Returns: String representation of select_statement - """ # noqa: E501 - sqlalchemy_connection: sa.engine.base.Connection = engine.engine # type: ignore[assignment] + """ # noqa: E501 # FIXME CoP + sqlalchemy_connection: sa.engine.base.Connection = engine.engine # type: ignore[assignment] # FIXME CoP compiled = select_statement.compile( sqlalchemy_connection, compile_kwargs={"render_postcompile": True}, @@ -1218,7 +1184,7 @@ def sql_statement_with_post_compile_to_string( dialect_name: str = engine.dialect_name if dialect_name in ["sqlite", "trino", "mssql"]: - params = (repr(compiled.params[name]) for name in compiled.positiontup) # type: ignore[union-attr] + params = (repr(compiled.params[name]) for name in compiled.positiontup) # type: ignore[union-attr] # FIXME CoP query_as_string = re.sub(r"\?", lambda m: next(params), str(compiled)) else: @@ -1242,7 +1208,7 @@ def get_sqlalchemy_source_table_and_schema( engine (SqlAlchemyExecutionEngine): Engine that is currently being used to calculate the Metrics Returns: SqlAlchemy Table that is the source table and schema. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert isinstance( engine.batch_manager.active_batch_data, SqlAlchemyBatchData ), "`active_batch_data` not SqlAlchemyBatchData" @@ -1259,7 +1225,7 @@ def get_sqlalchemy_source_table_and_schema( return engine.batch_manager.active_batch_data.selectable -def get_unexpected_indices_for_multiple_pandas_named_indices( # noqa: C901 +def get_unexpected_indices_for_multiple_pandas_named_indices( # noqa: C901 # FIXME CoP domain_records_df: pd.DataFrame, unexpected_index_column_names: List[str], expectation_domain_column_list: List[str], @@ -1275,10 +1241,10 @@ def get_unexpected_indices_for_multiple_pandas_named_indices( # noqa: C901 Returns: List of Dicts that contain ID/PK values - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not expectation_domain_column_list: raise gx_exceptions.MetricResolutionError( - message="Error: The list of domain columns is currently empty. Please check your configuration.", # noqa: E501 + message="Error: The list of domain columns is currently empty. Please check your configuration.", # noqa: E501 # FIXME CoP failed_metrics=["unexpected_index_list"], ) @@ -1339,7 +1305,7 @@ def get_unexpected_indices_for_single_pandas_named_index( Returns: List of Dicts that contain ID/PK values - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not expectation_domain_column_list: return [] unexpected_index_values_by_named_index: List[int | str] = list(domain_records_df.index) @@ -1349,7 +1315,7 @@ def get_unexpected_indices_for_single_pandas_named_index( and unexpected_index_column_names[0] == domain_records_df.index.name ): raise gx_exceptions.MetricResolutionError( - message=f"Error: The column {unexpected_index_column_names[0] if unexpected_index_column_names else ''} does not exist in the named indices. Please check your configuration", # noqa: E501 + message=f"Error: The column {unexpected_index_column_names[0] if unexpected_index_column_names else ''} does not exist in the named indices. Please check your configuration", # noqa: E501 # FIXME CoP failed_metrics=["unexpected_index_list"], ) @@ -1371,7 +1337,7 @@ def get_unexpected_indices_for_single_pandas_named_index( return unexpected_index_list -def compute_unexpected_pandas_indices( # noqa: C901 +def compute_unexpected_pandas_indices( # noqa: C901 # FIXME CoP domain_records_df: pd.DataFrame, expectation_domain_column_list: List[str], result_format: Dict[str, Any], @@ -1392,7 +1358,7 @@ def compute_unexpected_pandas_indices( # noqa: C901 Returns: list of unexpected_index_list values. It can either be a list of dicts or a list of numbers (if using default index). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP unexpected_index_column_names: List[str] unexpected_index_list: UnexpectedIndexList exclude_unexpected_values: bool = result_format.get("exclude_unexpected_values", False) @@ -1434,10 +1400,10 @@ def compute_unexpected_pandas_indices( # noqa: C901 } for index in unexpected_indices: for column_name in unexpected_index_column_names: - column_name = get_dbms_compatible_column_names( # noqa: PLW2901 + column_name = get_dbms_compatible_column_names( # noqa: PLW2901 # FIXME CoP column_names=column_name, batch_columns_list=metrics["table.columns"], - error_message_template='Error: The unexpected_index_column "{column_name:s}" does not exist in Dataframe. Please check your configuration and try again.', # noqa: E501 + error_message_template='Error: The unexpected_index_column "{column_name:s}" does not exist in Dataframe. Please check your configuration and try again.', # noqa: E501 # FIXME CoP ) primary_key_dict_list[column_name].append( domain_records_df.at[index, column_name] @@ -1455,10 +1421,10 @@ def compute_unexpected_pandas_indices( # noqa: C901 index, domain_column_name ] for column_name in unexpected_index_column_names: - column_name = get_dbms_compatible_column_names( # noqa: PLW2901 + column_name = get_dbms_compatible_column_names( # noqa: PLW2901 # FIXME CoP column_names=column_name, batch_columns_list=metrics["table.columns"], - error_message_template='Error: The unexpected_index_column "{column_name:s}" does not exist in Dataframe. Please check your configuration and try again.', # noqa: E501 + error_message_template='Error: The unexpected_index_column "{column_name:s}" does not exist in Dataframe. Please check your configuration and try again.', # noqa: E501 # FIXME CoP ) primary_key_dict[column_name] = domain_records_df.at[index, column_name] unexpected_index_list.append(primary_key_dict) diff --git a/great_expectations/expectations/model_field_types.py b/great_expectations/expectations/model_field_types.py index a63b8e7ada4e..d619536f877e 100644 --- a/great_expectations/expectations/model_field_types.py +++ b/great_expectations/expectations/model_field_types.py @@ -11,7 +11,7 @@ ) MostlyField = Annotated[ - float, + Union[float, SuiteParameterDict], pydantic.Field( description=MOSTLY_DESCRIPTION, ge=0.0, @@ -22,7 +22,7 @@ ] ValueSetField = Annotated[ - Union[Sequence, set, SuiteParameterDict, None], + Union[Sequence, set, SuiteParameterDict], pydantic.Field( title="Value Set", description=VALUE_SET_DESCRIPTION, diff --git a/great_expectations/expectations/regex_based_column_map_expectation.py b/great_expectations/expectations/regex_based_column_map_expectation.py index 4b3d37eacc50..e733053af1e8 100644 --- a/great_expectations/expectations/regex_based_column_map_expectation.py +++ b/great_expectations/expectations/regex_based_column_map_expectation.py @@ -74,7 +74,7 @@ class RegexColumnMapMetricProvider(ColumnMapMetricProvider): ---Documentation--- - https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_regex_based_column_map_expectations - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP condition_value_keys = () @@ -121,7 +121,7 @@ class RegexBasedColumnMapExpectation(ColumnMapExpectation, ABC): regex_ (str): A valid regex pattern. semantic_type_name_plural (optional[str]): The plural form of a semantic type being validated by a regex pattern. map_metric (str): The name of an ephemeral metric, as returned by `register_metric(...)`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP @staticmethod def register_metric( @@ -136,7 +136,7 @@ def register_metric( Returns: map_metric: The constructed name of the ephemeral metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP regex_snake_name: str = camel_to_snake(regex_camel_name) map_metric: str = "column_values.match_" + regex_snake_name + "_regex" @@ -164,7 +164,7 @@ def validate_configuration( Raises: InvalidExpectationConfigurationError: If no `regex` or `column` specified, or if `mostly` parameter incorrectly defined. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().validate_configuration(configuration) try: assert ( @@ -193,14 +193,14 @@ def _question_renderer(cls, configuration, result=None, runtime_configuration=No if mostly == 1 or mostly is None: if semantic_type_name_plural is not None: - return f'Are all values in column "{column}" valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}?' # noqa: E501 + return f'Are all values in column "{column}" valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}?' # noqa: E501 # FIXME CoP else: return f'Do all values in column "{column}" match the regular expression {regex}?' - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if semantic_type_name_plural is not None: - return f'Are at least {mostly * 100}% of values in column "{column}" valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}?' # noqa: E501 + return f'Are at least {mostly * 100}% of values in column "{column}" valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}?' # noqa: E501 # FIXME CoP else: - return f'Do at least {mostly * 100}% of values in column "{column}" match the regular expression {regex}?' # noqa: E501 + return f'Do at least {mostly * 100}% of values in column "{column}" match the regular expression {regex}?' # noqa: E501 # FIXME CoP @classmethod @renderer(renderer_type=LegacyRendererType.ANSWER) @@ -213,19 +213,19 @@ def _answer_renderer(cls, configuration=None, result=None, runtime_configuration if result.success: if mostly == 1 or mostly is None: if semantic_type_name_plural is not None: - return f'All values in column "{column}" are valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}.' # noqa: E501 + return f'All values in column "{column}" are valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}.' # noqa: E501 # FIXME CoP else: return f'All values in column "{column}" match the regular expression {regex}.' - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if semantic_type_name_plural is not None: - return f'At least {mostly * 100}% of values in column "{column}" are valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}.' # noqa: E501 + return f'At least {mostly * 100}% of values in column "{column}" are valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}.' # noqa: E501 # FIXME CoP else: - return f'At least {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 - else: # noqa: PLR5501 + return f'At least {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 # FIXME CoP + else: # noqa: PLR5501 # FIXME CoP if semantic_type_name_plural is not None: - return f' Less than {mostly * 100}% of values in column "{column}" are valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}.' # noqa: E501 + return f' Less than {mostly * 100}% of values in column "{column}" are valid {semantic_type_name_plural}, as judged by matching the regular expression {regex}.' # noqa: E501 # FIXME CoP else: - return f'Less than {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 + return f'Less than {mostly * 100}% of values in column "{column}" match the regular expression {regex}.' # noqa: E501 # FIXME CoP @override @classmethod @@ -289,7 +289,7 @@ def _prescriptive_renderer( template_str = "values must match this regular expression: $regex" if params["mostly"] is not None: params["mostly_pct"] = num_to_str(params["mostly"] * 100, no_scientific=True) - # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 + # params["mostly_pct"] = "{:.14f}".format(params["mostly"]*100).rstrip("0").rstrip(".") # noqa: E501 # FIXME CoP template_str += ", at least $mostly_pct % of the time." else: template_str += "." diff --git a/great_expectations/expectations/registry.py b/great_expectations/expectations/registry.py index 04dba6b69ad8..c2f140a2d906 100644 --- a/great_expectations/expectations/registry.py +++ b/great_expectations/expectations/registry.py @@ -65,7 +65,7 @@ def register_renderer( renderer_fn: Callable[..., Union[RenderedAtomicContent, RenderedContent]], ): # noinspection PyUnresolvedReferences - renderer_name = renderer_fn._renderer_type # type: ignore[attr-defined] + renderer_name = renderer_fn._renderer_type # type: ignore[attr-defined] # FIXME CoP if object_name not in _registered_renderers: logger.debug(f"Registering {renderer_name} for expectation_type {object_name}.") _registered_renderers[object_name] = {renderer_name: (parent_class, renderer_fn)} @@ -77,7 +77,7 @@ def register_renderer( renderer_fn, ): logger.info( - f"Multiple declarations of {renderer_name} renderer for expectation_type {object_name} " # noqa: E501 + f"Multiple declarations of {renderer_name} renderer for expectation_type {object_name} " # noqa: E501 # FIXME CoP f"found." ) return @@ -105,7 +105,7 @@ def get_renderer_names(expectation_or_metric_type: str) -> List[str]: Returns: A list of renderer names for the Expectation or Metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return list(_registered_renderers.get(expectation_or_metric_type, {}).keys()) @@ -121,7 +121,7 @@ def get_renderer_names_with_renderer_types( Returns: A list of renderer names for the given prefixes and Expectation or Metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return [ renderer_name for renderer_name in get_renderer_names( @@ -172,7 +172,7 @@ def register_core_metrics() -> None: # Implicitly calls MetaMetricProvider.__new__ as Metrics are loaded from metrics.__init__.py # As __new__ calls upon register_metric this import builds our core registry - from great_expectations.expectations import metrics # noqa: F401 + from great_expectations.expectations import metrics # noqa: F401 # FIXME CoP after_count = len(_registered_metrics) @@ -198,7 +198,7 @@ def register_core_expectations() -> None: # Implicitly calls MetaExpectation.__new__ as Expectations are loaded from core.__init__.py # As __new__ calls upon register_expectation, this import builds our core registry - from great_expectations.expectations import core # noqa: F401 + from great_expectations.expectations import core # noqa: F401 # FIXME CoP after_count = len(_registered_expectations) @@ -216,7 +216,7 @@ def _add_response_key(res, key, value): return res -def register_metric( # noqa: PLR0913 +def register_metric( # noqa: PLR0913 # FIXME CoP metric_name: str, metric_domain_keys: Tuple[str, ...], metric_value_keys: Tuple[str, ...], @@ -239,34 +239,34 @@ def register_metric( # noqa: PLR0913 Returns: A dictionary containing warnings thrown during registration if applicable, and the success status of registration. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP res: dict = {} execution_engine_name = execution_engine.__name__ logger.debug(f"Registering metric: {metric_name}") if metric_provider is not None and metric_fn_type is not None: - metric_provider.metric_fn_type = metric_fn_type # type: ignore[attr-defined] + metric_provider.metric_fn_type = metric_fn_type # type: ignore[attr-defined] # FIXME CoP if metric_name in _registered_metrics: metric_definition = _registered_metrics[metric_name] current_domain_keys = metric_definition.get("metric_domain_keys", set()) if set(current_domain_keys) != set(metric_domain_keys): logger.warning( - f"metric {metric_name} is being registered with different metric_domain_keys; overwriting metric_domain_keys" # noqa: E501 + f"metric {metric_name} is being registered with different metric_domain_keys; overwriting metric_domain_keys" # noqa: E501 # FIXME CoP ) _add_response_key( res, "warning", - f"metric {metric_name} is being registered with different metric_domain_keys; overwriting metric_domain_keys", # noqa: E501 + f"metric {metric_name} is being registered with different metric_domain_keys; overwriting metric_domain_keys", # noqa: E501 # FIXME CoP ) current_value_keys = metric_definition.get("metric_value_keys", set()) if set(current_value_keys) != set(metric_value_keys): logger.warning( - f"metric {metric_name} is being registered with different metric_value_keys; overwriting metric_value_keys" # noqa: E501 + f"metric {metric_name} is being registered with different metric_value_keys; overwriting metric_value_keys" # noqa: E501 # FIXME CoP ) _add_response_key( res, "warning", - f"metric {metric_name} is being registered with different metric_value_keys; overwriting metric_value_keys", # noqa: E501 + f"metric {metric_name} is being registered with different metric_value_keys; overwriting metric_value_keys", # noqa: E501 # FIXME CoP ) providers = metric_definition.get("providers", {}) @@ -274,22 +274,22 @@ def register_metric( # noqa: PLR0913 _current_provider_cls, current_provider_fn = providers[execution_engine_name] if current_provider_fn != metric_provider: logger.warning( - f"metric {metric_name} is being registered with different metric_provider; overwriting metric_provider" # noqa: E501 + f"metric {metric_name} is being registered with different metric_provider; overwriting metric_provider" # noqa: E501 # FIXME CoP ) _add_response_key( res, "warning", - f"metric {metric_name} is being registered with different metric_provider; overwriting metric_provider", # noqa: E501 + f"metric {metric_name} is being registered with different metric_provider; overwriting metric_provider", # noqa: E501 # FIXME CoP ) providers[execution_engine_name] = metric_class, metric_provider else: logger.info( - f"Multiple declarations of metric {metric_name} for engine {execution_engine_name}." # noqa: E501 + f"Multiple declarations of metric {metric_name} for engine {execution_engine_name}." # noqa: E501 # FIXME CoP ) _add_response_key( res, "info", - f"Multiple declarations of metric {metric_name} for engine {execution_engine_name}.", # noqa: E501 + f"Multiple declarations of metric {metric_name} for engine {execution_engine_name}.", # noqa: E501 # FIXME CoP ) else: providers[execution_engine_name] = metric_class, metric_provider @@ -314,7 +314,7 @@ def get_metric_provider( metric_definition = _registered_metrics[metric_name] return metric_definition["providers"][type(execution_engine).__name__] except KeyError: - raise gx_exceptions.MetricProviderError( # noqa: TRY003 + raise gx_exceptions.MetricProviderError( # noqa: TRY003 # FIXME CoP f"No provider found for {metric_name} using {type(execution_engine).__name__}" ) @@ -329,7 +329,7 @@ def get_metric_function_type( ] return getattr(provider_fn, "metric_fn_type", None) except KeyError: - raise gx_exceptions.MetricProviderError( # noqa: TRY003 + raise gx_exceptions.MetricProviderError( # noqa: TRY003 # FIXME CoP f"No provider found for {metric_name} using {type(execution_engine).__name__}" ) @@ -342,7 +342,7 @@ def get_metric_kwargs( try: metric_definition = _registered_metrics.get(metric_name) if metric_definition is None: - raise gx_exceptions.MetricProviderError(f"No definition found for {metric_name}") # noqa: TRY003 + raise gx_exceptions.MetricProviderError(f"No definition found for {metric_name}") # noqa: TRY003 # FIXME CoP default_kwarg_values = metric_definition["default_kwarg_values"] metric_kwargs = { "metric_domain_keys": metric_definition["metric_domain_keys"], @@ -377,7 +377,7 @@ def get_metric_kwargs( metric_kwargs["metric_value_kwargs"] = metric_value_kwargs return metric_kwargs except KeyError: - raise gx_exceptions.MetricProviderError(f"Incomplete definition found for {metric_name}") # noqa: TRY003 + raise gx_exceptions.MetricProviderError(f"Incomplete definition found for {metric_name}") # noqa: TRY003 # FIXME CoP def get_domain_metrics_dict_by_name( @@ -393,7 +393,7 @@ def get_domain_metrics_dict_by_name( def get_expectation_impl(expectation_name: str) -> Type[Expectation]: expectation: Type[Expectation] | None = _registered_expectations.get(expectation_name) if not expectation: - raise gx_exceptions.ExpectationNotFoundError(f"{expectation_name} not found") # noqa: TRY003 + raise gx_exceptions.ExpectationNotFoundError(f"{expectation_name} not found") # noqa: TRY003 # FIXME CoP return expectation diff --git a/great_expectations/expectations/row_conditions.py b/great_expectations/expectations/row_conditions.py index 2e9d7f564a3e..8d7438e95c86 100644 --- a/great_expectations/expectations/row_conditions.py +++ b/great_expectations/expectations/row_conditions.py @@ -23,7 +23,7 @@ from great_expectations.compatibility.sqlalchemy import sqlalchemy as sa from great_expectations.compatibility.typing_extensions import override from great_expectations.types import SerializableDictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from great_expectations.compatibility import pyspark, sqlalchemy @@ -116,11 +116,11 @@ def _parse_great_expectations_condition(row_condition: str): try: return condition.parseString(row_condition) except ParseException: - raise ConditionParserError(f"unable to parse condition: {row_condition}") # noqa: TRY003 + raise ConditionParserError(f"unable to parse condition: {row_condition}") # noqa: TRY003 # FIXME CoP # noinspection PyUnresolvedReferences -def parse_condition_to_spark( # type: ignore[return] # return or raise exists for all branches # noqa: C901, PLR0911 +def parse_condition_to_spark( # type: ignore[return] # return or raise exists for all branches # noqa: C901, PLR0911 # FIXME CoP row_condition: str, ) -> pyspark.Column: parsed = _parse_great_expectations_condition(row_condition) @@ -129,7 +129,7 @@ def parse_condition_to_spark( # type: ignore[return] # return or raise exists f if parsed["op"] == "==": return F.col(column) == parsed["condition_value"] else: - raise ConditionParserError( # noqa: TRY003 + raise ConditionParserError( # noqa: TRY003 # FIXME CoP f"Invalid operator: {parsed['op']} for string literal spark condition." ) elif "fnumber" in parsed: @@ -151,7 +151,7 @@ def parse_condition_to_spark( # type: ignore[return] # return or raise exists f elif "notnull" in parsed and parsed["notnull"] is True: return F.col(column).isNotNull() else: - raise ConditionParserError(f"unrecognized column condition: {row_condition}") # noqa: TRY003 + raise ConditionParserError(f"unrecognized column condition: {row_condition}") # noqa: TRY003 # FIXME CoP def generate_condition_by_operator(column, op, value): @@ -188,4 +188,4 @@ def parse_condition_to_sqlalchemy( elif "notnull" in parsed and parsed["notnull"] is True: return sa.not_(sa.column(column).is_(None)) else: - raise ConditionParserError(f"unrecognized column condition: {row_condition}") # noqa: TRY003 + raise ConditionParserError(f"unrecognized column condition: {row_condition}") # noqa: TRY003 # FIXME CoP diff --git a/great_expectations/expectations/set_based_column_map_expectation.py b/great_expectations/expectations/set_based_column_map_expectation.py index da49481fce0d..e92d8bb545ba 100644 --- a/great_expectations/expectations/set_based_column_map_expectation.py +++ b/great_expectations/expectations/set_based_column_map_expectation.py @@ -69,7 +69,7 @@ class SetColumnMapMetricProvider(ColumnMapMetricProvider): Constructed by the `register_metric(...)` function during Expectation execution. domain_keys (tuple): A tuple of the keys used to determine the domain of the metric. condition_value_keys (tuple): A tuple of the keys used to determine the value of the metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP condition_value_keys = () @@ -112,7 +112,7 @@ class SetBasedColumnMapExpectation(ColumnMapExpectation, ABC): ---Documentation--- - https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_set_based_column_map_expectations - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP @staticmethod def register_metric( @@ -127,7 +127,7 @@ def register_metric( Returns: map_metric: The constructed name of the ephemeral metric. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP set_snake_name = camel_to_snake(set_camel_name) map_metric: str = "column_values.match_" + set_snake_name + "_set" @@ -154,7 +154,7 @@ def validate_configuration( Raises: InvalidExpectationConfigurationError: If no `set_` or `column` specified, or if `mostly` parameter incorrectly defined. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().validate_configuration(configuration) try: assert ( @@ -189,11 +189,11 @@ def _question_renderer(cls, configuration, result=None, runtime_configuration=No return f'Are all values in column "{column}" in {set_semantic_name}: {set_!s}?' else: return f'Are all values in column "{column}" in the set {set_!s}?' - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if set_semantic_name is not None: - return f'Are at least {mostly * 100}% of values in column "{column}" in {set_semantic_name}: {set_!s}?' # noqa: E501 + return f'Are at least {mostly * 100}% of values in column "{column}" in {set_semantic_name}: {set_!s}?' # noqa: E501 # FIXME CoP else: - return f'Are at least {mostly * 100}% of values in column "{column}" in the set {set_!s}?' # noqa: E501 + return f'Are at least {mostly * 100}% of values in column "{column}" in the set {set_!s}?' # noqa: E501 # FIXME CoP @classmethod @renderer(renderer_type=LegacyRendererType.ANSWER) @@ -209,16 +209,16 @@ def _answer_renderer(cls, configuration=None, result=None, runtime_configuration return f'All values in column "{column}" are in {set_semantic_name}: {set_!s}.' else: return f'All values in column "{column}" are in the set {set_!s}.' - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if set_semantic_name is not None: - return f'At least {mostly * 100}% of values in column "{column}" are in {set_semantic_name}: {set_!s}.' # noqa: E501 + return f'At least {mostly * 100}% of values in column "{column}" are in {set_semantic_name}: {set_!s}.' # noqa: E501 # FIXME CoP else: - return f'At least {mostly * 100}% of values in column "{column}" are in the set {set!s}.' # noqa: E501 - else: # noqa: PLR5501 + return f'At least {mostly * 100}% of values in column "{column}" are in the set {set!s}.' # noqa: E501 # FIXME CoP + else: # noqa: PLR5501 # FIXME CoP if set_semantic_name is not None: - return f' Less than {mostly * 100}% of values in column "{column}" are in {set_semantic_name}: {set_!s}.' # noqa: E501 + return f' Less than {mostly * 100}% of values in column "{column}" are in {set_semantic_name}: {set_!s}.' # noqa: E501 # FIXME CoP else: - return f'Less than {mostly * 100}% of values in column "{column}" are in the set {set_!s}.' # noqa: E501 + return f'Less than {mostly * 100}% of values in column "{column}" are in the set {set_!s}.' # noqa: E501 # FIXME CoP @classmethod def _prescriptive_template( diff --git a/great_expectations/experimental/metric_repository/metric_list_metric_retriever.py b/great_expectations/experimental/metric_repository/metric_list_metric_retriever.py index 9ea8f49df326..eedd76f0e124 100644 --- a/great_expectations/experimental/metric_repository/metric_list_metric_retriever.py +++ b/great_expectations/experimental/metric_repository/metric_list_metric_retriever.py @@ -38,7 +38,7 @@ def get_metrics( metrics_result: List[Metric] = [] if not metric_list: - raise ValueError("metric_list cannot be empty") # noqa: TRY003 + raise ValueError("metric_list cannot be empty") # noqa: TRY003 # FIXME CoP self._check_valid_metric_types(metric_list) @@ -111,7 +111,7 @@ def _get_non_numeric_column_metrics( Returns: Sequence[Metric]: List of metrics for non-numeric columns. """ - # currently only the null-count is supported. If more metrics are added, this set will need to be updated. # noqa: E501 + # currently only the null-count is supported. If more metrics are added, this set will need to be updated. # noqa: E501 # FIXME CoP column_metric_names = {MetricTypes.COLUMN_NULL_COUNT} metrics: list[Metric] = [] metrics_list_as_set = set(metrics_list) @@ -191,7 +191,7 @@ def _get_timestamp_column_metrics( return metrics # Note: Timestamps are returned as strings for Snowflake, this may need to be adjusted - # when we support other datasources. For example in Pandas, timestamps can be returned as Timestamp(). # noqa: E501 + # when we support other datasources. For example in Pandas, timestamps can be returned as Timestamp(). # noqa: E501 # FIXME CoP return self._get_column_metrics( batch_request=batch_request, column_list=column_list, @@ -228,7 +228,7 @@ def _check_valid_metric_types(self, metric_list: List[MetricTypes]) -> bool: Returns: bool: True if all the metric types in the list are valid, False otherwise. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return all(metric in MetricTypes for metric in metric_list) def _column_metrics_in_metric_list(self, metric_list: List[MetricTypes]) -> bool: @@ -239,7 +239,7 @@ def _column_metrics_in_metric_list(self, metric_list: List[MetricTypes]) -> bool Returns: bool: True if any column metrics are present in the metric list, False otherwise. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_metrics: List[MetricTypes] = [ MetricTypes.COLUMN_MIN, MetricTypes.COLUMN_MAX, diff --git a/great_expectations/experimental/metric_repository/metric_retriever.py b/great_expectations/experimental/metric_repository/metric_retriever.py index 89def6bb601c..f1dd5779cb06 100644 --- a/great_expectations/experimental/metric_repository/metric_retriever.py +++ b/great_expectations/experimental/metric_repository/metric_retriever.py @@ -39,7 +39,7 @@ class MetricRetriever(abc.ABC): """A MetricRetriever is responsible for retrieving metrics for a batch of data. It is an ABC that contains base logic and methods share by both the ColumnDescriptiveMetricsMetricReceiver and MetricListMetricRetriver. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__(self, context: AbstractDataContext): self._context = context @@ -48,6 +48,11 @@ def __init__(self, context: AbstractDataContext): def get_validator(self, batch_request: BatchRequest) -> Validator: if self._validator is None: self._validator = self._context.get_validator(batch_request=batch_request) + + if isinstance(self._validator.active_batch, Batch): + if self._validator.active_batch.data_asset.name != batch_request.data_asset_name: + self._validator = self._context.get_validator(batch_request=batch_request) + return self._validator @abc.abstractmethod @@ -128,7 +133,7 @@ def _compute_metrics( ) assert isinstance( validator.active_batch, Batch - ), f"validator.active_batch is type {type(validator.active_batch).__name__} instead of type {Batch.__name__}" # noqa: E501 + ), f"validator.active_batch is type {type(validator.active_batch).__name__} instead of type {Batch.__name__}" # noqa: E501 # FIXME CoP batch_id = validator.active_batch.id return batch_id, computed_metrics, aborted_metrics @@ -183,7 +188,7 @@ def _get_column_names_for_semantic_types( ) assert isinstance( validator.active_batch, Batch - ), f"validator.active_batch is type {type(validator.active_batch).__name__} instead of type {Batch.__name__}" # noqa: E501 + ), f"validator.active_batch is type {type(validator.active_batch).__name__} instead of type {Batch.__name__}" # noqa: E501 # FIXME CoP batch_id = validator.active_batch.id column_names = domain_builder.get_effective_column_names( validator=validator, @@ -302,7 +307,7 @@ def _get_table_column_types(self, batch_request: BatchRequest) -> Metric: aborted_metrics=aborted_metrics, ) raw_column_types: list[dict[str, Any]] = value - # If type is not found, don't add empty type field. This can happen if our db introspection fails. # noqa: E501 + # If type is not found, don't add empty type field. This can happen if our db introspection fails. # noqa: E501 # FIXME CoP column_types_converted_to_str: list[dict[str, str]] = [] for raw_column_type in raw_column_types: if raw_column_type.get("type"): diff --git a/great_expectations/experimental/metric_repository/metrics.py b/great_expectations/experimental/metric_repository/metrics.py index d607628018f6..9b49ad2f5fe7 100644 --- a/great_expectations/experimental/metric_repository/metrics.py +++ b/great_expectations/experimental/metric_repository/metrics.py @@ -35,7 +35,7 @@ class MetricTypes(str, enum.Enum, metaclass=MetricTypesMeta): """Represents Metric types in OSS that are used for ColumnDescriptiveMetrics and MetricRepository. More Metric types will be added in the future. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Table metrics TABLE_COLUMNS = "table.columns" @@ -114,7 +114,7 @@ def _get_properties(cls): return properties @override - def dict( # noqa: PLR0913 + def dict( # noqa: PLR0913 # FIXME CoP self, *, include: AbstractSetIntStr | MappingIntStrAny | None = None, @@ -127,7 +127,7 @@ def dict( # noqa: PLR0913 ) -> Dict[str, Any]: """Override the dict function to include @property fields, in pydandic v2 we can use computed_field. https://docs.pydantic.dev/latest/usage/computed_fields/ - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP attribs = super().dict( include=include, exclude=exclude, @@ -195,7 +195,7 @@ def metric_type(self) -> str: # Metrics with parameters (aka metric_value_kwargs) # This is where the concrete metric types are defined that # bring together a domain type, value type and any parameters (aka metric_value_kwargs) -# If a metric has parameters, it should be defined here. If it doesn't, you can use the generic types above, for # noqa: E501 +# If a metric has parameters, it should be defined here. If it doesn't, you can use the generic types above, for # noqa: E501 # FIXME CoP # example, ColumnMetric[float] or TableMetric[list[str]]. # TODO: Add metrics here for all Column Descriptive Metrics @@ -205,7 +205,7 @@ def metric_type(self) -> str: class ColumnQuantileValuesMetric(ColumnMetric[List[float]]): quantiles: List[float] = Field(description="Quantiles to compute") allow_relative_error: Union[float, str] = Field( - description="Relative error interpolation type (pandas) or limit (e.g. spark) depending on data source" # noqa: E501 + description="Relative error interpolation type (pandas) or limit (e.g. spark) depending on data source" # noqa: E501 # FIXME CoP ) @property diff --git a/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py b/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py index 5c2ac56a7a22..065d3495d5d4 100644 --- a/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py +++ b/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py @@ -11,7 +11,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.types import SerializableDictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP deep_filter_properties_iterable, ) @@ -25,17 +25,17 @@ logger = logging.getLogger(__name__) -def _condition_metric_values(metric_values: MetricValues) -> MetricValues: # noqa: C901 - def _detect_illegal_array_type_or_shape(values: MetricValues) -> bool: # noqa: C901 - # Pandas "DataFrame" and "Series" are illegal as candidates for conversion into "numpy.ndarray" type. # noqa: E501 +def _condition_metric_values(metric_values: MetricValues) -> MetricValues: # noqa: C901 # FIXME CoP + def _detect_illegal_array_type_or_shape(values: MetricValues) -> bool: # noqa: C901 # FIXME CoP + # Pandas "DataFrame" and "Series" are illegal as candidates for conversion into "numpy.ndarray" type. # noqa: E501 # FIXME CoP if isinstance( values, deep_filter_properties_iterable( properties=( pd.DataFrame, pd.Series, - sqlalchemy.Row if sqlalchemy.Row else None, # type: ignore[truthy-function] - pyspark.Row if pyspark.Row else None, # type: ignore[truthy-function] + sqlalchemy.Row if sqlalchemy.Row else None, # type: ignore[truthy-function] # FIXME CoP + pyspark.Row if pyspark.Row else None, # type: ignore[truthy-function] # FIXME CoP set, ) ), @@ -48,7 +48,7 @@ def _detect_illegal_array_type_or_shape(values: MetricValues) -> bool: # noqa: values = deep_filter_properties_iterable(properties=values) if values: values_iterator: Iterator - # Components of different lengths cannot be packaged into "numpy.ndarray" type (due to undefined shape). # noqa: E501 + # Components of different lengths cannot be packaged into "numpy.ndarray" type (due to undefined shape). # noqa: E501 # FIXME CoP if all(isinstance(value, (list, tuple)) for value in values): values_iterator = iter(values) first_value_length: int = len(next(values_iterator)) @@ -59,7 +59,7 @@ def _detect_illegal_array_type_or_shape(values: MetricValues) -> bool: # noqa: ): return True - # Components of different types cannot be packaged into "numpy.ndarray" type (due to type mismatch). # noqa: E501 + # Components of different types cannot be packaged into "numpy.ndarray" type (due to type mismatch). # noqa: E501 # FIXME CoP values_iterator = iter(values) first_value_type: type = type(next(values_iterator)) current_type: type @@ -89,7 +89,7 @@ class AttributedResolvedMetrics(SerializableDictDot): In order to gather results pertaining to diverse MetricConfiguration directives, computed metrics are augmented with uniquely identifiable attribution object so that receivers can filter them from overall resolved metrics. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_ids: Optional[List[str]] = None metric_attributes: Optional[Attributes] = None @@ -101,7 +101,7 @@ def get_conditioned_attributed_metric_values_from_attributed_metric_values( ) -> Dict[str, MetricValues]: """ Converts "attributed_metric_values" to Numpy array for each "batch_id" key (recursively, wherever possible). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if attributed_metric_values is None: return {} @@ -118,7 +118,7 @@ def get_conditioned_metric_values_from_attributed_metric_values( ) -> Optional[MetricValues]: """ Converts all "attributed_metric_values" as list (together) to Numpy array (recursively, wherever possible). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if attributed_metric_values is None: return None @@ -158,7 +158,7 @@ def conditioned_attributed_metric_values(self) -> Dict[str, MetricValues]: if self.attributed_metric_values is None: return {} - return AttributedResolvedMetrics.get_conditioned_attributed_metric_values_from_attributed_metric_values( # noqa: E501 + return AttributedResolvedMetrics.get_conditioned_attributed_metric_values_from_attributed_metric_values( # noqa: E501 # FIXME CoP attributed_metric_values=self.attributed_metric_values ) diff --git a/great_expectations/experimental/rule_based_profiler/builder.py b/great_expectations/experimental/rule_based_profiler/builder.py index f798b38f1975..237de615e1cb 100644 --- a/great_expectations/experimental/rule_based_profiler/builder.py +++ b/great_expectations/experimental/rule_based_profiler/builder.py @@ -15,7 +15,7 @@ ) from great_expectations.types import SerializableDictDot, safe_deep_copy from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP deep_filter_properties_iterable, ) @@ -50,7 +50,7 @@ def __init__( """ Full getter/setter accessors for "batch_request" and "batch_list" are for configuring Builder dynamically. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP @property def batch_list(self) -> Optional[List[Batch]]: @@ -83,7 +83,7 @@ def set_batch_list_if_null_batch_request( """ If "batch_request" is already set on "Builder" object, then it is not overwritten. However, if "batch_request" is absent, then "batch_list" is accepted to support scenarios, where "Validator" already loaded "Batch" objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if self.batch_request is None: self.set_batch_data( batch_list=batch_list, @@ -104,8 +104,8 @@ def set_batch_data( ) ) if num_supplied_batch_specification_args > 1: - raise ProfilerConfigurationError( # noqa: TRY003 - f'Please pass at most one of "batch_list" and "batch_request" arguments (you passed {num_supplied_batch_specification_args} arguments).' # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f'Please pass at most one of "batch_list" and "batch_request" arguments (you passed {num_supplied_batch_specification_args} arguments).' # noqa: E501 # FIXME CoP ) if batch_list is None: @@ -132,7 +132,7 @@ def to_json_dict(self) -> dict: reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) return serializeable_dict @@ -157,7 +157,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -173,5 +173,5 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() diff --git a/great_expectations/experimental/rule_based_profiler/config/base.py b/great_expectations/experimental/rule_based_profiler/config/base.py index dc91e94138a4..0b3883419ff6 100644 --- a/great_expectations/experimental/rule_based_profiler/config/base.py +++ b/great_expectations/experimental/rule_based_profiler/config/base.py @@ -19,7 +19,7 @@ ) from great_expectations.types import DictDot, SerializableDictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP deep_filter_properties_iterable, filter_properties_dict, ) @@ -45,7 +45,7 @@ class NotNullSchema(Schema): Reference: https://marshmallow.readthedocs.io/en/stable/extending.html - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # noinspection PyUnusedLocal @post_load @@ -62,7 +62,7 @@ def make_config(self, data: dict, **kwargs) -> Type[DictDot]: Raises: NotImplementedError: If the subclass inheriting NotNullSchema fails to define a __config_class__ - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not hasattr(self, "__config_class__"): raise NotImplementedError( "The subclass extending NotNullSchema must define its own custom __config_class__" @@ -88,7 +88,7 @@ def remove_nulls_and_keep_unknowns(self, output: dict, original: DictDot, **kwar Returns: A cleaned dictionary that has no null values - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # noinspection PyArgumentList for ( # noqa: SIM118 # DictDot does not inherit from dict. key @@ -119,7 +119,7 @@ def __init__( for k, v in kwargs.items(): setattr(self, k, v) logger.debug( - f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{self.__class__.__name__}".', # noqa: E501 + f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{self.__class__.__name__}".', # noqa: E501 # FIXME CoP ) @override @@ -130,7 +130,7 @@ def to_json_dict(self) -> dict: reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) return serializeable_dict @@ -143,7 +143,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -165,7 +165,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() @@ -205,7 +205,7 @@ def __init__( for k, v in kwargs.items(): setattr(self, k, v) logger.debug( - f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{ self.__class__.__name__}".', # noqa: E501 + f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{ self.__class__.__name__}".', # noqa: E501 # FIXME CoP ) @override @@ -216,7 +216,7 @@ def to_json_dict(self) -> dict: reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) return serializeable_dict @@ -229,7 +229,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -251,7 +251,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() @@ -307,7 +307,7 @@ def __init__( for k, v in kwargs.items(): setattr(self, k, v) logger.debug( - f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{self.__class__.__name__}".' # noqa: E501 + f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{self.__class__.__name__}".' # noqa: E501 # FIXME CoP ) @override @@ -318,7 +318,7 @@ def to_json_dict(self) -> dict: reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) return serializeable_dict @@ -331,7 +331,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -353,7 +353,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() @@ -420,7 +420,7 @@ def to_json_dict(self) -> dict: reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) return serializeable_dict @@ -433,7 +433,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -455,7 +455,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() @@ -499,7 +499,7 @@ class Meta: class RuleBasedProfilerConfig(AbstractConfig, BaseYamlConfig): - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, config_version: float, @@ -536,7 +536,7 @@ def from_commented_map(cls, commented_map: CommentedMap): # type: ignore[overri return config except ValidationError: - logger.error( # noqa: TRY400 + logger.error( # noqa: TRY400 # FIXME CoP "Encountered errors during loading config. See ValidationError for more details." ) raise @@ -559,7 +559,7 @@ def to_json_dict(self) -> dict: reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dict_obj: dict = self.to_dict() serializeable_dict: dict = convert_to_json_serializable(data=dict_obj) return serializeable_dict @@ -572,7 +572,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -594,7 +594,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() @classmethod @@ -617,7 +617,7 @@ def resolve_config_using_acceptable_arguments( Returns: An instance of RuleBasedProfilerConfig that represents the reconciled profiler. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP effective_variables: Optional[ParameterContainer] = profiler.reconcile_profiler_variables( variables=variables, ) @@ -686,7 +686,7 @@ class RuleBasedProfilerConfigSchema(AbstractConfigSchema): """ Schema classes for configurations which extend from BaseYamlConfig must extend top-level Marshmallow Schema class. Schema classes for their constituent configurations which extend DictDot leve must extend NotNullSchema class. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class Meta: unknown = INCLUDE @@ -714,7 +714,7 @@ class Meta: allow_none=False, validate=lambda x: x == 1.0, error_messages={ - "invalid": "config version is not supported; it must be 1.0 per the current version of Great Expectations" # noqa: E501 + "invalid": "config version is not supported; it must be 1.0 per the current version of Great Expectations" # noqa: E501 # FIXME CoP }, ) module_name = fields.String( diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/__init__.py b/great_expectations/experimental/rule_based_profiler/domain_builder/__init__.py index 2758ef1bc6bb..9443a67925ad 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/__init__.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/__init__.py @@ -1,21 +1,21 @@ from great_expectations.experimental.rule_based_profiler.domain_builder.domain_builder import ( # isort:skip DomainBuilder, ) -from great_expectations.experimental.rule_based_profiler.domain_builder.table_domain_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.domain_builder.table_domain_builder import ( # isort:skip # noqa: E501 # FIXME CoP TableDomainBuilder, ) -from great_expectations.experimental.rule_based_profiler.domain_builder.column_domain_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.domain_builder.column_domain_builder import ( # isort:skip # noqa: E501 # FIXME CoP ColumnDomainBuilder, ) -from great_expectations.experimental.rule_based_profiler.domain_builder.column_pair_domain_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.domain_builder.column_pair_domain_builder import ( # isort:skip # noqa: E501 # FIXME CoP ColumnPairDomainBuilder, ) -from great_expectations.experimental.rule_based_profiler.domain_builder.multi_column_domain_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.domain_builder.multi_column_domain_builder import ( # isort:skip # noqa: E501 # FIXME CoP MultiColumnDomainBuilder, ) -from great_expectations.experimental.rule_based_profiler.domain_builder.categorical_column_domain_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.domain_builder.categorical_column_domain_builder import ( # isort:skip # noqa: E501 # FIXME CoP CategoricalColumnDomainBuilder, ) -from great_expectations.experimental.rule_based_profiler.domain_builder.map_metric_column_domain_builder import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.domain_builder.map_metric_column_domain_builder import ( # noqa: E501 # FIXME CoP MapMetricColumnDomainBuilder, ) diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/categorical_column_domain_builder.py b/great_expectations/experimental/rule_based_profiler/domain_builder/categorical_column_domain_builder.py index 3193d8a90a87..dc022abb36e6 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/categorical_column_domain_builder.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/categorical_column_domain_builder.py @@ -53,7 +53,7 @@ class CategoricalColumnDomainBuilder(ColumnDomainBuilder): } cardinality_limit_modes: Type[CardinalityLimitMode] = CardinalityLimitMode - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, include_column_names: Optional[Union[str, Optional[List[str]]]] = None, exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None, @@ -110,7 +110,7 @@ def __init__( # noqa: PLR0913 max_proportion_unique: proportion of unique values for a custom cardinality limit to use when filtering columns. data_context: AbstractDataContext associated with this DomainBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if exclude_column_names is None: exclude_column_names = [ "id", @@ -216,7 +216,7 @@ def _get_domains( Returns: List of domains that match the desired cardinality. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_ids: Optional[List[str]] = self.get_batch_ids(variables=variables) validator: Optional[Validator] = self.get_validator(variables=variables) @@ -227,7 +227,7 @@ def _get_domains( variables=variables, ) - # Obtain cardinality_limit_mode from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain cardinality_limit_mode from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP cardinality_limit_mode: Optional[Union[str, CardinalityLimitMode, dict]] = ( get_parameter_value_and_validate_return_type( domain=None, @@ -238,7 +238,7 @@ def _get_domains( ) ) - # Obtain max_unique_values from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain max_unique_values from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP max_unique_values: Optional[int] = get_parameter_value_and_validate_return_type( domain=None, parameter_reference=self.max_unique_values, @@ -247,7 +247,7 @@ def _get_domains( parameters=None, ) - # Obtain max_proportion_unique from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain max_proportion_unique from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP max_proportion_unique: Optional[float] = get_parameter_value_and_validate_return_type( domain=None, parameter_reference=self.max_proportion_unique, @@ -268,7 +268,7 @@ def _get_domains( max_proportion_unique=max_proportion_unique, ) - # Obtain allowed_semantic_types_passthrough from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain allowed_semantic_types_passthrough from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP allowed_semantic_types_passthrough: Union[ str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]] ] = get_parameter_value_and_validate_return_type( @@ -384,7 +384,7 @@ def _column_names_meeting_cardinality_limit( Returns: List of column names meeting cardinality. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_name: str resolved_metrics: Dict[Tuple[str, str, str], MetricValue] metric_value: MetricValue diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/column_domain_builder.py b/great_expectations/experimental/rule_based_profiler/domain_builder/column_domain_builder.py index c15932cbbf90..c108a100658c 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/column_domain_builder.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/column_domain_builder.py @@ -23,10 +23,10 @@ get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.semantic_type_filter import ( - SemanticTypeFilter, # noqa: TCH001 + SemanticTypeFilter, # noqa: TCH001 # FIXME CoP ) from great_expectations.validator.metric_configuration import MetricConfiguration @@ -40,14 +40,14 @@ class ColumnDomainBuilder(DomainBuilder): """ This DomainBuilder emits "Domain" object for every column in table and can serve as parent of other column-focused DomainBuilder implementations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = DomainBuilder.exclude_field_names | { "table_column_names", "semantic_type_filter", } - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, include_column_names: Optional[Union[str, Optional[List[str]]]] = None, exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None, @@ -82,7 +82,7 @@ def __init__( # noqa: PLR0913 Inclusion/Exclusion Logic: (include_column_names|table_columns - exclude_column_names) + (include_semantic_types - exclude_semantic_types) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__(data_context=data_context) self._include_column_names = include_column_names @@ -109,7 +109,7 @@ def domain_type(self) -> MetricDomainTypes: """ All DomainBuilder classes, whose "domain_type" property equals "MetricDomainTypes.COLUMN", must extend present class (ColumnDomainBuilder) in order to provide full getter/setter accessor for relevant properties (as overrides). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP @property def include_column_names(self) -> Optional[Union[str, Optional[List[str]]]]: @@ -197,12 +197,12 @@ def get_table_column_names( ) -> List[str]: """ This method returns all column names available (i.e., prior to any inclusions/exclusions filtering is applied). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if self._table_column_names: return self._table_column_names if batch_ids is None: - batch_ids: List[str] = self.get_batch_ids(variables=variables) # type: ignore[no-redef] + batch_ids: List[str] = self.get_batch_ids(variables=variables) # type: ignore[no-redef] # FIXME CoP if validator is None: validator = self.get_validator(variables=variables) @@ -212,7 +212,7 @@ def get_table_column_names( metric_name="table.columns", metric_domain_kwargs={ # active_batch_id - "batch_id": batch_ids[-1], # type: ignore[index] + "batch_id": batch_ids[-1], # type: ignore[index] # FIXME CoP }, metric_value_kwargs={ "include_nested": False, @@ -223,7 +223,7 @@ def get_table_column_names( return self._table_column_names - def get_filtered_column_names( # noqa: C901 + def get_filtered_column_names( # noqa: C901 # FIXME CoP self, column_names: List[str], batch_ids: Optional[List[str]] = None, @@ -232,7 +232,7 @@ def get_filtered_column_names( # noqa: C901 ) -> List[str]: """ This method returns list of column names, filtered according to directives supplied via instance attributes. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP include_column_names: List[str] = cast( List[str], self._resolve_list_type_property( @@ -303,7 +303,7 @@ def get_filtered_column_names( # noqa: C901 ) ) - # Obtain semantic_type_filter_module_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain semantic_type_filter_module_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP semantic_type_filter_module_name: Optional[str] = ( get_parameter_value_and_validate_return_type( domain=None, @@ -314,9 +314,9 @@ def get_filtered_column_names( # noqa: C901 ) ) if semantic_type_filter_module_name is None: - semantic_type_filter_module_name = "great_expectations.experimental.rule_based_profiler.helpers.simple_semantic_type_filter" # noqa: E501 + semantic_type_filter_module_name = "great_expectations.experimental.rule_based_profiler.helpers.simple_semantic_type_filter" # noqa: E501 # FIXME CoP - # Obtain semantic_type_filter_class_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain semantic_type_filter_class_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP semantic_type_filter_class_name: Optional[str] = ( get_parameter_value_and_validate_return_type( domain=None, @@ -358,7 +358,7 @@ def get_filtered_column_names( # noqa: C901 if include_semantic_types: filtered_column_names = list( filter( - lambda candidate_column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr,arg-type] # noqa: E501 + lambda candidate_column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr,arg-type] # noqa: E501 # FIXME CoP candidate_column_name ] in include_semantic_types, @@ -381,7 +381,7 @@ def get_filtered_column_names( # noqa: C901 if exclude_semantic_types: filtered_column_names = list( filter( - lambda candidate_column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr,arg-type] # lambda missing type details # noqa: E501 + lambda candidate_column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr,arg-type] # lambda missing type details # noqa: E501 # FIXME CoP candidate_column_name ] not in exclude_semantic_types, @@ -399,9 +399,9 @@ def get_effective_column_names( ) -> List[str]: """ This method applies multiple directives to obtain columns to be included as part of returned "Domain" objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if batch_ids is None: - batch_ids: List[str] = self.get_batch_ids(variables=variables) # type: ignore[no-redef] + batch_ids: List[str] = self.get_batch_ids(variables=variables) # type: ignore[no-redef] # FIXME CoP if validator is None: validator = self.get_validator(variables=variables) @@ -438,7 +438,7 @@ def _get_domains( Returns: List of domains that match the desired columns and filtering criteria. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_ids: List[str] = self.get_batch_ids(variables=variables) # type: ignore[assignment] # could be None validator: Validator = self.get_validator(variables=variables) # type: ignore[assignment] # could be None @@ -469,15 +469,15 @@ def _resolve_list_type_property( property_value = [] elif isinstance(property_value, str): property_value = [property_value] - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if not isinstance(property_value, property_value_type): - raise ValueError( # noqa: TRY003 - f'Unrecognized "{property_name}" directive -- must be "{property_value_type}" (or string).' # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f'Unrecognized "{property_name}" directive -- must be "{property_value_type}" (or string).' # noqa: E501 # FIXME CoP ) property_cursor: type property_value = [ - # Obtain property from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain property from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP get_parameter_value_and_validate_return_type( domain=None, parameter_reference=property_cursor, diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/column_pair_domain_builder.py b/great_expectations/experimental/rule_based_profiler/domain_builder/column_pair_domain_builder.py index ea9726784ebf..06c2ce2fb128 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/column_pair_domain_builder.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/column_pair_domain_builder.py @@ -12,7 +12,7 @@ from great_expectations.experimental.rule_based_profiler.domain_builder import ColumnDomainBuilder from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -25,7 +25,7 @@ class ColumnPairDomainBuilder(ColumnDomainBuilder): """ This DomainBuilder uses "include_column_names" property of its parent class to specify "column_A" and "column_B" (order-preserving). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = ColumnDomainBuilder.exclude_field_names | { "exclude_column_names", @@ -80,7 +80,7 @@ def _get_domains( Returns: List of domains that match the desired tolerance limits. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_ids: List[str] = self.get_batch_ids(variables=variables) # type: ignore[assignment] # could be None validator: Validator = self.get_validator(variables=variables) # type: ignore[assignment] # could be None @@ -92,12 +92,12 @@ def _get_domains( ) if not ( - effective_column_names and (len(effective_column_names) == 2) # noqa: PLR2004 + effective_column_names and (len(effective_column_names) == 2) # noqa: PLR2004 # FIXME CoP ): raise ProfilerExecutionError( message=f"""Error: Columns specified for {self.__class__.__name__} in sorted order must correspond to \ "column_A" and "column_B" (in this exact order). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) domain_kwargs: Dict[str, str] = dict( @@ -112,7 +112,7 @@ def _get_domains( column_name: str semantic_types_by_column_name: Dict[str, SemanticDomainTypes] = { - column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr] # could be None # noqa: E501 + column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr] # could be None # noqa: E501 # FIXME CoP column_name ] for column_name in effective_column_names diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/domain_builder.py b/great_expectations/experimental/rule_based_profiler/domain_builder/domain_builder.py index 4dfd6ceee874..9b67e29452c8 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/domain_builder.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/domain_builder.py @@ -3,10 +3,10 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from great_expectations.core.batch import Batch, BatchRequestBase # noqa: TCH001 -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.batch import Batch, BatchRequestBase # noqa: TCH001 # FIXME CoP +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.core.metric_domain_types import ( - MetricDomainTypes, # noqa: TCH001 + MetricDomainTypes, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.builder import Builder from great_expectations.experimental.rule_based_profiler.helpers.util import ( @@ -19,9 +19,9 @@ get_validator as get_validator_using_batch_list_or_batch_request, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP from great_expectations.validator.metric_configuration import MetricConfiguration if TYPE_CHECKING: @@ -67,7 +67,7 @@ def get_domains( Note: Please do not overwrite the public "get_domains()" method. If a child class needs to check parameters, then please do so in its implementation of the (private) "_get_domains()" method, or in a utility method. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self.set_batch_list_if_null_batch_request( batch_list=batch_list, batch_request=batch_request, diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/map_metric_column_domain_builder.py b/great_expectations/experimental/rule_based_profiler/domain_builder/map_metric_column_domain_builder.py index b75c12b515fe..90d843663020 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/map_metric_column_domain_builder.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/map_metric_column_domain_builder.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union -from great_expectations.core.domain import Domain, SemanticDomainTypes # noqa: TCH001 +from great_expectations.core.domain import Domain, SemanticDomainTypes # noqa: TCH001 # FIXME CoP from great_expectations.core.metric_function_types import ( SummarizationMetricNameSuffixes, ) @@ -14,9 +14,9 @@ get_resolved_metrics_by_key, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP from great_expectations.validator.metric_configuration import MetricConfiguration if TYPE_CHECKING: @@ -31,7 +31,7 @@ class MapMetricColumnDomainBuilder(ColumnDomainBuilder): This DomainBuilder uses relative tolerance of specified map metric to identify domains. """ - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, map_metric_name: str, include_column_names: Optional[Union[str, Optional[List[str]]]] = None, @@ -98,7 +98,7 @@ def __init__( # noqa: PLR0913 However, if "max_unexpected_ratio" is eased to above 0.2, then the tolerances will be met and Domain emitted. Alternatively, if "min_max_unexpected_values_proportion" is lowered to 0.66, Domain will also be emitted. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( include_column_names=include_column_names, exclude_column_names=exclude_column_names, @@ -147,8 +147,8 @@ def _get_domains( Returns: List of domains that match the desired tolerance limits. - """ # noqa: E501 - # Obtain map_metric_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Obtain map_metric_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP map_metric_name: str = get_parameter_value_and_validate_return_type( domain=None, parameter_reference=self.map_metric_name, @@ -157,7 +157,7 @@ def _get_domains( parameters=None, ) - # Obtain max_unexpected_values from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain max_unexpected_values from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP max_unexpected_values: int = get_parameter_value_and_validate_return_type( domain=None, parameter_reference=self.max_unexpected_values, @@ -166,7 +166,7 @@ def _get_domains( parameters=None, ) - # Obtain max_unexpected_ratio from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain max_unexpected_ratio from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP max_unexpected_ratio: Optional[float] = get_parameter_value_and_validate_return_type( domain=None, parameter_reference=self.max_unexpected_ratio, @@ -175,7 +175,7 @@ def _get_domains( parameters=None, ) - # Obtain min_max_unexpected_values_proportion from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain min_max_unexpected_values_proportion from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP min_max_unexpected_values_proportion: float = get_parameter_value_and_validate_return_type( domain=None, parameter_reference=self.min_max_unexpected_values_proportion, @@ -205,7 +205,7 @@ def _get_domains( 1.0 * sum(table_row_counts.values()) / num_batch_ids ) + NP_EPSILON - # If no "max_unexpected_ratio" is given, compute it based on average number of records across all Batch objects. # noqa: E501 + # If no "max_unexpected_ratio" is given, compute it based on average number of records across all Batch objects. # noqa: E501 # FIXME CoP if max_unexpected_ratio is None: max_unexpected_ratio = max_unexpected_values / mean_table_row_count_as_float @@ -255,7 +255,7 @@ def _generate_metric_configurations( Dictionary of the form { column_name: List[MetricConfiguration], } - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_name: str batch_id: str metric_configurations: Dict[str, List[MetricConfiguration]] = { @@ -276,7 +276,7 @@ def _generate_metric_configurations( return metric_configurations @staticmethod - def _get_column_names_satisfying_tolerance_limits( # noqa: PLR0913 + def _get_column_names_satisfying_tolerance_limits( # noqa: PLR0913 # FIXME CoP validator: Validator, num_batch_ids: int, metric_configurations_by_column_name: Dict[str, List[MetricConfiguration]], @@ -298,7 +298,7 @@ def _get_column_names_satisfying_tolerance_limits( # noqa: PLR0913 Returns: List of column names satisfying tolerance limits. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_name: str resolved_metrics: Dict[Tuple[str, str, str], MetricValue] @@ -326,7 +326,7 @@ def _get_column_names_satisfying_tolerance_limits( # noqa: PLR0913 metric_value_ratio <= max_unexpected_ratio for metric_value_ratio in metric_value_ratios ] - for column_name, metric_value_ratios in intra_batch_unexpected_ratios_by_column_name.items() # noqa: E501 + for column_name, metric_value_ratios in intra_batch_unexpected_ratios_by_column_name.items() # noqa: E501 # FIXME CoP } inter_batch_adherence_by_column_name: Dict[str, float] = { @@ -339,7 +339,7 @@ def _get_column_names_satisfying_tolerance_limits( # noqa: PLR0913 inter_batch_unexpected_values_proportion: float candidate_column_names: List[str] = [ column_name - for column_name, inter_batch_unexpected_values_proportion in inter_batch_adherence_by_column_name.items() # noqa: E501 + for column_name, inter_batch_unexpected_values_proportion in inter_batch_adherence_by_column_name.items() # noqa: E501 # FIXME CoP if inter_batch_unexpected_values_proportion >= min_max_unexpected_values_proportion ] diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/multi_column_domain_builder.py b/great_expectations/experimental/rule_based_profiler/domain_builder/multi_column_domain_builder.py index be320c054440..b68fdb2a2593 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/multi_column_domain_builder.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/multi_column_domain_builder.py @@ -12,7 +12,7 @@ from great_expectations.experimental.rule_based_profiler.domain_builder import ColumnDomainBuilder from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -25,7 +25,7 @@ class MultiColumnDomainBuilder(ColumnDomainBuilder): """ This DomainBuilder uses "include_column_names" property of its parent class to specify "column_list" (order-non-preserving). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = ColumnDomainBuilder.exclude_field_names | { "exclude_column_names", @@ -80,7 +80,7 @@ def _get_domains( Returns: List of domains that match the desired tolerance limits. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP batch_ids: List[str] = self.get_batch_ids(variables=variables) # type: ignore[assignment] # could be None validator: Validator = self.get_validator(variables=variables) # type: ignore[assignment] # could be None @@ -98,7 +98,7 @@ def _get_domains( column_name: str semantic_types_by_column_name: Dict[str, SemanticDomainTypes] = { - column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr] # could be None # noqa: E501 + column_name: self.semantic_type_filter.table_column_name_to_inferred_semantic_domain_type_map[ # type: ignore[union-attr] # could be None # noqa: E501 # FIXME CoP column_name ] for column_name in effective_column_names diff --git a/great_expectations/experimental/rule_based_profiler/domain_builder/table_domain_builder.py b/great_expectations/experimental/rule_based_profiler/domain_builder/table_domain_builder.py index d103b5d3f2e9..9ab64279deef 100644 --- a/great_expectations/experimental/rule_based_profiler/domain_builder/table_domain_builder.py +++ b/great_expectations/experimental/rule_based_profiler/domain_builder/table_domain_builder.py @@ -41,7 +41,7 @@ def domain_type(self) -> MetricDomainTypes: Note that for appropriate use-cases, it should be readily possible to build a multi-batch implementation, where a separate Domain object is emitted for each individual Batch (using its respective batch_id). (This is future work.) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP @override def _get_domains( @@ -52,7 +52,7 @@ def _get_domains( ) -> List[Domain]: other_table_name: Optional[str] try: - # Obtain table from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain table from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP other_table_name = get_parameter_value_and_validate_return_type( domain=None, parameter_reference=f"{VARIABLES_KEY}table", diff --git a/great_expectations/experimental/rule_based_profiler/estimators/bootstrap_numeric_range_estimator.py b/great_expectations/experimental/rule_based_profiler/estimators/bootstrap_numeric_range_estimator.py index 7e72439f4804..667380a577ba 100644 --- a/great_expectations/experimental/rule_based_profiler/estimators/bootstrap_numeric_range_estimator.py +++ b/great_expectations/experimental/rule_based_profiler/estimators/bootstrap_numeric_range_estimator.py @@ -19,7 +19,7 @@ import numpy as np from great_expectations.core.domain import Domain - from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 + from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 # FIXME CoP NumericRangeEstimationResult, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( @@ -41,7 +41,7 @@ class BootstrapNumericRangeEstimator(NumericRangeEstimator): Implements the "bootstrapped" estimation of parameter values from data. (Please refer to "https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.bootstrap.html" for details.) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -65,7 +65,7 @@ def _get_numeric_range_estimate( fuzzy=False, ): raise ProfilerExecutionError( - message=f'Estimator "{self.__class__.__name__}" does not support DateTime/TimeStamp data types.' # noqa: E501 + message=f'Estimator "{self.__class__.__name__}" does not support DateTime/TimeStamp data types.' # noqa: E501 # FIXME CoP ) false_positive_rate: np.float64 = get_false_positive_rate_from_rule_state( @@ -75,7 +75,7 @@ def _get_numeric_range_estimate( parameters=parameters, ) - # Obtain n_resamples override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain n_resamples override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP n_resamples: Optional[int] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.configuration.n_resamples, @@ -86,7 +86,7 @@ def _get_numeric_range_estimate( if n_resamples is None: n_resamples = DEFAULT_BOOTSTRAP_NUM_RESAMPLES - # Obtain random_seed override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain random_seed override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP random_seed: Optional[int] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.configuration.random_seed, @@ -95,7 +95,7 @@ def _get_numeric_range_estimate( parameters=parameters, ) - quantile_statistic_interpolation_method: str = get_quantile_statistic_interpolation_method_from_rule_state( # noqa: E501 + quantile_statistic_interpolation_method: str = get_quantile_statistic_interpolation_method_from_rule_state( # noqa: E501 # FIXME CoP quantile_statistic_interpolation_method=self.configuration.quantile_statistic_interpolation_method, round_decimals=self.configuration.round_decimals, domain=domain, @@ -107,7 +107,7 @@ def _get_numeric_range_estimate( DEFAULT_BOOTSTRAP_QUANTILE_STATISTIC_INTERPOLATION_METHOD ) - # Obtain quantile_bias_correction override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain quantile_bias_correction override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP quantile_bias_correction: Optional[bool] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.configuration.quantile_bias_correction, @@ -118,7 +118,7 @@ def _get_numeric_range_estimate( if quantile_bias_correction is None: quantile_bias_correction = False - # Obtain quantile_bias_std_error_ratio_threshold override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain quantile_bias_std_error_ratio_threshold override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP quantile_bias_std_error_ratio_threshold: Optional[float] = ( get_parameter_value_and_validate_return_type( domain=domain, diff --git a/great_expectations/experimental/rule_based_profiler/estimators/exact_numeric_range_estimator.py b/great_expectations/experimental/rule_based_profiler/estimators/exact_numeric_range_estimator.py index 1cce9e97901a..d03180238c7e 100644 --- a/great_expectations/experimental/rule_based_profiler/estimators/exact_numeric_range_estimator.py +++ b/great_expectations/experimental/rule_based_profiler/estimators/exact_numeric_range_estimator.py @@ -6,9 +6,9 @@ import numpy as np from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.domain import Domain # noqa: TCH001 -from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 - NumericRangeEstimationResult, # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP +from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 # FIXME CoP + NumericRangeEstimationResult, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimator import ( NumericRangeEstimator, @@ -18,7 +18,7 @@ datetime_semantic_domain_type, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) from great_expectations.util import convert_ndarray_to_datetime_dtype_best_effort diff --git a/great_expectations/experimental/rule_based_profiler/estimators/kde_numeric_range_estimator.py b/great_expectations/experimental/rule_based_profiler/estimators/kde_numeric_range_estimator.py index 8604f84f1a5a..c6e5bb5a2163 100644 --- a/great_expectations/experimental/rule_based_profiler/estimators/kde_numeric_range_estimator.py +++ b/great_expectations/experimental/rule_based_profiler/estimators/kde_numeric_range_estimator.py @@ -19,7 +19,7 @@ import numpy as np from great_expectations.core.domain import Domain - from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 + from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 # FIXME CoP NumericRangeEstimationResult, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( @@ -40,7 +40,7 @@ class KdeNumericRangeEstimator(NumericRangeEstimator): Implements the "kde" (kernel density estimation) estimation of parameter values from data. (Please refer to "https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html" for details.) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -64,7 +64,7 @@ def _get_numeric_range_estimate( fuzzy=False, ): raise ProfilerExecutionError( - message=f'Estimator "{self.__class__.__name__}" does not support DateTime/TimeStamp data types.' # noqa: E501 + message=f'Estimator "{self.__class__.__name__}" does not support DateTime/TimeStamp data types.' # noqa: E501 # FIXME CoP ) false_positive_rate: np.float64 = get_false_positive_rate_from_rule_state( @@ -74,7 +74,7 @@ def _get_numeric_range_estimate( parameters=parameters, ) - # Obtain n_resamples override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain n_resamples override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP n_resamples: Optional[int] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.configuration.n_resamples, @@ -85,7 +85,7 @@ def _get_numeric_range_estimate( if n_resamples is None: n_resamples = DEFAULT_KDE_NUM_RESAMPLES - # Obtain random_seed override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain random_seed override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP random_seed: Optional[int] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.configuration.random_seed, @@ -94,7 +94,7 @@ def _get_numeric_range_estimate( parameters=parameters, ) - quantile_statistic_interpolation_method: str = get_quantile_statistic_interpolation_method_from_rule_state( # noqa: E501 + quantile_statistic_interpolation_method: str = get_quantile_statistic_interpolation_method_from_rule_state( # noqa: E501 # FIXME CoP quantile_statistic_interpolation_method=self.configuration.quantile_statistic_interpolation_method, round_decimals=self.configuration.round_decimals, domain=domain, @@ -106,7 +106,7 @@ def _get_numeric_range_estimate( DEFAULT_KDE_QUANTILE_STATISTIC_INTERPOLATION_METHOD ) - # Obtain bw_method override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain bw_method override from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP bw_method: Optional[Union[str, float, Callable]] = ( get_parameter_value_and_validate_return_type( domain=domain, diff --git a/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimation_result.py b/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimation_result.py index cb34fece1236..c01afe143391 100644 --- a/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimation_result.py +++ b/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimation_result.py @@ -5,7 +5,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.types import DictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: import numpy as np @@ -24,7 +24,7 @@ class NumericRangeEstimationResult(DictDot): In particular, "estimation_histogram" is "numpy.ndarray" of shape [2, NUM_HISTOGRAM_BINS + 1], containing [0] "histogram": (integer array of dimension [NUM_HISTOGRAM_BINS + 1] padded with 0 at right edge) histogram values; [1] "bin_edges": (float array of dimension [NUM_HISTOGRAM_BINS + 1]) binning edges. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP estimation_histogram: np.ndarray value_range: Union[np.ndarray, List[np.float64]] diff --git a/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimator.py b/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimator.py index d7c205548a52..ca036b6cafff 100644 --- a/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimator.py +++ b/great_expectations/experimental/rule_based_profiler/estimators/numeric_range_estimator.py @@ -6,13 +6,13 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.types import SerializableDictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: import numpy as np from great_expectations.core.domain import Domain - from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 + from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 # FIXME CoP NumericRangeEstimationResult, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( @@ -28,7 +28,7 @@ class NumericRangeEstimator(ABC, SerializableDictDot): """ Parent class that incorporates the "get_numeric_range_estimate()" interface method, requiring all subclasses to implement the "_get_numeric_range_estimate()" method (for encapsulation reasons, the former calls the latter). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -41,7 +41,7 @@ def __init__( "bootstrap", "exact" (default - deterministic, incorporating entire observed value range), or "kde" (kernel density estimation). configuration: attributes needed for the estimation algorithm (subject of the inherited class) to operate. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._name = name self._configuration = configuration @@ -74,7 +74,7 @@ def get_numeric_range_estimate( Returns: "NumericRangeEstimationResult" object, containing computed "value_range" and "estimation_histogram" details. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._get_numeric_range_estimate( metric_values=metric_values, domain=domain, diff --git a/great_expectations/experimental/rule_based_profiler/estimators/quantiles_numeric_range_estimator.py b/great_expectations/experimental/rule_based_profiler/estimators/quantiles_numeric_range_estimator.py index 9403964bd17d..6f5c4aed673f 100644 --- a/great_expectations/experimental/rule_based_profiler/estimators/quantiles_numeric_range_estimator.py +++ b/great_expectations/experimental/rule_based_profiler/estimators/quantiles_numeric_range_estimator.py @@ -4,9 +4,9 @@ from typing import TYPE_CHECKING, Dict, Final, Optional from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.domain import Domain # noqa: TCH001 -from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 - NumericRangeEstimationResult, # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP +from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 # FIXME CoP + NumericRangeEstimationResult, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimator import ( NumericRangeEstimator, @@ -18,9 +18,9 @@ get_quantile_statistic_interpolation_method_from_rule_state, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) -from great_expectations.types.attributes import Attributes # noqa: TCH001 +from great_expectations.types.attributes import Attributes # noqa: TCH001 # FIXME CoP from great_expectations.util import convert_ndarray_to_datetime_dtype_best_effort if TYPE_CHECKING: @@ -39,7 +39,7 @@ class QuantilesNumericRangeEstimator(NumericRangeEstimator): This nonparameteric estimator calculates quantiles given a MetricValues vector of length N, the q-th quantile of the vector is the value q of the way from the minimum to the maximum in a sorted copy of the MetricValues. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -64,7 +64,7 @@ def _get_numeric_range_estimate( variables=variables, parameters=parameters, ) - quantile_statistic_interpolation_method: str = get_quantile_statistic_interpolation_method_from_rule_state( # noqa: E501 + quantile_statistic_interpolation_method: str = get_quantile_statistic_interpolation_method_from_rule_state( # noqa: E501 # FIXME CoP quantile_statistic_interpolation_method=self.configuration.quantile_statistic_interpolation_method, # type: ignore[union-attr] # configuration could be None round_decimals=self.configuration.round_decimals, # type: ignore[union-attr] # configuration could be None domain=domain, diff --git a/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/__init__.py b/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/__init__.py index 958f274dc507..4438e941e9ea 100644 --- a/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/__init__.py +++ b/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/__init__.py @@ -1,7 +1,7 @@ -from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder.expectation_configuration_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder.expectation_configuration_builder import ( # isort:skip # noqa: E501 # FIXME CoP ExpectationConfigurationBuilder, init_rule_expectation_configuration_builders, ) -from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder.default_expectation_configuration_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder.default_expectation_configuration_builder import ( # isort:skip # noqa: E501 # FIXME CoP DefaultExpectationConfigurationBuilder, ) diff --git a/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py b/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py index 260497856cce..5f34a6d4d785 100644 --- a/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py +++ b/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py @@ -18,12 +18,12 @@ from pyparsing import Optional as ppOptional import great_expectations.exceptions as gx_exceptions -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation_configuration import ( ExpectationConfiguration, ) from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder import ( @@ -33,7 +33,7 @@ get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -68,7 +68,7 @@ class DefaultExpectationConfigurationBuilder(ExpectationConfigurationBuilder): parameter_name-to-parameter_fully_qualified_parameter_name map (name-value pairs supplied in the kwargs dictionary). ExpectationConfigurations can be optionally filtered if a supplied condition is met. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = ( ExpectationConfigurationBuilder.exclude_field_names @@ -97,7 +97,7 @@ def __init__( These "ParameterBuilder" configurations help build kwargs needed for this "ExpectationConfigurationBuilder" data_context: AbstractDataContext associated with this ExpectationConfigurationBuilder kwargs: additional arguments - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( expectation_type=expectation_type, @@ -115,14 +115,14 @@ def __init__( raise ProfilerExecutionError( message=f"""Argument "{meta}" in "{self.__class__.__name__}" must be of type "dictionary" \ (value of type "{type(meta)!s}" was encountered). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if condition and (not isinstance(condition, str)): raise ProfilerExecutionError( message=f"""Argument "{condition}" in "{self.__class__.__name__}" must be of type "string" \ (value of type "{type(condition)!s}" was encountered). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) self._condition = condition @@ -159,12 +159,12 @@ def _parse_condition(self) -> ParseResults: Applicability: To be used as part of configuration (e.g., YAML-based files or text strings). Extendability: Readily extensible to include "slice" and other standard accessors (as long as no dynamic elements). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: return expr.parseString(self._condition) except ParseException: - raise ExpectationConfigurationConditionParserError( # noqa: TRY003 + raise ExpectationConfigurationConditionParserError( # noqa: TRY003 # FIXME CoP f'Unable to parse Expectation Configuration Condition: "{self._condition}".' ) @@ -214,7 +214,7 @@ def _substitute_parameters_and_variables( Returns: ParseResults: a ParseResults object identical to the one returned by self._parse_condition except with substituted parameters and variables. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP idx: int token: Union[str, ParseResults] for idx, token in enumerate(term_list): @@ -266,7 +266,7 @@ def _build_binary_list( Returns: ParseResults: a ParseResults object with all terms evaluated except for binary operations. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP idx: int token: Union[str, list] for idx, token in enumerate(substituted_term_list): @@ -301,7 +301,7 @@ def _build_boolean_result( Returns: bool: a boolean representing the evaluation of the entire provided condition. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP idx: int token: Union[str, list] for idx, token in enumerate(binary_list): @@ -346,7 +346,7 @@ def _build_expectation_configuration( parameters: Optional[Dict[str, ParameterContainer]] = None, runtime_configuration: Optional[dict] = None, ) -> Optional[ExpectationConfiguration]: - """Returns either and ExpectationConfiguration object or None depending on evaluation of condition""" # noqa: E501 + """Returns either and ExpectationConfiguration object or None depending on evaluation of condition""" # noqa: E501 # FIXME CoP parameter_name: str fully_qualified_parameter_name: str expectation_kwargs: Dict[str, Any] = { diff --git a/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py b/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py index 20aa7333556f..fc181a2c14dc 100644 --- a/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py +++ b/great_expectations/experimental/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py @@ -4,23 +4,23 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, ClassVar, Dict, List, Optional, Set, Union -from great_expectations.core.batch import Batch, BatchRequestBase # noqa: TCH001 -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.batch import Batch, BatchRequestBase # noqa: TCH001 # FIXME CoP +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.data_context.util import instantiate_class_from_config from great_expectations.expectations.expectation_configuration import ( - ExpectationConfiguration, # noqa: TCH001 + ExpectationConfiguration, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.registry import get_expectation_impl from great_expectations.experimental.rule_based_profiler.builder import Builder from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ( ParameterBuilder, init_rule_parameter_builders, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterContainer, # noqa: TCH001 + ParameterContainer, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -54,7 +54,7 @@ def __init__( These "ParameterBuilder" configurations help build kwargs needed for this "ExpectationConfigurationBuilder" data_context: AbstractDataContext associated with this ExpectationConfigurationBuilder kwargs: additional arguments - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__(data_context=data_context) @@ -69,14 +69,14 @@ def __init__( Since ExpectationConfigurationBuilderConfigSchema allows arbitrary fields (as ExpectationConfiguration kwargs) to be provided, they must be all converted to public property accessors and/or public fields in order for all provisions by Builder, SerializableDictDot, and DictDot to operate properly in compliance with their interfaces. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP for k, v in kwargs.items(): setattr(self, k, v) logger.debug( - f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{self.__class__.__name__}".' # noqa: E501 + f'Setting unknown kwarg ({k}, {v}) provided to constructor as argument in "{self.__class__.__name__}".' # noqa: E501 # FIXME CoP ) - def build_expectation_configuration( # noqa: PLR0913 + def build_expectation_configuration( # noqa: PLR0913 # FIXME CoP self, domain: Domain, variables: Optional[ParameterContainer] = None, @@ -96,7 +96,7 @@ def build_expectation_configuration( # noqa: PLR0913 Returns: ExpectationConfiguration object. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self.resolve_validation_dependencies( domain=domain, variables=variables, @@ -118,13 +118,13 @@ def _roundtrip_config_through_expectation( ) -> ExpectationConfiguration: """ Utilize Pydantic validaton and type coercion to ensure the final expectation configuration is valid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_cls = get_expectation_impl(config.type) kwargs = {**config.kwargs, **domain.domain_kwargs} expectation = expectation_cls(**kwargs, meta=config.meta) return expectation.configuration - def resolve_validation_dependencies( # noqa: PLR0913 + def resolve_validation_dependencies( # noqa: PLR0913 # FIXME CoP self, domain: Domain, variables: Optional[ParameterContainer] = None, @@ -191,12 +191,12 @@ def init_expectation_configuration_builder( expectation_configuration_builder_config.to_dict() ) - expectation_configuration_builder: ExpectationConfigurationBuilder = instantiate_class_from_config( # noqa: E501 + expectation_configuration_builder: ExpectationConfigurationBuilder = instantiate_class_from_config( # noqa: E501 # FIXME CoP config=expectation_configuration_builder_config, runtime_environment={"data_context": data_context}, config_defaults={ "class_name": "DefaultExpectationConfigurationBuilder", - "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder", # noqa: E501 + "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder", # noqa: E501 # FIXME CoP }, ) return expectation_configuration_builder diff --git a/great_expectations/experimental/rule_based_profiler/helpers/cardinality_checker.py b/great_expectations/experimental/rule_based_profiler/helpers/cardinality_checker.py index c3b4f25f3c97..2cc01c61eca8 100644 --- a/great_expectations/experimental/rule_based_profiler/helpers/cardinality_checker.py +++ b/great_expectations/experimental/rule_based_profiler/helpers/cardinality_checker.py @@ -10,7 +10,7 @@ ProfilerConfigurationError, ) from great_expectations.types import SerializableDictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP @dataclass(frozen=True) @@ -136,7 +136,7 @@ def cardinality_limit_mode( def cardinality_within_limit( self, - metric_value: Union[int, float], # noqa: PYI041 + metric_value: Union[int, float], # noqa: PYI041 # FIXME CoP ) -> bool: """Determine if the cardinality is within configured limit. @@ -157,20 +157,20 @@ def cardinality_within_limit( if isinstance(self._cardinality_limit_mode, RelativeCardinalityLimit): return float(metric_value) <= self._cardinality_limit_mode.max_proportion_unique - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP f'Unknown "cardinality_limit_mode" mode "{self._cardinality_limit_mode}" encountered.' ) @staticmethod - def _validate_metric_value(metric_value: Union[int, float]) -> None: # noqa: PYI041 + def _validate_metric_value(metric_value: Union[int, float]) -> None: # noqa: PYI041 # FIXME CoP if not isinstance(metric_value, (int, float)): - raise ProfilerConfigurationError( # noqa: TRY003 - f"Value of measured cardinality must be of type int or float, you provided {type(metric_value)}" # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f"Value of measured cardinality must be of type int or float, you provided {type(metric_value)}" # noqa: E501 # FIXME CoP ) if metric_value < 0.00: - raise ProfilerConfigurationError( # noqa: TRY003 - f"Value of cardinality (number of rows or percent unique) should be greater than 0.00, your value is {metric_value}" # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f"Value of cardinality (number of rows or percent unique) should be greater than 0.00, your value is {metric_value}" # noqa: E501 # FIXME CoP ) @staticmethod @@ -181,8 +181,8 @@ def _to_cardinality_limit_mode( try: return CardinalityLimitMode[cardinality_limit_mode.upper()].value except KeyError: - raise ProfilerConfigurationError( # noqa: TRY003 - f"Please specify a supported cardinality mode. Supported cardinality modes are {[member.name for member in CardinalityLimitMode]}" # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f"Please specify a supported cardinality mode. Supported cardinality modes are {[member.name for member in CardinalityLimitMode]}" # noqa: E501 # FIXME CoP ) elif isinstance(cardinality_limit_mode, dict): validate_input_parameters( @@ -207,8 +207,8 @@ def _to_cardinality_limit_mode( ], ) except (KeyError, ValueError): - raise ProfilerConfigurationError( # noqa: TRY003 - f"Please specify a supported cardinality mode. Supported cardinality modes are {[member.name for member in CardinalityLimitMode]}" # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f"Please specify a supported cardinality mode. Supported cardinality modes are {[member.name for member in CardinalityLimitMode]}" # noqa: E501 # FIXME CoP ) else: return cast(CardinalityLimitMode, cardinality_limit_mode).value @@ -235,14 +235,14 @@ def _convert_to_cardinality_limit_mode( else: assert ( max_proportion_unique is not None - ), "Guaranteed to have `max_proportion_unique` due to prior call to `validate_input_parameters`" # noqa: E501 + ), "Guaranteed to have `max_proportion_unique` due to prior call to `validate_input_parameters`" # noqa: E501 # FIXME CoP return RelativeCardinalityLimit( name=f"CUSTOM_REL_{max_proportion_unique}", max_proportion_unique=max_proportion_unique, ) -def validate_input_parameters( # noqa: C901 +def validate_input_parameters( # noqa: C901 # FIXME CoP cardinality_limit_mode: str | CardinalityLimitMode | dict | None = None, max_unique_values: int | None = None, max_proportion_unique: float | None = None, @@ -257,33 +257,33 @@ def validate_input_parameters( # noqa: C901 ) ) if num_supplied_params != required_num_supplied_params: - raise ProfilerConfigurationError( # noqa: TRY003 - f"Please pass ONE of the following parameters: cardinality_limit_mode, max_unique_values, max_proportion_unique, you passed {num_supplied_params} parameters." # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f"Please pass ONE of the following parameters: cardinality_limit_mode, max_unique_values, max_proportion_unique, you passed {num_supplied_params} parameters." # noqa: E501 # FIXME CoP ) if cardinality_limit_mode is not None: if not isinstance(cardinality_limit_mode, (str, CardinalityLimitMode, dict)): - raise ProfilerConfigurationError( # noqa: TRY003 - f"Please specify a supported cardinality limit type, supported classes are {','.join(CardinalityChecker.SUPPORTED_LIMIT_MODE_CLASS_NAMES)} and supported strings are {','.join(CardinalityChecker.SUPPORTED_CARDINALITY_LIMIT_MODE_STRINGS)}" # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f"Please specify a supported cardinality limit type, supported classes are {','.join(CardinalityChecker.SUPPORTED_LIMIT_MODE_CLASS_NAMES)} and supported strings are {','.join(CardinalityChecker.SUPPORTED_CARDINALITY_LIMIT_MODE_STRINGS)}" # noqa: E501 # FIXME CoP ) - if required_num_supplied_params == 2: # noqa: PLR2004 + if required_num_supplied_params == 2: # noqa: PLR2004 # FIXME CoP try: assert isinstance(cardinality_limit_mode, str) return CardinalityLimitMode[cardinality_limit_mode.upper()].value except KeyError: - raise ProfilerConfigurationError( # noqa: TRY003 - f"Please specify a supported cardinality mode. Supported cardinality modes are {[member.name for member in CardinalityLimitMode]}" # noqa: E501 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP + f"Please specify a supported cardinality mode. Supported cardinality modes are {[member.name for member in CardinalityLimitMode]}" # noqa: E501 # FIXME CoP ) if max_unique_values is not None: if not isinstance(max_unique_values, int): - raise ProfilerConfigurationError( # noqa: TRY003 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP f"Please specify an int, you specified a {type(max_unique_values)}" ) if max_proportion_unique is not None: if not isinstance(max_proportion_unique, (float, int)): - raise ProfilerConfigurationError( # noqa: TRY003 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP f"Please specify a float or int, you specified a {type(max_proportion_unique)}" ) diff --git a/great_expectations/experimental/rule_based_profiler/helpers/configuration_reconciliation.py b/great_expectations/experimental/rule_based_profiler/helpers/configuration_reconciliation.py index ba08e52ad0de..af80ab43a8ea 100644 --- a/great_expectations/experimental/rule_based_profiler/helpers/configuration_reconciliation.py +++ b/great_expectations/experimental/rule_based_profiler/helpers/configuration_reconciliation.py @@ -11,7 +11,7 @@ convert_variables_to_dict, ) from great_expectations.types import SerializableDictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from great_expectations.experimental.rule_based_profiler.parameter_container import ( @@ -70,7 +70,7 @@ def reconcile_rule_variables( :param variables_config: variables configuration override, supplied in dictionary (configuration) form :param reconciliation_strategy: one of update, nested_update, or overwrite ways of reconciling overwrites :return: reconciled variables configuration, returned in dictionary (configuration) form - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP effective_variables_config: dict = convert_variables_to_dict(variables=variables) if variables_config: if reconciliation_strategy == ReconciliationStrategy.NESTED_UPDATE: diff --git a/great_expectations/experimental/rule_based_profiler/helpers/runtime_environment.py b/great_expectations/experimental/rule_based_profiler/helpers/runtime_environment.py index 2698db973e05..87eac5831c36 100644 --- a/great_expectations/experimental/rule_based_profiler/helpers/runtime_environment.py +++ b/great_expectations/experimental/rule_based_profiler/helpers/runtime_environment.py @@ -10,7 +10,7 @@ convert_variables_to_dict, ) from great_expectations.types import SerializableDictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from great_expectations.experimental.rule_based_profiler.rule import Rule @@ -68,8 +68,8 @@ def build_variables_directives( This method makes best-effort attempt to identify directives, supplied in "kwargs", as "variables", referenced by components of "Rule" objects, identified by respective "rule_name" property as indicated, and return each of these directives as part of dedicated "RuntimeEnvironmentVariablesDirectives" typed object for every "rule_name" (string). - """ # noqa: E501 - # Implementation relies on assumption that "kwargs" contains "variables"-level arguments/directives only. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Implementation relies on assumption that "kwargs" contains "variables"-level arguments/directives only. # noqa: E501 # FIXME CoP directives: Dict[ str, Dict[str, Any] ] # key is "rule_name"; value is "variables" in corresponding "Rule" object @@ -82,7 +82,7 @@ def build_variables_directives( if rule.name in kwargs: rule_variables_configs.update(kwargs[rule.name]) - # Since "exact_estimation" is True, "estimator" value of "exact" must be set on "variables" of every "Rule". # noqa: E501 + # Since "exact_estimation" is True, "estimator" value of "exact" must be set on "variables" of every "Rule". # noqa: E501 # FIXME CoP rule_variables_configs.update( { "estimator": "exact", @@ -93,7 +93,7 @@ def build_variables_directives( else: directives = kwargs - # Convert "kwargs" ("dict"-typed) directives into interpretable "RuntimeEnvironmentVariablesDirectives" "Enum" type. # noqa: E501 + # Convert "kwargs" ("dict"-typed) directives into interpretable "RuntimeEnvironmentVariablesDirectives" "Enum" type. # noqa: E501 # FIXME CoP rule_name: str return [ RuntimeEnvironmentVariablesDirectives( @@ -111,14 +111,14 @@ def build_domain_type_directives( This method makes best-effort attempt to identify directives, supplied in "kwargs", as supported properties, corresponnding to "DomainBuilder" classes, associated with every "MetricDomainTypes", and return each of these directives as part of dedicated "RuntimeEnvironmentDomainTypeDirectives" typed object for every "MetricDomainTypes". - """ # noqa: E501 - # Implementation relies on assumption that "kwargs" contains "Domain"-level arguments/directives only. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Implementation relies on assumption that "kwargs" contains "Domain"-level arguments/directives only. # noqa: E501 # FIXME CoP """ Currently, only "column_domain_type_directives" are supported; in the future, other "Domain" type directives could be envisioned as consideration for support (e.g., "table_domain_type_directives"). To underscore this reasoning, "domain_type_directives_list" is declared as "List" and a single "RuntimeEnvironmentDomainTypeDirectives" element is appended, instead of setting "domain_type_directives_list" to contain that element explicitly. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_type_directives_list: List[RuntimeEnvironmentDomainTypeDirectives] = [] column_domain_type_directives: RuntimeEnvironmentDomainTypeDirectives = ( diff --git a/great_expectations/experimental/rule_based_profiler/helpers/simple_semantic_type_filter.py b/great_expectations/experimental/rule_based_profiler/helpers/simple_semantic_type_filter.py index a0915c79c465..fc2a7ad87968 100644 --- a/great_expectations/experimental/rule_based_profiler/helpers/simple_semantic_type_filter.py +++ b/great_expectations/experimental/rule_based_profiler/helpers/simple_semantic_type_filter.py @@ -39,7 +39,7 @@ def _is_sequence_of(sequence: Sequence, type_: Type[T]) -> TypeGuard[Sequence[T] class SimpleSemanticTypeFilter(SemanticTypeFilter): """ This class provides default implementation methods, any of which can be overwritten with different mechanisms. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -87,11 +87,11 @@ def parse_semantic_domain_type_argument( SemanticDomainTypes(semantic_type.lower()) for semantic_type in semantic_types ] - raise ValueError( # noqa: TRY003 - "All elements in semantic_types list must be either of str or SemanticDomainTypes type." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "All elements in semantic_types list must be either of str or SemanticDomainTypes type." # noqa: E501 # FIXME CoP ) - raise ValueError("Unrecognized semantic_types directive.") # noqa: TRY003 + raise ValueError("Unrecognized semantic_types directive.") # noqa: TRY003 # FIXME CoP def _build_table_column_name_to_inferred_semantic_domain_type_map( self, @@ -134,11 +134,11 @@ def _build_table_column_name_to_inferred_semantic_domain_type_map( } @staticmethod - def _infer_semantic_domain_type_from_table_column_type( # noqa: C901 + def _infer_semantic_domain_type_from_table_column_type( # noqa: C901 # FIXME CoP column_types_dict_list: List[Dict[str, Any]], column_name: str, ) -> InferredSemanticDomainType: - # Note: As of Python 3.8, specifying argument type in Lambda functions is not supported by Lambda syntax. # noqa: E501 + # Note: As of Python 3.8, specifying argument type in Lambda functions is not supported by Lambda syntax. # noqa: E501 # FIXME CoP column_types_dict_list = list( filter( lambda column_type_dict: column_name == column_type_dict["name"] @@ -153,7 +153,7 @@ def _infer_semantic_domain_type_from_table_column_type( # noqa: C901 raise ProfilerExecutionError( message=f"""Error: {len(column_types_dict_list)} columns were found while obtaining semantic type \ information. Please ensure that the specified column name refers to exactly one column. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ) column_type: str = str(column_types_dict_list[0]["type"]).upper() diff --git a/great_expectations/experimental/rule_based_profiler/helpers/util.py b/great_expectations/experimental/rule_based_profiler/helpers/util.py index 0b0a4e638f52..61ae92917e15 100644 --- a/great_expectations/experimental/rule_based_profiler/helpers/util.py +++ b/great_expectations/experimental/rule_based_profiler/helpers/util.py @@ -41,9 +41,9 @@ SemanticDomainTypes, ) from great_expectations.core.metric_domain_types import ( - MetricDomainTypes, # noqa: TCH001 + MetricDomainTypes, # noqa: TCH001 # FIXME CoP ) -from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 # FIXME CoP NUM_HISTOGRAM_BINS, NumericRangeEstimationResult, ) @@ -62,9 +62,9 @@ convert_ndarray_float_to_datetime_dtype, convert_ndarray_to_datetime_dtype_best_effort, ) -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP from great_expectations.validator.metric_configuration import ( - MetricConfiguration, # noqa: TCH001 + MetricConfiguration, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -98,7 +98,7 @@ NP_RANDOM_GENERATOR: Final = np.random.default_rng() -def get_validator( # noqa: PLR0913 +def get_validator( # noqa: PLR0913 # FIXME CoP purpose: str, *, data_context: Optional[AbstractDataContext] = None, @@ -134,7 +134,7 @@ def get_validator( # noqa: PLR0913 if num_batches == 0: raise ProfilerExecutionError( message=f"""{__name__}.get_validator() must utilize at least one Batch ({num_batches} are available). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) validator = get_validator_with_expectation_suite( @@ -153,7 +153,7 @@ def get_validator( # noqa: PLR0913 return validator -def get_batch_ids( # noqa: PLR0913 +def get_batch_ids( # noqa: PLR0913 # FIXME CoP data_context: Optional[AbstractDataContext] = None, batch_list: Optional[List[Batch]] = None, batch_request: Optional[Union[str, BatchRequestBase, dict]] = None, @@ -181,19 +181,19 @@ def get_batch_ids( # noqa: PLR0913 num_batch_ids: int = len(batch_ids) if limit is not None: - # No need to verify that type of "limit" is "integer", because static type checking already ascertains this. # noqa: E501 + # No need to verify that type of "limit" is "integer", because static type checking already ascertains this. # noqa: E501 # FIXME CoP if not (0 <= limit <= num_batch_ids): raise ProfilerExecutionError( message=f"""{__name__}.get_batch_ids() allows integer limit values between 0 and {num_batch_ids} \ ({limit} was requested). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) batch_ids = batch_ids[-limit:] if num_batch_ids == 0: raise ProfilerExecutionError( message=f"""{__name__}.get_batch_ids() must return at least one batch_id ({num_batch_ids} were retrieved). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) return batch_ids @@ -208,7 +208,7 @@ def build_batch_request( if batch_request is None: return None - # Obtain BatchRequest from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain BatchRequest from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP effective_batch_request: Optional[Union[BatchRequestBase, dict]] = ( get_parameter_value_and_validate_return_type( domain=domain, @@ -232,7 +232,7 @@ def build_metric_domain_kwargs( variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, ): - # Obtain domain kwargs from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain domain kwargs from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP metric_domain_kwargs = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=metric_domain_kwargs, @@ -261,7 +261,7 @@ def get_parameter_value_and_validate_return_type( """ This method allows for the parameter_reference to be specified as an object (literal, dict, any typed object, etc.) or as a fully-qualified parameter name. In either case, it can optionally validate the type of the return value. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(parameter_reference, dict): parameter_reference = safe_deep_copy(data=parameter_reference) @@ -277,7 +277,7 @@ def get_parameter_value_and_validate_return_type( raise ProfilerExecutionError( message=f"""Argument "{parameter_reference}" must be of type "{expected_return_type!s}" \ (value of type "{type(parameter_reference)!s}" was encountered). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) return parameter_reference @@ -293,7 +293,7 @@ def get_parameter_value( This method allows for the parameter_reference to be specified as an object (literal, dict, any typed object, etc.) or as a fully-qualified parameter name. Moreover, if the parameter_reference argument is an object of type "dict", it will recursively detect values using the fully-qualified parameter name format and evaluate them accordingly. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(parameter_reference, dict): for key, value in parameter_reference.items(): parameter_reference[key] = get_parameter_value( @@ -357,13 +357,13 @@ def get_resolved_metrics_by_key( Dictionary of the form { "my_key": Dict[Tuple[str, str, str], MetricValue], } - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP key: str metric_configuration: MetricConfiguration metric_configurations_for_key: List[MetricConfiguration] - # Step 1: Gather "MetricConfiguration" objects corresponding to all possible key values/combinations. # noqa: E501 - # and compute all metric values (resolve "MetricConfiguration" objects ) using a single method call. # noqa: E501 + # Step 1: Gather "MetricConfiguration" objects corresponding to all possible key values/combinations. # noqa: E501 # FIXME CoP + # and compute all metric values (resolve "MetricConfiguration" objects ) using a single method call. # noqa: E501 # FIXME CoP resolved_metrics: _MetricsDict resolved_metrics, _ = validator.compute_metrics( metric_configurations=[ @@ -375,14 +375,14 @@ def get_resolved_metrics_by_key( min_graph_edges_pbar_enable=0, ) - # Step 2: Gather "MetricConfiguration" ID values for each key (one element per batch_id in every list). # noqa: E501 + # Step 2: Gather "MetricConfiguration" ID values for each key (one element per batch_id in every list). # noqa: E501 # FIXME CoP metric_configuration_ids_by_key: Dict[str, List[Tuple[str, str, str]]] = { key: [metric_configuration.id for metric_configuration in metric_configurations_for_key] for key, metric_configurations_for_key in metric_configurations_by_key.items() } metric_configuration_ids: List[Tuple[str, str, str]] - # Step 3: Obtain flattened list of "MetricConfiguration" ID values across all key values/combinations. # noqa: E501 + # Step 3: Obtain flattened list of "MetricConfiguration" ID values across all key values/combinations. # noqa: E501 # FIXME CoP metric_configuration_ids_all_keys: List[Tuple[str, str, str]] = list( itertools.chain( *[ @@ -392,7 +392,7 @@ def get_resolved_metrics_by_key( ) ) - # Step 4: Retain only those metric computation results that both, correspond to "MetricConfiguration" objects of # noqa: E501 + # Step 4: Retain only those metric computation results that both, correspond to "MetricConfiguration" objects of # noqa: E501 # FIXME CoP # interest (reflecting specified key values/combinations). metric_configuration_id: Tuple[str, str, str] metric_value: Any @@ -444,7 +444,7 @@ def build_domains_from_column_names( :param domain_type: type of Domain objects (same "domain_type" must be applicable to all Domain objects returned) :param table_column_name_to_inferred_semantic_domain_type_map: map from column name to inferred semantic type :return: list of resulting Domain objects - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP column_name: str domains: List[Domain] = [ Domain( @@ -506,7 +506,7 @@ def integer_semantic_domain_type(domain: Domain) -> bool: Returns: Boolean value indicating whether or not specified "Domain" is inferred to denote "integer" values - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP inferred_semantic_domain_type: Dict[str, SemanticDomainTypes] = domain.details.get( INFERRED_SEMANTIC_TYPE_KEY @@ -535,7 +535,7 @@ def datetime_semantic_domain_type(domain: Domain) -> bool: Returns: Boolean value indicating whether or not specified "Domain" is inferred as "SemanticDomainTypes.DATETIME" - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP inferred_semantic_domain_type: Dict[str, SemanticDomainTypes] = domain.details.get( INFERRED_SEMANTIC_TYPE_KEY @@ -556,11 +556,11 @@ def get_false_positive_rate_from_rule_state( ) -> Union[float, np.float64]: """ This method obtains false_positive_rate from "rule state" (i.e., variables and parameters) and validates the result. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if false_positive_rate is None: return 5.0e-2 - # Obtain false_positive_rate from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain false_positive_rate from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP false_positive_rate = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=false_positive_rate, @@ -569,23 +569,23 @@ def get_false_positive_rate_from_rule_state( parameters=parameters, ) if not (0.0 <= false_positive_rate <= 1.0): - raise ProfilerExecutionError( # noqa: TRY003 + raise ProfilerExecutionError( # noqa: TRY003 # FIXME CoP f"""false_positive_rate must be a positive decimal number between 0 and 1 inclusive [0, 1], but \ {false_positive_rate} was provided. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) elif false_positive_rate <= NP_EPSILON: warnings.warn( f"""You have chosen a false_positive_rate of {false_positive_rate}, which is too close to 0. A \ false_positive_rate of {NP_EPSILON} has been selected instead. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) false_positive_rate = np.float64(NP_EPSILON) elif false_positive_rate >= (1.0 - NP_EPSILON): warnings.warn( f"""You have chosen a false_positive_rate of {false_positive_rate}, which is too close to 1. A \ false_positive_rate of {1.0 - NP_EPSILON} has been selected instead. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) false_positive_rate = np.float64(1.0 - NP_EPSILON) @@ -602,8 +602,8 @@ def get_quantile_statistic_interpolation_method_from_rule_state( """ This method obtains quantile_statistic_interpolation_method from "rule state" (i.e., variables and parameters) and validates the result. - """ # noqa: E501 - # Obtain quantile_statistic_interpolation_method directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Obtain quantile_statistic_interpolation_method directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP quantile_statistic_interpolation_method = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=quantile_statistic_interpolation_method, @@ -618,7 +618,7 @@ def get_quantile_statistic_interpolation_method_from_rule_state( raise ProfilerExecutionError( message=f"""The directive "quantile_statistic_interpolation_method" can be only one of \ {RECOGNIZED_QUANTILE_STATISTIC_INTERPOLATION_METHODS} ("{quantile_statistic_interpolation_method}" was detected). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if quantile_statistic_interpolation_method == "auto": @@ -654,7 +654,7 @@ def compute_quantiles( ) -def compute_kde_quantiles_point_estimate( # noqa: PLR0913 +def compute_kde_quantiles_point_estimate( # noqa: PLR0913 # FIXME CoP metric_values: np.ndarray, false_positive_rate: np.float64, n_resamples: int, @@ -685,7 +685,7 @@ def compute_kde_quantiles_point_estimate( # noqa: PLR0913 https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html random_seed: An optional random_seed to pass to "np.random.Generator(np.random.PCG64(random_seed))" for making probabilistic sampling deterministic. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP lower_quantile_pct: float = false_positive_rate / 2.0 upper_quantile_pct: float = 1.0 - (false_positive_rate / 2.0) @@ -722,7 +722,7 @@ def compute_kde_quantiles_point_estimate( # noqa: PLR0913 ) -def compute_bootstrap_quantiles_point_estimate( # noqa: PLR0913 +def compute_bootstrap_quantiles_point_estimate( # noqa: PLR0913 # FIXME CoP metric_values: np.ndarray, false_positive_rate: np.float64, n_resamples: int, @@ -784,7 +784,7 @@ def compute_bootstrap_quantiles_point_estimate( # noqa: PLR0913 bootstrap sampling technique (see https://en.wikipedia.org/wiki/Bootstrapping_(statistics) for background) for computing the stopping criterion, expressed as the optimal number of bootstrap samples, needed to achieve a maximum probability that the value of the statistic of interest will be minimally deviating from its actual (ideal) value. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP lower_quantile_pct: float = false_positive_rate / 2.0 upper_quantile_pct: float = 1.0 - false_positive_rate / 2.0 @@ -851,7 +851,7 @@ def build_numeric_range_estimation_result( Returns: Structured "NumericRangeEstimationResult" object, containing histogram and value_range attributes. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP ndarray_is_datetime_type: bool metric_values_converted: np.ndarray ( @@ -863,7 +863,7 @@ def build_numeric_range_estimation_result( bin_edges: np.ndarray if ndarray_is_datetime_type: histogram = np.histogram(a=metric_values_converted, bins=NUM_HISTOGRAM_BINS) - # Use "UTC" TimeZone normalization in "bin_edges" when "metric_values" consists of "datetime.datetime" objects. # noqa: E501 + # Use "UTC" TimeZone normalization in "bin_edges" when "metric_values" consists of "datetime.datetime" objects. # noqa: E501 # FIXME CoP bin_edges = convert_ndarray_float_to_datetime_dtype(data=histogram[1]) else: histogram = np.histogram(a=metric_values, bins=NUM_HISTOGRAM_BINS) @@ -885,7 +885,7 @@ def build_numeric_range_estimation_result( ) -def _determine_quantile_bias_corrected_point_estimate( # noqa: PLR0913 +def _determine_quantile_bias_corrected_point_estimate( # noqa: PLR0913 # FIXME CoP bootstraps: np.ndarray, quantile_pct: float, quantile_statistic_interpolation_method: str, @@ -905,7 +905,7 @@ def _determine_quantile_bias_corrected_point_estimate( # noqa: PLR0913 # Bias / Standard Error > 0.25 is a rule of thumb for when to apply bias correction. # See: - # Efron, B., & Tibshirani, R. J. (1993). Estimates of bias. An Introduction to the Bootstrap (pp. 128). # noqa: E501 + # Efron, B., & Tibshirani, R. J. (1993). Estimates of bias. An Introduction to the Bootstrap (pp. 128). # noqa: E501 # FIXME CoP # Springer Science and Business Media Dordrecht. DOI 10.1007/978-1-4899-4541-9 quantile_bias_corrected_point_estimate: np.float64 @@ -934,7 +934,7 @@ def convert_metric_values_to_float_dtype_best_effort( Return: Boolean flag -- True, if conversion of original "np.ndarray" to "datetime.datetime" occurred; False, otherwise. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP original_ndarray_is_datetime_type: bool conversion_ndarray_to_datetime_type_performed: bool metric_values_converted: np.ndaarray @@ -961,7 +961,7 @@ def convert_metric_values_to_float_dtype_best_effort( return ndarray_is_datetime_type, metric_values_converted -def get_validator_with_expectation_suite( # noqa: PLR0913 +def get_validator_with_expectation_suite( # noqa: PLR0913 # FIXME CoP data_context: AbstractDataContext, batch_list: Optional[List[Batch]] = None, batch_request: Optional[Union[BatchRequestBase, dict]] = None, @@ -974,7 +974,7 @@ def get_validator_with_expectation_suite( # noqa: PLR0913 Instantiates and returns "Validator" using "data_context", "batch_list" or "batch_request", and other information. Use "expectation_suite" if provided; otherwise, if "expectation_suite_name" is specified, then create "ExpectationSuite" from it. Otherwise, generate temporary "expectation_suite_name" using supplied "component_name". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert expectation_suite is None or isinstance(expectation_suite, ExpectationSuite) assert expectation_suite_name is None or isinstance(expectation_suite_name, str) @@ -995,7 +995,7 @@ def get_validator_with_expectation_suite( # noqa: PLR0913 return validator -def get_or_create_expectation_suite( # noqa: C901 +def get_or_create_expectation_suite( # noqa: C901 # FIXME CoP data_context: Optional[AbstractDataContext], expectation_suite: Optional[ExpectationSuite] = None, expectation_suite_name: Optional[str] = None, @@ -1005,14 +1005,14 @@ def get_or_create_expectation_suite( # noqa: C901 """ Use "expectation_suite" if provided. If not, then if "expectation_suite_name" is specified, then create "ExpectationSuite" from it. Otherwise, generate temporary "expectation_suite_name" using supplied "component_name". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP generate_temp_expectation_suite_name: bool create_expectation_suite: bool if expectation_suite is not None and expectation_suite_name is not None: if expectation_suite.name != expectation_suite_name: - raise ValueError( # noqa: TRY003 - 'Mutually inconsistent "expectation_suite" and "expectation_suite_name" were specified.' # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + 'Mutually inconsistent "expectation_suite" and "expectation_suite_name" were specified.' # noqa: E501 # FIXME CoP ) return expectation_suite @@ -1030,7 +1030,7 @@ def get_or_create_expectation_suite( # noqa: C901 if not component_name: component_name = "test" - expectation_suite_name = f"{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}.{component_name}.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}.{str(uuid.uuid4())[:8]}" # noqa: E501 + expectation_suite_name = f"{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}.{component_name}.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}.{str(uuid.uuid4())[:8]}" # noqa: E501 # FIXME CoP if create_expectation_suite: if persist and data_context: @@ -1069,7 +1069,7 @@ def sanitize_parameter_name( Returns: string-valued sanitized concatenation of "name" and MD5-digest of "suffix" arguments. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if suffix: suffix = hashlib.md5(suffix.encode("utf-8")).hexdigest() name = f"{name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{suffix}" diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/__init__.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/__init__.py index ccee6a9fad43..e3c75cc40985 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/__init__.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/__init__.py @@ -1,37 +1,37 @@ -from great_expectations.experimental.rule_based_profiler.parameter_builder.parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP ParameterBuilder, init_rule_parameter_builders, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.metric_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.metric_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP MetricMultiBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.metric_single_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.metric_single_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP MetricSingleBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.numeric_metric_range_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.numeric_metric_range_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP NumericMetricRangeMultiBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.mean_unexpected_map_metric_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.mean_unexpected_map_metric_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP MeanUnexpectedMapMetricMultiBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.mean_table_columns_set_match_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.mean_table_columns_set_match_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP MeanTableColumnsSetMatchMultiBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.unexpected_count_statistics_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.unexpected_count_statistics_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP UnexpectedCountStatisticsMultiBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.regex_pattern_string_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.regex_pattern_string_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP RegexPatternStringParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.simple_date_format_string_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.simple_date_format_string_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP SimpleDateFormatStringParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.value_set_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.value_set_multi_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP ValueSetMultiBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.value_counts_single_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.value_counts_single_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP ValueCountsSingleBatchParameterBuilder, ) -from great_expectations.experimental.rule_based_profiler.parameter_builder.histogram_single_batch_parameter_builder import ( # isort:skip # noqa: E501 +from great_expectations.experimental.rule_based_profiler.parameter_builder.histogram_single_batch_parameter_builder import ( # isort:skip # noqa: E501 # FIXME CoP HistogramSingleBatchParameterBuilder, ) diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/histogram_single_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/histogram_single_batch_parameter_builder.py index a15b348891c1..24c0ad185519 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/histogram_single_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/histogram_single_batch_parameter_builder.py @@ -5,7 +5,7 @@ import numpy as np from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.experimental.rule_based_profiler.config import ParameterBuilderConfig from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError @@ -26,7 +26,7 @@ ParameterNode, ) from great_expectations.types.attributes import Attributes -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP if TYPE_CHECKING: from great_expectations.data_context.data_context.abstract_data_context import ( @@ -52,7 +52,7 @@ class HistogramSingleBatchParameterBuilder(MetricSingleBatchParameterBuilder): } ) - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, bins: str = "uniform", @@ -74,7 +74,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._column_partition_metric_single_batch_parameter_builder_config = ( ParameterBuilderConfig( @@ -125,10 +125,10 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 - fully_qualified_column_partition_metric_single_batch_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_partition_metric_single_batch_parameter_builder_config.name}" # noqa: E501 - # Obtain "column.partition" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 - column_partition_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 + """ # noqa: E501 # FIXME CoP + fully_qualified_column_partition_metric_single_batch_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_partition_metric_single_batch_parameter_builder_config.name}" # noqa: E501 # FIXME CoP + # Obtain "column.partition" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP + column_partition_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 # FIXME CoP domain=domain, parameter_reference=fully_qualified_column_partition_metric_single_batch_parameter_builder_name, expected_return_type=None, @@ -140,7 +140,7 @@ def _build_parameters( ] if domain.domain_type == MetricDomainTypes.COLUMN and "." in domain.domain_kwargs["column"]: - raise ProfilerExecutionError( # noqa: TRY003 + raise ProfilerExecutionError( # noqa: TRY003 # FIXME CoP "Column names cannot contain '.' when computing the histogram metric." ) @@ -149,17 +149,17 @@ def _build_parameters( message=f"""Partitioning values for {self.__class__.__name__} by \ {self._column_partition_metric_single_batch_parameter_builder_config.name} into bins encountered empty or non-existent \ elements. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if not _is_iterable_of_numeric_dtypes(bins): raise ProfilerExecutionError( message=f"""Partitioning values for {self.__class__.__name__} by \ {self._column_partition_metric_single_batch_parameter_builder_config.name} did not yield bins of supported data type. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) - # Only unique "bins" are necessary (hence, "n_bins" is potentially lowered to fit data distribution). # noqa: E501 + # Only unique "bins" are necessary (hence, "n_bins" is potentially lowered to fit data distribution). # noqa: E501 # FIXME CoP bins = sorted(set(bins)) column_values_nonnull_count_metric_single_batch_parameter_builder = ( @@ -183,8 +183,8 @@ def _build_parameters( batch_request=self.batch_request, runtime_configuration=runtime_configuration, ) - # Obtain "column_values.nonnull.count" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 - column_values_nonnull_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 + # Obtain "column_values.nonnull.count" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP + column_values_nonnull_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 # FIXME CoP domain=domain, parameter_reference=column_values_nonnull_count_metric_single_batch_parameter_builder.raw_fully_qualified_parameter_name, expected_return_type=None, diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_table_columns_set_match_multi_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_table_columns_set_match_multi_batch_parameter_builder.py index e261fd396f8a..93d12c875162 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_table_columns_set_match_multi_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_table_columns_set_match_multi_batch_parameter_builder.py @@ -5,9 +5,9 @@ import numpy as np from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, @@ -22,7 +22,7 @@ ParameterNode, ) from great_expectations.types.attributes import Attributes -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP if TYPE_CHECKING: from great_expectations.data_context.data_context.abstract_data_context import ( @@ -41,7 +41,7 @@ class MeanTableColumnsSetMatchMultiBatchParameterBuilder(MetricMultiBatchParamet Step-2: Compute set union operation of column lists from Step-1 over all Batch objects (gives maximum column set). Step-3: Assign match scores: if column set of a Batch equals overall (maximum) column set, give it 1; 0 otherwise. Step-4: Compute mean value of match scores as "success_ratio" (divide sum of scores by number of Batch objects). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = ( MetricMultiBatchParameterBuilder.exclude_field_names @@ -73,7 +73,7 @@ def __init__( ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, metric_name="table.columns", @@ -99,7 +99,7 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Compute "table.columns" metric value for each Batch object. super().build_parameters( domain=domain, @@ -138,7 +138,7 @@ def _build_parameters( 1 if one_batch_table_columns_names_set == multi_batch_table_columns_names_as_set else 0 - for one_batch_table_columns_names_set in multi_batch_table_columns_names_sets_as_list # noqa: E501 + for one_batch_table_columns_names_set in multi_batch_table_columns_names_sets_as_list # noqa: E501 # FIXME CoP ] ) ) diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_unexpected_map_metric_multi_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_unexpected_map_metric_multi_batch_parameter_builder.py index 3d01b12bf0ac..bc85fd4ee36e 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_unexpected_map_metric_multi_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/mean_unexpected_map_metric_multi_batch_parameter_builder.py @@ -4,19 +4,19 @@ import numpy as np -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.core.metric_function_types import ( SummarizationMetricNameSuffixes, ) from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.helpers.util import ( NP_EPSILON, get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.metric_computation_result import ( - MetricValues, # noqa: TCH001 + MetricValues, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ( MetricMultiBatchParameterBuilder, @@ -39,7 +39,7 @@ class MeanUnexpectedMapMetricMultiBatchParameterBuilder(MetricMultiBatchParameterBuilder): """ Compute mean unexpected count ratio (as a fraction) of specified map-style metric across every Batch of data given. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = ( MetricMultiBatchParameterBuilder.exclude_field_names @@ -52,7 +52,7 @@ class MeanUnexpectedMapMetricMultiBatchParameterBuilder(MetricMultiBatchParamete } ) - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, map_metric_name: str, @@ -78,7 +78,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, metric_name=f"{map_metric_name}.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}", @@ -119,8 +119,8 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 - # Obtain total_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Obtain total_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP total_count_parameter_builder_name: str = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.total_count_parameter_builder_name, @@ -132,7 +132,7 @@ def _build_parameters( fully_qualified_total_count_parameter_builder_name: str = ( f"{RAW_PARAMETER_KEY}{total_count_parameter_builder_name}" ) - # Obtain total_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain total_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP total_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=fully_qualified_total_count_parameter_builder_name, @@ -144,7 +144,7 @@ def _build_parameters( FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY ] - # Obtain null_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain null_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP null_count_parameter_builder_name: Optional[str] = ( get_parameter_value_and_validate_return_type( domain=domain, @@ -169,7 +169,7 @@ def _build_parameters( fully_qualified_null_count_parameter_builder_name: str = ( f"{RAW_PARAMETER_KEY}{null_count_parameter_builder_name}" ) - # Obtain null_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain null_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP null_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=fully_qualified_null_count_parameter_builder_name, @@ -181,7 +181,7 @@ def _build_parameters( nonnull_count_values: np.ndarray = total_count_values - null_count_values - # Compute "unexpected_count" corresponding to "map_metric_name" (given as argument to this "ParameterBuilder"). # noqa: E501 + # Compute "unexpected_count" corresponding to "map_metric_name" (given as argument to this "ParameterBuilder"). # noqa: E501 # FIXME CoP super().build_parameters( domain=domain, variables=variables, @@ -190,7 +190,7 @@ def _build_parameters( runtime_configuration=runtime_configuration, ) - # Retrieve "unexpected_count" corresponding to "map_metric_name" (given as argument to this "ParameterBuilder"). # noqa: E501 + # Retrieve "unexpected_count" corresponding to "map_metric_name" (given as argument to this "ParameterBuilder"). # noqa: E501 # FIXME CoP parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.raw_fully_qualified_parameter_name, diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py index 199696cbfc9e..0de776d83a4b 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py @@ -4,16 +4,16 @@ import numpy as np -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.metric_computation_result import ( - MetricComputationDetails, # noqa: TCH001 - MetricComputationResult, # noqa: TCH001 + MetricComputationDetails, # noqa: TCH001 # FIXME CoP + MetricComputationResult, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ParameterBuilder from great_expectations.experimental.rule_based_profiler.parameter_container import ( @@ -34,9 +34,9 @@ class MetricMultiBatchParameterBuilder(ParameterBuilder): """ A Single/Multi-Batch implementation for obtaining a resolved (evaluated) metric, using domain_kwargs, value_kwargs, and metric_name as arguments. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, metric_name: str, @@ -75,7 +75,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, suite_parameter_builder_configs=suite_parameter_builder_configs, @@ -141,8 +141,8 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 - # Obtain single_batch_mode from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Obtain single_batch_mode from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP single_batch_mode: bool = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.single_batch_mode, @@ -167,7 +167,7 @@ def _build_parameters( ) details: MetricComputationDetails = metric_computation_result.details - # Obtain reduce_scalar_metric from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain reduce_scalar_metric from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP reduce_scalar_metric: bool = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.reduce_scalar_metric, @@ -177,7 +177,7 @@ def _build_parameters( ) if len(metric_computation_result.attributed_resolved_metrics) == 1: - # As a simplification, apply reduction to scalar in case of one-dimensional metric (for convenience). # noqa: E501 + # As a simplification, apply reduction to scalar in case of one-dimensional metric (for convenience). # noqa: E501 # FIXME CoP if ( reduce_scalar_metric and isinstance( @@ -197,10 +197,10 @@ def _build_parameters( ): return Attributes( { - FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 + FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 # FIXME CoP 0 ].conditioned_metric_values[:, 0], - FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 + FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 # FIXME CoP 0 ].conditioned_attributed_metric_values, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, @@ -209,10 +209,10 @@ def _build_parameters( return Attributes( { - FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 + FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 # FIXME CoP 0 ].conditioned_metric_values, - FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 + FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics[ # noqa: E501 # FIXME CoP 0 ].conditioned_attributed_metric_values, FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, @@ -221,8 +221,8 @@ def _build_parameters( return Attributes( { - FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics, # noqa: E501 - FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics, # noqa: E501 + FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY: metric_computation_result.attributed_resolved_metrics, # noqa: E501 # FIXME CoP + FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY: metric_computation_result.attributed_resolved_metrics, # noqa: E501 # FIXME CoP FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY: details, } ) diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_single_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_single_batch_parameter_builder.py index c8df3df656a8..9b64d51d1567 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_single_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/metric_single_batch_parameter_builder.py @@ -3,9 +3,9 @@ from typing import TYPE_CHECKING, ClassVar, Dict, List, Optional, Set, Union from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, @@ -31,7 +31,7 @@ class MetricSingleBatchParameterBuilder(MetricMultiBatchParameterBuilder): """ A Single-Batch-only implementation for obtaining a resolved (evaluated) metric, using domain_kwargs, value_kwargs, and metric_name as arguments. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = ( MetricMultiBatchParameterBuilder.exclude_field_names @@ -40,7 +40,7 @@ class MetricSingleBatchParameterBuilder(MetricMultiBatchParameterBuilder): } ) - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, metric_name: Optional[str] = None, @@ -68,7 +68,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, metric_name=metric_name, # type: ignore[arg-type] # could be None @@ -95,7 +95,7 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Compute metric value for one Batch object (expressed as list of Batch objects). super().build_parameters( domain=domain, diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py index a28d777c58fb..c69330a30256 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py @@ -19,27 +19,27 @@ import numpy as np -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) -from great_expectations.experimental.rule_based_profiler.estimators.bootstrap_numeric_range_estimator import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.estimators.bootstrap_numeric_range_estimator import ( # noqa: E501 # FIXME CoP BootstrapNumericRangeEstimator, ) -from great_expectations.experimental.rule_based_profiler.estimators.exact_numeric_range_estimator import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.estimators.exact_numeric_range_estimator import ( # noqa: E501 # FIXME CoP ExactNumericRangeEstimator, ) -from great_expectations.experimental.rule_based_profiler.estimators.kde_numeric_range_estimator import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.estimators.kde_numeric_range_estimator import ( # noqa: E501 # FIXME CoP KdeNumericRangeEstimator, ) -from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimation_result import ( # noqa: E501 # FIXME CoP NUM_HISTOGRAM_BINS, NumericRangeEstimationResult, ) from great_expectations.experimental.rule_based_profiler.estimators.numeric_range_estimator import ( - NumericRangeEstimator, # noqa: TCH001 + NumericRangeEstimator, # noqa: TCH001 # FIXME CoP ) -from great_expectations.experimental.rule_based_profiler.estimators.quantiles_numeric_range_estimator import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.estimators.quantiles_numeric_range_estimator import ( # noqa: E501 # FIXME CoP QuantilesNumericRangeEstimator, ) from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError @@ -51,7 +51,7 @@ integer_semantic_domain_type, ) from great_expectations.experimental.rule_based_profiler.metric_computation_result import ( - MetricValues, # noqa: TCH001 + MetricValues, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ( MetricMultiBatchParameterBuilder, @@ -98,7 +98,7 @@ class NumericMetricRangeMultiBatchParameterBuilder(MetricMultiBatchParameterBuil * exact -- uses the minimum and maximum observations for range boundaries. * bootstrap -- a statistical resampling technique (see "https://en.wikipedia.org/wiki/Bootstrapping_(statistics)"). * kde -- a statistical technique that fits a gaussian to the distribution and resamples from it. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP RECOGNIZED_SAMPLING_METHOD_NAMES: set = { "bootstrap", @@ -123,7 +123,7 @@ class NumericMetricRangeMultiBatchParameterBuilder(MetricMultiBatchParameterBuil } ) - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, metric_name: Optional[str] = None, @@ -192,7 +192,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, metric_name=metric_name, @@ -238,18 +238,18 @@ def __init__( # noqa: PLR0913 "lower_bound": None, "upper_bound": None, } - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if not isinstance(truncate_values, str): truncate_values_keys: set = set(truncate_values.keys()) if ( not truncate_values_keys - <= NumericMetricRangeMultiBatchParameterBuilder.RECOGNIZED_TRUNCATE_DISTRIBUTION_KEYS # noqa: E501 + <= NumericMetricRangeMultiBatchParameterBuilder.RECOGNIZED_TRUNCATE_DISTRIBUTION_KEYS # noqa: E501 # FIXME CoP ): raise ProfilerExecutionError( message=f"""Unrecognized truncate_values key(s) in {self.__class__.__name__}: "{truncate_values_keys - NumericMetricRangeMultiBatchParameterBuilder.RECOGNIZED_TRUNCATE_DISTRIBUTION_KEYS!s}" \ detected. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) self._truncate_values = truncate_values @@ -329,10 +329,10 @@ def _build_parameters( 8. Compute [low, high] for the desired metric using the chosen estimator method. 9. Return [low, high] for the desired metric as estimated by the specified sampling method. 10. Set up the arguments and call build_parameter_container() to store the parameter as part of "rule state". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP parameter_reference: str if self.metric_multi_batch_parameter_builder_name: - # Obtain metric_multi_batch_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain metric_multi_batch_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP metric_multi_batch_parameter_builder_name: str = ( get_parameter_value_and_validate_return_type( domain=domain, @@ -367,7 +367,7 @@ def _build_parameters( round_decimals: int if ( self.metric_name - not in NumericMetricRangeMultiBatchParameterBuilder.METRIC_NAMES_EXEMPT_FROM_VALUE_ROUNDING # noqa: E501 + not in NumericMetricRangeMultiBatchParameterBuilder.METRIC_NAMES_EXEMPT_FROM_VALUE_ROUNDING # noqa: E501 # FIXME CoP and integer_semantic_domain_type(domain=domain) ): round_decimals = 0 @@ -401,7 +401,7 @@ def _build_parameters( parameter_node[FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY] ) - # Obtain include_estimator_samples_histogram_in_details from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain include_estimator_samples_histogram_in_details from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP include_estimator_samples_histogram_in_details: bool = ( get_parameter_value_and_validate_return_type( domain=domain, @@ -431,8 +431,8 @@ def _build_numeric_range_estimator( ) -> NumericRangeEstimator: """ Determines "estimator" name and returns appropriate configured "NumericRangeEstimator" subclass instance. - """ # noqa: E501 - # Obtain estimator directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Obtain estimator directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP estimator: str = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.estimator, @@ -447,7 +447,7 @@ def _build_numeric_range_estimator( raise ProfilerExecutionError( message=f"""The directive "estimator" for {self.__class__.__name__} can be only one of {NumericMetricRangeMultiBatchParameterBuilder.RECOGNIZED_SAMPLING_METHOD_NAMES} ("{estimator}" was detected). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if estimator == "exact": @@ -459,22 +459,22 @@ def _build_numeric_range_estimator( { "false_positive_rate": self.false_positive_rate, "round_decimals": round_decimals, - "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 + "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 # FIXME CoP } ) ) - # Since complex numerical calculations do not support DateTime/TimeStamp data types, use "quantiles" estimator. # noqa: E501 + # Since complex numerical calculations do not support DateTime/TimeStamp data types, use "quantiles" estimator. # noqa: E501 # FIXME CoP if datetime_semantic_domain_type(domain=domain): logger.info( - f'Estimator "{estimator}" does not support DateTime/TimeStamp data types (downgrading to "quantiles").' # noqa: E501 + f'Estimator "{estimator}" does not support DateTime/TimeStamp data types (downgrading to "quantiles").' # noqa: E501 # FIXME CoP ) return QuantilesNumericRangeEstimator( configuration=Attributes( { "false_positive_rate": self.false_positive_rate, "round_decimals": round_decimals, - "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 + "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 # FIXME CoP } ) ) @@ -487,9 +487,9 @@ def _build_numeric_range_estimator( "round_decimals": round_decimals, "n_resamples": self.n_resamples, "random_seed": self.random_seed, - "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 + "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 # FIXME CoP "quantile_bias_correction": self.quantile_bias_correction, - "quantile_bias_std_error_ratio_threshold": self.quantile_bias_std_error_ratio_threshold, # noqa: E501 + "quantile_bias_std_error_ratio_threshold": self.quantile_bias_std_error_ratio_threshold, # noqa: E501 # FIXME CoP } ) ) @@ -502,7 +502,7 @@ def _build_numeric_range_estimator( "round_decimals": round_decimals, "n_resamples": self.n_resamples, "random_seed": self.random_seed, - "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 + "quantile_statistic_interpolation_method": self.quantile_statistic_interpolation_method, # noqa: E501 # FIXME CoP "bw_method": self.bw_method, } ) @@ -510,7 +510,7 @@ def _build_numeric_range_estimator( return ExactNumericRangeEstimator() - def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 + def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 # FIXME CoP self, metric_values: np.ndarray, numeric_range_estimator: NumericRangeEstimator, @@ -525,7 +525,7 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 metric, whose values are being estimated. Thus, for each element in the "R^m" hypercube, an "N"-dimensional vector of sample measurements is constructed and given to the estimator to apply its specific algorithm for computing the range of values in this vector. Estimator algorithms differ based on their use of data samples. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP truncate_values: Dict[str, Number] = self._get_truncate_values_using_heuristics( metric_values=metric_values, domain=domain, @@ -538,10 +538,10 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 min_value: Number max_value: Number - # Outer-most dimension is data samples (e.g., one per Batch); the rest are dimensions of the actual metric. # noqa: E501 + # Outer-most dimension is data samples (e.g., one per Batch); the rest are dimensions of the actual metric. # noqa: E501 # FIXME CoP metric_value_shape: tuple = metric_values.shape[1:] - # Generate all permutations of indexes for accessing every element of the multi-dimensional metric. # noqa: E501 + # Generate all permutations of indexes for accessing every element of the multi-dimensional metric. # noqa: E501 # FIXME CoP metric_value_shape_idx: int axes: List[np.ndarray] = [ np.indices(dimensions=(metric_value_shape_idx,))[0] @@ -549,19 +549,19 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 ] metric_value_indices: List[tuple] = list(itertools.product(*tuple(axes))) - # Generate all permutations of indexes for accessing estimates of every element of the multi-dimensional metric. # noqa: E501 - # Prefixing multi-dimensional index with "(slice(None, None, None),)" is equivalent to "[:,]" access. # noqa: E501 + # Generate all permutations of indexes for accessing estimates of every element of the multi-dimensional metric. # noqa: E501 # FIXME CoP + # Prefixing multi-dimensional index with "(slice(None, None, None),)" is equivalent to "[:,]" access. # noqa: E501 # FIXME CoP metric_value_idx: tuple metric_value_vector_indices: List[tuple] = [ (slice(None, None, None),) + metric_value_idx for metric_value_idx in metric_value_indices ] - # Initialize value range estimate for multi-dimensional metric to all trivial values (to be updated in situ). # noqa: E501 - # Since range includes min and max values, value range estimate contains 2-element least-significant dimension. # noqa: E501 + # Initialize value range estimate for multi-dimensional metric to all trivial values (to be updated in situ). # noqa: E501 # FIXME CoP + # Since range includes min and max values, value range estimate contains 2-element least-significant dimension. # noqa: E501 # FIXME CoP metric_value_range_shape: tuple = metric_value_shape + (2,) - # Initialize observed_values for multi-dimensional metric to all trivial values (to be updated in situ). # noqa: E501 - # Return values of "numpy.histogram()", histogram and bin edges, are packaged in least-significant dimensions. # noqa: E501 + # Initialize observed_values for multi-dimensional metric to all trivial values (to be updated in situ). # noqa: E501 # FIXME CoP + # Return values of "numpy.histogram()", histogram and bin edges, are packaged in least-significant dimensions. # noqa: E501 # FIXME CoP estimation_histogram_shape: tuple = metric_value_shape + ( 2, NUM_HISTOGRAM_BINS + 1, @@ -593,14 +593,14 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 metric_value_range = np.zeros(shape=metric_value_range_shape) estimation_histogram = np.empty(shape=estimation_histogram_shape) - # Traverse indices of sample vectors corresponding to every element of multi-dimensional metric. # noqa: E501 + # Traverse indices of sample vectors corresponding to every element of multi-dimensional metric. # noqa: E501 # FIXME CoP metric_value_vector: np.ndarray metric_value_range_min_idx: tuple metric_value_range_max_idx: tuple metric_value_estimation_histogram_idx: tuple numeric_range_estimation_result: NumericRangeEstimationResult for metric_value_idx in metric_value_vector_indices: - # Obtain "N"-element-long vector of samples for each element of multi-dimensional metric. # noqa: E501 + # Obtain "N"-element-long vector of samples for each element of multi-dimensional metric. # noqa: E501 # FIXME CoP metric_value_vector = metric_values[metric_value_idx] if not datetime_detected and np.all( np.isclose(metric_value_vector, metric_value_vector[0]) @@ -612,7 +612,7 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 max_value=metric_value_vector[0], ) else: - # Compute low and high estimates for vector of samples for given element of multi-dimensional metric. # noqa: E501 + # Compute low and high estimates for vector of samples for given element of multi-dimensional metric. # noqa: E501 # FIXME CoP numeric_range_estimation_result = ( numeric_range_estimator.get_numeric_range_estimate( metric_values=metric_value_vector, @@ -631,7 +631,7 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 max_value = min(np.float64(max_value), np.float64(upper_bound)) # Obtain index of metric element (by discarding "N"-element samples dimension). - metric_value_idx = metric_value_idx[1:] # noqa: PLW2901 + metric_value_idx = metric_value_idx[1:] # noqa: PLW2901 # FIXME CoP # Compute indices for metric value range min and max estimates. metric_value_range_min_idx = metric_value_idx + ( @@ -644,11 +644,11 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 # Compute index for metric value estimation histogram. metric_value_estimation_histogram_idx = metric_value_idx - # Store computed min and max value estimates into allocated range estimate for multi-dimensional metric. # noqa: E501 + # Store computed min and max value estimates into allocated range estimate for multi-dimensional metric. # noqa: E501 # FIXME CoP if datetime_detected: metric_value_range[metric_value_range_min_idx] = min_value metric_value_range[metric_value_range_max_idx] = max_value - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if round_decimals is None: metric_value_range[metric_value_range_min_idx] = np.float64(min_value) metric_value_range[metric_value_range_max_idx] = np.float64(max_value) @@ -660,12 +660,12 @@ def _estimate_metric_value_range( # noqa: C901, PLR0912, PLR0913, PLR0915 np.float64(max_value), round_decimals ) - # Store computed estimation_histogram into allocated range estimate for multi-dimensional metric. # noqa: E501 + # Store computed estimation_histogram into allocated range estimate for multi-dimensional metric. # noqa: E501 # FIXME CoP estimation_histogram[metric_value_estimation_histogram_idx] = ( numeric_range_estimation_result.estimation_histogram ) - # As a simplification, apply reduction to scalar in case of one-dimensional metric (for convenience). # noqa: E501 + # As a simplification, apply reduction to scalar in case of one-dimensional metric (for convenience). # noqa: E501 # FIXME CoP if metric_value_range.shape[0] == 1: metric_value_range = metric_value_range[0] estimation_histogram = estimation_histogram[0] @@ -716,7 +716,7 @@ def _get_truncate_values_using_heuristics( variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, ) -> Dict[str, Union[Optional[int], Optional[float]]]: - # Obtain truncate_values directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain truncate_values directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP truncate_values: Dict[str, Optional[Number]] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.truncate_values, @@ -737,7 +737,7 @@ def _get_truncate_values_using_heuristics( raise ProfilerExecutionError( message=f"""The directive "truncate_values" for {self.__class__.__name__} must specify the [lower_bound, upper_bound] closed interval, where either boundary is a numeric value (or None). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) lower_bound: Optional[Number] = truncate_values.get("lower_bound") @@ -762,7 +762,7 @@ def _get_round_decimals_using_heuristics( variables: Optional[ParameterContainer] = None, parameters: Optional[Dict[str, ParameterContainer]] = None, ) -> int: - # Obtain round_decimals directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain round_decimals directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP round_decimals: Optional[int] = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.round_decimals, @@ -776,7 +776,7 @@ def _get_round_decimals_using_heuristics( raise ProfilerExecutionError( message=f"""The directive "round_decimals" for {self.__class__.__name__} can be 0 or a positive integer, or must be omitted (or set to None). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) if np.issubdtype(metric_values.dtype, np.integer): diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/parameter_builder.py index 8eb5553b14dc..52d925d6298c 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/parameter_builder.py @@ -23,15 +23,15 @@ import numpy as np import pandas as pd -from great_expectations.core.batch import Batch, BatchRequestBase # noqa: TCH001 -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.batch import Batch, BatchRequestBase # noqa: TCH001 # FIXME CoP +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.data_context.util import instantiate_class_from_config from great_expectations.experimental.rule_based_profiler.attributed_resolved_metrics import ( AttributedResolvedMetrics, ) from great_expectations.experimental.rule_based_profiler.builder import Builder from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.experimental.rule_based_profiler.helpers.util import ( @@ -56,12 +56,15 @@ get_fully_qualified_parameter_names, ) from great_expectations.types.attributes import Attributes -from great_expectations.util import convert_to_json_serializable, is_parseable_date # noqa: TID251 -from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 -from great_expectations.validator.exception_info import ExceptionInfo # noqa: TCH001 +from great_expectations.util import ( + convert_to_json_serializable, # noqa: TID251 # FIXME CoP + is_parseable_date, +) +from great_expectations.validator.computed_metric import MetricValue # noqa: TCH001 # FIXME CoP +from great_expectations.validator.exception_info import ExceptionInfo # noqa: TCH001 # FIXME CoP from great_expectations.validator.metric_configuration import MetricConfiguration from great_expectations.validator.validation_graph import ( - ValidationGraph, # noqa: TCH001 + ValidationGraph, # noqa: TCH001 # FIXME CoP ) if TYPE_CHECKING: @@ -90,7 +93,7 @@ class ParameterBuilder(ABC, Builder): class_name: MetricMultiBatchParameterBuilder metric_name: column.mean ``` - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exclude_field_names: ClassVar[Set[str]] = Builder.exclude_field_names | { "suite_parameter_builders", @@ -113,7 +116,7 @@ def __init__( ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__(data_context=data_context) self._name = name @@ -125,7 +128,7 @@ def __init__( data_context=self._data_context, ) - def build_parameters( # noqa: PLR0913 + def build_parameters( # noqa: PLR0913 # FIXME CoP self, domain: Domain, variables: Optional[ParameterContainer] = None, @@ -144,7 +147,7 @@ def build_parameters( # noqa: PLR0913 batch_list: Explicit list of "Batch" objects to supply data at runtime. batch_request: Explicit batch_request used to supply data at runtime. runtime_configuration: Additional run-time settings (see "Validator.DEFAULT_RUNTIME_CONFIGURATION"). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP runtime_configuration = runtime_configuration or {} fully_qualified_parameter_names: List[str] = get_fully_qualified_parameter_names( @@ -153,7 +156,7 @@ def build_parameters( # noqa: PLR0913 parameters=parameters, ) - # recompute_existing_parameter_values: If "True", recompute value if "fully_qualified_parameter_name" exists. # noqa: E501 + # recompute_existing_parameter_values: If "True", recompute value if "fully_qualified_parameter_name" exists. # noqa: E501 # FIXME CoP recompute_existing_parameter_values: bool = runtime_configuration.get( "recompute_existing_parameter_values", False ) @@ -210,17 +213,17 @@ def resolve_evaluation_dependencies( """ This method computes ("resolves") pre-requisite ("evaluation") dependencies (i.e., results of executing other "ParameterBuilder" objects), whose output(s) are needed by specified "ParameterBuilder" object to operate. - """ # noqa: E501 - # Step-1: Check if any "suite_parameter_builders" are configured for specified "ParameterBuilder" object. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Step-1: Check if any "suite_parameter_builders" are configured for specified "ParameterBuilder" object. # noqa: E501 # FIXME CoP suite_parameter_builders: List[ParameterBuilder] = self.suite_parameter_builders if not suite_parameter_builders: return - # Step-2: Obtain all fully-qualified parameter names ("variables" and "parameter" keys) in namespace of "Domain" # noqa: E501 - # (fully-qualified parameter names are stored in "ParameterNode" objects of "ParameterContainer" of "Domain" # noqa: E501 - # when "ParameterBuilder.build_parameters()" is executed for "ParameterBuilder.fully_qualified_parameter_name"); # noqa: E501 - # this list contains "raw" (for internal calculations) and "JSON-serialized" fully-qualified parameter names. # noqa: E501 + # Step-2: Obtain all fully-qualified parameter names ("variables" and "parameter" keys) in namespace of "Domain" # noqa: E501 # FIXME CoP + # (fully-qualified parameter names are stored in "ParameterNode" objects of "ParameterContainer" of "Domain" # noqa: E501 # FIXME CoP + # when "ParameterBuilder.build_parameters()" is executed for "ParameterBuilder.fully_qualified_parameter_name"); # noqa: E501 # FIXME CoP + # this list contains "raw" (for internal calculations) and "JSON-serialized" fully-qualified parameter names. # noqa: E501 # FIXME CoP if fully_qualified_parameter_names is None: fully_qualified_parameter_names = get_fully_qualified_parameter_names( domain=domain, @@ -228,8 +231,8 @@ def resolve_evaluation_dependencies( parameters=parameters, ) - # Step-3: Check presence of fully-qualified parameter names of "ParameterBuilder" objects, obtained by iterating # noqa: E501 - # over evaluation dependencies. Execute "ParameterBuilder.build_parameters()" if not in "Domain" scoped list. # noqa: E501 + # Step-3: Check presence of fully-qualified parameter names of "ParameterBuilder" objects, obtained by iterating # noqa: E501 # FIXME CoP + # over evaluation dependencies. Execute "ParameterBuilder.build_parameters()" if not in "Domain" scoped list. # noqa: E501 # FIXME CoP suite_parameter_builder: ParameterBuilder for suite_parameter_builder in suite_parameter_builders: if ( @@ -263,7 +266,7 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP pass @property @@ -286,7 +289,7 @@ def suite_parameter_builder_configs( def raw_fully_qualified_parameter_name(self) -> str: """ This fully-qualified parameter name references "raw" "ParameterNode" output (including "Numpy" "dtype" values). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return f"{RAW_PARAMETER_KEY}{self.name}" @property @@ -329,7 +332,7 @@ def get_batch_ids( parameters=parameters, ) - def get_metrics( # noqa: C901, PLR0913 + def get_metrics( # noqa: C901, PLR0913 # FIXME CoP self, metric_name: str, metric_domain_kwargs: Optional[Union[Union[str, dict], List[Union[str, dict]]]] = None, @@ -360,11 +363,11 @@ def get_metrics( # noqa: C901, PLR0913 :return: "MetricComputationResult" object, containing both: data samples in the format "N x R^m", where "N" (most significant dimension) is the number of measurements (e.g., one per "Batch" of data), while "R^m" is the multi-dimensional metric, whose values are being estimated, and details (to be used for metadata purposes). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not metric_name: raise ProfilerExecutionError( message=f"""Utilizing "{self.__class__.__name__}.get_metrics()" requires valid "metric_name" to be \ -specified (empty "metric_name" value detected).""" # noqa: E501 +specified (empty "metric_name" value detected).""" # noqa: E501 # FIXME CoP ) batch_ids: Optional[List[str]] = self.get_batch_ids( @@ -375,7 +378,7 @@ def get_metrics( # noqa: C901, PLR0913 ) if not batch_ids: raise ProfilerExecutionError( - message=f"Utilizing a {self.__class__.__name__} requires a non-empty list of Batch identifiers." # noqa: E501 + message=f"Utilizing a {self.__class__.__name__} requires a non-empty list of Batch identifiers." # noqa: E501 # FIXME CoP ) """ @@ -387,7 +390,7 @@ def get_metrics( # noqa: C901, PLR0913 All "MetricConfiguration" directives are generated by combining each metric_value_kwargs" with "metric_domain_kwargs" for all "batch_ids" (where every "metric_domain_kwargs" represents separate "batch_id"). Then, all "MetricConfiguration" objects, collected into list as container, are resolved simultaneously. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Step-1: Gather "metric_domain_kwargs" (corresponding to "batch_ids"). @@ -414,14 +417,14 @@ def get_metrics( # noqa: C901, PLR0913 for batch_id in batch_ids ] - # Step-2: Gather "metric_value_kwargs" (caller may require same metric computed for multiple arguments). # noqa: E501 + # Step-2: Gather "metric_value_kwargs" (caller may require same metric computed for multiple arguments). # noqa: E501 # FIXME CoP if not isinstance(metric_value_kwargs, list): metric_value_kwargs = [metric_value_kwargs] value_kwargs_cursor: dict metric_value_kwargs = [ - # Obtain value kwargs from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain value kwargs from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=value_kwargs_cursor, @@ -432,7 +435,7 @@ def get_metrics( # noqa: C901, PLR0913 for value_kwargs_cursor in metric_value_kwargs ] - # Step-3: Generate "MetricConfiguration" directives for all "metric_domain_kwargs"/"metric_value_kwargs" pairs. # noqa: E501 + # Step-3: Generate "MetricConfiguration" directives for all "metric_domain_kwargs"/"metric_value_kwargs" pairs. # noqa: E501 # FIXME CoP domain_kwargs_cursor: dict kwargs_combinations: List[List[dict]] = [ @@ -481,7 +484,7 @@ def get_metrics( # noqa: C901, PLR0913 min_graph_edges_pbar_enable=0, ) - # Step-5: Map resolved metrics to their attributes for identification and recovery by receiver. # noqa: E501 + # Step-5: Map resolved metrics to their attributes for identification and recovery by receiver. # noqa: E501 # FIXME CoP attributed_resolved_metrics_map: Dict[str, AttributedResolvedMetrics] = {} @@ -510,11 +513,11 @@ def get_metrics( # noqa: C901, PLR0913 ) else: logger.warning( - f"{metric_configuration.id[0]} was not found in the resolved Metrics for ParameterBuilder." # noqa: E501 + f"{metric_configuration.id[0]} was not found in the resolved Metrics for ParameterBuilder." # noqa: E501 # FIXME CoP ) continue - # Step-6: Convert scalar metric values to vectors to enable uniformity of processing in subsequent operations. # noqa: E501 + # Step-6: Convert scalar metric values to vectors to enable uniformity of processing in subsequent operations. # noqa: E501 # FIXME CoP metric_attributes_id: str for ( @@ -530,11 +533,11 @@ def get_metrics( # noqa: C901, PLR0913 ): attributed_resolved_metrics.metric_values_by_batch_id = { batch_id: [resolved_metric_value] - for batch_id, resolved_metric_value in attributed_resolved_metrics.attributed_metric_values.items() # noqa: E501 + for batch_id, resolved_metric_value in attributed_resolved_metrics.attributed_metric_values.items() # noqa: E501 # FIXME CoP } attributed_resolved_metrics_map[metric_attributes_id] = attributed_resolved_metrics - # Step-7: Apply numeric/hygiene flags (e.g., "enforce_numeric_metric", "replace_nan_with_zero") to results. # noqa: E501 + # Step-7: Apply numeric/hygiene flags (e.g., "enforce_numeric_metric", "replace_nan_with_zero") to results. # noqa: E501 # FIXME CoP for ( metric_attributes_id, @@ -551,7 +554,7 @@ def get_metrics( # noqa: C901, PLR0913 parameters=parameters, ) - # Step-8: Build and return result to receiver (apply simplifications to cases of single "metric_value_kwargs"). # noqa: E501 + # Step-8: Build and return result to receiver (apply simplifications to cases of single "metric_value_kwargs"). # noqa: E501 # FIXME CoP details: dict = { "metric_configuration": { @@ -569,7 +572,7 @@ def get_metrics( # noqa: C901, PLR0913 ) @staticmethod - def _sanitize_metric_computation( # noqa: PLR0913 + def _sanitize_metric_computation( # noqa: PLR0913 # FIXME CoP parameter_builder: ParameterBuilder, metric_name: str, attributed_resolved_metrics: AttributedResolvedMetrics, @@ -586,8 +589,8 @@ def _sanitize_metric_computation( # noqa: PLR0913 1. If "enforce_numeric_metric" flag is set, raise an error if a non-numeric value is found in sample vectors. 2. Further, if a NaN is encountered in a sample vectors and "replace_nan_with_zero" is True, then replace those NaN values with the 0.0 floating point number; if "replace_nan_with_zero" is False, then raise an error. - """ # noqa: E501 - # Obtain enforce_numeric_metric from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + """ # noqa: E501 # FIXME CoP + # Obtain enforce_numeric_metric from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP enforce_numeric_metric = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=enforce_numeric_metric, @@ -596,7 +599,7 @@ def _sanitize_metric_computation( # noqa: PLR0913 parameters=parameters, ) - # Obtain replace_nan_with_zero from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain replace_nan_with_zero from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP replace_nan_with_zero = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=replace_nan_with_zero, @@ -627,7 +630,7 @@ def _sanitize_metric_computation( # noqa: PLR0913 metric_value_shape: tuple = metric_values.shape - # Generate all permutations of indexes for accessing every element of the multi-dimensional metric. # noqa: E501 + # Generate all permutations of indexes for accessing every element of the multi-dimensional metric. # noqa: E501 # FIXME CoP metric_value_shape_idx: int axes: List[np.ndarray] = [ np.indices(dimensions=(metric_value_shape_idx,))[0] @@ -641,8 +644,8 @@ def _sanitize_metric_computation( # noqa: PLR0913 if enforce_numeric_metric: if pd.isnull(metric_value): if not replace_nan_with_zero: - raise ValueError( # noqa: TRY003 - f"""Computation of metric "{metric_name}" resulted in NaN ("not a number") value.""" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"""Computation of metric "{metric_name}" resulted in NaN ("not a number") value.""" # noqa: E501 # FIXME CoP ) batch_metric_values.append(0.0) @@ -657,7 +660,7 @@ def _sanitize_metric_computation( # noqa: PLR0913 raise ProfilerExecutionError( message=f"""Applicability of {parameter_builder.__class__.__name__} is restricted to \ numeric-valued and datetime-valued metrics (value {metric_value} of type "{type(metric_value)!s}" was computed). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) else: batch_metric_values.append(metric_value) @@ -676,7 +679,7 @@ def _get_best_candidate_above_threshold( """ Helper method to calculate which candidate strings or patterns are the best match (ie. highest ratio), provided they are also above the threshold. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP best_candidate: Optional[str] = None best_ratio: float = 0.0 @@ -697,7 +700,7 @@ def _get_sorted_candidates_and_ratios( Helper method to sort all candidate strings or patterns by success ratio (how well they matched the domain). Returns sorted dict of candidate as key and ratio as value - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # noinspection PyTypeChecker return dict( sorted( diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py index 6ff2d7c7a793..d0db663d7699 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py @@ -5,15 +5,15 @@ import pandas as pd -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.core.metric_function_types import ( SummarizationMetricNameSuffixes, ) from great_expectations.experimental.rule_based_profiler.attributed_resolved_metrics import ( - AttributedResolvedMetrics, # noqa: TCH001 + AttributedResolvedMetrics, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.experimental.rule_based_profiler.helpers.util import ( @@ -21,8 +21,8 @@ get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.metric_computation_result import ( - MetricComputationResult, # noqa: TCH001 - MetricValues, # noqa: TCH001 + MetricComputationResult, # noqa: TCH001 # FIXME CoP + MetricValues, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ParameterBuilder from great_expectations.experimental.rule_based_profiler.parameter_container import ( @@ -46,7 +46,7 @@ class RegexPatternStringParameterBuilder(ParameterBuilder): Detects the domain REGEX from a set of candidate REGEX strings by computing the column_values.match_regex_format.unexpected_count metric for each candidate format and returning the format that has the lowest unexpected_count ratio. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # list of candidate strings that are most commonly used # source: https://regexland.com/most-common-regular-expressions/ @@ -58,13 +58,13 @@ class RegexPatternStringParameterBuilder(ParameterBuilder): r"[A-Za-z0-9\.,;:!?()\"'%\-]+", # general text r"^\s+", # leading space r"\s+$", # trailing space - r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,255}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#()?&//=]*)", # Matching URL (including http(s) protocol) # noqa: E501 + r"https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,255}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#()?&//=]*)", # Matching URL (including http(s) protocol) # noqa: E501 # FIXME CoP r"<\/?(?:p|a|b|img)(?: \/)?>", # HTML tags - r"(?:25[0-5]|2[0-4]\d|[01]\d{2}|\d{1,2})(?:.(?:25[0-5]|2[0-4]\d|[01]\d{2}|\d{1,2})){3}", # IPv4 IP address # noqa: E501 - r"\b[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}-[0-5][0-9a-fA-F]{3}-[089ab][0-9a-fA-F]{3}-\b[0-9a-fA-F]{12}\b ", # UUID # noqa: E501 + r"(?:25[0-5]|2[0-4]\d|[01]\d{2}|\d{1,2})(?:.(?:25[0-5]|2[0-4]\d|[01]\d{2}|\d{1,2})){3}", # IPv4 IP address # noqa: E501 # FIXME CoP + r"\b[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}-[0-5][0-9a-fA-F]{3}-[089ab][0-9a-fA-F]{3}-\b[0-9a-fA-F]{12}\b ", # UUID # noqa: E501 # FIXME CoP } - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, metric_domain_kwargs: Optional[Union[str, dict]] = None, @@ -86,7 +86,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, suite_parameter_builder_configs=suite_parameter_builder_configs, @@ -122,7 +122,7 @@ def candidate_regexes( ) -> Union[str, Union[List[str], Set[str]]]: return self._candidate_regexes - def _build_parameters( # noqa: C901 + def _build_parameters( # noqa: C901 # FIXME CoP self, domain: Domain, variables: Optional[ParameterContainer] = None, @@ -136,7 +136,7 @@ def _build_parameters( # noqa: C901 the configured threshold. Builds ParameterContainer object that holds ParameterNode objects with attribute name-value pairs and details. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_computation_result: MetricComputationResult metric_computation_result = self.get_metrics( @@ -155,7 +155,7 @@ def _build_parameters( # noqa: C901 # This should never happen. if len(metric_computation_result.attributed_resolved_metrics) != 1: raise ProfilerExecutionError( - message=f'Result of metric computations for {self.__class__.__name__} must be a list with exactly 1 element of type "AttributedResolvedMetrics" ({metric_computation_result.attributed_resolved_metrics} found).' # noqa: E501 + message=f'Result of metric computations for {self.__class__.__name__} must be a list with exactly 1 element of type "AttributedResolvedMetrics" ({metric_computation_result.attributed_resolved_metrics} found).' # noqa: E501 # FIXME CoP ) attributed_resolved_metrics: AttributedResolvedMetrics @@ -171,7 +171,7 @@ def _build_parameters( # noqa: C901 message=f"Result of metric computations for {self.__class__.__name__} is empty." ) - # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 + # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 # FIXME CoP metric_values = metric_values[:, 0] nonnull_count: int @@ -180,7 +180,7 @@ def _build_parameters( # noqa: C901 else: nonnull_count = sum(metric_values) - # Obtain candidate_regexes from "rule state" (i.e, variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain candidate_regexes from "rule state" (i.e, variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP candidate_regexes: Union[ List[str], Set[str], @@ -213,7 +213,7 @@ def _build_parameters( # noqa: C901 match_regex_metric_value_kwargs_list.append(match_regex_metric_value_kwargs) - # Obtain resolved metrics and metadata for all metric configurations and available Batch objects simultaneously. # noqa: E501 + # Obtain resolved metrics and metadata for all metric configurations and available Batch objects simultaneously. # noqa: E501 # FIXME CoP metric_computation_result = self.get_metrics( metric_name=f"column_values.match_regex.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}", metric_domain_kwargs=self.metric_domain_kwargs, @@ -230,7 +230,7 @@ def _build_parameters( # noqa: C901 regex_string_success_ratios: dict = {} for attributed_resolved_metrics in metric_computation_result.attributed_resolved_metrics: - # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 + # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 # FIXME CoP metric_values = attributed_resolved_metrics.conditioned_metric_values[:, 0] match_regex_unexpected_count: int @@ -246,7 +246,7 @@ def _build_parameters( # noqa: C901 success_ratio ) - # Obtain threshold from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain threshold from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP threshold: float = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self._threshold, diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py index 9b3f52be5764..23dd3f9958df 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py @@ -3,15 +3,15 @@ import logging from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Union -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.core.metric_function_types import ( SummarizationMetricNameSuffixes, ) from great_expectations.experimental.rule_based_profiler.attributed_resolved_metrics import ( - AttributedResolvedMetrics, # noqa: TCH001 + AttributedResolvedMetrics, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.experimental.rule_based_profiler.helpers.util import ( @@ -19,8 +19,8 @@ get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.metric_computation_result import ( - MetricComputationResult, # noqa: TCH001 - MetricValues, # noqa: TCH001 + MetricComputationResult, # noqa: TCH001 # FIXME CoP + MetricValues, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ParameterBuilder from great_expectations.experimental.rule_based_profiler.parameter_container import ( @@ -106,9 +106,9 @@ class SimpleDateFormatStringParameterBuilder(ParameterBuilder): Detects the domain date format from a set of candidate date format strings by computing the column_values.match_strftime_format.unexpected_count metric for each candidate format and returning the format that has the lowest unexpected_count ratio. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, metric_domain_kwargs: Optional[Union[str, dict]] = None, @@ -132,7 +132,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, suite_parameter_builder_configs=suite_parameter_builder_configs, @@ -186,7 +186,7 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_computation_result: MetricComputationResult metric_computation_result = self.get_metrics( @@ -205,7 +205,7 @@ def _build_parameters( # This should never happen. if len(metric_computation_result.attributed_resolved_metrics) != 1: raise ProfilerExecutionError( - message=f'Result of metric computations for {self.__class__.__name__} must be a list with exactly 1 element of type "AttributedResolvedMetrics" ({metric_computation_result.attributed_resolved_metrics} found).' # noqa: E501 + message=f'Result of metric computations for {self.__class__.__name__} must be a list with exactly 1 element of type "AttributedResolvedMetrics" ({metric_computation_result.attributed_resolved_metrics} found).' # noqa: E501 # FIXME CoP ) attributed_resolved_metrics: AttributedResolvedMetrics @@ -221,12 +221,12 @@ def _build_parameters( message=f"Result of metric computations for {self.__class__.__name__} is empty." ) - # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 + # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 # FIXME CoP metric_values = metric_values[:, 0] nonnull_count: int = sum(metric_values) - # Obtain candidate_strings from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain candidate_strings from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP candidate_strings: Union[ List[str], Set[str], @@ -255,7 +255,7 @@ def _build_parameters( match_strftime_metric_value_kwargs_list.append(match_strftime_metric_value_kwargs) - # Obtain resolved metrics and metadata for all metric configurations and available Batch objects simultaneously. # noqa: E501 + # Obtain resolved metrics and metadata for all metric configurations and available Batch objects simultaneously. # noqa: E501 # FIXME CoP metric_computation_result = self.get_metrics( metric_name=f"column_values.match_strftime_format.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}", metric_domain_kwargs=self.metric_domain_kwargs, @@ -272,7 +272,7 @@ def _build_parameters( format_string_success_ratios: dict = {} for attributed_resolved_metrics in metric_computation_result.attributed_resolved_metrics: - # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 + # Now obtain 1-dimensional vector of values of computed metric (each element corresponds to a Batch ID). # noqa: E501 # FIXME CoP metric_values = attributed_resolved_metrics.conditioned_metric_values[:, 0] match_strftime_unexpected_count: int = sum(metric_values) @@ -283,7 +283,7 @@ def _build_parameters( attributed_resolved_metrics.metric_attributes["strftime_format"] ] = success_ratio - # Obtain threshold from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain threshold from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP threshold: float = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.threshold, diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/unexpected_count_statistics_multi_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/unexpected_count_statistics_multi_batch_parameter_builder.py index 3bc4993448d6..6aeff0f74bf4 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/unexpected_count_statistics_multi_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/unexpected_count_statistics_multi_batch_parameter_builder.py @@ -5,10 +5,10 @@ import numpy as np import scipy -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.experimental.rule_based_profiler.helpers.util import ( @@ -16,7 +16,7 @@ get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.metric_computation_result import ( - MetricValues, # noqa: TCH001 + MetricValues, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ( ParameterBuilder, @@ -39,7 +39,7 @@ class UnexpectedCountStatisticsMultiBatchParameterBuilder(ParameterBuilder): """ Compute specified aggregate of unexpected count fraction (e.g., of a map metric) across every Batch of data given. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP RECOGNIZED_UNEXPECTED_RATIO_AGGREGATION_METHODS: set = { "unexpected_count_fraction_values", @@ -47,7 +47,7 @@ class UnexpectedCountStatisticsMultiBatchParameterBuilder(ParameterBuilder): "multi_batch", } - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, unexpected_count_parameter_builder_name: str, @@ -73,7 +73,7 @@ def __init__( # noqa: PLR0913 ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, suite_parameter_builder_configs=suite_parameter_builder_configs, @@ -123,14 +123,14 @@ def _build_parameters( # PLR0915, PLR0915 Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if domain.domain_type == MetricDomainTypes.COLUMN and "." in domain.domain_kwargs["column"]: - raise ProfilerExecutionError( # noqa: TRY003 - "Column names cannot contain '.' when computing parameters for unexpected count statistics." # noqa: E501 + raise ProfilerExecutionError( # noqa: TRY003 # FIXME CoP + "Column names cannot contain '.' when computing parameters for unexpected count statistics." # noqa: E501 # FIXME CoP ) - # Obtain unexpected_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain unexpected_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP unexpected_count_parameter_builder_name: Optional[str] = ( get_parameter_value_and_validate_return_type( domain=domain, @@ -144,7 +144,7 @@ def _build_parameters( # PLR0915, PLR0915 fully_qualified_unexpected_count_parameter_builder_name: str = ( f"{RAW_PARAMETER_KEY}{unexpected_count_parameter_builder_name}" ) - # Obtain unexpected_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain unexpected_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP unexpected_count_parameter_node: ParameterNode = ( get_parameter_value_and_validate_return_type( domain=domain, @@ -158,7 +158,7 @@ def _build_parameters( # PLR0915, PLR0915 FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY ] - # Obtain total_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain total_count_parameter_builder_name from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP total_count_parameter_builder_name: str = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.total_count_parameter_builder_name, @@ -170,7 +170,7 @@ def _build_parameters( # PLR0915, PLR0915 fully_qualified_total_count_parameter_builder_name: str = ( f"{RAW_PARAMETER_KEY}{total_count_parameter_builder_name}" ) - # Obtain total_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain total_count from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP total_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=fully_qualified_total_count_parameter_builder_name, @@ -186,7 +186,7 @@ def _build_parameters( # PLR0915, PLR0915 total_count_values + NP_EPSILON ) - # Obtain mode from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain mode from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP mode: str = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.mode, @@ -196,12 +196,12 @@ def _build_parameters( # PLR0915, PLR0915 ) if mode and ( mode - not in UnexpectedCountStatisticsMultiBatchParameterBuilder.RECOGNIZED_UNEXPECTED_RATIO_AGGREGATION_METHODS # noqa: E501 + not in UnexpectedCountStatisticsMultiBatchParameterBuilder.RECOGNIZED_UNEXPECTED_RATIO_AGGREGATION_METHODS # noqa: E501 # FIXME CoP ): raise ProfilerExecutionError( message=f"""The directive "mode" can only be one of \ {UnexpectedCountStatisticsMultiBatchParameterBuilder.RECOGNIZED_UNEXPECTED_RATIO_AGGREGATION_METHODS}, or must be omitted (or set to None); however, "{mode}" was detected. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) result: Union[np.float64, Dict[str, Union[np.float64, np.ndarray]]] @@ -226,7 +226,7 @@ def _build_parameters( # PLR0915, PLR0915 ) result["error_rate"] = np.float64(0.0) elif mode == "multi_batch": - # Obtain max_error_rate directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 + # Obtain max_error_rate directive from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP max_error_rate: float = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.max_error_rate, @@ -259,7 +259,7 @@ def _build_parameters( # PLR0915, PLR0915 ) -def _standardize_mostly_for_single_batch( # noqa: PLR0911 +def _standardize_mostly_for_single_batch( # noqa: PLR0911 # FIXME CoP expectation_type: str, mostly: np.float64 ) -> np.float64: """ @@ -269,10 +269,10 @@ def _standardize_mostly_for_single_batch( # noqa: PLR0911 if mostly >= 1.0: return np.float64(1.0) - if mostly >= 0.99: # noqa: PLR2004 + if mostly >= 0.99: # noqa: PLR2004 # FIXME CoP return np.float64(0.99) - if mostly >= 0.975: # noqa: PLR2004 + if mostly >= 0.975: # noqa: PLR2004 # FIXME CoP return np.float64(0.975) return mostly @@ -281,7 +281,7 @@ def _standardize_mostly_for_single_batch( # noqa: PLR0911 if mostly >= 1.0: return np.float64(1.0) - if mostly >= 0.99: # noqa: PLR2004 + if mostly >= 0.99: # noqa: PLR2004 # FIXME CoP return np.float64(0.99) # round down to nearest 0.025 @@ -293,7 +293,7 @@ def _multi_batch_cost_function(x: np.float64, a: np.ndarray) -> np.float64: Mean (per-Batch) Hamming distance -- loss only when expectation validation fails; no change otherwise. Expectation validation fails when candidate unexpected_count_fraction x is less than observed array element value. Once optimal unexpected_count_fraction is computed, mostly becomes its complement (1.0 - unexpected_count_fraction). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return np.mean(x < a) @@ -304,16 +304,16 @@ def _compute_multi_batch_min_unexpected_count_fraction( """ Use constrained optimization algorithm to compute minimum value of x ("unexpected_count_fraction") under constraint that _cost_function() of variable x given array a must be less than or equal to "max_error_rate" constant. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - # Define objective function to be minimized (minimum "unexpected_count_fraction" is desired to maximize "mostly"). # noqa: E501 + # Define objective function to be minimized (minimum "unexpected_count_fraction" is desired to maximize "mostly"). # noqa: E501 # FIXME CoP def _objective_function(x: np.float64) -> np.float64: return x[0] # Sort array in ascending order sorted_a: np.ndarray = np.sort(a, axis=None) - # Define constraint function reflecting penalty incurred by lowering "unexpected_count_fraction" (raising "mostly"). # noqa: E501 + # Define constraint function reflecting penalty incurred by lowering "unexpected_count_fraction" (raising "mostly"). # noqa: E501 # FIXME CoP def _constraint_function(x: np.float64) -> np.float64: return np.float64(_multi_batch_cost_function(x=x[0], a=sorted_a) - max_error_rate) diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/value_counts_single_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/value_counts_single_batch_parameter_builder.py index 6ea01713a3d2..ac5f71311d0d 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/value_counts_single_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/value_counts_single_batch_parameter_builder.py @@ -5,7 +5,7 @@ import numpy as np from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.experimental.rule_based_profiler.config import ParameterBuilderConfig from great_expectations.experimental.rule_based_profiler.helpers.util import ( NP_EPSILON, @@ -64,7 +64,7 @@ def __init__( ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._column_value_counts_metric_single_batch_parameter_builder_config = ( ParameterBuilderConfig( module_name="great_expectations.rule_based_profiler.parameter_builder", @@ -127,10 +127,10 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 - fully_qualified_column_values_nonnull_count_metric_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_values_nonnull_count_metric_single_batch_parameter_builder_config.name}" # noqa: E501 - # Obtain "column_values.nonnull.count" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 - column_values_nonnull_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 + """ # noqa: E501 # FIXME CoP + fully_qualified_column_values_nonnull_count_metric_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_values_nonnull_count_metric_single_batch_parameter_builder_config.name}" # noqa: E501 # FIXME CoP + # Obtain "column_values.nonnull.count" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP + column_values_nonnull_count_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 # FIXME CoP domain=domain, parameter_reference=fully_qualified_column_values_nonnull_count_metric_parameter_builder_name, expected_return_type=None, @@ -138,9 +138,9 @@ def _build_parameters( parameters=parameters, ) - fully_qualified_column_value_counts_metric_single_batch_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_value_counts_metric_single_batch_parameter_builder_config.name}" # noqa: E501 - # Obtain "column.value_counts" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 - column_value_counts_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 + fully_qualified_column_value_counts_metric_single_batch_parameter_builder_name: str = f"{RAW_PARAMETER_KEY}{self._column_value_counts_metric_single_batch_parameter_builder_config.name}" # noqa: E501 # FIXME CoP + # Obtain "column.value_counts" from "rule state" (i.e., variables and parameters); from instance variable otherwise. # noqa: E501 # FIXME CoP + column_value_counts_parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( # noqa: E501 # FIXME CoP domain=domain, parameter_reference=fully_qualified_column_value_counts_metric_single_batch_parameter_builder_name, expected_return_type=None, diff --git a/great_expectations/experimental/rule_based_profiler/parameter_builder/value_set_multi_batch_parameter_builder.py b/great_expectations/experimental/rule_based_profiler/parameter_builder/value_set_multi_batch_parameter_builder.py index dac38c220f79..138f9ad89281 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_builder/value_set_multi_batch_parameter_builder.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_builder/value_set_multi_batch_parameter_builder.py @@ -16,19 +16,19 @@ import numpy as np from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.experimental.rule_based_profiler.attributed_resolved_metrics import ( AttributedResolvedMetrics, ) from great_expectations.experimental.rule_based_profiler.config import ( - ParameterBuilderConfig, # noqa: TCH001 + ParameterBuilderConfig, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.helpers.util import ( datetime_semantic_domain_type, get_parameter_value_and_validate_return_type, ) from great_expectations.experimental.rule_based_profiler.metric_computation_result import ( - MetricValues, # noqa: TCH001 + MetricValues, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ( MetricMultiBatchParameterBuilder, @@ -98,7 +98,7 @@ def __init__( ParameterBuilder objects' outputs available (as fully-qualified parameter names) is pre-requisite. These "ParameterBuilder" configurations help build parameters needed for this "ParameterBuilder". data_context: AbstractDataContext associated with this ParameterBuilder - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP super().__init__( name=name, metric_name="column.distinct_values", @@ -124,7 +124,7 @@ def _build_parameters( Returns: Attributes object, containing computed parameter values and parameter computation details metadata. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Build the list of unique values for each Batch object. super().build_parameters( domain=domain, @@ -134,7 +134,7 @@ def _build_parameters( runtime_configuration=runtime_configuration, ) - # Retrieve and replace list of unique values for each Batch with set of unique values for all batches in domain. # noqa: E501 + # Retrieve and replace list of unique values for each Batch with set of unique values for all batches in domain. # noqa: E501 # FIXME CoP parameter_node: ParameterNode = get_parameter_value_and_validate_return_type( domain=domain, parameter_reference=self.raw_fully_qualified_parameter_name, @@ -200,7 +200,7 @@ def _get_unique_values_from_nested_collection_of_sets( In multi-batch data analysis, values can be empty and missing, resulting in "None" added to set. However, due to reliance on "np.ndarray", "None" gets converted to "numpy.Inf", whereas "numpy.Inf == numpy.Inf" returns False, resulting in numerous "None" elements in final set. For this reason, all "None" elements must be filtered out. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP unique_values: Set[V] = set( sorted( # type: ignore[type-var,arg-type] # lambda destroys type info? filter( diff --git a/great_expectations/experimental/rule_based_profiler/parameter_container.py b/great_expectations/experimental/rule_based_profiler/parameter_container.py index 3aa8b280ef9d..29bed1390663 100644 --- a/great_expectations/experimental/rule_based_profiler/parameter_container.py +++ b/great_expectations/experimental/rule_based_profiler/parameter_container.py @@ -20,7 +20,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.experimental.rule_based_profiler.exceptions import ProfilerExecutionError from great_expectations.types import SerializableDictDot, SerializableDotDict -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from great_expectations.core.domain import Domain @@ -96,12 +96,12 @@ def _parse_attribute_naming_pattern(name: str) -> ParseResults: Applicability: To be used as part of configuration (e.g., YAML-based files or text strings). Extendability: Readily extensible to include "slice" and other standard accessors (as long as no dynamic elements). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: return attribute_naming_pattern.parseString(name) except ParseException: - raise ParameterAttributeNameParserError( # noqa: TRY003 + raise ParameterAttributeNameParserError( # noqa: TRY003 # FIXME CoP f'Unable to parse Parameter Attribute Name: "{name}".' ) @@ -115,7 +115,7 @@ def validate_fully_qualified_parameter_name_delimiter( raise ProfilerExecutionError( message=f"""Unable to get value for parameter name "{fully_qualified_parameter_name}" -- parameter \ names must start with {FULLY_QUALIFIED_PARAMETER_NAME_DELIMITER_CHARACTER} (e.g., "{FULLY_QUALIFIED_PARAMETER_NAME_DELIMITER_CHARACTER}{fully_qualified_parameter_name}"). -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) @@ -157,7 +157,7 @@ class ParameterNode(SerializableDotDict): Even though, typically, only the leaf nodes (characterized by having no keys of "ParameterNode" type) store parameter values and details, intermediate nodes may also have these properties. This is important for supporting the situations where multiple long fully-qualified parameter names have overlapping intermediate parts (see below). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def to_dict(self) -> dict: return convert_parameter_node_to_dictionary(source=dict(self)) # type: ignore[return-value] # could be None @@ -221,7 +221,7 @@ class ParameterContainer(SerializableDictDot): The ParameterContainer maintains a dictionary that holds references to root-level ParameterNode objects for all parameter "name spaces" applicable to the given Domain (where the first part of all fully-qualified parameter names within the same "name space" serves as the dictionary key, and the root-level ParameterNode objects are the values). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP parameter_nodes: Optional[Dict[str, ParameterNode]] = None @@ -252,7 +252,7 @@ def deep_convert_properties_iterable_to_parameter_node( if isinstance(source, dict): return _deep_convert_properties_iterable_to_parameter_node(source=ParameterNode(source)) - # Must allow for non-dictionary source types, since their internal nested structures may contain dictionaries. # noqa: E501 + # Must allow for non-dictionary source types, since their internal nested structures may contain dictionaries. # noqa: E501 # FIXME CoP if isinstance(source, (list, set, tuple)): data_type: type = type(source) @@ -364,7 +364,7 @@ def build_parameter_container( (and any "details") and builds the tree under a single root-level ParameterNode object for a "name space". In particular, if any ParameterNode object in the tree (starting with the root-level ParameterNode object) already exists, it is reused; in other words, ParameterNode objects are unique per part of fully-qualified parameter names. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP parameter_node: Optional[ParameterNode] fully_qualified_parameter_name: str parameter_value: Any @@ -416,12 +416,12 @@ def _build_parameter_node_tree_for_one_parameter( parameter_node: root-level ParameterNode for the sub-tree, characterized by the first parameter name in list parameter_name_as_list: list of parts of a fully-qualified parameter name of sub-tree (or sub "name space") parameter_value: value pertaining to the last part of the fully-qualified parameter name ("leaf node") - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP node: ParameterNode = parameter_node parameter_name: str for parameter_name in parameter_name_as_list[:-1]: - # This conditional is functionally equivalent to `node = node.setdefault(parameter_name, ParameterNode({})).` # noqa: E501 - # However, setdefault always evaluates its second argument which is much slower in this hot code path. # noqa: E501 + # This conditional is functionally equivalent to `node = node.setdefault(parameter_name, ParameterNode({})).` # noqa: E501 # FIXME CoP + # However, setdefault always evaluates its second argument which is much slower in this hot code path. # noqa: E501 # FIXME CoP if parameter_name in node: node = node[parameter_name] else: @@ -449,14 +449,14 @@ def get_parameter_value_by_fully_qualified_parameter_name( :param parameters :return: Optional[Union[Any, ParameterNode]] object corresponding to the last part of the fully-qualified parameter name supplied as argument -- a value (of type "Any") or a ParameterNode object (containing the sub-tree structure). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP validate_fully_qualified_parameter_name_delimiter( fully_qualified_parameter_name=fully_qualified_parameter_name ) - # Using "__getitem__" (bracket) notation instead of "__getattr__" (dot) notation in order to insure the # noqa: E501 - # compatibility of field names (e.g., "domain_kwargs") with user-facing syntax (as governed by the value of the # noqa: E501 - # DOMAIN_KWARGS_PARAMETER_NAME constant, which may change, requiring the same change to the field name). # noqa: E501 + # Using "__getitem__" (bracket) notation instead of "__getattr__" (dot) notation in order to insure the # noqa: E501 # FIXME CoP + # compatibility of field names (e.g., "domain_kwargs") with user-facing syntax (as governed by the value of the # noqa: E501 # FIXME CoP + # DOMAIN_KWARGS_PARAMETER_NAME constant, which may change, requiring the same change to the field name). # noqa: E501 # FIXME CoP if fully_qualified_parameter_name == DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME: if domain: # Supports the "$domain.domain_kwargs" style syntax. @@ -526,10 +526,10 @@ def _get_parameter_value_from_parameter_container( name=parameter_name_part ) if len(parsed_attribute_name) < 1: - raise KeyError( # noqa: TRY003, TRY301 + raise KeyError( # noqa: TRY003, TRY301 # FIXME CoP f"""Unable to get value for parameter name "{fully_qualified_parameter_name}": Part \ "{parameter_name_part}" in fully-qualified parameter name does not represent a valid expression. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) parent_parameter_node = return_value @@ -543,14 +543,14 @@ def _get_parameter_value_from_parameter_container( for attribute_value_accessor in parsed_attribute_name: return_value = return_value[attribute_value_accessor] except KeyError: - raise KeyError( # noqa: TRY003 + raise KeyError( # noqa: TRY003 # FIXME CoP f"""Unable to find value for parameter name "{fully_qualified_parameter_name}": Part \ "{parameter_name_part}" does not exist in fully-qualified parameter name. """ ) if attribute_value_reference not in parent_parameter_node: # type: ignore[operator] # could be None - raise KeyError( # noqa: TRY003 + raise KeyError( # noqa: TRY003 # FIXME CoP f"""Unable to find value for parameter name "{fully_qualified_parameter_name}": Part \ "{parameter_name_part}" of fully-qualified parameter name does not exist. """ @@ -634,12 +634,12 @@ def _get_parameter_node_attribute_names( attribute_name: str for attribute_name_as_list in attribute_names_as_lists: - attribute_name_as_list = ( # noqa: PLW2901 + attribute_name_as_list = ( # noqa: PLW2901 # FIXME CoP _get_parameter_name_parts_up_to_including_reserved_literal( attribute_name_as_list=attribute_name_as_list ) ) - attribute_name = f"{FULLY_QUALIFIED_PARAMETER_NAME_DELIMITER_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER.join(attribute_name_as_list[1:])}" # noqa: E501 + attribute_name = f"{FULLY_QUALIFIED_PARAMETER_NAME_DELIMITER_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER.join(attribute_name_as_list[1:])}" # noqa: E501 # FIXME CoP attribute_names.add(attribute_name) return list(attribute_names) @@ -682,7 +682,7 @@ def _get_parameter_name_parts_up_to_including_reserved_literal( if not (set(attribute_name_as_list) & RESERVED_TERMINAL_LITERALS): return attribute_name_as_list - # TODO: 12/29/2022: Lexicographical order avoids collisions between regular keys and reserved literals. # noqa: E501 + # TODO: 12/29/2022: Lexicographical order avoids collisions between regular keys and reserved literals. # noqa: E501 # FIXME CoP reserved_terminal_literals: List[str] = list(sorted(RESERVED_TERMINAL_LITERALS)) idx: Optional[int] = None diff --git a/great_expectations/experimental/rule_based_profiler/rule/rule.py b/great_expectations/experimental/rule_based_profiler/rule/rule.py index dcb8434228c7..0c92e48beb82 100644 --- a/great_expectations/experimental/rule_based_profiler/rule/rule.py +++ b/great_expectations/experimental/rule_based_profiler/rule/rule.py @@ -13,7 +13,7 @@ expectationConfigurationBuilderConfigSchema, parameterBuilderConfigSchema, ) -from great_expectations.experimental.rule_based_profiler.helpers.configuration_reconciliation import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.helpers.configuration_reconciliation import ( # noqa: E501 # FIXME CoP DEFAULT_RECONCILATION_DIRECTIVES, ReconciliationDirectives, reconcile_rule_variables, @@ -28,7 +28,7 @@ from great_expectations.experimental.rule_based_profiler.rule.rule_state import RuleState from great_expectations.types import SerializableDictDot from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP deep_filter_properties_iterable, measure_execution_time, ) @@ -39,7 +39,7 @@ from great_expectations.experimental.rule_based_profiler.domain_builder import ( DomainBuilder, ) - from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder import ( # noqa: E501 + from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder import ( # noqa: E501 # FIXME CoP ExpectationConfigurationBuilder, ) from great_expectations.experimental.rule_based_profiler.parameter_builder import ( @@ -65,7 +65,7 @@ def __init__( domain_builder: A Domain Builder object used to build rule data domain parameter_builders: A Parameter Builder list used to configure necessary rule suite parameters expectation_configuration_builders: A list of Expectation Configuration Builders - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._name = name if variables is None: @@ -89,7 +89,7 @@ def __init__( execution_time_property_name="rule_execution_time", pretty_print=False, ) - def run( # noqa: PLR0913 + def run( # noqa: PLR0913 # FIXME CoP self, variables: Optional[ParameterContainer] = None, batch_list: Optional[List[Batch]] = None, @@ -112,7 +112,7 @@ def run( # noqa: PLR0913 Returns: RuleState representing effect of executing Rule - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not reconciliation_directives: reconciliation_directives = DEFAULT_RECONCILATION_DIRECTIVES @@ -194,7 +194,7 @@ def name(self, value: str) -> None: @property def variables(self) -> ParameterContainer: - # Returning a copy of the "self._variables" state variable in order to prevent write-before-read hazard. # noqa: E501 + # Returning a copy of the "self._variables" state variable in order to prevent write-before-read hazard. # noqa: E501 # FIXME CoP return copy.deepcopy(self._variables) @variables.setter @@ -260,7 +260,7 @@ def to_json_dict(self) -> dict: reference implementation in the "SerializableDictDot" class itself. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dict_obj: dict = self.to_dict() variables_dict: Optional[Dict[str, Any]] = convert_variables_to_dict( variables=self.variables @@ -277,7 +277,7 @@ def __repr__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP json_dict: dict = self.to_json_dict() deep_filter_properties_iterable( properties=json_dict, @@ -293,7 +293,7 @@ def __str__(self) -> str: implementation in the "SerializableDictDot" class. However, the circular import dependencies, due to the location of the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules make this refactoring infeasible at the present time. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.__repr__() def _get_parameter_builders_as_dict(self) -> Dict[str, ParameterBuilder]: diff --git a/great_expectations/experimental/rule_based_profiler/rule/rule_output.py b/great_expectations/experimental/rule_based_profiler/rule/rule_output.py index 439e66f36664..ad82614f505e 100644 --- a/great_expectations/experimental/rule_based_profiler/rule/rule_output.py +++ b/great_expectations/experimental/rule_based_profiler/rule/rule_output.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional from great_expectations.experimental.rule_based_profiler.expectation_configuration_builder import ( - ExpectationConfigurationBuilder, # noqa: TCH001 + ExpectationConfigurationBuilder, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( ParameterNode, @@ -24,7 +24,7 @@ class RuleOutput: """ RuleOutput provides methods for extracting useful information from RuleState using directives and application logic. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -33,7 +33,7 @@ def __init__( """ Args: rule_state: RuleState object represented by "Domain" objects and parameters,.computed for one Rule object. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._rule_state = rule_state @property diff --git a/great_expectations/experimental/rule_based_profiler/rule/rule_state.py b/great_expectations/experimental/rule_based_profiler/rule/rule_state.py index e7a12800463b..a9f24a1fb44e 100644 --- a/great_expectations/experimental/rule_based_profiler/rule/rule_state.py +++ b/great_expectations/experimental/rule_based_profiler/rule/rule_state.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.experimental.rule_based_profiler.exceptions import ( ProfilerConfigurationError, ) @@ -21,7 +21,7 @@ class RuleState: with currently loaded configuration of "Rule" components ("DomainBuilder" object, "ParameterBuilder" objects, and "ExpectationConfigurationBuilder" objects). Using "RuleState" with corresponding flags is sufficient for generating outputs for different purposes (in raw and aggregated form) from available "Domain" objects and computed parameters. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -37,7 +37,7 @@ def __init__( domains: List of Domain objects, which DomainBuilder of associated Rule generated. variables: attribute name/value pairs (part of state, relevant for associated Rule). parameters: Dictionary of ParameterContainer objects corresponding to all Domain objects in memory. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self._rule = rule if domains is None: @@ -145,10 +145,10 @@ def add_domain( if not allow_duplicates and domain.id in [ domain_cursor.id for domain_cursor in self.domains ]: - raise ProfilerConfigurationError( # noqa: TRY003 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP f"""Error: Domain\n{domain}\nalready exists. In order to add it, either pass "allow_duplicates=True" \ or call "RuleState.remove_domain_if_exists()" with Domain having ID equal to "{domain.id}" as argument first. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) self.domains.append(domain) @@ -169,11 +169,11 @@ def initialize_parameter_container_for_domain( overwrite: bool = True, ) -> None: if not overwrite and domain.id in self.parameters: - raise ProfilerConfigurationError( # noqa: TRY003 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP f"""Error: ParameterContainer for Domain\n{domain}\nalready exists. In order to overwrite it, either \ pass "overwrite=True" or call "RuleState.remove_parameter_container_from_domain()" with Domain having ID equal to \ "{domain.id}" as argument first. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) parameter_container = ParameterContainer(parameter_nodes=None) diff --git a/great_expectations/experimental/rule_based_profiler/rule_based_profiler.py b/great_expectations/experimental/rule_based_profiler/rule_based_profiler.py index 6441f81b7abc..204d8c5fc6d3 100644 --- a/great_expectations/experimental/rule_based_profiler/rule_based_profiler.py +++ b/great_expectations/experimental/rule_based_profiler/rule_based_profiler.py @@ -46,7 +46,7 @@ ExpectationConfigurationBuilder, init_rule_expectation_configuration_builders, ) -from great_expectations.experimental.rule_based_profiler.helpers.configuration_reconciliation import ( # noqa: E501 +from great_expectations.experimental.rule_based_profiler.helpers.configuration_reconciliation import ( # noqa: E501 # FIXME CoP DEFAULT_RECONCILATION_DIRECTIVES, ReconciliationDirectives, ReconciliationStrategy, @@ -67,7 +67,7 @@ from great_expectations.experimental.rule_based_profiler.rule import Rule, RuleOutput from great_expectations.experimental.rule_based_profiler.rule.rule_state import RuleState from great_expectations.util import ( - convert_to_json_serializable, # noqa: TID251 + convert_to_json_serializable, # noqa: TID251 # FIXME CoP filter_properties_dict, ) from great_expectations.validator.exception_info import ExceptionInfo @@ -96,7 +96,7 @@ class BaseRuleBasedProfiler(ConfigPeer): """ BaseRuleBasedProfiler class is initialized from RuleBasedProfilerConfig typed object and contains all functionality in the form of interface methods (which can be overwritten by subclasses) and their reference implementation. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP EXPECTATION_SUCCESS_KEYS: Set[str] = { "_auto", @@ -123,7 +123,7 @@ def __init__( Defaults to False. If True, then catch exceptions and include them as part of the result object. \ For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str = profiler_config.name id: Optional[str] = None if hasattr(profiler_config, "id"): @@ -237,7 +237,7 @@ def _init_rule_domain_builder( return domain_builder - def run( # noqa: PLR0913 + def run( # noqa: PLR0913 # FIXME CoP self, variables: Optional[Dict[str, Any]] = None, rules: Optional[Dict[str, Dict[str, Any]]] = None, @@ -264,7 +264,7 @@ def run( # noqa: PLR0913 Returns: A `RuleBasedProfilerResult` instance that contains the profiling output. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Check to see if the user has disabled progress bars disable = False if self._data_context: @@ -336,7 +336,7 @@ def run( # noqa: PLR0913 rule_state.exception_traceback = exception_info self.rule_states.append(rule_state) else: - raise err # noqa: TRY201 + raise err # noqa: TRY201 # FIXME CoP return RuleBasedProfilerResult( fully_qualified_parameter_names_by_domain=self.get_fully_qualified_parameter_names_by_domain(), @@ -375,7 +375,7 @@ def get_expectation_configurations(self) -> List[ExpectationConfiguration]: """ Returns: List of ExpectationConfiguration objects, accumulated from RuleState of every Rule executed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_configurations: List[ExpectationConfiguration] = [] rule_state: RuleState @@ -390,7 +390,7 @@ def get_fully_qualified_parameter_names_by_domain(self) -> Dict[Domain, List[str """ Returns: Dictionary of fully-qualified parameter names by Domain, accumulated from RuleState of every Rule executed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP fully_qualified_parameter_names_by_domain: Dict[Domain, List[str]] = {} rule_state: RuleState @@ -410,7 +410,7 @@ def get_fully_qualified_parameter_names_for_domain_id(self, domain_id: str) -> L Returns: List of fully-qualified parameter names for Domain with domain_id as specified, accumulated from RuleState of corresponding Rule executed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP rule_state: RuleState for rule_state in self.rule_states: domain: Domain = rule_state.get_domains_as_dict().get(domain_id) @@ -426,7 +426,7 @@ def get_parameter_values_for_fully_qualified_parameter_names_by_domain( """ Returns: Dictionaries of values for fully-qualified parameter names by Domain, accumulated from RuleState of every Rule executed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP values_for_fully_qualified_parameter_names_by_domain: Dict[ Domain, Dict[str, ParameterNode] ] = {} @@ -450,20 +450,20 @@ def get_parameter_values_for_fully_qualified_parameter_names_for_domain_id( Returns: Dictionary of values for fully-qualified parameter names for Domain with domain_id as specified, accumulated from RuleState of corresponding Rule executed. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP rule_state: RuleState for rule_state in self.rule_states: domain: Domain = rule_state.get_domains_as_dict().get(domain_id) if domain is not None: rule_output = RuleOutput(rule_state=rule_state) - return rule_output.get_parameter_values_for_fully_qualified_parameter_names_for_domain_id( # noqa: E501 + return rule_output.get_parameter_values_for_fully_qualified_parameter_names_for_domain_id( # noqa: E501 # FIXME CoP domain_id=domain_id ) def add_rule(self, rule: Rule) -> None: """ Add Rule object to existing profiler object by reconciling profiler rules and updating _profiler_config. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP rules_dict: Dict[str, Dict[str, Any]] = { rule.name: rule.to_json_dict(), } @@ -484,7 +484,7 @@ def add_rule(self, rule: Rule) -> None: def reconcile_profiler_variables( self, variables: Optional[Dict[str, Any]] = None, - reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.variables, # noqa: E501 + reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.variables, # noqa: E501 # FIXME CoP ) -> Optional[ParameterContainer]: """ Profiler "variables" reconciliation involves combining the variables, instantiated from Profiler configuration @@ -496,7 +496,7 @@ def reconcile_profiler_variables( :param variables: variables overrides, supplied in dictionary (configuration) form :param reconciliation_strategy: one of update, nested_update, or overwrite ways of reconciling overwrites :return: reconciled variables in their canonical ParameterContainer object form - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP effective_variables: ParameterContainer if variables and isinstance(variables, dict): variables_configs: dict = self._reconcile_profiler_variables_as_dict( @@ -513,7 +513,7 @@ def reconcile_profiler_variables( def _reconcile_profiler_variables_as_dict( self, variables: Optional[Dict[str, Any]], - reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.variables, # noqa: E501 + reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.variables, # noqa: E501 # FIXME CoP ) -> dict: if variables is None: variables = {} @@ -550,7 +550,7 @@ def reconcile_profiler_rules( :param rules: rules overrides, supplied in dictionary (configuration) form for each rule name as the key :param reconciliation_directives directives for how each rule component should be overwritten :return: reconciled rules in their canonical List[Rule] object form - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP effective_rules: Dict[str, Rule] = self._reconcile_profiler_rules_as_dict( rules=rules, reconciliation_directives=reconciliation_directives, @@ -621,7 +621,7 @@ def _reconcile_rule_config( :param rule_config: configuration of an override rule candidate, supplied in dictionary (configuration) form :param reconciliation_directives directives for how each rule component should be overwritten :return: reconciled rule configuration, returned in dictionary (configuration) form - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP effective_rule_config: Dict[str, Any] if rule_name in existing_rules: rule: Rule = existing_rules[rule_name] @@ -664,7 +664,7 @@ def _reconcile_rule_config( "variables": effective_variables, "domain_builder": effective_domain_builder_config, "parameter_builders": effective_parameter_builder_configs, - "expectation_configuration_builders": effective_expectation_configuration_builder_configs, # noqa: E501 + "expectation_configuration_builders": effective_expectation_configuration_builder_configs, # noqa: E501 # FIXME CoP } else: effective_rule_config = rule_config @@ -675,7 +675,7 @@ def _reconcile_rule_config( def _reconcile_rule_domain_builder_config( domain_builder: DomainBuilder, domain_builder_config: dict, - reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.domain_builder, # noqa: E501 + reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.domain_builder, # noqa: E501 # FIXME CoP ) -> dict: """ Rule "domain builder" reconciliation involves combining the domain builder, instantiated from Rule configuration @@ -690,7 +690,7 @@ def _reconcile_rule_domain_builder_config( :param domain_builder_config: domain builder configuration override, supplied in dictionary (configuration) form :param reconciliation_strategy: one of update, nested_update, or overwrite ways of reconciling overwrites :return: reconciled domain builder configuration, returned in dictionary (configuration) form - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_builder_as_dict: dict = domain_builder.to_json_dict() domain_builder_as_dict["class_name"] = domain_builder.__class__.__name__ domain_builder_as_dict["module_name"] = domain_builder.__class__.__module__ @@ -720,7 +720,7 @@ def _reconcile_rule_domain_builder_config( def _reconcile_rule_parameter_builder_configs( rule: Rule, parameter_builder_configs: List[dict], - reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.parameter_builder, # noqa: E501 + reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.parameter_builder, # noqa: E501 # FIXME CoP ) -> Optional[List[dict]]: """ Rule "parameter builders" reconciliation involves combining the parameter builders, instantiated from Rule @@ -736,7 +736,7 @@ def _reconcile_rule_parameter_builder_configs( :param parameter_builder_configs: parameter builder configuration overrides, supplied in dictionary (configuration) form :param reconciliation_strategy: one of update, nested_update, or overwrite ways of reconciling overwrites :return: reconciled parameter builder configuration, returned in dictionary (configuration) form - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP parameter_builder_config: dict for parameter_builder_config in parameter_builder_configs: _validate_builder_override_config(builder_config=parameter_builder_config) @@ -790,7 +790,7 @@ def _reconcile_rule_parameter_builder_configs( def _reconcile_rule_expectation_configuration_builder_configs( rule: Rule, expectation_configuration_builder_configs: List[dict], - reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.expectation_configuration_builder, # noqa: E501 + reconciliation_strategy: ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.expectation_configuration_builder, # noqa: E501 # FIXME CoP ) -> List[dict]: """ Rule "expectation configuration builders" reconciliation involves combining the expectation configuration builders, instantiated from Rule @@ -806,7 +806,7 @@ def _reconcile_rule_expectation_configuration_builder_configs( :param expectation_configuration_builder_configs: expectation configuration builder configuration overrides, supplied in dictionary (configuration) form :param reconciliation_strategy: one of update, nested_update, or overwrite ways of reconciling overwrites :return: reconciled expectation configuration builder configuration, returned in dictionary (configuration) form - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_configuration_builder_config: dict for expectation_configuration_builder_config in expectation_configuration_builder_configs: _validate_builder_override_config( @@ -852,7 +852,7 @@ def _reconcile_rule_expectation_configuration_builder_configs( expectation_configuration_builder_config[ "expectation_type" ]: expectation_configuration_builder_config - for expectation_configuration_builder_config in expectation_configuration_builder_configs # noqa: E501 + for expectation_configuration_builder_config in expectation_configuration_builder_configs # noqa: E501 # FIXME CoP } if reconciliation_strategy == ReconciliationStrategy.NESTED_UPDATE: effective_expectation_configuration_builder_configs = nested_update( @@ -891,7 +891,7 @@ def _apply_runtime_environment( rules: name/(configuration-dictionary) to modify using "runtime_environment" variables_directives_list: additional/override runtime variables directives (modify "BaseRuleBasedProfiler") domain_type_directives_list: additional/override runtime domain directives (modify "BaseRuleBasedProfiler") - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP BaseRuleBasedProfiler._apply_variables_directives_runtime_environment( rules=rules, variables_directives_list=variables_directives_list, @@ -912,7 +912,7 @@ def _apply_variables_directives_runtime_environment( """ rules: name/(configuration-dictionary) to modify using "runtime_environment" variables_directives_list: additional/override runtime "variables" directives (modify "BaseRuleBasedProfiler") - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if rules is None: rules = [] @@ -923,7 +923,7 @@ def _apply_variables_directives_runtime_environment( if variables_directives_list is None: variables_directives_list = [] - # 1. Ensure that "variables_directives_list" pertains to "Rule" objects (no spurrious "Rule" names). # noqa: E501 + # 1. Ensure that "variables_directives_list" pertains to "Rule" objects (no spurrious "Rule" names). # noqa: E501 # FIXME CoP variables_directives_list = list( filter( lambda element: element.rule_name in rule_names, @@ -933,12 +933,12 @@ def _apply_variables_directives_runtime_environment( variables_directives: RuntimeEnvironmentVariablesDirectives - # 2. Now obtain "Rule" names solely pertaining to additional/override runtime "variables" directives. # noqa: E501 + # 2. Now obtain "Rule" names solely pertaining to additional/override runtime "variables" directives. # noqa: E501 # FIXME CoP rule_names = [ variables_directives.rule_name for variables_directives in variables_directives_list ] - # 3. Filter "Rule" objects to contain only those subject to additional/override runtime "variables" directives. # noqa: E501 + # 3. Filter "Rule" objects to contain only those subject to additional/override runtime "variables" directives. # noqa: E501 # FIXME CoP rules = list( filter( lambda element: element.name in rule_names, @@ -948,14 +948,14 @@ def _apply_variables_directives_runtime_environment( rules_as_dict: Dict[str, Rule] = {rule.name: rule for rule in rules} - # 4. Update "variables" of pertinent "Rule" objects, according to corresponding additional/override directives. # noqa: E501 + # 4. Update "variables" of pertinent "Rule" objects, according to corresponding additional/override directives. # noqa: E501 # FIXME CoP variables: Optional[Dict[str, Any]] rule_variables_configs: Optional[Dict[str, Any]] for variables_directives in variables_directives_list: variables = variables_directives.variables or {} rule = rules_as_dict[variables_directives.rule_name] rule_variables_configs = convert_variables_to_dict(variables=rule.variables) - # Filter only those additional/override directives that correspond to keys in "Rule" "variables" settings. # noqa: E501 + # Filter only those additional/override directives that correspond to keys in "Rule" "variables" settings. # noqa: E501 # FIXME CoP # noinspection PyTypeChecker variables = dict( filter( @@ -963,7 +963,7 @@ def _apply_variables_directives_runtime_environment( variables.items(), ) ) - # Update "Rule" "variables" settings with corresponding values specified by additional/override directives. # noqa: E501 + # Update "Rule" "variables" settings with corresponding values specified by additional/override directives. # noqa: E501 # FIXME CoP rule_variables_configs.update(variables) # Restore "ParameterContainer" typed object satus of "Rule" "variables" field. rule.variables = build_parameter_container_for_variables( @@ -978,7 +978,7 @@ def _apply_domain_type_directives_runtime_environment( """ rules: name/(configuration-dictionary) to modify using "runtime_environment" domain_type_directives_list: additional/override runtime domain directives (modify "BaseRuleBasedProfiler") - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if rules is None: rules = [] @@ -989,7 +989,7 @@ def _apply_domain_type_directives_runtime_environment( domain_rules: List[Rule] rule: Rule for domain_type_directives in domain_type_directives_list: - # 1. Ensure that Domain directives pertain to "Rule" objects with "DomainBuilder" of correct "Domain" type. # noqa: E501 + # 1. Ensure that Domain directives pertain to "Rule" objects with "DomainBuilder" of correct "Domain" type. # noqa: E501 # FIXME CoP domain_rules = [ rule for rule in rules @@ -998,19 +998,19 @@ def _apply_domain_type_directives_runtime_environment( domain_property_key: str domain_property_value: Any existing_domain_property_value: Any - # 2. Update Domain properties of pertinent "Rule" objects, according to corresponding Domain directives. # noqa: E501 + # 2. Update Domain properties of pertinent "Rule" objects, according to corresponding Domain directives. # noqa: E501 # FIXME CoP for rule in domain_rules: for ( domain_property_key, domain_property_value, ) in domain_type_directives.directives.items(): - # Use property getter/setter methods on "DomainBuilder" of "Rule" to affect override directives. # noqa: E501 + # Use property getter/setter methods on "DomainBuilder" of "Rule" to affect override directives. # noqa: E501 # FIXME CoP try: # Ensure that new directives augment (not eliminate) existing directives. existing_domain_property_value = getattr( rule.domain_builder, domain_property_key ) - domain_property_value = ( # noqa: PLW2901 + domain_property_value = ( # noqa: PLW2901 # FIXME CoP BaseRuleBasedProfiler._get_effective_domain_builder_property_value( dest_property_value=domain_property_value, source_property_value=existing_domain_property_value, @@ -1022,7 +1022,7 @@ def _apply_domain_type_directives_runtime_environment( domain_property_value, ) except AttributeError: - # Skip every directive that is not defined property of "DomainBuilder" object of "domain_type". # noqa: E501 + # Skip every directive that is not defined property of "DomainBuilder" object of "domain_type". # noqa: E501 # FIXME CoP pass @staticmethod @@ -1030,7 +1030,7 @@ def _get_effective_domain_builder_property_value( dest_property_value: Optional[Any] = None, source_property_value: Optional[Any] = None, ) -> Optional[Any]: - # Property values of collections types must be unique (use set for "list"/"tuple" and "update" for dictionary). # noqa: E501 + # Property values of collections types must be unique (use set for "list"/"tuple" and "update" for dictionary). # noqa: E501 # FIXME CoP if isinstance(dest_property_value, list) and isinstance(source_property_value, list): return list(set(dest_property_value + source_property_value)) @@ -1043,7 +1043,7 @@ def _get_effective_domain_builder_property_value( return dest_property_value @classmethod - def run_profiler( # noqa: PLR0913 + def run_profiler( # noqa: PLR0913 # FIXME CoP cls, data_context: AbstractDataContext, profiler_store: ProfilerStore, @@ -1074,7 +1074,7 @@ def run_profiler( # noqa: PLR0913 ) @classmethod - def run_profiler_on_data( # noqa: PLR0913 + def run_profiler_on_data( # noqa: PLR0913 # FIXME CoP cls, data_context: AbstractDataContext, profiler_store: ProfilerStore, @@ -1108,7 +1108,7 @@ def run_profiler_on_data( # noqa: PLR0913 ) @classmethod - def add_profiler( # noqa: PLR0913 + def add_profiler( # noqa: PLR0913 # FIXME CoP cls, data_context: AbstractDataContext, profiler_store: ProfilerStore, @@ -1131,7 +1131,7 @@ def add_profiler( # noqa: PLR0913 ) @classmethod - def update_profiler( # noqa: PLR0913 + def update_profiler( # noqa: PLR0913 # FIXME CoP cls, profiler_store: ProfilerStore, data_context: AbstractDataContext, @@ -1154,7 +1154,7 @@ def update_profiler( # noqa: PLR0913 ) @classmethod - def add_or_update_profiler( # noqa: PLR0913 + def add_or_update_profiler( # noqa: PLR0913 # FIXME CoP cls, data_context: AbstractDataContext, profiler_store: ProfilerStore, @@ -1177,7 +1177,7 @@ def add_or_update_profiler( # noqa: PLR0913 ) @classmethod - def _persist_profiler( # noqa: PLR0913 + def _persist_profiler( # noqa: PLR0913 # FIXME CoP cls, data_context: AbstractDataContext, persistence_fn: Callable, @@ -1198,7 +1198,7 @@ def _persist_profiler( # noqa: PLR0913 ) if not RuleBasedProfiler._check_validity_of_batch_requests_in_config(config=config): - raise gx_exceptions.InvalidConfigError( # noqa: TRY003 + raise gx_exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP "batch_data found in batch_request cannot be saved to ProfilerStore" ) @@ -1230,7 +1230,7 @@ def _persist_profiler( # noqa: PLR0913 try: response = persistence_fn(key=key, value=config) except gx_exceptions.StoreBackendError as e: - raise ProfilerError(f"{e.message}; could not persist profiler") from e # noqa: TRY003 + raise ProfilerError(f"{e.message}; could not persist profiler") from e # noqa: TRY003 # FIXME CoP if isinstance(response, GXCloudResourceRef): new_profiler.id = response.id @@ -1238,7 +1238,7 @@ def _persist_profiler( # noqa: PLR0913 return new_profiler @staticmethod - def _resolve_profiler_config_for_store( # noqa: PLR0913 + def _resolve_profiler_config_for_store( # noqa: PLR0913 # FIXME CoP name: str | None = None, id: str | None = None, config_version: float | None = None, @@ -1247,8 +1247,8 @@ def _resolve_profiler_config_for_store( # noqa: PLR0913 profiler: RuleBasedProfiler | None = None, ) -> RuleBasedProfilerConfig: if not ((profiler is None) ^ all(arg is None for arg in (name, config_version, rules))): - raise TypeError( # noqa: TRY003 - "Must either pass in an existing 'profiler' or individual constructor arguments (but not both)" # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + "Must either pass in an existing 'profiler' or individual constructor arguments (but not both)" # noqa: E501 # FIXME CoP ) if profiler: @@ -1266,7 +1266,7 @@ def _resolve_profiler_config_for_store( # noqa: PLR0913 "variables": variables, } - # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. # noqa: E501 + # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. # noqa: E501 # FIXME CoP validated_config: dict = ruleBasedProfilerConfigSchema.load(config_data) profiler_config: dict = ruleBasedProfilerConfigSchema.dump(validated_config) profiler_config.pop("class_name") @@ -1318,7 +1318,7 @@ def get_profiler( key.configuration_key if isinstance(key, ConfigurationIdentifier) else key ) raise ProfilerNotFoundError( - message=f'Non-existent Profiler configuration named "{config_id}".\n\nDetails: {exc_ik}' # noqa: E501 + message=f'Non-existent Profiler configuration named "{config_id}".\n\nDetails: {exc_ik}' # noqa: E501 # FIXME CoP ) config: dict = profiler_config.to_json_dict() @@ -1354,7 +1354,7 @@ def delete_profiler( except (gx_exceptions.InvalidKeyError, KeyError) as exc_ik: config_id = key.configuration_key if isinstance(key, ConfigurationIdentifier) else key raise ProfilerNotFoundError( - message=f'Non-existent Profiler configuration named "{config_id}".\n\nDetails: {exc_ik}' # noqa: E501 + message=f'Non-existent Profiler configuration named "{config_id}".\n\nDetails: {exc_ik}' # noqa: E501 # FIXME CoP ) @staticmethod @@ -1393,7 +1393,7 @@ def config_version(self) -> float: @property def variables(self) -> Optional[ParameterContainer]: - # Returning a copy of the "self._variables" state variable in order to prevent write-before-read hazard. # noqa: E501 + # Returning a copy of the "self._variables" state variable in order to prevent write-before-read hazard. # noqa: E501 # FIXME CoP return copy.deepcopy(self._variables) @variables.setter @@ -1526,9 +1526,9 @@ class RuleBasedProfiler(BaseRuleBasedProfiler): expectation_completeness: Moderate --ge-feature-maturity-info-- - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name: str, config_version: float, @@ -1562,7 +1562,7 @@ def _validate_builder_override_config(builder_config: dict) -> None: :param builder_config: candidate builder override configuration :raises: ProfilerConfigurationError - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not all( ( isinstance(builder_config, dict), @@ -1570,6 +1570,6 @@ def _validate_builder_override_config(builder_config: dict) -> None: "module_name" in builder_config, ) ): - raise ProfilerConfigurationError( # noqa: TRY003 + raise ProfilerConfigurationError( # noqa: TRY003 # FIXME CoP 'Both "class_name" and "module_name" must be specified.' ) diff --git a/great_expectations/experimental/rule_based_profiler/rule_based_profiler_result.py b/great_expectations/experimental/rule_based_profiler/rule_based_profiler_result.py index 65d1b6f300ac..57b92a56df52 100644 --- a/great_expectations/experimental/rule_based_profiler/rule_based_profiler_result.py +++ b/great_expectations/experimental/rule_based_profiler/rule_based_profiler_result.py @@ -5,20 +5,20 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.core import ( - ExpectationSuite, # noqa: TCH001 + ExpectationSuite, # noqa: TCH001 # FIXME CoP ) -from great_expectations.core.domain import Domain # noqa: TCH001 +from great_expectations.core.domain import Domain # noqa: TCH001 # FIXME CoP from great_expectations.expectations.expectation_configuration import ( - ExpectationConfiguration, # noqa: TCH001 + ExpectationConfiguration, # noqa: TCH001 # FIXME CoP ) from great_expectations.experimental.rule_based_profiler.helpers.util import ( get_or_create_expectation_suite, ) from great_expectations.experimental.rule_based_profiler.parameter_container import ( - ParameterNode, # noqa: TCH001 + ParameterNode, # noqa: TCH001 # FIXME CoP ) from great_expectations.types import SerializableDictDot -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP if TYPE_CHECKING: from great_expectations.alias_types import JSONValues @@ -45,7 +45,7 @@ class RuleBasedProfilerResult(SerializableDictDot): If True, then catch exceptions and include them as part of the result object. \ For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP fully_qualified_parameter_names_by_domain: Dict[Domain, List[str]] parameter_values_for_fully_qualified_parameter_names_by_domain: Optional[ @@ -63,7 +63,7 @@ def to_dict(self) -> dict: """ Returns: This `RuleBasedProfilerResult` as dictionary (JSON-serializable for `RuleBasedProfilerResult` objects). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain: Domain fully_qualified_parameter_names: List[str] parameter_values_for_fully_qualified_parameter_names: Dict[str, ParameterNode] @@ -80,17 +80,17 @@ def to_dict(self) -> dict: data=fully_qualified_parameter_names ), } - for domain, fully_qualified_parameter_names in self.fully_qualified_parameter_names_by_domain.items() # noqa: E501 + for domain, fully_qualified_parameter_names in self.fully_qualified_parameter_names_by_domain.items() # noqa: E501 # FIXME CoP ], "parameter_values_for_fully_qualified_parameter_names_by_domain": [ { "domain_id": domain.id, "domain": domain.to_json_dict(), - "parameter_values_for_fully_qualified_parameter_names": convert_to_json_serializable( # noqa: E501 + "parameter_values_for_fully_qualified_parameter_names": convert_to_json_serializable( # noqa: E501 # FIXME CoP data=parameter_values_for_fully_qualified_parameter_names ), } - for domain, parameter_values_for_fully_qualified_parameter_names in parameter_values_for_fully_qualified_parameter_names_by_domain.items() # noqa: E501 + for domain, parameter_values_for_fully_qualified_parameter_names in parameter_values_for_fully_qualified_parameter_names_by_domain.items() # noqa: E501 # FIXME CoP ], "expectation_configurations": [ expectation_configuration.to_json_dict() diff --git a/great_expectations/profile/base.py b/great_expectations/profile/base.py index b2a42dc65b80..e87d836ce8e7 100644 --- a/great_expectations/profile/base.py +++ b/great_expectations/profile/base.py @@ -58,20 +58,20 @@ def get_basic_column_cardinality(cls, num_unique=0, pct_unique=0) -> OrderedProf Returns: The column cardinality - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if pct_unique == 1.0: cardinality = cls.UNIQUE elif num_unique == 1: cardinality = cls.ONE - elif num_unique == 2: # noqa: PLR2004 + elif num_unique == 2: # noqa: PLR2004 # FIXME CoP cardinality = cls.TWO - elif 0 < num_unique < 20: # noqa: PLR2004 + elif 0 < num_unique < 20: # noqa: PLR2004 # FIXME CoP cardinality = cls.VERY_FEW - elif 0 < num_unique < 60: # noqa: PLR2004 + elif 0 < num_unique < 60: # noqa: PLR2004 # FIXME CoP cardinality = cls.FEW elif num_unique is None or num_unique == 0 or pct_unique is None: cardinality = cls.NONE - elif pct_unique > 0.1: # noqa: PLR2004 + elif pct_unique > 0.1: # noqa: PLR2004 # FIXME CoP cardinality = cls.VERY_MANY else: cardinality = cls.MANY diff --git a/great_expectations/render/components.py b/great_expectations/render/components.py index b000d58fb333..ff383bc0980e 100644 --- a/great_expectations/render/components.py +++ b/great_expectations/render/components.py @@ -8,7 +8,7 @@ from marshmallow import Schema, fields, post_dump, post_load -from great_expectations.alias_types import JSONValues # noqa: TCH001 +from great_expectations.alias_types import JSONValues # noqa: TCH001 # FIXME CoP from great_expectations.compatibility.typing_extensions import override from great_expectations.render.exceptions import InvalidRenderedContentError from great_expectations.types import DictDot @@ -282,9 +282,9 @@ class RenderedTableContent(RenderedComponentContent): name and the values being a dictionary with the following form: sortable: A boolean indicating whether the column is sortable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, table: list[RenderedContent], header: Optional[Union[RenderedContent, dict]] = None, @@ -363,7 +363,7 @@ def to_json_dict(self) -> dict[str, JSONValues]: class RenderedBootstrapTableContent(RenderedComponentContent): - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, table_data, table_columns, @@ -465,7 +465,7 @@ class RenderedStringTemplateContent(RenderedComponentContent): styling: A dictionary containing styling information. styling: A dictionary containing styling information. content_block_type: The type of content block. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -613,9 +613,9 @@ class CollapseContent(RenderedComponentContent): styling: A dictionary containing styling information. content_block_type: The type of content block. inline_link: Whether to include a link inline. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, collapse: Union[RenderedContent, list], collapse_toggle_link: Optional[Union[RenderedContent, dict]] = None, @@ -663,7 +663,7 @@ def to_json_dict(self) -> dict[str, JSONValues]: class RenderedDocumentContent(RenderedContent): # NOTE: JPC 20191028 - review these keys to consolidate and group - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, sections, data_asset_name=None, @@ -680,7 +680,7 @@ def __init__( # noqa: PLR0913 if not isinstance(sections, list) and all( isinstance(section, RenderedSectionContent) for section in sections ): - raise InvalidRenderedContentError( # noqa: TRY003 + raise InvalidRenderedContentError( # noqa: TRY003 # FIXME CoP "RenderedDocumentContent requires a list of RenderedSectionContent for " "sections." ) self.sections = sections @@ -722,7 +722,7 @@ def __init__(self, content_blocks, section_name=None) -> None: if not isinstance(content_blocks, list) and all( isinstance(content_block, RenderedComponentContent) for content_block in content_blocks ): - raise InvalidRenderedContentError( # noqa: TRY003 + raise InvalidRenderedContentError( # noqa: TRY003 # FIXME CoP "Rendered section content requires a list of RenderedComponentContent " "for content blocks." ) @@ -743,7 +743,7 @@ def to_json_dict(self) -> dict[str, JSONValues]: class RenderedAtomicValue(DictDot): - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, schema: Optional[dict] = None, header: Optional[RenderedAtomicValue] = None, @@ -950,7 +950,7 @@ def make_rendered_atomic_content(self, data, **kwargs): @post_dump def clean_null_attrs(self, data: dict, **kwargs: dict) -> dict: """Removes the attributes in RenderedAtomicContentSchema.REMOVE_KEYS_IF_NONE during serialization if - their values are None.""" # noqa: E501 + their values are None.""" # noqa: E501 # FIXME CoP data = deepcopy(data) for key in RenderedAtomicContentSchema.REMOVE_KEYS_IF_NONE: if key in data and data[key] is None: diff --git a/great_expectations/render/renderer/call_to_action_renderer.py b/great_expectations/render/renderer/call_to_action_renderer.py index d9b98a31de5b..1c302e4925d9 100644 --- a/great_expectations/render/renderer/call_to_action_renderer.py +++ b/great_expectations/render/renderer/call_to_action_renderer.py @@ -45,7 +45,7 @@ def render(cls, cta_object): "template": "$icon", "params": {"icon": ""}, "tooltip": { - "content": "To disable this footer, set the show_how_to_buttons flag in your project's data_docs_sites config to false." # noqa: E501 + "content": "To disable this footer, set the show_how_to_buttons flag in your project's data_docs_sites config to false." # noqa: E501 # FIXME CoP }, "styling": { "params": { diff --git a/great_expectations/render/renderer/column_section_renderer.py b/great_expectations/render/renderer/column_section_renderer.py index b089d27cd707..36b46ca551b5 100644 --- a/great_expectations/render/renderer/column_section_renderer.py +++ b/great_expectations/render/renderer/column_section_renderer.py @@ -51,8 +51,8 @@ def _get_column_name(cls, ge_object): elif isinstance(candidate_object, ExpectationValidationResult): return candidate_object.expectation_config.kwargs["column"] else: - raise ValueError( # noqa: TRY003, TRY004 - "Provide a column section renderer an expectation, list of expectations, evr, or list of evrs." # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + "Provide a column section renderer an expectation, list of expectations, evr, or list of evrs." # noqa: E501 # FIXME CoP ) except KeyError: return "Table-Level Expectations" @@ -112,12 +112,12 @@ def render(self, evrs, section_name=None, column_type=None): # type: ignore[exp An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exception_traceback = traceback.format_exc() exception_message += ( f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP # NOTE : Some render* functions return None so we filter them out populated_content_blocks = list(filter(None, content_blocks)) @@ -160,7 +160,7 @@ def _render_header(cls, evrs, column_type=None): "string_template": { "template": f"Type: {column_type}", "tooltip": { - "content": "expect_column_values_to_be_of_type
expect_column_values_to_be_in_type_list", # noqa: E501 + "content": "expect_column_values_to_be_of_type
expect_column_values_to_be_in_type_list", # noqa: E501 # FIXME CoP }, "tag": "h6", "styling": {"classes": ["mt-1", "mb-0"]}, @@ -221,7 +221,7 @@ def _render_expectation_types(cls, evrs, content_blocks) -> None: **{ "content_block_type": "bullet_list", "header": RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": 'Expectation types ', diff --git a/great_expectations/render/renderer/content_block/__init__.py b/great_expectations/render/renderer/content_block/__init__.py index 7f5654520567..39773baf3ff6 100644 --- a/great_expectations/render/renderer/content_block/__init__.py +++ b/great_expectations/render/renderer/content_block/__init__.py @@ -1,4 +1,4 @@ -from great_expectations.render.renderer.content_block.validation_results_table_content_block import ( # noqa: E501 +from great_expectations.render.renderer.content_block.validation_results_table_content_block import ( # noqa: E501 # FIXME CoP ValidationResultsTableContentBlockRenderer, ) diff --git a/great_expectations/render/renderer/content_block/content_block.py b/great_expectations/render/renderer/content_block/content_block.py index 023efb8cb826..36f71da2350a 100644 --- a/great_expectations/render/renderer/content_block/content_block.py +++ b/great_expectations/render/renderer/content_block/content_block.py @@ -51,7 +51,7 @@ def render(cls, render_object: Any, **kwargs) -> Union[_rendered_component_type, An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP runtime_configuration = { "styling": cls._get_element_styling(), @@ -88,7 +88,7 @@ def _get_content_block_fn_from_render_object( ) @classmethod - def _render_list( # noqa: C901, PLR0912 + def _render_list( # noqa: C901, PLR0912 # FIXME CoP cls, render_object: list, exception_list_content_block: bool, @@ -129,7 +129,7 @@ def _render_list( # noqa: C901, PLR0912 data_docs_exception_message + f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP if isinstance(obj_, ExpectationValidationResult): content_block_fn = cls._get_content_block_fn("_missing_content_block_fn") @@ -147,7 +147,7 @@ def _render_list( # noqa: C901, PLR0912 runtime_configuration=runtime_configuration, **kwargs, ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if isinstance(obj_, ExpectationValidationResult): content_block_fn = ( cls._missing_content_block_fn @@ -214,7 +214,7 @@ def _render_list( # noqa: C901, PLR0912 return None @classmethod - def _render_other( # noqa: C901 + def _render_other( # noqa: C901 # FIXME CoP cls, render_object: Any, exception_list_content_block: bool, @@ -244,7 +244,7 @@ def _render_other( # noqa: C901 data_docs_exception_message + f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP if isinstance(render_object, ExpectationValidationResult): content_block_fn = cls._get_content_block_fn("_missing_content_block_fn") @@ -260,7 +260,7 @@ def _render_other( # noqa: C901 runtime_configuration=runtime_configuration, **kwargs, ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if isinstance(render_object, ExpectationValidationResult): content_block_fn = ( cls._missing_content_block_fn @@ -296,11 +296,11 @@ def _render_expectation_description( expectation = configuration.to_domain_obj() description = expectation.description if not description: - raise ValueError("Cannot render an expectation with no description.") # noqa: TRY003 - # If we wish to support $VAR substitution, we should use RenderedStringTemplateContent with params # noqa: E501 + raise ValueError("Cannot render an expectation with no description.") # noqa: TRY003 # FIXME CoP return [ - RenderedMarkdownContent( - markdown=description, styling=runtime_configuration.get("styling", {}) + RenderedStringTemplateContent( + string_template={"template": description}, + styling=runtime_configuration.get("styling", {}), ) ] diff --git a/great_expectations/render/renderer/content_block/expectation_string.py b/great_expectations/render/renderer/content_block/expectation_string.py index 30118c68244e..81729cb669b6 100644 --- a/great_expectations/render/renderer/content_block/expectation_string.py +++ b/great_expectations/render/renderer/content_block/expectation_string.py @@ -35,7 +35,7 @@ def _missing_content_block_fn( ) return [ RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "styling": {"parent": {"classes": ["alert", "alert-warning"]}}, "string_template": { @@ -67,7 +67,7 @@ def _diagnostic_status_icon_renderer( assert result, "Must provide a result object." if result.exception_info["raised_exception"]: return RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": "$icon", @@ -90,7 +90,7 @@ def _diagnostic_status_icon_renderer( if result.success: return RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": "$icon", @@ -113,7 +113,7 @@ def _diagnostic_status_icon_renderer( ) else: return RenderedStringTemplateContent( - **{ # type: ignore[arg-type] + **{ # type: ignore[arg-type] # FIXME CoP "content_block_type": "string_template", "string_template": { "template": "$icon", diff --git a/great_expectations/render/renderer/content_block/validation_results_table_content_block.py b/great_expectations/render/renderer/content_block/validation_results_table_content_block.py index 7556ecd586c6..5e94d52ccad4 100644 --- a/great_expectations/render/renderer/content_block/validation_results_table_content_block.py +++ b/great_expectations/render/renderer/content_block/validation_results_table_content_block.py @@ -80,7 +80,7 @@ def _process_content_block(cls, content_block, has_failed_evr, render_object=Non @override @classmethod - def _get_content_block_fn( # noqa: C901, PLR0915 + def _get_content_block_fn( # noqa: C901 # FIXME CoP cls, expectation_type: str, expectation_config: ExpectationConfiguration | None = None, @@ -88,8 +88,6 @@ def _get_content_block_fn( # noqa: C901, PLR0915 content_block_fn = super()._get_content_block_fn( expectation_type=expectation_type, expectation_config=expectation_config ) - if content_block_fn == cls._render_expectation_description: - return content_block_fn expectation_string_fn = content_block_fn if expectation_string_fn is None: @@ -99,8 +97,8 @@ def _get_content_block_fn( # noqa: C901, PLR0915 if expectation_string_fn is None: expectation_string_fn = cls._missing_content_block_fn - # This function wraps expect_* methods from ExpectationStringRenderer to generate table classes # noqa: E501 - def row_generator_fn( # noqa: C901 + # This function wraps expect_* methods from ExpectationStringRenderer to generate table classes # noqa: E501 # FIXME CoP + def row_generator_fn( # noqa: C901 # FIXME CoP configuration=None, result=None, runtime_configuration=None, @@ -133,7 +131,7 @@ def row_generator_fn( # noqa: C901 An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: unexpected_statement_renderer = get_renderer_impl( object_name=expectation_type, @@ -150,7 +148,7 @@ def row_generator_fn( # noqa: C901 data_docs_exception_message + f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP try: unexpected_table_renderer = get_renderer_impl( object_name=expectation_type, @@ -167,7 +165,7 @@ def row_generator_fn( # noqa: C901 data_docs_exception_message + f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP try: observed_value_renderer = get_renderer_impl( object_name=expectation_type, @@ -186,7 +184,7 @@ def row_generator_fn( # noqa: C901 data_docs_exception_message + f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP # If the expectation has some unexpected values...: if unexpected_statement: @@ -213,14 +211,14 @@ def row_generator_fn( # noqa: C901 def _get_legacy_v2_api_style_expectation_string_fn(cls, expectation_type): legacy_expectation_string_fn = getattr(cls, expectation_type, None) if legacy_expectation_string_fn is None: - # With the V2 API, expectation rendering was implemented by defining a method with the same name as the expectation. # noqa: E501 + # With the V2 API, expectation rendering was implemented by defining a method with the same name as the expectation. # noqa: E501 # FIXME CoP # If no legacy rendering is present, return None. return None # deprecated-v0.13.28 warnings.warn( - "V2 API style custom rendering is deprecated as of v0.13.28 and is not fully supported anymore; " # noqa: E501 - "As it will be removed in v0.16, please transition to V3 API and associated rendering style", # noqa: E501 + "V2 API style custom rendering is deprecated as of v0.13.28 and is not fully supported anymore; " # noqa: E501 # FIXME CoP + "As it will be removed in v0.16, please transition to V3 API and associated rendering style", # noqa: E501 # FIXME CoP DeprecationWarning, ) @@ -230,7 +228,7 @@ def expectation_string_fn_with_legacy_translation( if runtime_configuration is None: runtime_configuration = {} - # With the V2 API, the expectation string function had a different signature; the below translates from the new signature to the legacy signature. # noqa: E501 + # With the V2 API, the expectation string function had a different signature; the below translates from the new signature to the legacy signature. # noqa: E501 # FIXME CoP return legacy_expectation_string_fn( expectation=configuration, styling=runtime_configuration.get("styling", None), @@ -242,9 +240,9 @@ def expectation_string_fn_with_legacy_translation( @staticmethod def _get_legacy_v2_api_observed_value(expectation_string_fn, result): if expectation_string_fn.__name__ != "expectation_string_fn_with_legacy_translation": - # If legacy V2 API style rendering is used, "expectation_string_fn" will be the method defined in the above "_get_legacy_v2_api_style_expectation_string_fn". # noqa: E501 + # If legacy V2 API style rendering is used, "expectation_string_fn" will be the method defined in the above "_get_legacy_v2_api_style_expectation_string_fn". # noqa: E501 # FIXME CoP # If this isn't the case, return None, so we don't do any legacy logic. return None - # With V2 API style rendering, the result had an "observed_value" entry that could be rendered. # noqa: E501 + # With V2 API style rendering, the result had an "observed_value" entry that could be rendered. # noqa: E501 # FIXME CoP return result["result"].get("observed_value") diff --git a/great_expectations/render/renderer/email_renderer.py b/great_expectations/render/renderer/email_renderer.py index 6e28b38ff3ac..ce9e19b1fe6f 100644 --- a/great_expectations/render/renderer/email_renderer.py +++ b/great_expectations/render/renderer/email_renderer.py @@ -38,7 +38,7 @@ def _render_validation_result(self, result: ExpectationSuiteValidationResult) -> n_checks = result.statistics["evaluated_expectations"] run_id = result.meta.get("run_id", "__no_run_id__") batch_id = result.batch_id - check_details_text = f"{n_checks_succeeded} of {n_checks} expectations were met" # noqa: E501 + check_details_text = f"{n_checks_succeeded} of {n_checks} expectations were met" # noqa: E501 # FIXME CoP status = "Success 🎉" if result.success else "Failed ❌" title = f"

{suite_name}

" @@ -65,16 +65,16 @@ def _get_report_element(self, docs_link): if "file://" in docs_link: # handle special case since the email does not render these links report_element = str( - f'

DataDocs can be found here: {docs_link}.
' # noqa: E501 + f'

DataDocs can be found here: {docs_link}.
' # noqa: E501 # FIXME CoP "(Please copy and paste link into a browser to view)

", ) else: - report_element = f'

DataDocs can be found here: {docs_link}.

' # noqa: E501 + report_element = f'

DataDocs can be found here: {docs_link}.

' # noqa: E501 # FIXME CoP except Exception as e: logger.warning( f"""EmailRenderer had a problem with generating the docs link. link used to generate the docs link is: {docs_link} and is of type: {type(docs_link)}. - Error: {e}""" # noqa: E501 + Error: {e}""" # noqa: E501 # FIXME CoP ) return else: diff --git a/great_expectations/render/renderer/inline_renderer.py b/great_expectations/render/renderer/inline_renderer.py index cb115e1dfa46..c6afb991bf22 100644 --- a/great_expectations/render/renderer/inline_renderer.py +++ b/great_expectations/render/renderer/inline_renderer.py @@ -46,8 +46,8 @@ def __init__( if isinstance(render_object, (ExpectationConfiguration, ExpectationValidationResult)): self._render_object = render_object else: - raise InlineRendererError( # noqa: TRY003 - f"InlineRenderer can only be used with an ExpectationConfiguration or ExpectationValidationResult, but {type(render_object)} was used." # noqa: E501 + raise InlineRendererError( # noqa: TRY003 # FIXME CoP + f"InlineRenderer can only be used with an ExpectationConfiguration or ExpectationValidationResult, but {type(render_object)} was used." # noqa: E501 # FIXME CoP ) def _get_atomic_rendered_content_for_object( @@ -61,7 +61,7 @@ def _get_atomic_rendered_content_for_object( Returns: A list of RenderedAtomicContent objects for a given ExpectationConfiguration or ExpectationValidationResult. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_type: str renderer_types: List[AtomicRendererType] if isinstance(render_object, ExpectationConfiguration): @@ -71,16 +71,16 @@ def _get_atomic_rendered_content_for_object( if render_object.expectation_config: expectation_type = render_object.expectation_config.type else: - raise InlineRendererError( # noqa: TRY003 - "ExpectationValidationResult passed to InlineRenderer._get_atomic_rendered_content_for_object is missing an expectation_config." # noqa: E501 + raise InlineRendererError( # noqa: TRY003 # FIXME CoP + "ExpectationValidationResult passed to InlineRenderer._get_atomic_rendered_content_for_object is missing an expectation_config." # noqa: E501 # FIXME CoP ) renderer_types = [ AtomicRendererType.DIAGNOSTIC, AtomicRendererType.PRESCRIPTIVE, ] else: - raise InlineRendererError( # noqa: TRY003 - f"InlineRenderer._get_atomic_rendered_content_for_object can only be used with an ExpectationConfiguration or ExpectationValidationResult, but {type(render_object)} was used." # noqa: E501 + raise InlineRendererError( # noqa: TRY003 # FIXME CoP + f"InlineRenderer._get_atomic_rendered_content_for_object can only be used with an ExpectationConfiguration or ExpectationValidationResult, but {type(render_object)} was used." # noqa: E501 # FIXME CoP ) renderer_names: List[ @@ -149,15 +149,15 @@ def _get_renderer_atomic_rendered_content( render_object=render_object, ) else: - raise InlineRendererError( # noqa: TRY003, TRY301 - f"renderer_name: {renderer_name} was not found in the registry for expectation_type: {expectation_type}" # noqa: E501 + raise InlineRendererError( # noqa: TRY003, TRY301 # FIXME CoP + f"renderer_name: {renderer_name} was not found in the registry for expectation_type: {expectation_type}" # noqa: E501 # FIXME CoP ) assert isinstance( renderer_rendered_content, RenderedAtomicContent - ), f"The renderer: {renderer_name} for expectation: {expectation_type} should return RenderedAtomicContent." # noqa: E501 + ), f"The renderer: {renderer_name} for expectation: {expectation_type} should return RenderedAtomicContent." # noqa: E501 # FIXME CoP except Exception as e: - error_message = f'Renderer "{renderer_name}" failed to render Expectation "{expectation_type} with exception message: {e!s}".' # noqa: E501 + error_message = f'Renderer "{renderer_name}" failed to render Expectation "{expectation_type} with exception message: {e!s}".' # noqa: E501 # FIXME CoP logger.info(error_message) failure_renderer: AtomicPrescriptiveRendererType | AtomicDiagnosticRendererType @@ -183,7 +183,7 @@ def _get_renderer_atomic_rendered_content( ) renderer_rendered_content.exception = error_message else: - raise InlineRendererError( # noqa: TRY003 + raise InlineRendererError( # noqa: TRY003 # FIXME CoP f'Renderer "{failure_renderer}" was not found in the registry.' ) diff --git a/great_expectations/render/renderer/page_renderer.py b/great_expectations/render/renderer/page_renderer.py index 91005d8f694e..080992ad7b25 100644 --- a/great_expectations/render/renderer/page_renderer.py +++ b/great_expectations/render/renderer/page_renderer.py @@ -85,8 +85,8 @@ def render( # Add datasource key to batch_kwargs if missing if "datasource" not in batch_kwargs: - # Check if expectation_suite_name follows datasource.batch_kwargs_generator.data_asset_name.suite_name pattern # noqa: E501 - if len(expectation_suite_name.split(".")) == 4: # noqa: PLR2004 + # Check if expectation_suite_name follows datasource.batch_kwargs_generator.data_asset_name.suite_name pattern # noqa: E501 # FIXME CoP + if len(expectation_suite_name.split(".")) == 4: # noqa: PLR2004 # FIXME CoP batch_kwargs["datasource"] = expectation_suite_name.split(".")[0] columns = self._group_evrs_by_column(validation_results, expectation_suite_name) @@ -278,7 +278,7 @@ def _determine_page_title( include_run_name: bool = False if ( - run_name_as_time != run_time_datetime # noqa: PLR1714 + run_name_as_time != run_time_datetime # noqa: PLR1714 # FIXME CoP and run_name_as_time != "__none__" ): include_run_name = True @@ -343,7 +343,7 @@ def _render_validation_header(cls, validation_results): + ["expectations"] + str(expectation_suite_name).split(".") ) - expectation_suite_path = f"{os.path.join(*expectation_suite_path_components)}.html" # noqa: PTH118 + expectation_suite_path = f"{os.path.join(*expectation_suite_path_components)}.html" # noqa: PTH118 # FIXME CoP # TODO: deprecate dual batch api support in 0.14 batch_kwargs = ( validation_results.meta.get("batch_kwargs", {}) @@ -378,7 +378,7 @@ def _render_validation_header(cls, validation_results): **{ "content_block_type": "string_template", "string_template": { - "template": "${suite_title} ${expectation_suite_name}\n ${data_asset} ${data_asset_name}\n ${status_title} ${html_success_icon} ${success}", # noqa: E501 + "template": "${suite_title} ${expectation_suite_name}\n ${data_asset} ${data_asset_name}\n ${status_title} ${html_success_icon} ${success}", # noqa: E501 # FIXME CoP "params": { "suite_title": "Expectation Suite:", "data_asset": "Data asset:", @@ -669,7 +669,7 @@ def _get_grouped_and_ordered_expectations_by_column( # Group items by column sorted_columns = sorted(list(expectations_by_column.keys())) - # only return ordered columns from expect_table_columns_to_match_ordered_list evr if they match set of column # noqa: E501 + # only return ordered columns from expect_table_columns_to_match_ordered_list evr if they match set of column # noqa: E501 # FIXME CoP # names from entire evr, else use alphabetic sort if set(sorted_columns) == set(ordered_columns): return expectations_by_column, ordered_columns @@ -816,9 +816,9 @@ def _render_expectation_suite_notes(cls, expectations: ExpectationSuite) -> Text content += [ # TODO: Leaving these two paragraphs as placeholders for later development. - # "This Expectation suite was first generated by {BasicDatasetProfiler} on {date}, using version {xxx} of Great Expectations.", # noqa: E501 + # "This Expectation suite was first generated by {BasicDatasetProfiler} on {date}, using version {xxx} of Great Expectations.", # noqa: E501 # FIXME CoP # "{name}, {name}, and {name} have also contributed additions and revisions.", - f"This Expectation suite currently contains {total_expectations} total Expectations across {total_columns} columns.", # noqa: E501 + f"This Expectation suite currently contains {total_expectations} total Expectations across {total_columns} columns.", # noqa: E501 # FIXME CoP ] notes = expectations.notes @@ -910,7 +910,7 @@ def __init__(self, overview_section_renderer=None, column_section_renderer=None) class_name=column_section_renderer["class_name"], ) - def render(self, validation_results): # noqa: C901, PLR0912 + def render(self, validation_results): # noqa: C901, PLR0912 # FIXME CoP run_id = validation_results.meta["run_id"] if isinstance(run_id, str): try: @@ -932,8 +932,8 @@ def render(self, validation_results): # noqa: C901, PLR0912 # add datasource key to batch_kwargs if missing if "datasource" not in batch_kwargs and "datasource" not in batch_kwargs: - # check if expectation_suite_name follows datasource.batch_kwargs_generator.data_asset_name.suite_name pattern # noqa: E501 - if len(expectation_suite_name.split(".")) == 4: # noqa: PLR2004 + # check if expectation_suite_name follows datasource.batch_kwargs_generator.data_asset_name.suite_name pattern # noqa: E501 # FIXME CoP + if len(expectation_suite_name.split(".")) == 4: # noqa: PLR2004 # FIXME CoP if "batch_kwargs" in validation_results.meta: batch_kwargs["datasource"] = expectation_suite_name.split(".")[0] else: @@ -959,7 +959,7 @@ def render(self, validation_results): # noqa: C901, PLR0912 include_run_name: bool = False if ( - run_name_as_time != run_time_datetime # noqa: PLR1714 + run_name_as_time != run_time_datetime # noqa: PLR1714 # FIXME CoP and run_name_as_time != "__none__" ): include_run_name = True diff --git a/great_expectations/render/renderer/profiling_results_overview_section_renderer.py b/great_expectations/render/renderer/profiling_results_overview_section_renderer.py index 083dc7054fd0..dac2f5ac6308 100644 --- a/great_expectations/render/renderer/profiling_results_overview_section_renderer.py +++ b/great_expectations/render/renderer/profiling_results_overview_section_renderer.py @@ -20,8 +20,8 @@ class ProfilingResultsOverviewSectionRenderer(Renderer): def render(cls, evrs, section_name=None): content_blocks = [] # NOTE: I don't love the way this builds content_blocks as a side effect. - # The top-level API is clean and scannable, but the function internals are counterintutitive and hard to test. # noqa: E501 - # I wonder if we can enable something like jquery chaining for this. That would be concise AND testable. # noqa: E501 + # The top-level API is clean and scannable, but the function internals are counterintutitive and hard to test. # noqa: E501 # FIXME CoP + # I wonder if we can enable something like jquery chaining for this. That would be concise AND testable. # noqa: E501 # FIXME CoP # Pressing on for now... cls._render_header(evrs, content_blocks) cls._render_dataset_info(evrs, content_blocks) @@ -94,7 +94,7 @@ def _render_dataset_info(cls, evrs, content_blocks) -> None: "Missing cells", cls._get_percentage_missing_cells_str(evrs), ], - # ["Duplicate rows", "0 (0.0%)", ], #TODO: bring back when we have an expectation for this # noqa: E501 + # ["Duplicate rows", "0 (0.0%)", ], #TODO: bring back when we have an expectation for this # noqa: E501 # FIXME CoP ] content_blocks.append( @@ -122,7 +122,7 @@ def _render_dataset_info(cls, evrs, content_blocks) -> None: @classmethod def _render_variable_types(cls, evrs, content_blocks) -> None: column_types = cls._get_column_types(evrs) - # TODO: check if we have the information to make this statement. Do all columns have type expectations? # noqa: E501 + # TODO: check if we have the information to make this statement. Do all columns have type expectations? # noqa: E501 # FIXME CoP column_type_counter = Counter(column_types.values()) table_rows = [ [type, str(column_type_counter[type])] @@ -254,7 +254,7 @@ def _render_warnings(cls, evrs, content_blocks): # render_warning_row( # "$column has $n ($p%) missing values", "Age", 177, 19.9, "Missing"), # render_warning_row( - # "$column has a high cardinality: $n distinct values", "Cabin", 148, None, "Warning"), # noqa: E501 + # "$column has a high cardinality: $n distinct values", "Cabin", 148, None, "Warning"), # noqa: E501 # FIXME CoP # render_warning_row( # "$column has $n ($p%) missing values", "Cabin", 687, 77.1, "Missing"), # render_warning_row( @@ -293,7 +293,7 @@ def _get_percentage_missing_cells_str(cls, evrs): if len(columns) > len(expect_column_values_to_not_be_null_evrs): warnings.warn( - "Cannot get % of missing cells - not all columns have expect_column_values_to_not_be_null expectations" # noqa: E501 + "Cannot get % of missing cells - not all columns have expect_column_values_to_not_be_null expectations" # noqa: E501 # FIXME CoP ) return "?" @@ -310,7 +310,7 @@ def _get_percentage_missing_cells_str(cls, evrs): ) @classmethod - def _get_column_types(cls, evrs): # noqa: C901 + def _get_column_types(cls, evrs): # noqa: C901 # FIXME CoP columns = cls._get_column_list_from_evrs(evrs) type_evrs = cls._find_all_evrs_by_type( diff --git a/great_expectations/render/renderer/renderer.py b/great_expectations/render/renderer/renderer.py index 93cad0710081..5eaab84502ee 100644 --- a/great_expectations/render/renderer/renderer.py +++ b/great_expectations/render/renderer/renderer.py @@ -27,8 +27,8 @@ def wrapper(renderer_fn: Callable[P, T]) -> Callable[P, T]: def inner_func(*args: P.args, **kwargs: P.kwargs): return renderer_fn(*args, **kwargs) - inner_func._renderer_type = renderer_type # type: ignore[attr-defined] - inner_func._renderer_definition_kwargs = kwargs # type: ignore[attr-defined] + inner_func._renderer_type = renderer_type # type: ignore[attr-defined] # FIXME CoP + inner_func._renderer_definition_kwargs = kwargs # type: ignore[attr-defined] # FIXME CoP return inner_func return wrapper @@ -44,7 +44,7 @@ def __eq__(self, other: object) -> bool: return type(self) is type(other) def serialize(self) -> dict: - # Necessary to enable proper serialization within an Action (and additionally, within a Checkpoint) # noqa: E501 + # Necessary to enable proper serialization within an Action (and additionally, within a Checkpoint) # noqa: E501 # FIXME CoP # TODO: Renderers should be ported over to Pydantic to prevent this fork in logic return { "module_name": self.__class__.__module__, @@ -91,7 +91,7 @@ def _get_column_list_from_evrs(cls, evrs): :param evrs: :return: list of columns with best effort sorting - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP evrs_ = evrs if isinstance(evrs, list) else evrs.results expect_table_columns_to_match_ordered_list_evr = cls._find_evr_by_type( @@ -115,7 +115,7 @@ def _get_column_list_from_evrs(cls, evrs): else: ordered_columns = [] - # only return ordered columns from expect_table_columns_to_match_ordered_list evr if they match set of column # noqa: E501 + # only return ordered columns from expect_table_columns_to_match_ordered_list evr if they match set of column # noqa: E501 # FIXME CoP # names from entire evr if set(sorted_columns) == set(ordered_columns): return ordered_columns diff --git a/great_expectations/render/renderer/site_builder.py b/great_expectations/render/renderer/site_builder.py index 244520d33c01..3cf4e66dfc7a 100644 --- a/great_expectations/render/renderer/site_builder.py +++ b/great_expectations/render/renderer/site_builder.py @@ -120,7 +120,7 @@ class SiteBuilder: class_name: DefaultJinjaIndexPageView """ - def __init__( # noqa: C901, PLR0912, PLR0913 + def __init__( # noqa: C901, PLR0912, PLR0913 # FIXME CoP self, data_context: AbstractDataContext, store_backend, @@ -148,21 +148,21 @@ def __init__( # noqa: C901, PLR0912, PLR0913 # set custom_styles_directory if present custom_styles_directory = None plugins_directory = data_context.plugins_directory - if plugins_directory and os.path.isdir( # noqa: PTH112 - os.path.join( # noqa: PTH118 + if plugins_directory and os.path.isdir( # noqa: PTH112 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP plugins_directory, "custom_data_docs", "styles" ) ): - custom_styles_directory = os.path.join( # noqa: PTH118 + custom_styles_directory = os.path.join( # noqa: PTH118 # FIXME CoP plugins_directory, "custom_data_docs", "styles" ) # set custom_views_directory if present custom_views_directory = None - if plugins_directory and os.path.isdir( # noqa: PTH112 - os.path.join(plugins_directory, "custom_data_docs", "views") # noqa: PTH118 + if plugins_directory and os.path.isdir( # noqa: PTH112 # FIXME CoP + os.path.join(plugins_directory, "custom_data_docs", "views") # noqa: PTH118 # FIXME CoP ): - custom_views_directory = os.path.join( # noqa: PTH118 + custom_views_directory = os.path.join( # noqa: PTH118 # FIXME CoP plugins_directory, "custom_data_docs", "views" ) @@ -340,7 +340,7 @@ def get_resource_url(self, resource_identifier=None, only_if_exists=True) -> Opt class DefaultSiteSectionBuilder: - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name, data_context: AbstractDataContext, @@ -372,7 +372,7 @@ def __init__( # noqa: PLR0913 self.cloud_mode = cloud_mode self.ge_cloud_mode = cloud_mode if renderer is None: - raise exceptions.InvalidConfigError( # noqa: TRY003 + raise exceptions.InvalidConfigError( # noqa: TRY003 # FIXME CoP "SiteSectionBuilder requires a renderer configuration " "with a class_name key." ) module_name = renderer.get("module_name") or "great_expectations.render.renderer" @@ -410,7 +410,7 @@ def __init__( # noqa: PLR0913 class_name=view["class_name"], ) - def build(self, resource_identifiers=None) -> None: # noqa: C901, PLR0912 + def build(self, resource_identifiers=None) -> None: # noqa: C901, PLR0912 # FIXME CoP source_store_keys = self.source_store.list_keys() if self.name == "validations" and self.validation_results_limit: source_store_keys = sorted( @@ -453,7 +453,7 @@ def build(self, resource_identifiers=None) -> None: # noqa: C901, PLR0912 ) else: logger.debug( - f" Rendering validation: run name: {run_name}, run time: {run_time}, suite {expectation_suite_name} for batch {resource_key.batch_identifier}" # noqa: E501 + f" Rendering validation: run name: {run_name}, run time: {run_time}, suite {expectation_suite_name} for batch {resource_key.batch_identifier}" # noqa: E501 # FIXME CoP ) try: @@ -485,16 +485,16 @@ def build(self, resource_identifiers=None) -> None: # noqa: C901, PLR0912 An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exception_traceback = traceback.format_exc() exception_message += ( f'{type(e).__name__}: "{e!s}". ' f'Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP class DefaultSiteIndexBuilder: - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, name, site_name, @@ -562,7 +562,7 @@ def __init__( # noqa: PLR0913 class_name=view["class_name"], ) - def add_resource_info_to_index_links_dict( # noqa: PLR0913 + def add_resource_info_to_index_links_dict( # noqa: PLR0913 # FIXME CoP self, index_links_dict, expectation_suite_name, @@ -601,7 +601,7 @@ def add_resource_info_to_index_links_dict( # noqa: PLR0913 url_encoded_filepath = urllib.parse.quote(filepath) - expectation_suite_filepath = os.path.join( # noqa: PTH118 + expectation_suite_filepath = os.path.join( # noqa: PTH118 # FIXME CoP "expectations", *expectation_suite_name.split(".") ) expectation_suite_filepath += ".html" @@ -656,7 +656,7 @@ def get_calls_to_action(self): # ) return { - "header": "To continue exploring Great Expectations check out one of these tutorials...", # noqa: E501 + "header": "To continue exploring Great Expectations check out one of these tutorials...", # noqa: E501 # FIXME CoP "buttons": self._get_call_to_action_buttons(usage_statistics), } @@ -715,7 +715,7 @@ def build( be skipped and removed from the target store :param build_index: a flag if False, skips building the index page :return: tuple(index_page_url, index_links_dict) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Loop over sections in the HtmlStore logger.debug("DefaultSiteIndexBuilder.build") @@ -753,12 +753,12 @@ def build( An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exception_traceback = traceback.format_exc() exception_message += ( f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP return self.target_store.write_index_page(viewable_content), index_links_dict @@ -877,7 +877,7 @@ def _add_profiling_to_index_links( batch_spec=batch_spec, ) except Exception: - error_msg = f"Profiling result not found: {profiling_result_key.to_tuple()!s:s} - skipping" # noqa: E501 + error_msg = f"Profiling result not found: {profiling_result_key.to_tuple()!s:s} - skipping" # noqa: E501 # FIXME CoP logger.warning(error_msg) def _add_validations_to_index_links( @@ -933,7 +933,7 @@ def _add_validations_to_index_links( batch_spec=batch_spec, ) except Exception: - error_msg = f"Validation result not found: {validation_result_key.to_tuple()!s:s} - skipping" # noqa: E501 + error_msg = f"Validation result not found: {validation_result_key.to_tuple()!s:s} - skipping" # noqa: E501 # FIXME CoP logger.warning(error_msg) diff --git a/great_expectations/render/renderer/site_index_page_renderer.py b/great_expectations/render/renderer/site_index_page_renderer.py index ce1a00a572e6..9a349554a114 100644 --- a/great_expectations/render/renderer/site_index_page_renderer.py +++ b/great_expectations/render/renderer/site_index_page_renderer.py @@ -469,9 +469,9 @@ def render(cls, index_links_dict): An unexpected Exception occurred during data docs rendering. Because of this error, certain parts of data docs will \ not be rendered properly and/or may not appear altogether. Please use the trace, included in this message, to \ diagnose and repair the underlying issue. Detailed information follows: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exception_traceback = traceback.format_exc() exception_message += ( f'{type(e).__name__}: "{e!s}". Traceback: "{exception_traceback}".' ) - logger.error(exception_message) # noqa: TRY400 + logger.error(exception_message) # noqa: TRY400 # FIXME CoP diff --git a/great_expectations/render/renderer/slack_renderer.py b/great_expectations/render/renderer/slack_renderer.py index 5a708f684274..345f73634767 100644 --- a/great_expectations/render/renderer/slack_renderer.py +++ b/great_expectations/render/renderer/slack_renderer.py @@ -134,7 +134,7 @@ def _build_footer(self) -> dict: "elements": [ { "type": "mrkdwn", - "text": f"Learn how to review validation results in Data Docs: {documentation_url}", # noqa: E501 + "text": f"Learn how to review validation results in Data Docs: {documentation_url}", # noqa: E501 # FIXME CoP } ], } @@ -149,7 +149,7 @@ def _get_report_element(self, docs_link: str) -> dict[str, Any] | None: "type": "section", "text": { "type": "mrkdwn", - "text": f"*DataDocs* can be found here: `{docs_link}` \n (Please copy and paste link into " # noqa: E501 + "text": f"*DataDocs* can be found here: `{docs_link}` \n (Please copy and paste link into " # noqa: E501 # FIXME CoP f"a browser to view)\n", }, } @@ -165,7 +165,7 @@ def _get_report_element(self, docs_link: str) -> dict[str, Any] | None: logger.warning( f"""SlackRenderer had a problem with generating the docs link. link used to generate the docs link is: {docs_link} and is of type: {type(docs_link)}. - Error: {e}""" # noqa: E501 + Error: {e}""" # noqa: E501 # FIXME CoP ) return else: @@ -186,14 +186,14 @@ def _build_report_element_block( else: logger.critical( f"*ERROR*: Slack is trying to provide a link to the following DataDocs: `" - f"{docs_link_key!s}`, but it is not configured under `data_docs_sites` in the " # noqa: E501 + f"{docs_link_key!s}`, but it is not configured under `data_docs_sites` in the " # noqa: E501 # FIXME CoP f"`great_expectations.yml`\n" ) report_element = { "type": "section", "text": { "type": "mrkdwn", - "text": f"*ERROR*: Slack is trying to provide a link to the following DataDocs: " # noqa: E501 + "text": f"*ERROR*: Slack is trying to provide a link to the following DataDocs: " # noqa: E501 # FIXME CoP f"`{docs_link_key!s}`, but it is not configured under " f"`data_docs_sites` in the `great_expectations.yml`\n", }, diff --git a/great_expectations/render/renderer_configuration.py b/great_expectations/render/renderer_configuration.py index de4eeb6baad8..82c79da5777c 100644 --- a/great_expectations/render/renderer_configuration.py +++ b/great_expectations/render/renderer_configuration.py @@ -32,10 +32,10 @@ from great_expectations.compatibility.pydantic import generics as pydantic_generics from great_expectations.compatibility.typing_extensions import override from great_expectations.core import ( - ExpectationValidationResult, # noqa: TCH001 + ExpectationValidationResult, # noqa: TCH001 # FIXME CoP ) from great_expectations.expectations.expectation_configuration import ( - ExpectationConfiguration, # noqa: TCH001 + ExpectationConfiguration, # noqa: TCH001 # FIXME CoP ) from great_expectations.render.exceptions import RendererConfigurationError from great_expectations.render.renderer.observed_value_renderer import ObservedValueRenderState @@ -68,14 +68,14 @@ class RendererSchema(TypedDict): class _RendererValueBase(BaseModel): """ _RendererValueBase is the base for renderer classes that need to override the default pydantic dict behavior. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class Config: validate_assignment = True arbitrary_types_allowed = True @override - def dict( # noqa: PLR0913 + def dict( # noqa: PLR0913 # FIXME CoP self, include: Optional[Union[AbstractSetIntStr, MappingIntStrAny]] = None, exclude: Optional[Union[AbstractSetIntStr, MappingIntStrAny]] = None, @@ -93,7 +93,7 @@ def dict( # noqa: PLR0913 In practice this means the renderer implementer doesn't need to use .dict(by_alias=True, exclude_none=True) everywhere. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return super().dict( include=include, exclude=exclude, @@ -219,7 +219,7 @@ class RendererConfiguration(pydantic_generics.GenericModel, Generic[RendererPara """ Configuration object built for each renderer. Operations to be performed strictly on this object at the renderer implementation-level. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP configuration: Optional[ExpectationConfiguration] = Field(None, allow_mutation=False) result: Optional[ExpectationValidationResult] = Field(None, allow_mutation=False) @@ -249,7 +249,7 @@ def _validate_configuration_or_result(cls, values: dict) -> dict: if ("configuration" not in values or values["configuration"] is None) and ( "result" not in values or values["result"] is None ): - raise RendererConfigurationError( # noqa: TRY003 + raise RendererConfigurationError( # noqa: TRY003 # FIXME CoP "RendererConfiguration must be passed either configuration or result." ) return values @@ -259,13 +259,13 @@ def __init__(self, **values) -> None: super().__init__(**values) class _RequiredRendererParamArgs(TypedDict): - """Used for building up a dictionary that is unpacked into RendererParams upon initialization.""" # noqa: E501 + """Used for building up a dictionary that is unpacked into RendererParams upon initialization.""" # noqa: E501 # FIXME CoP schema: RendererSchema value: Any class _RendererParamArgs(_RequiredRendererParamArgs, total=False): - """Used for building up a dictionary that is unpacked into RendererParams upon initialization.""" # noqa: E501 + """Used for building up a dictionary that is unpacked into RendererParams upon initialization.""" # noqa: E501 # FIXME CoP suite_parameter: Dict[str, Any] @@ -273,7 +273,7 @@ class _RendererParamBase(_RendererValueBase): """ _RendererParamBase is the base for a param that is added to RendererParams. It contains the validation logic, but it is dynamically renamed in order for the RendererParams attribute to have the same name as the param. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP renderer_schema: RendererSchema = Field(alias="schema", allow_mutation=False) value: Any = Field(allow_mutation=False) @@ -352,9 +352,6 @@ def _validate_and_set_renderer_attrs(cls, values: dict) -> dict: ].expectation_config values["expectation_type"] = expectation_configuration.type values["kwargs"] = expectation_configuration.kwargs - # description is the template_str override - if expectation_configuration.description: - values["template_str"] = expectation_configuration.description raw_configuration: ExpectationConfiguration = ( expectation_configuration.get_raw_configuration() ) @@ -543,13 +540,7 @@ def _get_row_condition_string(row_condition_str: str) -> str: @validator("template_str") def _set_template_str(cls, v: str, values: dict) -> str: - if values.get("configuration") and values["configuration"].description: - # description always overrides other template_strs - v = values["configuration"].description - elif values.get("result") and values["result"].expectation_config.description: - # description always overrides other template_strs - v = values["result"].expectation_config.description - elif values.get("_row_condition"): + if values.get("_row_condition"): row_condition_str: str = RendererConfiguration._get_row_condition_string( row_condition_str=values["_row_condition"] ) @@ -571,8 +562,8 @@ def _choose_param_type_for_value( except ValidationError: pass - raise RendererConfigurationError( # noqa: TRY003 - f"None of the param_types: {[param_type.value for param_type in param_types]} match the value: {value}" # noqa: E501 + raise RendererConfigurationError( # noqa: TRY003 # FIXME CoP + f"None of the param_types: {[param_type.value for param_type in param_types]} match the value: {value}" # noqa: E501 # FIXME CoP ) def add_param( @@ -600,7 +591,7 @@ def add_param( Returns: None - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP renderer_param: Type[BaseModel] = RendererConfiguration._get_renderer_value_base_model_type( name=name ) diff --git a/great_expectations/render/util.py b/great_expectations/render/util.py index 8127d6104973..a8058dad86d6 100644 --- a/great_expectations/render/util.py +++ b/great_expectations/render/util.py @@ -22,7 +22,7 @@ ctx.prec = DEFAULT_PRECISION -def num_to_str( # noqa: C901 +def num_to_str( # noqa: C901 # FIXME CoP f: float, precision: int = DEFAULT_PRECISION, use_locale: bool = False, @@ -44,14 +44,14 @@ def num_to_str( # noqa: C901 Returns: A string representation of the input float `f`, according to the desired parameters. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert not (use_locale and no_scientific) if precision != DEFAULT_PRECISION: local_context = decimal.Context() local_context.prec = precision else: local_context = ctx - # We cast to string; we want to avoid precision issues, but format everything as though it were a float. # noqa: E501 + # We cast to string; we want to avoid precision issues, but format everything as though it were a float. # noqa: E501 # FIXME CoP # So, if it's not already a float, we will append a decimal point to the string representation s = repr(f) if not isinstance(f, float): @@ -59,7 +59,7 @@ def num_to_str( # noqa: C901 try: d = local_context.create_decimal(s) except decimal.InvalidOperation: - raise TypeError(f"num_to_str received an invalid value: {f} of type {type(f).__name__}.") # noqa: TRY003 + raise TypeError(f"num_to_str received an invalid value: {f} of type {type(f).__name__}.") # noqa: TRY003 # FIXME CoP if no_scientific: result = format(d, "f") elif use_locale: @@ -72,7 +72,7 @@ def num_to_str( # noqa: C901 result = f"≈{result}" decimal_char = locale.localeconv().get("decimal_point") if not isinstance(decimal_char, str): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"Expected str but got {decimal_char} which is type {type(decimal_char).__name__}." ) if "e" not in result and "E" not in result and decimal_char in result: @@ -87,7 +87,7 @@ def ordinal(num): """Convert a number to ordinal""" # Taken from https://codereview.stackexchange.com/questions/41298/producing-ordinal-numbers/41301 # Consider a library like num2word when internationalization comes - if 10 <= num % 100 <= 20: # noqa: PLR2004 + if 10 <= num % 100 <= 20: # noqa: PLR2004 # FIXME CoP suffix = "th" else: # the second parameter is a default. @@ -99,7 +99,7 @@ def resource_key_passes_run_name_filter(resource_key, run_name_filter): if type(resource_key) == ValidationResultIdentifier: # noqa: E721 # ?? run_name = resource_key.run_id.run_name else: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP "run_name_filter filtering is only implemented for ValidationResultResources." ) @@ -142,7 +142,7 @@ def substitute_none_for_missing( This is helpful for standardizing the input objects for rendering functions. The alternative is lots of awkward `if "some_param" not in kwargs or kwargs["some_param"] == None:` clauses in renderers. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP new_kwargs = copy.deepcopy(kwargs) for kwarg in kwarg_list: @@ -199,7 +199,7 @@ def parse_row_condition_string_pandas_engine( conditions_list: list[str] = [ condition.strip() for condition in re.split(r"AND|OR|NOT(?! in)|\(|\)", condition_string) - if condition != "" and condition != " " # noqa: PLR1714 + if condition != "" and condition != " " # noqa: PLR1714 # FIXME CoP ] for i, condition in enumerate(conditions_list): @@ -301,7 +301,7 @@ def build_count_table( return header_row, table_rows -def build_count_and_index_table( # noqa: C901 +def build_count_and_index_table( # noqa: C901 # FIXME CoP partial_unexpected_counts: list[dict], unexpected_index_list: list[dict], unexpected_count: int, @@ -333,8 +333,8 @@ def build_count_and_index_table( # noqa: C901 partial_unexpected_counts=partial_unexpected_counts, ) if unexpected_index_df.empty: - raise RenderingError( # noqa: TRY003 - "GX ran into an issue while building count and index table for rendering. Please check your configuration." # noqa: E501 + raise RenderingError( # noqa: TRY003 # FIXME CoP + "GX ran into an issue while building count and index table for rendering. Please check your configuration." # noqa: E501 # FIXME CoP ) # using default indices for Pandas @@ -350,7 +350,7 @@ def build_count_and_index_table( # noqa: C901 total_count += count if unexpected_value is not None and unexpected_value != "": - row_list.append(unexpected_value) # type: ignore[arg-type] + row_list.append(unexpected_value) # type: ignore[arg-type] # FIXME CoP row_list.append(count) elif unexpected_value == "": row_list.append("EMPTY") @@ -405,7 +405,7 @@ def _convert_unexpected_indices_to_df( unexpected_list: if we are using default Pandas output. Returns: pd.DataFrame that contains indices for unexpected values - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP domain_column_name_list: list[str] if unexpected_index_column_names: # if we have defined unexpected_index_column_names for ID/PK @@ -417,8 +417,8 @@ def _convert_unexpected_indices_to_df( set(first_unexpected_index.keys()).difference(set(unexpected_index_column_names)) ) else: - raise TypeError( # noqa: TRY003 - f"Expected dict but got {unexpected_index_list[0]} which is type {type(unexpected_index_list[0]).__name__}." # noqa: E501 + raise TypeError( # noqa: TRY003 # FIXME CoP + f"Expected dict but got {unexpected_index_list[0]} which is type {type(unexpected_index_list[0]).__name__}." # noqa: E501 # FIXME CoP ) elif unexpected_list: # if we are using default Pandas unexpected indices @@ -468,7 +468,7 @@ def truncate_list_of_indices(indices: list[int | str], max_index: int = 10) -> s Returns: string of indices that are joined using ` ` - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if len(indices) > max_index: indices = indices[:max_index] indices.append("...") diff --git a/great_expectations/render/view/view.py b/great_expectations/render/view/view.py index ccb728c4f399..bb94eee70f2f 100644 --- a/great_expectations/render/view/view.py +++ b/great_expectations/render/view/view.py @@ -129,7 +129,7 @@ def add_data_context_id_to_url( return url @contextfilter # type: ignore[misc] # untyped 3rd party decorator - def render_content_block( # noqa: C901, PLR0911, PLR0912 + def render_content_block( # noqa: C901, PLR0911, PLR0912 # FIXME CoP self, jinja_context: Any, content_block: str | list | dict | RenderedComponentContent, @@ -145,13 +145,13 @@ def render_content_block( # noqa: C901, PLR0911, PLR0912 :param content_block_id: :param render_to_markdown: Whether this method should render the markdown version instead of HTML :return: - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(content_block, str): return content_block elif content_block is None: return "" elif isinstance(content_block, list): - # If the content_block item here is actually a list of content blocks then we want to recursively render # noqa: E501 + # If the content_block item here is actually a list of content blocks then we want to recursively render # noqa: E501 # FIXME CoP rendered_block = "" for idx, content_block_el in enumerate(content_block): if ( @@ -168,7 +168,7 @@ def render_content_block( # noqa: C901, PLR0911, PLR0912 idx, content_block_id=new_content_block_id, ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if render_to_markdown: rendered_block += str(content_block_el) else: @@ -251,14 +251,14 @@ def render_styling(self, styling: Mapping) -> str: If they aren't present, they simply won't be rendered. Other dictionary keys are also allowed and ignored. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class_list = styling.get("classes", None) if class_list is None: class_str = "" else: - if type(class_list) == str: # noqa: E721 - raise TypeError("classes must be a list, not a string.") # noqa: TRY003 + if type(class_list) == str: # noqa: E721 # FIXME CoP + raise TypeError("classes must be a list, not a string.") # noqa: TRY003 # FIXME CoP class_str = f"class=\"{' '.join(class_list)}\" " attribute_dict = styling.get("attributes", None) @@ -289,7 +289,7 @@ def render_styling(self, styling: Mapping) -> str: def render_styling_from_string_template(self, template: dict | OrderedDict) -> str: # NOTE: We should add some kind of type-checking to template - """This method is a thin wrapper use to call `render_styling` from within jinja templates.""" # noqa: E501 + """This method is a thin wrapper use to call `render_styling` from within jinja templates.""" # noqa: E501 # FIXME CoP if not isinstance(template, (dict, OrderedDict)): return template @@ -311,7 +311,7 @@ def render_markdown(self, markdown): except OSError: return markdown - def render_string_template(self, template): # noqa: C901, PLR0912 + def render_string_template(self, template): # noqa: C901, PLR0912 # FIXME CoP # NOTE: Using this line for debugging. This should probably be logged...? # print(template) @@ -369,7 +369,7 @@ def render_string_template(self, template): # noqa: C901, PLR0912 ) for parameter in template["params"]: - # If this param has styling that over-rides the default, skip it here and get it in the next loop. # noqa: E501 + # If this param has styling that over-rides the default, skip it here and get it in the next loop. # noqa: E501 # FIXME CoP if "params" in template["styling"]: if parameter in template["styling"]["params"]: continue @@ -467,7 +467,7 @@ def render(self, document: list, template=None, **kwargs) -> list[str] | str: Handle list as well as single document """ if isinstance(document, list): - # We need to keep this as super(DefaultMarkdownPageView, self); otherwise a wrong render will be called. # noqa: E501 + # We need to keep this as super(DefaultMarkdownPageView, self); otherwise a wrong render will be called. # noqa: E501 # FIXME CoP return [ super(DefaultMarkdownPageView, self).render(document=d, template=template, **kwargs) for d in document diff --git a/great_expectations/self_check/sqlalchemy_connection_manager.py b/great_expectations/self_check/sqlalchemy_connection_manager.py index 8f84a10a7b95..630f934582ff 100644 --- a/great_expectations/self_check/sqlalchemy_connection_manager.py +++ b/great_expectations/self_check/sqlalchemy_connection_manager.py @@ -30,7 +30,7 @@ def get_connection(self, connection_string): self._connections[connection_string] = conn except (ImportError, SQLAlchemyError) as e: print( - f'Unable to establish connection with {connection_string} -- exception "{e}" occurred.' # noqa: E501 + f'Unable to establish connection with {connection_string} -- exception "{e}" occurred.' # noqa: E501 # FIXME CoP ) raise diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py index f4b886474495..bb80394db1a3 100644 --- a/great_expectations/self_check/util.py +++ b/great_expectations/self_check/util.py @@ -81,6 +81,7 @@ import_library_module, ) from great_expectations.validator.validator import Validator +from tests.test_utils import get_default_mssql_url SQLAlchemyError = sqlalchemy.SQLAlchemyError @@ -133,10 +134,10 @@ BIGQUERY_TYPES["GEOGRAPHY"] = GEOGRAPHY try: - import sqlalchemy.dialects.postgresql as postgresqltypes # noqa: TID251 + import sqlalchemy.dialects.postgresql as postgresqltypes # noqa: TID251 # FIXME CoP # noinspection PyPep8Naming - from sqlalchemy.dialects.postgresql import dialect as pgDialect # noqa: TID251 + from sqlalchemy.dialects.postgresql import dialect as pgDialect # noqa: TID251 # FIXME CoP POSTGRESQL_TYPES = { "TEXT": postgresqltypes.TEXT, @@ -151,15 +152,15 @@ "NUMERIC": postgresqltypes.NUMERIC, } except (ImportError, KeyError): - postgresqltypes = None # type: ignore[assignment] - pgDialect = None # type: ignore[assignment] + postgresqltypes = None # type: ignore[assignment] # FIXME CoP + pgDialect = None # type: ignore[assignment] # FIXME CoP POSTGRESQL_TYPES = {} try: - import sqlalchemy.dialects.mysql as mysqltypes # noqa: TID251 + import sqlalchemy.dialects.mysql as mysqltypes # noqa: TID251 # FIXME CoP # noinspection PyPep8Naming - from sqlalchemy.dialects.mysql import dialect as mysqlDialect # noqa: TID251 + from sqlalchemy.dialects.mysql import dialect as mysqlDialect # noqa: TID251 # FIXME CoP MYSQL_TYPES = { "TEXT": mysqltypes.TEXT, @@ -176,22 +177,22 @@ "TINYINT": mysqltypes.TINYINT, } except (ImportError, KeyError): - mysqltypes = None # type: ignore[assignment] - mysqlDialect = None # type: ignore[assignment] + mysqltypes = None # type: ignore[assignment] # FIXME CoP + mysqlDialect = None # type: ignore[assignment] # FIXME CoP MYSQL_TYPES = {} try: - # SQLAlchemy does not export the "INT" type for the MS SQL Server dialect; however "INT" is supported by the engine. # noqa: E501 - # Since SQLAlchemy exports the "INTEGER" type for the MS SQL Server dialect, alias "INT" to the "INTEGER" type. # noqa: E501 - import sqlalchemy.dialects.mssql as mssqltypes # noqa: TID251 + # SQLAlchemy does not export the "INT" type for the MS SQL Server dialect; however "INT" is supported by the engine. # noqa: E501 # FIXME CoP + # Since SQLAlchemy exports the "INTEGER" type for the MS SQL Server dialect, alias "INT" to the "INTEGER" type. # noqa: E501 # FIXME CoP + import sqlalchemy.dialects.mssql as mssqltypes # noqa: TID251 # FIXME CoP # noinspection PyPep8Naming - from sqlalchemy.dialects.mssql import dialect as mssqlDialect # noqa: TID251 + from sqlalchemy.dialects.mssql import dialect as mssqlDialect # noqa: TID251 # FIXME CoP try: mssqltypes.INT # type: ignore[attr-defined] # noqa: B018 # reassigning if attr not found except AttributeError: - mssqltypes.INT = mssqltypes.INTEGER # type: ignore[attr-defined] + mssqltypes.INT = mssqltypes.INTEGER # type: ignore[attr-defined] # FIXME CoP # noinspection PyUnresolvedReferences MSSQL_TYPES = { @@ -206,7 +207,7 @@ "DECIMAL": mssqltypes.DECIMAL, "FLOAT": mssqltypes.FLOAT, "IMAGE": mssqltypes.IMAGE, - "INT": mssqltypes.INT, # type: ignore[attr-defined] + "INT": mssqltypes.INT, # type: ignore[attr-defined] # FIXME CoP "INTEGER": mssqltypes.INTEGER, "MONEY": mssqltypes.MONEY, "NCHAR": mssqltypes.NCHAR, @@ -227,8 +228,8 @@ "VARCHAR": mssqltypes.VARCHAR, } except (ImportError, KeyError): - mssqltypes = None # type: ignore[assignment] - mssqlDialect = None # type: ignore[assignment] + mssqltypes = None # type: ignore[assignment] # FIXME CoP + mssqlDialect = None # type: ignore[assignment] # FIXME CoP MSSQL_TYPES = {} @@ -325,7 +326,7 @@ SNOWFLAKE_TYPES: Dict[str, Any] if snowflake.snowflakesqlalchemy and snowflake.snowflakedialect and snowflake.snowflaketypes: - # Sometimes "snowflake-sqlalchemy" fails to self-register in certain environments, so we do it explicitly. # noqa: E501 + # Sometimes "snowflake-sqlalchemy" fails to self-register in certain environments, so we do it explicitly. # noqa: E501 # FIXME CoP # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) sqlalchemy.dialects.registry.register("snowflake", "snowflake.sqlalchemy", "dialect") @@ -440,7 +441,7 @@ def get_sqlite_connection_url(sqlite_db_path): return url -def get_test_validator_with_data( # noqa: PLR0913 +def get_test_validator_with_data( # noqa: PLR0913 # FIXME CoP execution_engine: str, data: dict, table_name: str | None = None, @@ -489,10 +490,10 @@ def get_test_validator_with_data( # noqa: PLR0913 pk_column=pk_column, ) else: - raise ValueError(f"Unknown dataset_type {execution_engine!s}") # noqa: TRY003 + raise ValueError(f"Unknown dataset_type {execution_engine!s}") # noqa: TRY003 # FIXME CoP -def _get_test_validator_with_data_pandas( # noqa: C901 +def _get_test_validator_with_data_pandas( # noqa: C901 # FIXME CoP df: pd.DataFrame, schemas: dict | None, table_name: str | None, @@ -505,8 +506,8 @@ def _get_test_validator_with_data_pandas( # noqa: C901 schema["pk_index"] = "int" pandas_schema = {} for key, value in schema.items(): - # Note, these are just names used in our internal schemas to build datasets *for internal tests* # noqa: E501 - # Further, some changes in pandas internal about how datetimes are created means to support pandas # noqa: E501 + # Note, these are just names used in our internal schemas to build datasets *for internal tests* # noqa: E501 # FIXME CoP + # Further, some changes in pandas internal about how datetimes are created means to support pandas # noqa: E501 # FIXME CoP # pre- 0.25, we need to explicitly specify when we want timezone. # We will use timestamp for timezone-aware (UTC only) dates in our tests @@ -518,7 +519,7 @@ def _get_test_validator_with_data_pandas( # noqa: C901 continue elif value.lower() in ["date"]: df[key] = execute_pandas_to_datetime(df[key]).dt.date - value = "object" # noqa: PLW2901 + value = "object" # noqa: PLW2901 # FIXME CoP try: type_ = np.dtype(value) except TypeError: @@ -545,7 +546,7 @@ def _get_test_validator_with_data_pandas( # noqa: C901 ) -def _get_test_validator_with_data_sqlalchemy( # noqa: PLR0913 +def _get_test_validator_with_data_sqlalchemy( # noqa: PLR0913 # FIXME CoP df: pd.DataFrame, execution_engine: str, schemas: dict | None, @@ -561,8 +562,8 @@ def _get_test_validator_with_data_sqlalchemy( # noqa: PLR0913 return None if table_name is None: - raise ExecutionEngineError( # noqa: TRY003 - "Initializing a Validator for SqlAlchemyExecutionEngine in tests requires `table_name` to be defined. Please check your configuration" # noqa: E501 + raise ExecutionEngineError( # noqa: TRY003 # FIXME CoP + "Initializing a Validator for SqlAlchemyExecutionEngine in tests requires `table_name` to be defined. Please check your configuration" # noqa: E501 # FIXME CoP ) return build_sa_validator_with_data( df=df, @@ -579,7 +580,7 @@ def _get_test_validator_with_data_sqlalchemy( # noqa: PLR0913 ) -def _get_test_validator_with_data_spark( # noqa: C901, PLR0912, PLR0915 +def _get_test_validator_with_data_spark( # noqa: C901, PLR0912, PLR0915 # FIXME CoP data: dict, schemas: dict | None, context: AbstractDataContext | None, @@ -601,7 +602,7 @@ def _get_test_validator_with_data_spark( # noqa: C901, PLR0912, PLR0915 } spark = SparkDFExecutionEngine.get_or_create_spark_session() - # We need to allow null values in some column types that do not support them natively, so we skip # noqa: E501 + # We need to allow null values in some column types that do not support them natively, so we skip # noqa: E501 # FIXME CoP # use of df in this case. data_reshaped = list(zip(*(v for _, v in data.items()))) # create a list of rows if schemas and "spark" in schemas: @@ -653,7 +654,7 @@ def _get_test_validator_with_data_spark( # noqa: C901, PLR0912, PLR0915 if val is None: vals.append(val) else: - vals.append(parse(val)) # type: ignore[arg-type] + vals.append(parse(val)) # type: ignore[arg-type] # FIXME CoP data[col] = vals # Do this again, now that we have done type conversion using the provided schema data_reshaped = list(zip(*(v for _, v in data.items()))) # create a list of rows @@ -697,7 +698,7 @@ def build_pandas_validator_with_data( batch_definition: Optional[LegacyBatchDefinition] = None, context: Optional[AbstractDataContext] = None, ) -> Validator: - batch = Batch(data=df, batch_definition=batch_definition) # type: ignore[arg-type] + batch = Batch(data=df, batch_definition=batch_definition) # type: ignore[arg-type] # FIXME CoP if context is None: context = build_in_memory_runtime_context() @@ -711,7 +712,7 @@ def build_pandas_validator_with_data( ) -def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 +def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 # FIXME CoP df, sa_engine_name, table_name, @@ -724,9 +725,9 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 context: Optional[AbstractDataContext] = None, pk_column: bool = False, ): - _debug = lambda x: x # noqa: E731 + _debug = lambda x: x # noqa: E731 # FIXME CoP if debug_logger: - _debug = lambda x: debug_logger.debug( # noqa: E731 + _debug = lambda x: debug_logger.debug( # noqa: E731 # FIXME CoP f"(build_sa_validator_with_data) {x}" ) @@ -740,13 +741,13 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 try: dialect_classes["postgresql"] = postgresqltypes.dialect - dialect_types["postgresql"] = POSTGRESQL_TYPES # type: ignore[assignment] + dialect_types["postgresql"] = POSTGRESQL_TYPES # type: ignore[assignment] # FIXME CoP except AttributeError: pass try: dialect_classes["mysql"] = mysqltypes.dialect - dialect_types["mysql"] = MYSQL_TYPES # type: ignore[assignment] + dialect_types["mysql"] = MYSQL_TYPES # type: ignore[assignment] # FIXME CoP except AttributeError: pass @@ -757,7 +758,7 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 pass try: - dialect_classes["bigquery"] = BigQueryDialect # type: ignore[assignment] + dialect_classes["bigquery"] = BigQueryDialect # type: ignore[assignment] # FIXME CoP dialect_types["bigquery"] = BIGQUERY_TYPES except AttributeError: pass @@ -800,11 +801,8 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 connection_string = f"mysql+pymysql://root@{db_hostname}/test_ci" engine = sa.create_engine(connection_string) elif sa_engine_name == "mssql": - connection_string = f"mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@{db_hostname}:1433/test_ci?driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true" # noqa: E501 - engine = sa.create_engine( - connection_string, - # echo=True, - ) + connection_string = get_default_mssql_url() + engine = sa.create_engine(connection_string) elif sa_engine_name == "bigquery": connection_string = _get_bigquery_connection_string() engine = sa.create_engine(connection_string) @@ -827,8 +825,8 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 connection_string = None engine = None - # If "autocommit" is not desired to be on by default, then use the following pattern when explicit "autocommit" # noqa: E501 - # is desired (e.g., for temporary tables, "autocommit" is off by default, so the override option may be useful). # noqa: E501 + # If "autocommit" is not desired to be on by default, then use the following pattern when explicit "autocommit" # noqa: E501 # FIXME CoP + # is desired (e.g., for temporary tables, "autocommit" is off by default, so the override option may be useful). # noqa: E501 # FIXME CoP # execution_engine.execute_query(sa.text(sql_query_string).execution_options(autocommit=True)) # Add the data to the database as a new table @@ -841,7 +839,7 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 if ( schemas and sa_engine_name in schemas - and isinstance(engine.dialect, dialect_classes[sa_engine_name]) # type: ignore[union-attr] + and isinstance(engine.dialect, dialect_classes[sa_engine_name]) # type: ignore[union-attr] # FIXME CoP ): schema = schemas[sa_engine_name] if pk_column: @@ -988,12 +986,12 @@ def build_spark_validator_with_data( def build_pandas_engine( df: pd.DataFrame, ) -> PandasExecutionEngine: - batch = Batch(data=df) # type: ignore[arg-type] + batch = Batch(data=df) # type: ignore[arg-type] # FIXME CoP execution_engine = PandasExecutionEngine(batch_data_dict={batch.id: batch.data}) return execution_engine -def build_sa_execution_engine( # noqa: PLR0913 +def build_sa_execution_engine( # noqa: PLR0913 # FIXME CoP df: pd.DataFrame, sa: ModuleType, schema: Optional[str] = None, @@ -1049,7 +1047,7 @@ def build_spark_engine( ) != 1 ): - raise ValueError("Exactly one of batch_id or batch_definition must be specified.") # noqa: TRY003 + raise ValueError("Exactly one of batch_id or batch_definition must be specified.") # noqa: TRY003 # FIXME CoP if batch_id is None: batch_id = cast(LegacyBatchDefinition, batch_definition).id @@ -1063,11 +1061,11 @@ def build_spark_engine( ) for record in df.to_records(index=False) ] - schema = df.columns.tolist() # type: ignore[assignment] + schema = df.columns.tolist() # type: ignore[assignment] # FIXME CoP else: data = df - df = spark.createDataFrame(data=data, schema=schema) # type: ignore[type-var,arg-type] + df = spark.createDataFrame(data=data, schema=schema) # type: ignore[type-var,arg-type] # FIXME CoP conf: Iterable[Tuple[str, str]] = spark.sparkContext.getConf().getAll() spark_config: Dict[str, Any] = dict(conf) @@ -1249,10 +1247,10 @@ def candidate_test_is_on_temporary_notimplemented_list_v3_api(context, expectati # a github issue tracking adding the test with BigQuery. ### expectations_not_implemented_v3_sql.append( - "expect_column_kl_divergence_to_be_less_than" # TODO: will collect for over 60 minutes, and will not completes # noqa: E501 + "expect_column_kl_divergence_to_be_less_than" # TODO: will collect for over 60 minutes, and will not completes # noqa: E501 # FIXME CoP ) expectations_not_implemented_v3_sql.append( - "expect_column_quantile_values_to_be_between" # TODO: will run but will add about 1hr to pipeline. # noqa: E501 + "expect_column_quantile_values_to_be_between" # TODO: will run but will add about 1hr to pipeline. # noqa: E501 # FIXME CoP ) return expectation_type in expectations_not_implemented_v3_sql @@ -1290,7 +1288,7 @@ def candidate_test_is_on_temporary_notimplemented_list_v3_api(context, expectati return False -def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 +def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 # FIXME CoP include_pandas=True, include_spark=False, include_sqlalchemy=True, @@ -1320,7 +1318,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 if not pyspark.pyspark: if raise_exceptions_for_backends is True: - raise ValueError("spark tests are requested, but pyspark is not installed") # noqa: TRY003 + raise ValueError("spark tests are requested, but pyspark is not installed") # noqa: TRY003 # FIXME CoP else: logger.warning("spark tests are requested, but pyspark is not installed") else: @@ -1331,7 +1329,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 sa: Optional[ModuleType] = import_library_module(module_name="sqlalchemy") if sa is None: if raise_exceptions_for_backends is True: - raise ImportError("sqlalchemy tests are requested, but sqlalchemy in not installed") # noqa: TRY003 + raise ImportError("sqlalchemy tests are requested, but sqlalchemy in not installed") # noqa: TRY003 # FIXME CoP else: logger.warning("sqlalchemy tests are requested, but sqlalchemy in not installed") return test_backends @@ -1352,15 +1350,15 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 checker = LockingConnectionCheck(sa, connection_string) if checker.is_valid() is True: test_backends += ["postgresql"] - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if raise_exceptions_for_backends is True: - raise ValueError( # noqa: TRY003 - f"backend-specific tests are requested, but unable to connect to the database at " # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"backend-specific tests are requested, but unable to connect to the database at " # noqa: E501 # FIXME CoP f"{connection_string}" ) else: logger.warning( - f"backend-specific tests are requested, but unable to connect to the database at " # noqa: E501 + f"backend-specific tests are requested, but unable to connect to the database at " # noqa: E501 # FIXME CoP f"{connection_string}" ) @@ -1371,7 +1369,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 conn.close() except (ImportError, SQLAlchemyError): if raise_exceptions_for_backends is True: - raise ImportError( # noqa: TRY003 + raise ImportError( # noqa: TRY003 # FIXME CoP "mysql tests are requested, but unable to connect to the mysql database at " f"'mysql+pymysql://root@{db_hostname}/test_ci'" ) @@ -1384,27 +1382,21 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 test_backends += ["mysql"] if include_mssql: - # noinspection PyUnresolvedReferences + connection_string = get_default_mssql_url() try: - engine = sa.create_engine( - f"mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@{db_hostname}:1433/test_ci?" - "driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true", - # echo=True, - ) + engine = sa.create_engine(connection_string) conn = engine.connect() conn.close() except (ImportError, sa.exc.SQLAlchemyError): if raise_exceptions_for_backends is True: - raise ImportError( # noqa: TRY003 + raise ImportError( # noqa: TRY003 # FIXME CoP "mssql tests are requested, but unable to connect to the mssql database at " - f"'mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@{db_hostname}:1433/test_ci?" - "driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true'", + f"{connection_string}", ) else: logger.warning( "mssql tests are requested, but unable to connect to the mssql database at " - f"'mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@{db_hostname}:1433/test_ci?" - "driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true'", + f"{connection_string}", ) else: test_backends += ["mssql"] @@ -1417,7 +1409,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 conn.close() except (ImportError, ValueError, sa.exc.SQLAlchemyError) as e: if raise_exceptions_for_backends is True: - raise ImportError("bigquery tests are requested, but unable to connect") from e # noqa: TRY003 + raise ImportError("bigquery tests are requested, but unable to connect") from e # noqa: TRY003 # FIXME CoP else: logger.warning(f"bigquery tests are requested, but unable to connect; {e!r}") else: @@ -1440,7 +1432,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 and not aws_config_file ): if raise_exceptions_for_backends is True: - raise ImportError("AWS tests are requested, but credentials were not set up") # noqa: TRY003 + raise ImportError("AWS tests are requested, but credentials were not set up") # noqa: TRY003 # FIXME CoP else: logger.warning("AWS tests are requested, but credentials were not set up") @@ -1452,7 +1444,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 conn.close() except (ImportError, ValueError, sa.exc.SQLAlchemyError) as e: if raise_exceptions_for_backends is True: - raise ImportError( # noqa: TRY003 + raise ImportError( # noqa: TRY003 # FIXME CoP "clickhouse tests are requested, but unable to connect" ) from e else: @@ -1468,7 +1460,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 conn.close() except (ImportError, ValueError, sa.exc.SQLAlchemyError) as e: if raise_exceptions_for_backends is True: - raise ImportError("trino tests are requested, but unable to connect") from e # noqa: TRY003 + raise ImportError("trino tests are requested, but unable to connect") from e # noqa: TRY003 # FIXME CoP else: logger.warning(f"trino tests are requested, but unable to connect; {e!r}") else: @@ -1480,7 +1472,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 azure_access_key: Optional[str] = os.getenv("AZURE_ACCESS_KEY") if not azure_access_key and not azure_connection_string and not azure_credential: if raise_exceptions_for_backends is True: - raise ImportError("Azure tests are requested, but credentials were not set up") # noqa: TRY003 + raise ImportError("Azure tests are requested, but credentials were not set up") # noqa: TRY003 # FIXME CoP else: logger.warning("Azure tests are requested, but credentials were not set up") test_backends += ["azure"] @@ -1493,7 +1485,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 conn.close() except (ImportError, ValueError, sa.exc.SQLAlchemyError) as e: if raise_exceptions_for_backends is True: - raise ImportError("redshift tests are requested, but unable to connect") from e # noqa: TRY003 + raise ImportError("redshift tests are requested, but unable to connect") from e # noqa: TRY003 # FIXME CoP else: logger.warning(f"redshift tests are requested, but unable to connect; {e!r}") else: @@ -1507,7 +1499,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 conn.close() except (ImportError, ValueError, sa.exc.SQLAlchemyError) as e: if raise_exceptions_for_backends is True: - raise ImportError("athena tests are requested, but unable to connect") from e # noqa: TRY003 + raise ImportError("athena tests are requested, but unable to connect") from e # noqa: TRY003 # FIXME CoP else: logger.warning(f"athena tests are requested, but unable to connect; {e!r}") else: @@ -1521,7 +1513,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 conn.close() except (ImportError, ValueError, sa.exc.SQLAlchemyError) as e: if raise_exceptions_for_backends is True: - raise ImportError("snowflake tests are requested, but unable to connect") from e # noqa: TRY003 + raise ImportError("snowflake tests are requested, but unable to connect") from e # noqa: TRY003 # FIXME CoP else: logger.warning(f"snowflake tests are requested, but unable to connect; {e!r}") else: @@ -1530,7 +1522,7 @@ def build_test_backends_list( # noqa: C901, PLR0912, PLR0913, PLR0915 return test_backends -def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 +def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 # FIXME CoP expectation_type: str, test_data_cases: List[ExpectationTestDataCases], execution_engine_diagnostics: ExpectationExecutionEngineDiagnostics, @@ -1553,14 +1545,14 @@ def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 :param only_consider_these_backends: optional list of backends to consider :param context Instance of any child of "AbstractDataContext" class :return: list of parametrized tests with loaded validators and accessible backends - """ # noqa: E501 - _debug = lambda x: x # noqa: E731 - _error = lambda x: x # noqa: E731 + """ # noqa: E501 # FIXME CoP + _debug = lambda x: x # noqa: E731 # FIXME CoP + _error = lambda x: x # noqa: E731 # FIXME CoP if debug_logger: - _debug = lambda x: debug_logger.debug( # noqa: E731 + _debug = lambda x: debug_logger.debug( # noqa: E731 # FIXME CoP f"(generate_expectation_tests) {x}" ) - _error = lambda x: debug_logger.error( # noqa: E731 + _error = lambda x: debug_logger.error( # noqa: E731 # FIXME CoP f"(generate_expectation_tests) {x}" ) @@ -1627,7 +1619,7 @@ def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 num_test_data_cases = len(test_data_cases) for i, d in enumerate(test_data_cases, 1): _debug(f"test_data_case {i}/{num_test_data_cases}") - d = copy.deepcopy(d) # noqa: PLW2901 + d = copy.deepcopy(d) # noqa: PLW2901 # FIXME CoP titles = [] only_fors = [] suppress_test_fors = [] @@ -1637,7 +1629,7 @@ def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 suppress_test_fors.append(_test_case.suppress_test_for) _debug(f"titles -> {titles}") _debug( - f"only_fors -> {only_fors} suppress_test_fors -> {suppress_test_fors} only_consider_these_backends -> {only_consider_these_backends}" # noqa: E501 + f"only_fors -> {only_fors} suppress_test_fors -> {suppress_test_fors} only_consider_these_backends -> {only_consider_these_backends}" # noqa: E501 # FIXME CoP ) for c in backends: _debug(f"Getting validators with data: {c}") @@ -1647,7 +1639,7 @@ def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 for sup in suppress_test_fors ] only_fors_ok = [] - for i, only_for in enumerate(only_fors): # noqa: PLW2901 + for i, only_for in enumerate(only_fors): # noqa: PLW2901 # FIXME CoP if not only_for: only_fors_ok.append(True) continue @@ -1761,7 +1753,7 @@ def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 # print(pd.DataFrame(d.get("data_alt"))) # print() _error( - f"PROBLEM with get_test_validator_with_data in backend {c} for {expectation_type} from data AND data_alt {repr(e)[:300]}" # noqa: E501 + f"PROBLEM with get_test_validator_with_data in backend {c} for {expectation_type} from data AND data_alt {repr(e)[:300]}" # noqa: E501 # FIXME CoP ) parametrized_tests.append( { @@ -1776,11 +1768,11 @@ def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 else: # print("\n[[ The alternate data worked!! ]]\n") _debug( - f"Needed to use data_alt for backend {c}, but it worked for {expectation_type}" # noqa: E501 + f"Needed to use data_alt for backend {c}, but it worked for {expectation_type}" # noqa: E501 # FIXME CoP ) else: _error( - f"PROBLEM with get_test_validator_with_data in backend {c} for {expectation_type} from data (no data_alt to try) {repr(e)[:300]}" # noqa: E501 + f"PROBLEM with get_test_validator_with_data in backend {c} for {expectation_type} from data (no data_alt to try) {repr(e)[:300]}" # noqa: E501 # FIXME CoP ) parametrized_tests.append( { @@ -1816,7 +1808,7 @@ def generate_expectation_tests( # noqa: C901, PLR0912, PLR0913, PLR0915 return parametrized_tests -def should_we_generate_this_test( # noqa: C901, PLR0911, PLR0912, PLR0913 +def should_we_generate_this_test( # noqa: C901, PLR0911, PLR0912, PLR0913 # FIXME CoP backend: str, expectation_test_case: ExpectationTestCase, ignore_suppress: bool = False, @@ -1824,9 +1816,9 @@ def should_we_generate_this_test( # noqa: C901, PLR0911, PLR0912, PLR0913 extra_debug_info: str = "", debug_logger: Optional[logging.Logger] = None, ): - _debug = lambda x: x # noqa: E731 + _debug = lambda x: x # noqa: E731 # FIXME CoP if debug_logger: - _debug = lambda x: debug_logger.debug( # noqa: E731 + _debug = lambda x: debug_logger.debug( # noqa: E731 # FIXME CoP f"(should_we_generate_this_test) {x}" ) @@ -1840,23 +1832,23 @@ def should_we_generate_this_test( # noqa: C901, PLR0911, PLR0912, PLR0913 if backend in expectation_test_case.suppress_test_for: if ignore_suppress: _debug( - f"Should be suppressing {expectation_test_case.title} for {backend}, but ignore_suppress is True | {extra_debug_info}" # noqa: E501 + f"Should be suppressing {expectation_test_case.title} for {backend}, but ignore_suppress is True | {extra_debug_info}" # noqa: E501 # FIXME CoP ) return True else: _debug( - f"Backend {backend} is suppressed for test {expectation_test_case.title}: | {extra_debug_info}" # noqa: E501 + f"Backend {backend} is suppressed for test {expectation_test_case.title}: | {extra_debug_info}" # noqa: E501 # FIXME CoP ) return False if "sqlalchemy" in expectation_test_case.suppress_test_for and backend in SQL_DIALECT_NAMES: if ignore_suppress: _debug( - f"Should be suppressing {expectation_test_case.title} for sqlalchemy (including {backend}), but ignore_suppress is True | {extra_debug_info}" # noqa: E501 + f"Should be suppressing {expectation_test_case.title} for sqlalchemy (including {backend}), but ignore_suppress is True | {extra_debug_info}" # noqa: E501 # FIXME CoP ) return True else: _debug( - f"All sqlalchemy (including {backend}) is suppressed for test: {expectation_test_case.title} | {extra_debug_info}" # noqa: E501 + f"All sqlalchemy (including {backend}) is suppressed for test: {expectation_test_case.title} | {extra_debug_info}" # noqa: E501 # FIXME CoP ) return False if expectation_test_case.only_for is not None and expectation_test_case.only_for: @@ -1872,19 +1864,19 @@ def should_we_generate_this_test( # noqa: C901, PLR0911, PLR0912, PLR0913 if major == "0" and minor in ["22", "23"]: return True elif "pandas>=024" in expectation_test_case.only_for: - if (major == "0" and int(minor) >= 24) or int( # noqa: PLR2004 + if (major == "0" and int(minor) >= 24) or int( # noqa: PLR2004 # FIXME CoP major ) >= 1: return True if ignore_only_for: _debug( - f"Should normally not run test {expectation_test_case.title} for {backend}, but ignore_only_for is True | {extra_debug_info}" # noqa: E501 + f"Should normally not run test {expectation_test_case.title} for {backend}, but ignore_only_for is True | {extra_debug_info}" # noqa: E501 # FIXME CoP ) return True else: _debug( - f"Only {expectation_test_case.only_for} allowed (not {backend}) for test: {expectation_test_case.title} | {extra_debug_info}" # noqa: E501 + f"Only {expectation_test_case.only_for} allowed (not {backend}) for test: {expectation_test_case.title} | {extra_debug_info}" # noqa: E501 # FIXME CoP ) return False @@ -1892,7 +1884,7 @@ def should_we_generate_this_test( # noqa: C901, PLR0911, PLR0912, PLR0913 def sort_unexpected_values(test_value_list, result_value_list): - # check if value can be sorted; if so, sort so arbitrary ordering of results does not cause failure # noqa: E501 + # check if value can be sorted; if so, sort so arbitrary ordering of results does not cause failure # noqa: E501 # FIXME CoP if (isinstance(test_value_list, list)) & (len(test_value_list) >= 1): # __lt__ is not implemented for python dictionaries making sorting trickier # in our case, we will sort on the values for each key sequentially @@ -1913,7 +1905,7 @@ def sort_unexpected_values(test_value_list, result_value_list): return test_value_list, result_value_list -def evaluate_json_test_v3_api( # noqa: C901, PLR0912, PLR0913 +def evaluate_json_test_v3_api( # noqa: C901, PLR0912, PLR0913 # FIXME CoP validator: Validator, expectation_type: str, test: Dict[str, Any], @@ -1945,13 +1937,13 @@ def evaluate_json_test_v3_api( # noqa: C901, PLR0912, PLR0913 :param debug_logger: logger instance or None :param pk_column: If True, then the primary-key column has been defined in the json test data. :return: Tuple(ExpectationValidationResult, error_message, stack_trace). asserts correctness of results. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if debug_logger is not None: - _debug = lambda x: debug_logger.debug( # noqa: E731 + _debug = lambda x: debug_logger.debug( # noqa: E731 # FIXME CoP f"(evaluate_json_test_v3_api) {x}" ) else: - _debug = lambda x: x # noqa: E731 + _debug = lambda x: x # noqa: E731 # FIXME CoP expectation_suite = ExpectationSuite("json_test_suite") # noinspection PyProtectedMember @@ -1959,22 +1951,22 @@ def evaluate_json_test_v3_api( # noqa: C901, PLR0912, PLR0913 # validator.set_default_expectation_argument("result_format", "COMPLETE") if "title" not in test: - raise ValueError("Invalid test configuration detected: 'title' is required.") # noqa: TRY003 + raise ValueError("Invalid test configuration detected: 'title' is required.") # noqa: TRY003 # FIXME CoP if "exact_match_out" not in test: - raise ValueError("Invalid test configuration detected: 'exact_match_out' is required.") # noqa: TRY003 + raise ValueError("Invalid test configuration detected: 'exact_match_out' is required.") # noqa: TRY003 # FIXME CoP if "input" not in test: if "in" in test: test["input"] = test["in"] else: - raise ValueError("Invalid test configuration detected: 'input' is required.") # noqa: TRY003 + raise ValueError("Invalid test configuration detected: 'input' is required.") # noqa: TRY003 # FIXME CoP if "output" not in test: if "out" in test: test["output"] = test["out"] else: - raise ValueError("Invalid test configuration detected: 'output' is required.") # noqa: TRY003 + raise ValueError("Invalid test configuration detected: 'output' is required.") # noqa: TRY003 # FIXME CoP kwargs = copy.deepcopy(test["input"]) error_message = None @@ -2027,10 +2019,10 @@ def evaluate_json_test_v3_api( # noqa: C901, PLR0912, PLR0913 return (result, error_message, stack_trace) -def check_json_test_result( # noqa: C901, PLR0912, PLR0915 +def check_json_test_result( # noqa: C901, PLR0912, PLR0915 # FIXME CoP test, result, pk_column=False ) -> None: - # check for id_pk results in cases where pk_column is true and unexpected_index_list already exists # noqa: E501 + # check for id_pk results in cases where pk_column is true and unexpected_index_list already exists # noqa: E501 # FIXME CoP # this will work for testing since result_format is COMPLETE if pk_column: if not result["success"]: @@ -2090,7 +2082,7 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 ], rtol=RTOL, atol=ATOL, - ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['result']['observed_value']} not np.allclose to {expectationValidationResultSchema.load(test['output'])['result']['observed_value']}" # noqa: E501 + ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['result']['observed_value']} not np.allclose to {expectationValidationResultSchema.load(test['output'])['result']['observed_value']}" # noqa: E501 # FIXME CoP else: assert result == expectationValidationResultSchema.load( test["output"] @@ -2100,9 +2092,9 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 test["output"] ), f"{result} != {expectationValidationResultSchema.load(test['output'])}" else: - # Convert result to json since our tests are reading from json so cannot easily contain richer types (e.g. NaN) # noqa: E501 - # NOTE - 20191031 - JPC - we may eventually want to change these tests as we update our view on how # noqa: E501 - # representations, serializations, and objects should interact and how much of that is shown to the user. # noqa: E501 + # Convert result to json since our tests are reading from json so cannot easily contain richer types (e.g. NaN) # noqa: E501 # FIXME CoP + # NOTE - 20191031 - JPC - we may eventually want to change these tests as we update our view on how # noqa: E501 # FIXME CoP + # representations, serializations, and objects should interact and how much of that is shown to the user. # noqa: E501 # FIXME CoP result = result.to_json_dict() for key, value in test["output"].items(): if key == "success": @@ -2113,7 +2105,7 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 value, rtol=RTOL, atol=ATOL, - ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['success']} not np.allclose to {value}" # noqa: E501 + ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['success']} not np.allclose to {value}" # noqa: E501 # FIXME CoP except TypeError: assert result["success"] == value, f"{result['success']} != {value}" else: @@ -2124,7 +2116,7 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 if isinstance(value, dict): assert ( set(result["result"]["observed_value"].keys()) == set(value.keys()) - ), f"{set(result['result']['observed_value'].keys())} != {set(value.keys())}" # noqa: E501 + ), f"{set(result['result']['observed_value'].keys())} != {set(value.keys())}" # noqa: E501 # FIXME CoP for k, v in value.items(): assert np.allclose( result["result"]["observed_value"][k], @@ -2137,7 +2129,7 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 value, rtol=test["tolerance"], ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if isinstance(value, dict) and "values" in value: try: assert np.allclose( @@ -2145,7 +2137,7 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 value["values"], rtol=RTOL, atol=ATOL, - ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['result']['observed_value']['values']} not np.allclose to {value['values']}" # noqa: E501 + ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['result']['observed_value']['values']} not np.allclose to {value['values']}" # noqa: E501 # FIXME CoP except TypeError as e: print(e) assert ( @@ -2157,14 +2149,14 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 value, rtol=RTOL, atol=ATOL, - ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['result']['observed_value']} not np.allclose to {value}" # noqa: E501 + ), f"(RTOL={RTOL}, ATOL={ATOL}) {result['result']['observed_value']} not np.allclose to {value}" # noqa: E501 # FIXME CoP else: assert ( result["result"]["observed_value"] == value ), f"{result['result']['observed_value']} != {value}" - # NOTE: This is a key used ONLY for testing cases where an expectation is legitimately allowed to return # noqa: E501 - # any of multiple possible observed_values. expect_column_values_to_be_of_type is one such expectation. # noqa: E501 + # NOTE: This is a key used ONLY for testing cases where an expectation is legitimately allowed to return # noqa: E501 # FIXME CoP + # any of multiple possible observed_values. expect_column_values_to_be_of_type is one such expectation. # noqa: E501 # FIXME CoP elif key == "observed_value_list": assert result["result"]["observed_value"] in value @@ -2226,11 +2218,11 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 else: assert result["result"]["details"]["observed_cdf"]["x"][0] == value else: - raise ValueError(f"Invalid test specification: unknown key {key} in 'out'") # noqa: TRY003 + raise ValueError(f"Invalid test specification: unknown key {key} in 'out'") # noqa: TRY003 # FIXME CoP elif key == "traceback_substring": if "raised_exception" not in result["exception_info"]: - # TODO JT: This accounts for a dictionary of type {"metric_id": ExceptionInfo} path defined in # noqa: E501 + # TODO JT: This accounts for a dictionary of type {"metric_id": ExceptionInfo} path defined in # noqa: E501 # FIXME CoP # validator._resolve_suite_level_graph_and_process_metric_evaluation_errors for k, v in result["exception_info"].items(): assert v["raised_exception"], f"{v['raised_exception']}" @@ -2279,7 +2271,7 @@ def check_json_test_result( # noqa: C901, PLR0912, PLR0915 ) else: - raise ValueError(f"Invalid test specification: unknown key {key} in 'out'") # noqa: TRY003 + raise ValueError(f"Invalid test specification: unknown key {key} in 'out'") # noqa: TRY003 # FIXME CoP def generate_test_table_name( @@ -2305,7 +2297,7 @@ def generate_dataset_name_from_expectation_name( sub_index (Optional int): In cases where dataset is a list, the additional index is used. Returns: dataset_name - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP dataset_name: str if not sub_index: @@ -2333,10 +2325,10 @@ def _check_if_valid_dataset_name(dataset_name: str) -> str: Returns: dataset_name - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if not re.match(r"^[A-Za-z0-9_]+$", dataset_name): - raise ExecutionEngineError( # noqa: TRY003 - f"dataset_name: {dataset_name} is not valid, because it contains non-alphanumeric and _ characters." # noqa: E501 + raise ExecutionEngineError( # noqa: TRY003 # FIXME CoP + f"dataset_name: {dataset_name} is not valid, because it contains non-alphanumeric and _ characters." # noqa: E501 # FIXME CoP f"Please check your configuration." ) @@ -2344,7 +2336,7 @@ def _check_if_valid_dataset_name(dataset_name: str) -> str: # starting from the end, so that we always get the index and sub_index new_dataset_name = dataset_name[-MAX_TABLE_NAME_LENGTH:] logger.info( - f"dataset_name: '{dataset_name}' was truncated to '{new_dataset_name}' to keep within length limits." # noqa: E501 + f"dataset_name: '{dataset_name}' was truncated to '{new_dataset_name}' to keep within length limits." # noqa: E501 # FIXME CoP ) dataset_name = new_dataset_name @@ -2361,7 +2353,7 @@ def _create_bigquery_engine() -> sqlalchemy.Engine: def _get_bigquery_connection_string() -> str: gcp_project = os.getenv("GE_TEST_GCP_PROJECT") if not gcp_project: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "Environment Variable GE_TEST_GCP_PROJECT is required to run BigQuery expectation tests" ) @@ -2371,8 +2363,8 @@ def _get_bigquery_connection_string() -> str: def _bigquery_dataset() -> str: dataset = os.getenv("GE_TEST_BIGQUERY_DATASET") if not dataset: - raise ValueError( # noqa: TRY003 - "Environment Variable GE_TEST_BIGQUERY_DATASET is required to run BigQuery expectation tests" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable GE_TEST_BIGQUERY_DATASET is required to run BigQuery expectation tests" # noqa: E501 # FIXME CoP ) return dataset @@ -2390,7 +2382,7 @@ def _create_clickhouse_engine( _get_clickhouse_connection_string(hostname=hostname, schema_name=schema_name) ) from clickhouse_sqlalchemy.exceptions import DatabaseException - from sqlalchemy import text # noqa: TID251 + from sqlalchemy import text # noqa: TID251 # FIXME CoP with engine.begin() as conn: try: @@ -2432,19 +2424,19 @@ def _create_trino_engine( # trino_password = os.getenv("GE_TEST_TRINO_PASSWORD") # if not trino_password: # raise ValueError( - # "Environment Variable GE_TEST_TRINO_PASSWORD is required to run trino expectation tests." # noqa: E501 + # "Environment Variable GE_TEST_TRINO_PASSWORD is required to run trino expectation tests." # noqa: E501 # FIXME CoP # ) # trino_account = os.getenv("GE_TEST_TRINO_ACCOUNT") # if not trino_account: # raise ValueError( - # "Environment Variable GE_TEST_TRINO_ACCOUNT is required to run trino expectation tests." # noqa: E501 + # "Environment Variable GE_TEST_TRINO_ACCOUNT is required to run trino expectation tests." # noqa: E501 # FIXME CoP # ) # trino_cluster = os.getenv("GE_TEST_TRINO_CLUSTER") # if not trino_cluster: # raise ValueError( - # "Environment Variable GE_TEST_TRINO_CLUSTER is required to run trino expectation tests." # noqa: E501 + # "Environment Variable GE_TEST_TRINO_CLUSTER is required to run trino expectation tests." # noqa: E501 # FIXME CoP # ) # return create_engine( @@ -2464,36 +2456,36 @@ def _get_redshift_connection_string() -> str: """ Copied get_redshift_connection_url func from tests/test_utils.py """ - host = os.environ.get("REDSHIFT_HOST") # noqa: TID251 - port = os.environ.get("REDSHIFT_PORT") # noqa: TID251 - user = os.environ.get("REDSHIFT_USERNAME") # noqa: TID251 - pswd = os.environ.get("REDSHIFT_PASSWORD") # noqa: TID251 - db = os.environ.get("REDSHIFT_DATABASE") # noqa: TID251 - ssl = os.environ.get("REDSHIFT_SSLMODE") # noqa: TID251 + host = os.environ.get("REDSHIFT_HOST") # noqa: TID251 # FIXME CoP + port = os.environ.get("REDSHIFT_PORT") # noqa: TID251 # FIXME CoP + user = os.environ.get("REDSHIFT_USERNAME") # noqa: TID251 # FIXME CoP + pswd = os.environ.get("REDSHIFT_PASSWORD") # noqa: TID251 # FIXME CoP + db = os.environ.get("REDSHIFT_DATABASE") # noqa: TID251 # FIXME CoP + ssl = os.environ.get("REDSHIFT_SSLMODE") # noqa: TID251 # FIXME CoP if not host: - raise ValueError( # noqa: TRY003 - "Environment Variable REDSHIFT_HOST is required to run integration tests against Redshift" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable REDSHIFT_HOST is required to run integration tests against Redshift" # noqa: E501 # FIXME CoP ) if not port: - raise ValueError( # noqa: TRY003 - "Environment Variable REDSHIFT_PORT is required to run integration tests against Redshift" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable REDSHIFT_PORT is required to run integration tests against Redshift" # noqa: E501 # FIXME CoP ) if not user: - raise ValueError( # noqa: TRY003 - "Environment Variable REDSHIFT_USERNAME is required to run integration tests against Redshift" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable REDSHIFT_USERNAME is required to run integration tests against Redshift" # noqa: E501 # FIXME CoP ) if not pswd: - raise ValueError( # noqa: TRY003 - "Environment Variable REDSHIFT_PASSWORD is required to run integration tests against Redshift" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable REDSHIFT_PASSWORD is required to run integration tests against Redshift" # noqa: E501 # FIXME CoP ) if not db: - raise ValueError( # noqa: TRY003 - "Environment Variable REDSHIFT_DATABASE is required to run integration tests against Redshift" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable REDSHIFT_DATABASE is required to run integration tests against Redshift" # noqa: E501 # FIXME CoP ) if not ssl: - raise ValueError( # noqa: TRY003 - "Environment Variable REDSHIFT_SSLMODE is required to run integration tests against Redshift" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable REDSHIFT_SSLMODE is required to run integration tests against Redshift" # noqa: E501 # FIXME CoP ) url = f"redshift+psycopg2://{user}:{pswd}@{host}:{port}/{db}?sslmode={ssl}" @@ -2516,13 +2508,13 @@ def _get_athena_connection_string(db_name_env_var: str = "ATHENA_DB_NAME") -> st ATHENA_STAGING_S3: Optional[str] = os.getenv("ATHENA_STAGING_S3") if not ATHENA_DB_NAME: - raise ValueError( # noqa: TRY003 - f"Environment Variable {db_name_env_var} is required to run integration tests against AWS Athena" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f"Environment Variable {db_name_env_var} is required to run integration tests against AWS Athena" # noqa: E501 # FIXME CoP ) if not ATHENA_STAGING_S3: - raise ValueError( # noqa: TRY003 - "Environment Variable ATHENA_STAGING_S3 is required to run integration tests against AWS Athena" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Environment Variable ATHENA_STAGING_S3 is required to run integration tests against AWS Athena" # noqa: E501 # FIXME CoP ) url = f"awsathena+rest://@athena.us-east-1.amazonaws.com/{ATHENA_DB_NAME}?s3_staging_dir={ATHENA_STAGING_S3}" @@ -2538,13 +2530,13 @@ def _get_snowflake_connection_string() -> str: """ Copied get_snowflake_connection_url func from tests/test_utils.py """ - sfUser = os.environ.get("SNOWFLAKE_USER") # noqa: TID251 - sfPswd = os.environ.get("SNOWFLAKE_PW") # noqa: TID251 - sfAccount = os.environ.get("SNOWFLAKE_ACCOUNT") # noqa: TID251 - sfDatabase = os.environ.get("SNOWFLAKE_DATABASE") # noqa: TID251 - sfSchema = os.environ.get("SNOWFLAKE_SCHEMA", "") # noqa: TID251 - sfWarehouse = os.environ.get("SNOWFLAKE_WAREHOUSE") # noqa: TID251 - sfRole = os.environ.get("SNOWFLAKE_ROLE", "PUBLIC") # noqa: TID251 + sfUser = os.environ.get("SNOWFLAKE_USER") # noqa: TID251 # FIXME CoP + sfPswd = os.environ.get("SNOWFLAKE_PW") # noqa: TID251 # FIXME CoP + sfAccount = os.environ.get("SNOWFLAKE_ACCOUNT") # noqa: TID251 # FIXME CoP + sfDatabase = os.environ.get("SNOWFLAKE_DATABASE") # noqa: TID251 # FIXME CoP + sfSchema = os.environ.get("SNOWFLAKE_SCHEMA", "") # noqa: TID251 # FIXME CoP + sfWarehouse = os.environ.get("SNOWFLAKE_WAREHOUSE") # noqa: TID251 # FIXME CoP + sfRole = os.environ.get("SNOWFLAKE_ROLE", "PUBLIC") # noqa: TID251 # FIXME CoP url = f"snowflake://{sfUser}:{sfPswd}@{sfAccount}/{sfDatabase}/{sfSchema}?warehouse={sfWarehouse}&role={sfRole}" @@ -2558,10 +2550,10 @@ def generate_sqlite_db_path(): Returns: str: An absolute path to the ephemeral db within the created temporary directory. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP tmp_dir = str(tempfile.mkdtemp()) - abspath = os.path.abspath( # noqa: PTH100 - os.path.join( # noqa: PTH118 + abspath = os.path.abspath( # noqa: PTH100 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP tmp_dir, "sqlite_db" + "".join([random.choice(string.ascii_letters + string.digits) for _ in range(8)]) diff --git a/great_expectations/types/__init__.py b/great_expectations/types/__init__.py index 0540e94c69ac..5894db4f4831 100644 --- a/great_expectations/types/__init__.py +++ b/great_expectations/types/__init__.py @@ -58,7 +58,7 @@ class MyClassA(DictDot): bar: int For more examples of usage, please see `test_dataclass_serializable_dot_dict_pattern.py` in the tests folder. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP include_field_names: ClassVar[Set[str]] = set() exclude_field_names: ClassVar[Set[str]] = set() @@ -94,11 +94,11 @@ def get(self, key, default_value=None): return self.__getitem__(item=key) return self.__dict__.get(key, default_value) - def to_raw_dict(self) -> dict: # noqa: C901 + def to_raw_dict(self) -> dict: # noqa: C901 # FIXME CoP """Convert this object into a standard dictionary, recursively. This is often convenient for serialization, and in cases where an untyped version of the object is required. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP new_dict = safe_deep_copy(data=self.__dict__) @@ -126,11 +126,11 @@ def to_raw_dict(self) -> dict: # noqa: C901 if isinstance(element, Enum): new_dict[key][i] = element.value - # Note: conversion will not work automatically if there are additional layers in between. # noqa: E501 + # Note: conversion will not work automatically if there are additional layers in between. # noqa: E501 # FIXME CoP return new_dict - def to_dict(self) -> dict: # noqa: C901 + def to_dict(self) -> dict: # noqa: C901 # FIXME CoP new_dict = { key: self[key] for key in self.property_names( @@ -162,7 +162,7 @@ def to_dict(self) -> dict: # noqa: C901 return new_dict - def property_names( # noqa: C901 + def property_names( # noqa: C901 # FIXME CoP self, include_keys: Optional[Set[str]] = None, exclude_keys: Optional[Set[str]] = None, @@ -175,7 +175,7 @@ def property_names( # noqa: C901 :param include_keys: inclusion list ("include only these properties, while excluding all the rest") :param exclude_keys: exclusion list ("exclude only these properties, while include all the rest") :return: property names, subject to inclusion/exclusion filtering - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if include_keys is None: include_keys = set() @@ -183,14 +183,14 @@ def property_names( # noqa: C901 exclude_keys = set() if include_keys & exclude_keys: - raise ValueError( # noqa: TRY003 - "Common keys between sets of include_keys and exclude_keys filtering directives are illegal." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Common keys between sets of include_keys and exclude_keys filtering directives are illegal." # noqa: E501 # FIXME CoP ) key: str # Gather private fields: - # By Python convention, properties of non-trivial length, prefixed by underscore ("_") character, are private. # noqa: E501 + # By Python convention, properties of non-trivial length, prefixed by underscore ("_") character, are private. # noqa: E501 # FIXME CoP private_fields: Set[str] = set( filter( lambda name: len(name) > 1, @@ -213,8 +213,8 @@ def assert_valid_keys(keys: Set[str], purpose: str) -> None: try: _ = self[f"_{name}"] except AttributeError: - raise ValueError( # noqa: TRY003 - f'Property "{name}", marked for {purpose} on object "{type(self)!s}", does not exist.' # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + f'Property "{name}", marked for {purpose} on object "{type(self)!s}", does not exist.' # noqa: E501 # FIXME CoP ) if include_keys: @@ -243,15 +243,15 @@ def to_json_dict(self) -> Dict[str, JSONValues]: """ # TODO: 2/4/2022 - # A reference implementation can be provided, once circular import dependencies, caused by relative locations of # noqa: E501 - # the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules are resolved. # noqa: E501 + # A reference implementation can be provided, once circular import dependencies, caused by relative locations of # noqa: E501 # FIXME CoP + # the "great_expectations/types/__init__.py" and "great_expectations/core/util.py" modules are resolved. # noqa: E501 # FIXME CoP raise NotImplementedError def safe_deep_copy(data, memo=None): """ This method makes a copy of a dictionary, applying deep copy to attribute values, except for non-pickleable objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(data, (pd.Series, pd.DataFrame)) or ( pyspark.pyspark and isinstance(data, pyspark.DataFrame) ): diff --git a/great_expectations/types/attributes.py b/great_expectations/types/attributes.py index 3b48ea80c9de..b083afd7e457 100644 --- a/great_expectations/types/attributes.py +++ b/great_expectations/types/attributes.py @@ -5,7 +5,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.core import IDDict from great_expectations.types import SerializableDotDict -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP logger = logging.getLogger(__name__) diff --git a/great_expectations/types/base.py b/great_expectations/types/base.py index 343893a8600d..b7a42502c544 100644 --- a/great_expectations/types/base.py +++ b/great_expectations/types/base.py @@ -39,13 +39,13 @@ def __deepcopy__(self, memo): return DotDict([(copy.deepcopy(k, memo), copy.deepcopy(v, memo)) for k, v in self.items()]) # The following are required to support yaml serialization, since we do not raise - # AttributeError from __getattr__ in DotDict. We *do* raise that AttributeError when it is possible to know # noqa: E501 + # AttributeError from __getattr__ in DotDict. We *do* raise that AttributeError when it is possible to know # noqa: E501 # FIXME CoP # a given attribute is not allowed (because it's not in _allowed_keys) _yaml_merge: List = [] @classmethod def yaml_anchor(cls): - # This is required since our dotdict allows *any* access via dotNotation, blocking the normal # noqa: E501 + # This is required since our dotdict allows *any* access via dotNotation, blocking the normal # noqa: E501 # FIXME CoP # behavior of raising an AttributeError when trying to access a nonexistent function return None @@ -60,7 +60,7 @@ class SerializableDotDict(DotDict): Analogously to the way "SerializableDictDot" extends "DictDot" to provide JSON serialization, the present class, "SerializableDotDict" extends "DotDict" to provide JSON-serializable version of the "DotDict" class as well. Since "DotDict" is already YAML-serializable, "SerializableDotDict" is both YAML-serializable and JSON-serializable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def to_json_dict(self) -> dict: raise NotImplementedError diff --git a/great_expectations/types/configurations.py b/great_expectations/types/configurations.py index 5cca7cfe5c4c..76399f9c3543 100644 --- a/great_expectations/types/configurations.py +++ b/great_expectations/types/configurations.py @@ -4,7 +4,7 @@ class ClassConfig: - """Defines information sufficient to identify a class to be (dynamically) loaded for a DataContext.""" # noqa: E501 + """Defines information sufficient to identify a class to be (dynamically) loaded for a DataContext.""" # noqa: E501 # FIXME CoP def __init__(self, class_name, module_name=None) -> None: self._class_name = class_name diff --git a/great_expectations/util.py b/great_expectations/util.py index 6c533c35e887..78b0e214bdf0 100644 --- a/great_expectations/util.py +++ b/great_expectations/util.py @@ -29,6 +29,7 @@ Callable, Dict, List, + Literal, Optional, Set, SupportsFloat, @@ -144,7 +145,7 @@ def measure_execution_time( Returns: Callable -- configured "execution_time_decorator" function. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def execution_time_decorator(func: Callable) -> Callable: @wraps(func) @@ -160,7 +161,7 @@ def compute_delta_t(*args, **kwargs) -> Any: Returns: Any (output value of original function being decorated). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP time_begin: float = (getattr(time, method))() try: return func(*args, **kwargs) @@ -170,7 +171,7 @@ def compute_delta_t(*args, **kwargs) -> Any: if kwargs is None: kwargs = {} - execution_time_holder: type = kwargs.get( # type: ignore[assignment] + execution_time_holder: type = kwargs.get( # type: ignore[assignment] # FIXME CoP execution_time_holder_object_reference_name ) if execution_time_holder is not None and hasattr( @@ -184,11 +185,11 @@ def compute_delta_t(*args, **kwargs) -> Any: call_args: OrderedDict = bound_args.arguments print( f"""Total execution time of function {func.__name__}({dict(call_args)!s}): {delta_t} \ -seconds.""" # noqa: E501 +seconds.""" # noqa: E501 # FIXME CoP ) else: print( - f"Total execution time of function {func.__name__}(): {delta_t} seconds." # noqa: E501 + f"Total execution time of function {func.__name__}(): {delta_t} seconds." # noqa: E501 # FIXME CoP ) return compute_delta_t @@ -216,7 +217,7 @@ def verify_dynamic_loading_support(module_name: str, package_name: Optional[str] message: str = f"""No module named "{package_name + module_name}" could be found in the repository. Please \ make sure that the file, corresponding to this package and module, exists and that dynamic loading of code modules, \ templates, and assets is supported in your execution environment. This error is unrecoverable. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP raise FileNotFoundError(message) @@ -224,7 +225,7 @@ def import_library_module(module_name: str) -> Optional[ModuleType]: """ :param module_name: a fully-qualified name of a module (e.g., "great_expectations.dataset.sqlalchemy_dataset") :return: raw source code of the module (if can be retrieved) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP module_obj: Optional[ModuleType] try: @@ -242,13 +243,13 @@ def is_library_loadable(library_name: str) -> bool: def load_class(class_name: str, module_name: str) -> type: if class_name is None: - raise TypeError("class_name must not be None") # noqa: TRY003 + raise TypeError("class_name must not be None") # noqa: TRY003 # FIXME CoP if not isinstance(class_name, str): - raise TypeError("class_name must be a string") # noqa: TRY003 + raise TypeError("class_name must be a string") # noqa: TRY003 # FIXME CoP if module_name is None: - raise TypeError("module_name must not be None") # noqa: TRY003 + raise TypeError("module_name must not be None") # noqa: TRY003 # FIXME CoP if not isinstance(module_name, str): - raise TypeError("module_name must be a string") # noqa: TRY003 + raise TypeError("module_name must be a string") # noqa: TRY003 # FIXME CoP try: verify_dynamic_loading_support(module_name=module_name) except FileNotFoundError: @@ -308,7 +309,7 @@ def gen_directory_tree_str(startpath: PathStr): bbb.txt #Note: files and directories are sorted alphabetically, so that this method can be used for testing. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP output_str = "" @@ -318,7 +319,7 @@ def gen_directory_tree_str(startpath: PathStr): for root, dirs, files in tuples: level = root.replace(str(startpath), "").count(os.sep) indent = " " * 4 * level - output_str += f"{indent}{os.path.basename(root)}/\n" # noqa: PTH119 + output_str += f"{indent}{os.path.basename(root)}/\n" # noqa: PTH119 # FIXME CoP subindent = " " * 4 * (level + 1) files.sort() @@ -328,7 +329,7 @@ def gen_directory_tree_str(startpath: PathStr): return output_str -def filter_properties_dict( # noqa: C901, PLR0912, PLR0913 +def filter_properties_dict( # noqa: C901, PLR0912, PLR0913 # FIXME CoP properties: Optional[dict] = None, keep_fields: Optional[Set[str]] = None, delete_fields: Optional[Set[str]] = None, @@ -351,7 +352,7 @@ def filter_properties_dict( # noqa: C901, PLR0912, PLR0913 Returns: The (possibly) filtered properties dictionary (or None if no entries remain after filtering is performed) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if keep_fields is None: keep_fields = set() @@ -359,8 +360,8 @@ def filter_properties_dict( # noqa: C901, PLR0912, PLR0913 delete_fields = set() if keep_fields & delete_fields: - raise ValueError( # noqa: TRY003 - "Common keys between sets of keep_fields and delete_fields filtering directives are illegal." # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + "Common keys between sets of keep_fields and delete_fields filtering directives are illegal." # noqa: E501 # FIXME CoP ) if clean_falsy: @@ -370,8 +371,8 @@ def filter_properties_dict( # noqa: C901, PLR0912, PLR0913 properties = {} if not isinstance(properties, dict): - raise ValueError( # noqa: TRY003, TRY004 - f'Source "properties" must be a dictionary (illegal type "{type(properties)!s}" detected).' # noqa: E501 + raise ValueError( # noqa: TRY003, TRY004 # FIXME CoP + f'Source "properties" must be a dictionary (illegal type "{type(properties)!s}" detected).' # noqa: E501 # FIXME CoP ) if not inplace: @@ -501,7 +502,7 @@ def deep_filter_properties_iterable( ) -> None: ... -def deep_filter_properties_iterable( # noqa: C901, PLR0913 +def deep_filter_properties_iterable( # noqa: C901, PLR0913 # FIXME CoP properties: Union[dict, list, set, tuple, None] = None, keep_fields: Optional[Set[str]] = None, delete_fields: Optional[Set[str]] = None, @@ -546,7 +547,7 @@ def deep_filter_properties_iterable( # noqa: C901, PLR0913 # Upon unwinding the call stack, do a sanity check to ensure cleaned properties. keys_to_delete: List[str] = list( filter( - lambda k: k not in keep_fields # type: ignore[arg-type] + lambda k: k not in keep_fields # type: ignore[arg-type] # FIXME CoP and _is_to_be_removed_from_deep_filter_properties_iterable( value=properties[k], clean_nulls=clean_nulls, @@ -682,7 +683,7 @@ def convert_decimal_to_float(d: SupportsFloat) -> float: def requires_lossy_conversion(d: decimal.Decimal) -> bool: """ This method determines whether or not conversion from "decimal.Decimal" to standard "float" type cannot be lossless. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return d - decimal.Context(prec=sys.float_info.dig).create_decimal(d) != 0 @@ -710,22 +711,22 @@ def isclose( of the "atol" value (here, 8 digits as the default). However, when the "control" value is large in magnitude, the relative tolerance ("rtol") parameter carries a greater weight in the comparison assessment, because the acceptable deviation between the two quantities can be relatively larger for them to be deemed as "close enough" in this case. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if isinstance(operand_a, str) and isinstance(operand_b, str): return operand_a == operand_b if isinstance(operand_a, datetime.datetime) and isinstance(operand_b, datetime.datetime): - operand_a = operand_a.timestamp() # type: ignore[assignment] - operand_b = operand_b.timestamp() # type: ignore[assignment] + operand_a = operand_a.timestamp() # type: ignore[assignment] # FIXME CoP + operand_b = operand_b.timestamp() # type: ignore[assignment] # FIXME CoP elif isinstance(operand_a, datetime.timedelta) and isinstance(operand_b, datetime.timedelta): - operand_a = operand_a.total_seconds() # type: ignore[assignment] - operand_b = operand_b.total_seconds() # type: ignore[assignment] + operand_a = operand_a.total_seconds() # type: ignore[assignment] # FIXME CoP + operand_b = operand_b.total_seconds() # type: ignore[assignment] # FIXME CoP return cast( bool, np.isclose( - a=np.float64(operand_a), # type: ignore[arg-type] - b=np.float64(operand_b), # type: ignore[arg-type] + a=np.float64(operand_a), # type: ignore[arg-type] # FIXME CoP + b=np.float64(operand_b), # type: ignore[arg-type] # FIXME CoP rtol=rtol, atol=atol, equal_nan=equal_nan, @@ -773,7 +774,7 @@ def is_ndarray_datetime_dtype( ) -> bool: """ Determine whether or not all elements of 1-D "np.ndarray" argument are "datetime.datetime" type objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value: Any result: bool = all(isinstance(value, datetime.datetime) for value in data) return result or ( @@ -795,7 +796,7 @@ def convert_ndarray_to_datetime_dtype_best_effort( Boolean flag -- True if all elements of original "data" were "datetime.datetime" type objects; False, otherwise. Boolean flag -- True, if conversion was performed; False, otherwise. Output "np.ndarray" (converted, if necessary). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if is_ndarray_datetime_dtype(data=data, parse_strings_as_datetimes=False, fuzzy=fuzzy): return True, False, data @@ -822,7 +823,7 @@ def convert_ndarray_datetime_to_float_dtype_utc_timezone( Convert all elements of 1-D "np.ndarray" argument from "datetime.datetime" type to "timestamp" "float" type objects. Note: Conversion of "datetime.datetime" to "float" uses "UTC" TimeZone to normalize all "datetime.datetime" values. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value: Any return np.asarray([value.replace(tzinfo=datetime.timezone.utc).timestamp() for value in data]) @@ -832,10 +833,10 @@ def convert_ndarray_float_to_datetime_dtype(data: np.ndarray) -> np.ndarray: Convert all elements of 1-D "np.ndarray" argument from "float" type to "datetime.datetime" type objects. Note: Converts to "naive" "datetime.datetime" values (assumes "UTC" TimeZone based floating point timestamps). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value: Any return np.asarray( - [datetime.datetime.utcfromtimestamp(value) for value in data] # noqa: DTZ004 + [datetime.datetime.utcfromtimestamp(value) for value in data] # noqa: DTZ004 # FIXME CoP ) @@ -846,7 +847,7 @@ def convert_ndarray_float_to_datetime_tuple( Convert all elements of 1-D "np.ndarray" argument from "float" type to "datetime.datetime" type tuple elements. Note: Converts to "naive" "datetime.datetime" values (assumes "UTC" TimeZone based floating point timestamps). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return tuple(convert_ndarray_float_to_datetime_dtype(data=data).tolist()) @@ -855,7 +856,7 @@ def does_ndarray_contain_decimal_dtype( ) -> TypeGuard[npt.NDArray]: """ Determine whether or not all elements of 1-D "np.ndarray" argument are "decimal.Decimal" type objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP value: Any result: bool = any(isinstance(value, decimal.Decimal) for value in data) return result @@ -864,7 +865,7 @@ def does_ndarray_contain_decimal_dtype( def convert_ndarray_decimal_to_float_dtype(data: np.ndarray) -> np.ndarray: """ Convert all elements of N-D "np.ndarray" argument from "decimal.Decimal" type to "float" type objects. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP convert_decimal_to_float_vectorized: Callable[[np.ndarray], np.ndarray] = np.vectorize( pyfunc=convert_decimal_to_float ) @@ -876,7 +877,7 @@ def convert_pandas_series_decimal_to_float_dtype( ) -> pd.Series | None: """ Convert all elements of "pd.Series" argument from "decimal.Decimal" type to "float" type objects "pd.Series" result. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP series_data: np.ndarray = data.to_numpy() series_data_has_decimal: bool = does_ndarray_contain_decimal_dtype(data=series_data) if series_data_has_decimal: @@ -934,12 +935,12 @@ def get_sqlalchemy_selectable( also needs to be handled here, using the old equivalent method. https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#change-4617 - """ # noqa: E501 - if sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select): # type: ignore[truthy-function] + """ # noqa: E501 # FIXME CoP + if sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select): # type: ignore[truthy-function] # FIXME CoP if version.parse(sa.__version__) >= version.parse("1.4"): - selectable = selectable.subquery() # type: ignore[assignment] + selectable = selectable.subquery() # type: ignore[assignment] # FIXME CoP else: - selectable = selectable.alias() # type: ignore[assignment] + selectable = selectable.alias() # type: ignore[assignment] # FIXME CoP return selectable @@ -949,7 +950,7 @@ def get_sqlalchemy_subquery_type(): This helper method ensures that the appropriate type is returned. https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#change-4617 - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP try: return sa.sql.Subquery except AttributeError: @@ -970,7 +971,7 @@ def import_make_url(): """ Beginning from SQLAlchemy 1.4, make_url is accessed from sqlalchemy.engine; earlier versions must still be accessed from sqlalchemy.engine.url to avoid import errors. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if version.parse(sa.__version__) < version.parse("1.4"): make_url = sqlalchemy.url.make_url else: @@ -1018,13 +1019,26 @@ def get_trino_potential_type(type_module: ModuleType, type_: str) -> object: return potential_type -def pandas_series_between_inclusive(series: pd.Series, min_value: int, max_value: int) -> pd.Series: +Inclusive = Literal["left", "right", "neither", "both"] + + +def pandas_series_between( + series: pd.Series, min_value: int, max_value: int, inclusive: Inclusive +) -> pd.Series: """ - As of Pandas 1.3.0, the 'inclusive' arg in between() is an enum: {"left", "right", "neither", "both"} - """ # noqa: E501 + As of Pandas 1.3.0, the 'inclusive' arg in between() is a string literal: {"left", "right", "neither", "both"} + """ # noqa: E501 # FIXME CoP metric_series: pd.Series if version.parse(pd.__version__) >= version.parse("1.3.0"): - metric_series = series.between(min_value, max_value, inclusive="both") + metric_series = series.between(min_value, max_value, inclusive=inclusive) + elif inclusive == "left": + metric_series = (series >= min_value) & (series < max_value) + elif inclusive == "right": + metric_series = (series > min_value) & (series <= max_value) + elif inclusive == "neither": + metric_series = series.between(min_value, max_value, inclusive=False) # type: ignore[arg-type] # valid for pandas < 1.3 + elif inclusive == "both": + metric_series = series.between(min_value, max_value, inclusive=True) # type: ignore[arg-type] # valid for pandas < 1.3 else: metric_series = series.between(min_value, max_value) @@ -1049,7 +1063,7 @@ def pandas_series_between_inclusive(series: pd.Series, min_value: int, max_value ] JSONConvertable: TypeAlias = Union[ - ToDict, ToList, ToStr, ToInt, ToFloat, ToBool, ToBool, None # noqa: PYI016 + ToDict, ToList, ToStr, ToInt, ToFloat, ToBool, ToBool, None # noqa: PYI016 # FIXME CoP ] @@ -1095,7 +1109,7 @@ def convert_to_json_serializable( ) -> None: ... -def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 +def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 # FIXME CoP data: JSONConvertable, ) -> JSONValues: """Converts an object to one that is JSON-serializable. @@ -1140,7 +1154,7 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 if isinstance(data, dict): new_dict = {} for key in data: - # A pandas index can be numeric, and a dict key can be numeric, but a json key must be a string # noqa: E501 + # A pandas index can be numeric, and a dict key can be numeric, but a json key must be a string # noqa: E501 # FIXME CoP new_dict[str(key)] = convert_to_json_serializable(data[key]) return new_dict @@ -1154,8 +1168,8 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 if isinstance(data, (np.ndarray, pd.Index)): # test_obj[key] = test_obj[key].tolist() - # If we have an array or index, convert it first to a list--causing coercion to float--and then round # noqa: E501 - # to the number of digits for which the string representation will equal the float representation # noqa: E501 + # If we have an array or index, convert it first to a list--causing coercion to float--and then round # noqa: E501 # FIXME CoP + # to the number of digits for which the string representation will equal the float representation # noqa: E501 # FIXME CoP return [convert_to_json_serializable(x) for x in data.tolist()] if isinstance(data, np.int64): @@ -1164,7 +1178,7 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 if isinstance(data, np.float64): return float(data) - if isinstance(data, (datetime.datetime, datetime.date)): + if isinstance(data, (datetime.datetime, datetime.date, datetime.time)): return data.isoformat() if isinstance(data, (np.datetime64)): @@ -1199,13 +1213,13 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 return float(round(data, sys.float_info.dig)) # type: ignore[arg-type] # could be None # Note: This clause has to come after checking for np.ndarray or we get: - # `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()` # noqa: E501 + # `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()` # noqa: E501 # FIXME CoP if data is None: # No problem to encode json return data try: - if not isinstance(data, list) and pd.isna(data): # type: ignore[arg-type] + if not isinstance(data, list) and pd.isna(data): # type: ignore[arg-type] # FIXME CoP # pd.isna is functionally vectorized, but we only want to apply this to single objects # Hence, why we test for `not isinstance(list)` return None @@ -1221,8 +1235,8 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 value_name = data.name or "value" return [ { - index_name: convert_to_json_serializable(idx), # type: ignore[call-overload] - value_name: convert_to_json_serializable(val), # type: ignore[dict-item] + index_name: convert_to_json_serializable(idx), # type: ignore[call-overload] # FIXME CoP + value_name: convert_to_json_serializable(val), # type: ignore[dict-item] # FIXME CoP } for idx, val in data.items() ] @@ -1230,7 +1244,7 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 if isinstance(data, pd.DataFrame): return convert_to_json_serializable(data.to_dict(orient="records")) - if pyspark.DataFrame and isinstance(data, pyspark.DataFrame): # type: ignore[truthy-function] + if pyspark.DataFrame and isinstance(data, pyspark.DataFrame): # type: ignore[truthy-function] # FIXME CoP # using StackOverflow suggestion for converting pyspark df into dictionary # https://stackoverflow.com/questions/43679880/pyspark-dataframe-to-dictionary-columns-as-keys-and-list-of-column-values-ad-di return convert_to_json_serializable(dict(zip(data.schema.names, zip(*data.collect())))) @@ -1240,10 +1254,10 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 return dict(data) # sqlalchemy text for SqlAlchemy 2 compatibility - if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): # type: ignore[truthy-function] + if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): # type: ignore[truthy-function] # FIXME CoP return str(data) - if Row and isinstance(data, Row): # type: ignore[truthy-function] + if Row and isinstance(data, Row): # type: ignore[truthy-function] # FIXME CoP return str(data) if isinstance(data, decimal.Decimal): @@ -1258,7 +1272,7 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 if pyspark.types and isinstance(data, pyspark.types.StructType): return dict(data.jsonValue()) - if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): # type: ignore[truthy-function] + if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): # type: ignore[truthy-function] # FIXME CoP # Connection is a module, which is non-serializable. Return module name instead. return "sqlalchemy.engine.base.Connection" @@ -1269,10 +1283,10 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 return data.pattern # Unable to serialize (unrecognized data type). - raise TypeError(f"{data!s} is of type {type(data).__name__} which cannot be serialized.") # noqa: TRY003 + raise TypeError(f"{data!s} is of type {type(data).__name__} which cannot be serialized.") # noqa: TRY003 # FIXME CoP -def ensure_json_serializable(data: Any) -> None: # noqa: C901, PLR0911, PLR0912 +def ensure_json_serializable(data: Any) -> None: # noqa: C901, PLR0911, PLR0912 # FIXME CoP """ Helper function to convert an object to one that is json serializable Args: @@ -1304,12 +1318,12 @@ def ensure_json_serializable(data: Any) -> None: # noqa: C901, PLR0911, PLR0912 if isinstance(data, (np.ndarray, pd.Index)): # test_obj[key] = test_obj[key].tolist() - # If we have an array or index, convert it first to a list--causing coercion to float--and then round # noqa: E501 - # to the number of digits for which the string representation will equal the float representation # noqa: E501 - _ = [ensure_json_serializable(x) for x in data.tolist()] # type: ignore[func-returns-value] + # If we have an array or index, convert it first to a list--causing coercion to float--and then round # noqa: E501 # FIXME CoP + # to the number of digits for which the string representation will equal the float representation # noqa: E501 # FIXME CoP + _ = [ensure_json_serializable(x) for x in data.tolist()] # type: ignore[func-returns-value] # FIXME CoP return - if isinstance(data, (datetime.datetime, datetime.date)): + if isinstance(data, (datetime.datetime, datetime.date, datetime.time)): return if isinstance(data, pathlib.PurePath): @@ -1328,7 +1342,7 @@ def ensure_json_serializable(data: Any) -> None: # noqa: C901, PLR0911, PLR0912 return # Note: This clause has to come after checking for np.ndarray or we get: - # `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()` # noqa: E501 + # `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()` # noqa: E501 # FIXME CoP if data is None: # No problem to encode json return @@ -1350,8 +1364,8 @@ def ensure_json_serializable(data: Any) -> None: # noqa: C901, PLR0911, PLR0912 value_name = data.name or "value" _ = [ { - index_name: ensure_json_serializable(idx), # type: ignore[func-returns-value] - value_name: ensure_json_serializable(val), # type: ignore[func-returns-value] + index_name: ensure_json_serializable(idx), # type: ignore[func-returns-value] # FIXME CoP + value_name: ensure_json_serializable(val), # type: ignore[func-returns-value] # FIXME CoP } for idx, val in data.items() ] @@ -1373,14 +1387,14 @@ def ensure_json_serializable(data: Any) -> None: # noqa: C901, PLR0911, PLR0912 if isinstance(data, RunIdentifier): return - if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): # type: ignore[truthy-function] + if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): # type: ignore[truthy-function] # FIXME CoP # TextClause is handled manually by convert_to_json_serializable() return - if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): # type: ignore[truthy-function] + if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): # type: ignore[truthy-function] # FIXME CoP # Connection module is handled manually by convert_to_json_serializable() return - raise InvalidExpectationConfigurationError( # noqa: TRY003 + raise InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP f"{data!s} is of type {type(data).__name__} which cannot be serialized to json" ) diff --git a/great_expectations/validator/computed_metric.py b/great_expectations/validator/computed_metric.py index 7af8fa305851..5c4de37e608d 100644 --- a/great_expectations/validator/computed_metric.py +++ b/great_expectations/validator/computed_metric.py @@ -6,7 +6,7 @@ import pandas as pd MetricValue = Union[ - Any, # Encompasses deferred-query/execution plans ("SQLAlchemy" and "Spark") conditions and aggregation functions. # noqa: E501 + Any, # Encompasses deferred-query/execution plans ("SQLAlchemy" and "Spark") conditions and aggregation functions. # noqa: E501 # FIXME CoP List[Any], Set[Any], Tuple[Any, ...], diff --git a/great_expectations/validator/exception_info.py b/great_expectations/validator/exception_info.py index 1cee390cbfb4..1bcb3d548384 100644 --- a/great_expectations/validator/exception_info.py +++ b/great_expectations/validator/exception_info.py @@ -5,7 +5,7 @@ from great_expectations.compatibility.typing_extensions import override from great_expectations.core import IDDict from great_expectations.types.base import SerializableDotDict -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP class ExceptionInfo(SerializableDotDict): diff --git a/great_expectations/validator/metric_configuration.py b/great_expectations/validator/metric_configuration.py index c6abd5a2c913..9e5b1fbe2bfa 100644 --- a/great_expectations/validator/metric_configuration.py +++ b/great_expectations/validator/metric_configuration.py @@ -8,7 +8,7 @@ from great_expectations.core.id_dict import IDDict from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.experimental.metric_repository.metrics import MetricTypes -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP class MetricConfiguration: @@ -25,7 +25,7 @@ class MetricConfiguration: metric_value_kwargs (optional[dict]): Optional kwargs that define values specific to each Metric. For instance, a Metric that partitions a column can define the method of partitioning (`uniform` bins) and the number of bins (`n_bins`) as `metric_value_kwargs`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, @@ -135,7 +135,7 @@ def get_domain(self) -> Domain: }, ) - raise ValueError(f"""Domain type "{domain_type}" is not recognized.""") # noqa: TRY003 + raise ValueError(f"""Domain type "{domain_type}" is not recognized.""") # noqa: TRY003 # FIXME CoP def get_domain_type(self) -> MetricDomainTypes: """Return "domain_type" of this "MetricConfiguration" object.""" @@ -148,7 +148,7 @@ def get_domain_type(self) -> MetricDomainTypes: if "column_list" in self._metric_domain_kwargs: return MetricDomainTypes.MULTICOLUMN - # TODO: Determining "domain_type" of "MetricConfiguration" using heuristics defaults to "TABLE". # noqa: E501 + # TODO: Determining "domain_type" of "MetricConfiguration" using heuristics defaults to "TABLE". # noqa: E501 # FIXME CoP return MetricDomainTypes.TABLE @property diff --git a/great_expectations/validator/metrics_calculator.py b/great_expectations/validator/metrics_calculator.py index c45c7af1b04f..5b1ae5c32de1 100644 --- a/great_expectations/validator/metrics_calculator.py +++ b/great_expectations/validator/metrics_calculator.py @@ -130,7 +130,7 @@ def get_metrics( Returns: Return Dictionary with requested metrics resolved, with metric_name as key and computed metric as value. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP resolved_metrics: _MetricsDict resolved_metrics, _ = self.compute_metrics( metric_configurations=list(metrics.values()), @@ -159,7 +159,7 @@ def compute_metrics( Tuple of two elements, the first is a dictionary with requested metrics resolved, with unique metric ID as key and computed metric as value. The second is a dictionary of the aborted metrics information, with metric ID as key if any metrics were aborted. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP graph: ValidationGraph = self.build_metric_dependency_graph( metric_configurations=metric_configurations, runtime_configuration=runtime_configuration, @@ -191,7 +191,7 @@ def build_metric_dependency_graph( Returns: Resulting "ValidationGraph" object. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP graph: ValidationGraph = ValidationGraph(execution_engine=self._execution_engine) metric_configuration: MetricConfiguration @@ -219,7 +219,7 @@ def resolve_validation_graph_and_handle_aborted_metrics_info( Returns: Dictionary with requested metrics resolved, with unique metric ID as key and computed metric as value. Dictionary with aborted metrics information, with metric ID as key. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP resolved_metrics: _MetricsDict aborted_metrics_info: _AbortedMetricsInfoDict ( @@ -259,7 +259,7 @@ def resolve_validation_graph( Returns: Dictionary with requested metrics resolved, with unique metric ID as key and computed metric as value. Dictionary with aborted metrics information, with metric ID as key. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP resolved_metrics: _MetricsDict aborted_metrics_info: _AbortedMetricsInfoDict resolved_metrics, aborted_metrics_info = graph.resolve( diff --git a/great_expectations/validator/util.py b/great_expectations/validator/util.py index e1995a6301c6..2598b079f253 100644 --- a/great_expectations/validator/util.py +++ b/great_expectations/validator/util.py @@ -32,7 +32,7 @@ def recursively_convert_to_json_serializable( return _recursively_convert_to_json_serializable(test_obj) -def _recursively_convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 +def _recursively_convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 # FIXME CoP test_obj: Any, ) -> Any: # If it's one of our types, we pass @@ -59,7 +59,7 @@ def _recursively_convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 for key in test_obj: if key == "row_condition" and test_obj[key] is not None: ensure_row_condition_is_correct(test_obj[key]) - # A pandas index can be numeric, and a dict key can be numeric, but a json key must be a string # noqa: E501 + # A pandas index can be numeric, and a dict key can be numeric, but a json key must be a string # noqa: E501 # FIXME CoP new_dict[str(key)] = recursively_convert_to_json_serializable(test_obj[key]) return new_dict @@ -73,12 +73,12 @@ def _recursively_convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 elif isinstance(test_obj, (np.ndarray, pd.Index)): # test_obj[key] = test_obj[key].tolist() - # If we have an array or index, convert it first to a list--causing coercion to float--and then round # noqa: E501 - # to the number of digits for which the string representation will equal the float representation # noqa: E501 + # If we have an array or index, convert it first to a list--causing coercion to float--and then round # noqa: E501 # FIXME CoP + # to the number of digits for which the string representation will equal the float representation # noqa: E501 # FIXME CoP return [_recursively_convert_to_json_serializable(x) for x in test_obj.tolist()] # Note: This clause has to come after checking for np.ndarray or we get: - # `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()` # noqa: E501 + # `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()` # noqa: E501 # FIXME CoP elif test_obj is None: # No problem to encode json return test_obj @@ -125,7 +125,7 @@ def _recursively_convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 return float(test_obj) else: - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"{test_obj!s} is of type {type(test_obj).__name__} which cannot be serialized." ) @@ -143,12 +143,12 @@ def ensure_row_condition_is_correct(row_condition_string) -> None: the pandas query string """ if "'" in row_condition_string: - raise InvalidExpectationConfigurationError( # noqa: TRY003 + raise InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP f"{row_condition_string} cannot be serialized to json. " "Do not introduce simple quotes in configuration." "Use double quotes instead." ) if "\n" in row_condition_string: - raise InvalidExpectationConfigurationError( # noqa: TRY003 - f"{row_condition_string!r} cannot be serialized to json. Do not introduce \\n in configuration." # noqa: E501 + raise InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP + f"{row_condition_string!r} cannot be serialized to json. Do not introduce \\n in configuration." # noqa: E501 # FIXME CoP ) diff --git a/great_expectations/validator/v1_validator.py b/great_expectations/validator/v1_validator.py index ee14c87aeb20..8d33216473fc 100644 --- a/great_expectations/validator/v1_validator.py +++ b/great_expectations/validator/v1_validator.py @@ -14,7 +14,7 @@ ResultFormat, ) from great_expectations.data_context.data_context.context_factory import project_manager -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP from great_expectations.validator.validator import Validator as OldValidator from great_expectations.validator.validator import calc_validation_statistics diff --git a/great_expectations/validator/validation_graph.py b/great_expectations/validator/validation_graph.py index 5ec362d5c9ad..775b9cb21657 100644 --- a/great_expectations/validator/validation_graph.py +++ b/great_expectations/validator/validation_graph.py @@ -96,16 +96,16 @@ def __eq__(self, other) -> bool: @property def edges(self) -> List[MetricEdge]: - """Returns "MetricEdge" objects, contained within this "ValidationGraph" object (as list).""" # noqa: E501 + """Returns "MetricEdge" objects, contained within this "ValidationGraph" object (as list).""" # noqa: E501 # FIXME CoP return self._edges @property def edge_ids(self) -> Set[Tuple[str, str]]: - """Returns "MetricEdge" objects, contained within this "ValidationGraph" object (as set of two-tuples).""" # noqa: E501 + """Returns "MetricEdge" objects, contained within this "ValidationGraph" object (as set of two-tuples).""" # noqa: E501 # FIXME CoP return {edge.id for edge in self._edges} def add(self, edge: MetricEdge) -> None: - """Adds supplied "MetricEdge" object to this "ValidationGraph" object (if not already present).""" # noqa: E501 + """Adds supplied "MetricEdge" object to this "ValidationGraph" object (if not already present).""" # noqa: E501 # FIXME CoP if edge.id not in self._edge_ids: self._edges.append(edge) self._edge_ids.add(edge.id) @@ -122,7 +122,7 @@ def build_metric_dependency_graph( Args: metric_configuration: Desired MetricConfiguration object to be resolved. runtime_configuration: Additional run-time settings (see "Validator.DEFAULT_RUNTIME_CONFIGURATION"). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_impl_klass: MetricProvider metric_provider: Callable @@ -170,7 +170,7 @@ def set_metric_configuration_default_kwargs_if_absent( ) -> Tuple[MetricProvider, Callable]: """ Updates "metric_domain_kwargs" and/or "metric_value_kwargs" of "MetricConfiguration" with defualts (if needed). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_impl_klass: MetricProvider metric_provider: Callable metric_impl_klass, metric_provider = get_metric_provider( @@ -211,11 +211,11 @@ def resolve( return resolved_metrics, aborted_metrics_info - def _resolve( # noqa: C901, PLR0912, PLR0915 + def _resolve( # noqa: C901, PLR0912, PLR0915 # FIXME CoP self, metrics: Dict[_MetricKey, MetricValue], runtime_configuration: Optional[dict] = None, - min_graph_edges_pbar_enable: int = 0, # Set to low number (e.g., 3) to suppress progress bar for small graphs. # noqa: E501 + min_graph_edges_pbar_enable: int = 0, # Set to low number (e.g., 3) to suppress progress bar for small graphs. # noqa: E501 # FIXME CoP show_progress_bars: bool = True, ) -> _AbortedMetricsInfoDict: if metrics is None: @@ -271,7 +271,7 @@ def _resolve( # noqa: C901, PLR0912, PLR0915 computable_metrics.add(metric) try: - # Access "ExecutionEngine.resolve_metrics()" method, to resolve missing "MetricConfiguration" objects. # noqa: E501 + # Access "ExecutionEngine.resolve_metrics()" method, to resolve missing "MetricConfiguration" objects. # noqa: E501 # FIXME CoP metrics.update( self._execution_engine.resolve_metrics( metrics_to_resolve=computable_metrics, # type: ignore[arg-type] # Metric typing needs further refinement. @@ -302,15 +302,15 @@ def _resolve( # noqa: C901, PLR0912, PLR0915 failed_metric_info[failed_metric.id]["exception_info"] = exception_info else: - raise err # noqa: TRY201 + raise err # noqa: TRY201 # FIXME CoP except Exception as e: if catch_exceptions: - logger.error( # noqa: TRY400 - f"""Caught exception {e!s} while trying to resolve a set of {len(ready_metrics)} metrics; aborting graph resolution.""" # noqa: E501 + logger.error( # noqa: TRY400 # FIXME CoP + f"""Caught exception {e!s} while trying to resolve a set of {len(ready_metrics)} metrics; aborting graph resolution.""" # noqa: E501 # FIXME CoP ) done = True else: - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP if (len(ready_metrics) + len(needed_metrics) == 0) or ( len(ready_metrics) == len(aborted_metrics_info) @@ -326,7 +326,7 @@ def _parse( metrics: Dict[_MetricKey, MetricValue], ) -> Tuple[Set[MetricConfiguration], Set[MetricConfiguration]]: """Given validation graph, returns the ready and needed metrics necessary for validation using a traversal of - validation graph (a graph structure of metric ids) edges""" # noqa: E501 + validation graph (a graph structure of metric ids) edges""" # noqa: E501 # FIXME CoP unmet_dependency_ids = set() unmet_dependency = set() maybe_ready_ids = set() @@ -338,7 +338,7 @@ def _parse( if edge.left.id not in maybe_ready_ids: maybe_ready_ids.add(edge.left.id) maybe_ready.add(edge.left) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if edge.left.id not in unmet_dependency_ids: unmet_dependency_ids.add(edge.left.id) unmet_dependency.add(edge.left) @@ -372,13 +372,13 @@ def __init__( graph: ValidationGraph, ) -> None: if configuration is None: - raise ValueError( # noqa: TRY003 - """Instantiation of "ExpectationValidationGraph" requires valid "ExpectationConfiguration" object.""" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + """Instantiation of "ExpectationValidationGraph" requires valid "ExpectationConfiguration" object.""" # noqa: E501 # FIXME CoP ) if graph is None: - raise ValueError( # noqa: TRY003 - """Instantiation of "ExpectationValidationGraph" requires valid "ValidationGraph" object.""" # noqa: E501 + raise ValueError( # noqa: TRY003 # FIXME CoP + """Instantiation of "ExpectationValidationGraph" requires valid "ValidationGraph" object.""" # noqa: E501 # FIXME CoP ) self._configuration = configuration diff --git a/great_expectations/validator/validator.py b/great_expectations/validator/validator.py index 91936604bf6e..a99f482f714d 100644 --- a/great_expectations/validator/validator.py +++ b/great_expectations/validator/validator.py @@ -52,7 +52,7 @@ get_expectation_impl, list_registered_expectation_implementations, ) -from great_expectations.util import convert_to_json_serializable # noqa: TID251 +from great_expectations.util import convert_to_json_serializable # noqa: TID251 # FIXME CoP from great_expectations.validator.exception_info import ExceptionInfo from great_expectations.validator.metrics_calculator import ( MetricsCalculator, @@ -90,7 +90,7 @@ @dataclass class ValidationDependencies: - # Note: Dependent "metric_name" (key) is different from "metric_name" in dependency "MetricConfiguration" (value). # noqa: E501 + # Note: Dependent "metric_name" (key) is different from "metric_name" in dependency "MetricConfiguration" (value). # noqa: E501 # FIXME CoP metric_configurations: Dict[str, MetricConfiguration] = field(default_factory=dict) result_format: Dict[str, Any] = field(default_factory=dict) @@ -99,25 +99,25 @@ def set_metric_configuration( ) -> None: """ Sets specified "MetricConfiguration" for "metric_name" to "metric_configurations" dependencies dictionary. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP self.metric_configurations[metric_name] = metric_configuration def get_metric_configuration(self, metric_name: str) -> Optional[MetricConfiguration]: """ Obtains "MetricConfiguration" for specified "metric_name" from "metric_configurations" dependencies dictionary. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self.metric_configurations.get(metric_name) def remove_metric_configuration(self, metric_name: str) -> None: """ Removes "MetricConfiguration" for specified "metric_name" from "metric_configurations" dependencies dictionary. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP del self.metric_configurations[metric_name] def get_metric_names(self) -> List[str]: """ Returns "metric_name" keys, for which "MetricConfiguration" dependency objects have been specified. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return list(self.metric_configurations.keys()) def get_metric_configurations(self) -> List[MetricConfiguration]: @@ -139,7 +139,7 @@ class Validator: expectation_suite_name: The name of the Expectation Suite to validate. data_context: The Data Context associated with this Validator. batches: The Batches for which to validate. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP DEFAULT_RUNTIME_CONFIGURATION = { "catch_exceptions": False, @@ -148,7 +148,7 @@ class Validator: RUNTIME_KEYS = DEFAULT_RUNTIME_CONFIGURATION.keys() # noinspection PyUnusedLocal - def __init__( # noqa: PLR0913 + def __init__( # noqa: PLR0913 # FIXME CoP self, execution_engine: ExecutionEngine, interactive_evaluation: bool = True, @@ -178,10 +178,10 @@ def __init__( # noqa: PLR0913 expectation_suite_name=expectation_suite_name, ) self._default_expectation_args: Dict[str, Union[bool, str]] = copy.deepcopy( - Validator.DEFAULT_RUNTIME_CONFIGURATION # type: ignore[arg-type] + Validator.DEFAULT_RUNTIME_CONFIGURATION # type: ignore[arg-type] # FIXME CoP ) - # This special state variable tracks whether a validation run is going on, which will disable # noqa: E501 + # This special state variable tracks whether a validation run is going on, which will disable # noqa: E501 # FIXME CoP # saving expectation config objects self._active_validation: bool = False @@ -196,7 +196,7 @@ def execution_engine(self) -> ExecutionEngine: @property def metrics_calculator(self) -> MetricsCalculator: - """Returns the "MetricsCalculator" object being used by the Validator to handle metrics computations.""" # noqa: E501 + """Returns the "MetricsCalculator" object being used by the Validator to handle metrics computations.""" # noqa: E501 # FIXME CoP return self._metrics_calculator @property @@ -221,7 +221,7 @@ def loaded_batch_ids(self) -> List[str]: @property def active_batch_data(self) -> Optional[BatchDataUnion]: - """Getter for BatchData object from the currently-active Batch object (convenience property).""" # noqa: E501 + """Getter for BatchData object from the currently-active Batch object (convenience property).""" # noqa: E501 # FIXME CoP return self._execution_engine.batch_manager.active_batch_data @property @@ -272,12 +272,12 @@ def expectation_suite(self, value: ExpectationSuite) -> None: @property def expectation_suite_name(self) -> str: - """Gets the current expectation_suite name of this data_asset as stored in the expectations configuration.""" # noqa: E501 + """Gets the current expectation_suite name of this data_asset as stored in the expectations configuration.""" # noqa: E501 # FIXME CoP return self._expectation_suite.name @expectation_suite_name.setter def expectation_suite_name(self, name: str) -> None: - """Sets the expectation_suite name of this data_asset as stored in the expectations configuration.""" # noqa: E501 + """Sets the expectation_suite name of this data_asset as stored in the expectations configuration.""" # noqa: E501 # FIXME CoP self._expectation_suite.name = name def load_batch_list(self, batch_list: Sequence[Batch | FluentBatch]) -> None: @@ -309,7 +309,7 @@ def get_metrics( Returns: Return Dictionary with requested metrics resolved, with metric_name as key and computed metric as value. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._metrics_calculator.get_metrics(metrics=metrics) def compute_metrics( @@ -331,7 +331,7 @@ def compute_metrics( Tuple of two elements, the first is a dictionary with requested metrics resolved, with unique metric ID as key and computed metric as value. The second is a dictionary of the aborted metrics information, with metric ID as key if any metrics were aborted. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._metrics_calculator.compute_metrics( metric_configurations=metric_configurations, runtime_configuration=runtime_configuration, @@ -412,7 +412,7 @@ def _determine_progress_bars(self) -> bool: def __getattr__(self, name): if self.active_batch is None: - raise TypeError("active_batch cannot be None") # noqa: TRY003 + raise TypeError("active_batch cannot be None") # noqa: TRY003 # FIXME CoP name = name.lower() if ( name.startswith("expect_") or name == "unexpected_rows_expectation" @@ -425,9 +425,9 @@ def __getattr__(self, name): ): return getattr(self.active_batch.data.dataframe, name) else: - raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") # noqa: TRY003 + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") # noqa: TRY003 # FIXME CoP - def validate_expectation(self, name: str) -> Callable: # noqa: C901, PLR0915 + def validate_expectation(self, name: str) -> Callable: # noqa: C901, PLR0915 # FIXME CoP """ Given the name of an Expectation, obtains the Class-first Expectation implementation and utilizes the expectation's validate method to obtain a validation result. Also adds in the runtime configuration @@ -437,13 +437,13 @@ def validate_expectation(self, name: str) -> Callable: # noqa: C901, PLR0915 Returns: The Expectation's validation result - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_impl = get_expectation_impl(name) - def inst_expectation(*args: dict, **kwargs): # noqa: C901, PLR0912 - # this is used so that exceptions are caught appropriately when they occur in expectation config # noqa: E501 + def inst_expectation(*args: dict, **kwargs): # noqa: C901, PLR0912 # FIXME CoP + # this is used so that exceptions are caught appropriately when they occur in expectation config # noqa: E501 # FIXME CoP - # TODO: JPC - THIS LOGIC DOES NOT RESPECT DEFAULTS SET BY USERS IN THE VALIDATOR VS IN THE EXPECTATION # noqa: E501 + # TODO: JPC - THIS LOGIC DOES NOT RESPECT DEFAULTS SET BY USERS IN THE VALIDATOR VS IN THE EXPECTATION # noqa: E501 # FIXME CoP # DEVREL has action to develop a new plan in coordination with MarioPod expectation_kwargs = recursively_convert_to_json_serializable(kwargs) @@ -479,7 +479,7 @@ def inst_expectation(*args: dict, **kwargs): # noqa: C901, PLR0912 ) meta = arg except IndexError: - raise InvalidExpectationConfigurationError( # noqa: TRY003 + raise InvalidExpectationConfigurationError( # noqa: TRY003 # FIXME CoP f"Invalid positional argument: {arg}" ) @@ -496,7 +496,7 @@ def inst_expectation(*args: dict, **kwargs): # noqa: C901, PLR0912 self._data_context, ) - """Given an implementation and a configuration for any Expectation, returns its validation result""" # noqa: E501 + """Given an implementation and a configuration for any Expectation, returns its validation result""" # noqa: E501 # FIXME CoP if not self.interactive_evaluation and not self._active_validation: validation_result = ExpectationValidationResult( @@ -510,7 +510,7 @@ def inst_expectation(*args: dict, **kwargs): # noqa: C901, PLR0912 runtime_configuration=basic_runtime_configuration, ) - # If validate has set active_validation to true, then we do not save the config to avoid # noqa: E501 + # If validate has set active_validation to true, then we do not save the config to avoid # noqa: E501 # FIXME CoP # saving updating expectation configs to the same suite during validation runs if self._active_validation is True: stored_config = configuration.get_raw_configuration() @@ -545,7 +545,7 @@ def inst_expectation(*args: dict, **kwargs): # noqa: C901, PLR0912 expectation_config=configuration, ) else: - raise err # noqa: TRY201 + raise err # noqa: TRY201 # FIXME CoP if self._include_rendered_content: validation_result.render() @@ -626,8 +626,8 @@ def graph_validate( show_progress_bars=self._determine_progress_bars(), ) except Exception as err: - # If a general Exception occurs during the execution of "ValidationGraph.resolve()", then # noqa: E501 - # all expectations in the suite are impacted, because it is impossible to attribute the failure to a metric. # noqa: E501 + # If a general Exception occurs during the execution of "ValidationGraph.resolve()", then # noqa: E501 # FIXME CoP + # all expectations in the suite are impacted, because it is impossible to attribute the failure to a metric. # noqa: E501 # FIXME CoP if catch_exceptions: exception_traceback: str = traceback.format_exc() evrs = self._catch_exceptions_in_failing_expectation_validations( @@ -638,7 +638,7 @@ def graph_validate( ) return evrs else: - raise err # noqa: TRY201 + raise err # noqa: TRY201 # FIXME CoP configuration: ExpectationConfiguration result: ExpectationValidationResult @@ -663,7 +663,7 @@ def graph_validate( evrs=evrs, ) else: - raise err # noqa: TRY201 + raise err # noqa: TRY201 # FIXME CoP return evrs @@ -678,8 +678,8 @@ def _generate_metric_dependency_subgraphs_for_each_expectation_configuration( List[ExpectationValidationResult], List[ExpectationConfiguration], ]: - # While evaluating expectation configurations, create sub-graph for every metric dependency and incorporate # noqa: E501 - # these sub-graphs under corresponding expectation-level sub-graph (state of ExpectationValidationGraph object). # noqa: E501 + # While evaluating expectation configurations, create sub-graph for every metric dependency and incorporate # noqa: E501 # FIXME CoP + # these sub-graphs under corresponding expectation-level sub-graph (state of ExpectationValidationGraph object). # noqa: E501 # FIXME CoP expectation_validation_graphs: List[ExpectationValidationGraph] = [] evrs: List[ExpectationValidationResult] = [] configuration: ExpectationConfiguration @@ -709,7 +709,7 @@ def _generate_metric_dependency_subgraphs_for_each_expectation_configuration( ) try: - expectation_validation_graph: ExpectationValidationGraph = ExpectationValidationGraph( # noqa: E501 + expectation_validation_graph: ExpectationValidationGraph = ExpectationValidationGraph( # noqa: E501 # FIXME CoP configuration=evaluated_config, graph=self._metrics_calculator.build_metric_dependency_graph( metric_configurations=validation_dependencies.get_metric_configurations(), @@ -733,7 +733,7 @@ def _generate_metric_dependency_subgraphs_for_each_expectation_configuration( ) evrs.append(result) else: - raise err # noqa: TRY201 + raise err # noqa: TRY201 # FIXME CoP return expectation_validation_graphs, evrs, processed_configurations @@ -741,7 +741,7 @@ def _generate_suite_level_graph_from_expectation_level_sub_graphs( self, expectation_validation_graphs: List[ExpectationValidationGraph], ) -> ValidationGraph: - # Collect edges from all expectation-level sub-graphs and incorporate them under common suite-level graph. # noqa: E501 + # Collect edges from all expectation-level sub-graphs and incorporate them under common suite-level graph. # noqa: E501 # FIXME CoP expectation_validation_graph: ExpectationValidationGraph edges: List[MetricEdge] = list( itertools.chain.from_iterable( @@ -754,7 +754,7 @@ def _generate_suite_level_graph_from_expectation_level_sub_graphs( validation_graph = ValidationGraph(execution_engine=self._execution_engine, edges=edges) return validation_graph - def _resolve_suite_level_graph_and_process_metric_evaluation_errors( # noqa: PLR0913 + def _resolve_suite_level_graph_and_process_metric_evaluation_errors( # noqa: PLR0913 # FIXME CoP self, graph: ValidationGraph, runtime_configuration: dict, @@ -767,7 +767,7 @@ def _resolve_suite_level_graph_and_process_metric_evaluation_errors( # noqa: PL List[ExpectationValidationResult], List[ExpectationConfiguration], ]: - # Resolve overall suite-level graph and process any MetricResolutionError type exceptions that might occur. # noqa: E501 + # Resolve overall suite-level graph and process any MetricResolutionError type exceptions that might occur. # noqa: E501 # FIXME CoP resolved_metrics: _MetricsDict aborted_metrics_info: _AbortedMetricsInfoDict ( @@ -779,13 +779,13 @@ def _resolve_suite_level_graph_and_process_metric_evaluation_errors( # noqa: PL min_graph_edges_pbar_enable=0, ) - # Trace MetricResolutionError occurrences to expectations relying on corresponding malfunctioning metrics. # noqa: E501 + # Trace MetricResolutionError occurrences to expectations relying on corresponding malfunctioning metrics. # noqa: E501 # FIXME CoP rejected_configurations: List[ExpectationConfiguration] = [] for expectation_validation_graph in expectation_validation_graphs: metric_exception_info: Dict[str, Union[MetricConfiguration, ExceptionInfo, int]] = ( expectation_validation_graph.get_exception_info(metric_info=aborted_metrics_info) ) - # Report all MetricResolutionError occurrences impacting expectation and append it to rejected list. # noqa: E501 + # Report all MetricResolutionError occurrences impacting expectation and append it to rejected list. # noqa: E501 # FIXME CoP if len(metric_exception_info) > 0: configuration = expectation_validation_graph.configuration result = ExpectationValidationResult( @@ -821,7 +821,7 @@ def _catch_exceptions_in_failing_expectation_validations( Returns: List of ExpectationValidationResult objects with unsuccessful ExpectationValidationResult objects appended - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP exception_message: str = str(exception) exception_info = ExceptionInfo( exception_traceback=exception_traceback, @@ -864,7 +864,7 @@ def remove_expectation( Raises: TypeError: Must provide either expectation_configuration or id. ValueError: No match or multiple matches found (and remove_multiple_matches=False). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP return self._expectation_suite.remove_expectation( expectation_configuration=expectation_configuration, @@ -880,7 +880,7 @@ def discard_failing_expectations(self) -> None: for item in res: config = item.expectation_config if not config: - raise ValueError( # noqa: TRY003 + raise ValueError( # noqa: TRY003 # FIXME CoP "ExpectationValidationResult does not have an expectation_config" ) self.remove_expectation( @@ -945,7 +945,7 @@ def set_default_expectation_argument(self, argument: str, value) -> None: self._default_expectation_args[argument] = value - def get_expectation_suite( # noqa: C901, PLR0912, PLR0913 + def get_expectation_suite( # noqa: C901, PLR0912, PLR0913 # FIXME CoP self, discard_failed_expectations: bool = True, discard_result_format_kwargs: bool = True, @@ -978,9 +978,9 @@ def get_expectation_suite( # noqa: C901, PLR0912, PLR0913 for expectation in expectations: # Note: This is conservative logic. - # Instead of retaining expectations IFF success==True, it discard expectations IFF success==False. # noqa: E501 - # In cases where expectation.success is missing or None, expectations are *retained*. # noqa: E501 - # Such a case could occur if expectations were loaded from a config file and never run. # noqa: E501 + # Instead of retaining expectations IFF success==True, it discard expectations IFF success==False. # noqa: E501 # FIXME CoP + # In cases where expectation.success is missing or None, expectations are *retained*. # noqa: E501 # FIXME CoP + # Such a case could occur if expectations were loaded from a config file and never run. # noqa: E501 # FIXME CoP if expectation.success_on_last_run is False: discards["failed_expectations"] += 1 else: @@ -992,12 +992,12 @@ def get_expectation_suite( # noqa: C901, PLR0912, PLR0913 if discards["failed_expectations"] > 0 and not suppress_warnings: message += ( - f" Omitting {discards['failed_expectations']} expectation(s) that failed when last run; set " # noqa: E501 + f" Omitting {discards['failed_expectations']} expectation(s) that failed when last run; set " # noqa: E501 # FIXME CoP "discard_failed_expectations=False to include them." ) for expectation in expectations: - # FIXME: Factor this out into a new function. The logic is duplicated in remove_expectation, # noqa: E501 + # FIXME: Factor this out into a new function. The logic is duplicated in remove_expectation, # noqa: E501 # FIXME CoP # which calls _copy_and_clean_up_expectation expectation.success_on_last_run = None @@ -1036,7 +1036,7 @@ def get_expectation_suite( # noqa: C901, PLR0912, PLR0913 logger.info(message + settings_message) return expectation_suite - def save_expectation_suite( # noqa: PLR0913 + def save_expectation_suite( # noqa: PLR0913 # FIXME CoP self, filepath: Optional[str] = None, discard_failed_expectations: bool = True, @@ -1059,7 +1059,7 @@ def save_expectation_suite( # noqa: PLR0913 Raises: ValueError: Must configure a Data Context when instantiating the Validator or pass in `filepath`. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation_suite: ExpectationSuite = self.get_expectation_suite( discard_failed_expectations, discard_result_format_kwargs, @@ -1081,14 +1081,14 @@ def save_expectation_suite( # noqa: PLR0913 sort_keys=True, ) else: - raise ValueError("Unable to save config: filepath or data_context must be available.") # noqa: TRY003 + raise ValueError("Unable to save config: filepath or data_context must be available.") # noqa: TRY003 # FIXME CoP @deprecated_argument( argument_name="run_id", - message="Only the str version of this argument is deprecated. run_id should be a RunIdentifier or dict. Support will be removed in 0.16.0.", # noqa: E501 + message="Only the str version of this argument is deprecated. run_id should be a RunIdentifier or dict. Support will be removed in 0.16.0.", # noqa: E501 # FIXME CoP version="0.13.0", ) - def validate( # noqa: C901, PLR0912, PLR0913 + def validate( # noqa: C901, PLR0912, PLR0913 # FIXME CoP self, expectation_suite: str | ExpectationSuite | None = None, run_id: str | RunIdentifier | Dict[str, str] | None = None, @@ -1124,7 +1124,7 @@ def validate( # noqa: C901, PLR0912, PLR0913 GreatExpectationsError: If `expectation_suite` is a string it must point to an existing and readable file. ValidationError: If `expectation_suite` is a string, the file it points to must be valid JSON. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # noinspection PyUnusedLocal try: validation_time = datetime.datetime.now(datetime.timezone.utc).strftime( @@ -1162,13 +1162,13 @@ def validate( # noqa: C901, PLR0912, PLR0913 except ValidationError: raise except OSError: - raise GreatExpectationsError( # noqa: TRY003 - f"Unable to load expectation suite: IO error while reading {expectation_suite}" # noqa: E501 + raise GreatExpectationsError( # noqa: TRY003 # FIXME CoP + f"Unable to load expectation suite: IO error while reading {expectation_suite}" # noqa: E501 # FIXME CoP ) if not isinstance(expectation_suite, ExpectationSuite): logger.error( - "Unable to validate using the provided value for expectation suite; does it need to be " # noqa: E501 + "Unable to validate using the provided value for expectation suite; does it need to be " # noqa: E501 # FIXME CoP "loaded from a dictionary?" ) return ExpectationValidationResult(success=False) @@ -1330,7 +1330,7 @@ def test_expectation_function(self, function: Callable, *args, **kwargs) -> Call Check out :ref:`how_to_guides__creating_and_editing_expectations__how_to_create_custom_expectations` for more information. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # noinspection SpellCheckingInspection argspec = inspect.getfullargspec(function)[0][1:] @@ -1344,7 +1344,7 @@ def _parse_validation_graph( metrics: _MetricsDict, ) -> Tuple[Set[MetricConfiguration], Set[MetricConfiguration]]: """Given validation graph, returns the ready and needed metrics necessary for validation using a traversal of - validation graph (a graph structure of metric ids) edges""" # noqa: E501 + validation graph (a graph structure of metric ids) edges""" # noqa: E501 # FIXME CoP unmet_dependency_ids = set() unmet_dependency = set() maybe_ready_ids = set() @@ -1356,7 +1356,7 @@ def _parse_validation_graph( if edge.left.id not in maybe_ready_ids: maybe_ready_ids.add(edge.left.id) maybe_ready.add(edge.left) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if edge.left.id not in unmet_dependency_ids: unmet_dependency_ids.add(edge.left.id) unmet_dependency.add(edge.left) @@ -1389,11 +1389,11 @@ def _initialize_expectations( Returns: None - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Checking type of expectation_suite. # Check for expectation_suite_name is already done by ExpectationSuiteIdentifier if expectation_suite and not isinstance(expectation_suite, ExpectationSuite): - raise TypeError( # noqa: TRY003 + raise TypeError( # noqa: TRY003 # FIXME CoP f"expectation_suite must be of type ExpectationSuite, not {type(expectation_suite)}" ) if expectation_suite is not None: @@ -1407,7 +1407,7 @@ def _initialize_expectations( if expectation_suite_name is not None: if self._expectation_suite.name != expectation_suite_name: logger.warning( - f"Overriding existing expectation_suite_name {self._expectation_suite.name} with new name {expectation_suite_name}" # noqa: E501 + f"Overriding existing expectation_suite_name {self._expectation_suite.name} with new name {expectation_suite_name}" # noqa: E501 # FIXME CoP ) self._expectation_suite.name = expectation_suite_name @@ -1434,7 +1434,7 @@ def _get_runtime_configuration( runtime_configuration.pop("result_format") else: runtime_configuration.update({"result_format": result_format}) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if result_format is not None: runtime_configuration.update({"result_format": result_format}) diff --git a/great_expectations/warnings.py b/great_expectations/warnings.py index cd0dc786bf13..57635cdb27a1 100644 --- a/great_expectations/warnings.py +++ b/great_expectations/warnings.py @@ -9,8 +9,8 @@ def warn_pandas_less_than_2_0_and_sqlalchemy_greater_than_or_equal_2_0() -> None: - """Warning to emit when using pandas less than v1.4.0 with sqlalchemy greater than or equal to 2.0.0.""" # noqa: E501 + """Warning to emit when using pandas less than v1.4.0 with sqlalchemy greater than or equal to 2.0.0.""" # noqa: E501 # FIXME CoP warnings.warn( - """Please be aware that pandas versions below 2.0.0 may have issues when paired with SQLAlchemy 2.0.0 and above when using pandas + sql functionality (like the pandas read_sql reader method).""", # noqa: E501 + """Please be aware that pandas versions below 2.0.0 may have issues when paired with SQLAlchemy 2.0.0 and above when using pandas + sql functionality (like the pandas read_sql reader method).""", # noqa: E501 # FIXME CoP UserWarning, ) diff --git a/pyproject.toml b/pyproject.toml index 2cd35a549546..dfcafaacfb63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -464,7 +464,8 @@ filterwarnings = [ # This deprecation warning comes from getsentry/responses, a mocking utility for requests. It is a dependency in moto. "ignore:stream argument is deprecated. Use stream parameter in request directly:DeprecationWarning", # We likely won't be updating to `marhsmallow` 4, these errors should be filtered out - "error::marshmallow.warnings.RemovedInMarshmallow4Warning", + "ignore::marshmallow.warnings.RemovedInMarshmallow4Warning", + "ignore::marshmallow.warnings.ChangedInMarshmallow4Warning", # pkg_resources is deprecated as an API, but third party libraries still use it 'ignore: Deprecated call to `pkg_resources.declare_namespace\(.*\)`', diff --git a/reqs/requirements-dev-databricks.txt b/reqs/requirements-dev-databricks.txt index c51024fd7e4e..256a35c1379d 100644 --- a/reqs/requirements-dev-databricks.txt +++ b/reqs/requirements-dev-databricks.txt @@ -1 +1 @@ -databricks-sql-connector[sqlalchemy]>=3.0.0 +databricks-sqlalchemy>=1.0.0 diff --git a/reqs/requirements-dev-lite.txt b/reqs/requirements-dev-lite.txt index b293026496f4..05e67982bfe7 100644 --- a/reqs/requirements-dev-lite.txt +++ b/reqs/requirements-dev-lite.txt @@ -16,6 +16,6 @@ pytest-random-order>=1.1.1 pytest-timeout>=2.3.1 pytest-xdist>=3.6.1 requirements-parser>=0.9.0 -responses>=0.23.1 # requests mocking +responses>=0.23.1,!=0.25.5 # requests mocking - pinning due to https://github.com/getsentry/responses/issues/751 setuptools>=70.0.0 # required for python 3.12 sqlalchemy>=1.4.0 diff --git a/reqs/requirements-dev-snowflake.txt b/reqs/requirements-dev-snowflake.txt index 77f144549a96..c1ce85e04e48 100644 --- a/reqs/requirements-dev-snowflake.txt +++ b/reqs/requirements-dev-snowflake.txt @@ -1,4 +1,4 @@ pandas<2.2.0; python_version >= "3.9" snowflake-connector-python>=2.5.0; python_version < "3.11" snowflake-connector-python>2.9.0; python_version >= "3.11" # earlier versions fail to build on 3.11 -snowflake-sqlalchemy>=1.2.3,<1.7.0 # pinned due to breaking in 1.7 +snowflake-sqlalchemy>=1.2.3,!=1.7.0 diff --git a/reqs/requirements-dev-sqlalchemy2.txt b/reqs/requirements-dev-sqlalchemy2.txt index de989da0bbd8..cc359e5f4994 100644 --- a/reqs/requirements-dev-sqlalchemy2.txt +++ b/reqs/requirements-dev-sqlalchemy2.txt @@ -12,6 +12,6 @@ # ----------- # Tempory pins for type checking step # TODO: update these pins in their respective requirements files and remove from here -snowflake-sqlalchemy>=1.6,<1.7.0 # min version required for sqlalchemy 2.0, pinned due to breaking in 1.7 +snowflake-sqlalchemy>=1.6,!=1.7.0 # min version required for sqlalchemy 2.0 sqlalchemy>=2.0 sqlalchemy-bigquery>=1.11.0 # min version required for sqlalchemy 2.0 diff --git a/requirements.txt b/requirements.txt index 415506a27222..e076522c3473 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ pandas>=1.1.3,<2.2; python_version == "3.9" pandas>=1.3.0,<2.2; python_version >= "3.10" pandas<2.2; python_version >= "3.12" # analytics -posthog>=2.1.0,<3 +posthog>3,<4 # patch version updates `typing_extensions` to the needed version pydantic>=1.10.7 pyparsing>=2.4 diff --git a/scripts/check_linter_ignores.sh b/scripts/check_linter_ignores.sh new file mode 100755 index 000000000000..dbea0a2747c8 --- /dev/null +++ b/scripts/check_linter_ignores.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# NOTE: For mypy checks, explicitly including modules found in git diff (overrides any config exclusions) + +cd "$(git rev-parse --show-toplevel)" || exit 1 + +git fetch --quiet +# Get a list of modified python scripts +# https://git-scm.com/docs/git-diff#Documentation/git-diff.txt---diff-filterACDMRTUXB82308203 +diff_modules=$(\ + git diff --diff-filter=MA --name-only origin/develop \ + | grep -E "[great_expectations|tests]\/.+\.py$" || true \ +) +echo "Force running on files diff'd with origin/develop: $diff_modules" +# Omitting double quotes bc need to unpack the filenames +# shellcheck disable=SC2086 +python scripts/linter_ignores.py $diff_modules +echo "Make sure your branch is up to date" diff --git a/docs/checks/__init__.py b/scripts/cleanup/__init__.py similarity index 100% rename from docs/checks/__init__.py rename to scripts/cleanup/__init__.py diff --git a/scripts/cleanup/cleanup_big_query.py b/scripts/cleanup/cleanup_big_query.py new file mode 100644 index 000000000000..49715cf1c8d1 --- /dev/null +++ b/scripts/cleanup/cleanup_big_query.py @@ -0,0 +1,55 @@ +import logging +import sys + +from great_expectations.compatibility.pydantic import BaseSettings +from great_expectations.compatibility.sqlalchemy import TextClause, create_engine + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler(sys.stdout)) + + +class BigQueryConnectionConfig(BaseSettings): + """Environment variables for BigQuery connection. + These are injected in via CI, but when running locally, you may use your own credentials. + GOOGLE_APPLICATION_CREDENTIALS must be kept secret + """ + + GE_TEST_GCP_PROJECT: str + GE_TEST_BIGQUERY_DATASET: str + GOOGLE_APPLICATION_CREDENTIALS: str + + @property + def connection_string(self) -> str: + return f"bigquery://{self.GE_TEST_GCP_PROJECT}/{self.GE_TEST_BIGQUERY_DATASET}?credentials_path={self.GOOGLE_APPLICATION_CREDENTIALS}" + + +SCHEMA_FORMAT = "^test_[a-z]{10}$" + + +def cleanup_big_query(config: BigQueryConnectionConfig) -> None: + engine = create_engine(url=config.connection_string) + with engine.connect() as conn, conn.begin(): + results = conn.execute( + TextClause( + """ + SELECT 'DROP SCHEMA ' || schema_name || ' CASCADE;' + FROM INFORMATION_SCHEMA.SCHEMATA + WHERE REGEXP_CONTAINS(schema_name, :schema_format) + AND creation_time < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 2 HOUR); + """ + ), + {"schema_format": SCHEMA_FORMAT}, + ).fetchall() + if results: + to_run = TextClause("\n".join([row[0] for row in results])) + conn.execute(to_run) + logger.info(f"Cleaned up {len(results)} BigQuery schema(s)") + else: + logger.info("No BigQuery schemas to clean up!") + engine.dispose() + + +if __name__ == "__main__": + config = BigQueryConnectionConfig() # type: ignore[call-arg] # pydantic populates from env vars + cleanup_big_query(config) diff --git a/scripts/install_mssql_odbc_driver.sh b/scripts/install_mssql_odbc_driver.sh new file mode 100755 index 000000000000..4e2ab3acd7cf --- /dev/null +++ b/scripts/install_mssql_odbc_driver.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if ! [[ "18.04 20.04 22.04 24.04" == *"$(grep VERSION_ID /etc/os-release | cut -d '"' -f 2)"* ]]; +then + echo "Ubuntu $(grep VERSION_ID /etc/os-release | cut -d '"' -f 2) is not currently supported."; + exit; +fi + +# Download the package to configure the Microsoft repo +curl -sSL -O https://packages.microsoft.com/config/ubuntu/$(grep VERSION_ID /etc/os-release | cut -d '"' -f 2)/packages-microsoft-prod.deb +# Install the package +sudo dpkg -i packages-microsoft-prod.deb +# Delete the file +rm packages-microsoft-prod.deb + +sudo apt-get update +sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18 diff --git a/scripts/linter_ignores.py b/scripts/linter_ignores.py new file mode 100644 index 000000000000..4f6582d4a0a4 --- /dev/null +++ b/scripts/linter_ignores.py @@ -0,0 +1,73 @@ +""" +Check for instances of linter directive ignores without comments. + +In order to keep code quality high, we want to avoid future additions of +noqa or type: ignore directives without comments explaining why they are +necessary. +""" + +from __future__ import annotations + +import pathlib +import re +import sys +from collections.abc import Iterator + +TYPE_IGNORE_COMMENT_REGEX: re.Pattern[str] = re.compile( + r" # type: ignore(?P\[.*?\])?(?P\s*# .*)?$" +) +NOQA_IGNORE_COMMENT_REGEX: re.Pattern[str] = re.compile( + r" # noqa: (?P.*?)*(?P\s*# .*)?$" +) + + +def get_type_ignores(path: pathlib.Path) -> Iterator[tuple[int, str]]: + """Get all type-ignores from a file.""" + with open(path) as f_in: + for lineno, line in enumerate(f_in, start=1): + match = TYPE_IGNORE_COMMENT_REGEX.search(line) + if not match: + continue + if not match.group("comment"): + yield lineno, line + + +def check_type_ignores(paths: list[pathlib.Path]) -> list[tuple[str, str, str]]: + """Collect list of type ignores without comments.""" + return [(path, lineno, ignore) for path in paths for lineno, ignore in get_type_ignores(path)] + + +def get_noqa_ignores(path: pathlib.Path) -> Iterator[tuple[int, str]]: + """Get all noqa-ignores from a file.""" + with open(path) as f_in: + for lineno, line in enumerate(f_in, start=1): + match = NOQA_IGNORE_COMMENT_REGEX.search(line) + if not match: + continue + if not match.group("comment"): + yield lineno, line + + +def check_noqa_ignores(paths: list[pathlib.Path]) -> list[tuple[str, str, str]]: + """Collect list of noqa ignores without comments.""" + return [(path, lineno, ignore) for path in paths for lineno, ignore in get_noqa_ignores(path)] + + +if __name__ == "__main__": + paths = [pathlib.Path(p) for p in sys.argv[1:]] + checks = {"type": check_type_ignores(paths), "noqa": check_noqa_ignores(paths)} + + total_errors = 0 + for key, all_ignores in checks.items(): + if all_ignores: + total_errors += len(all_ignores) + print(f"{len(all_ignores)} errors must be fixed before merging.") + print(f"Found {key} ignores without explanatory comments:") + for path, lineno, ignore in all_ignores: + print(f" {path}:{lineno}\n {ignore}") + + if total_errors: + print( + f"Found {total_errors} ignores without comments that need to be fixed before merging." + ) + sys.exit(1) diff --git a/tasks.py b/tasks.py index 92065236b7eb..fd8e7d861181 100644 --- a/tasks.py +++ b/tasks.py @@ -52,11 +52,6 @@ "check": _CHECK_HELP_DESC, "exclude": _EXCLUDE_HELP_DESC, "path": _PATH_HELP_DESC, - "isort": "Use `isort` to sort packages. Default behavior.", - "ruff": ( - "Use `ruff` instead of `isort` to sort imports." - " This will eventually become the default." - ), "pty": _PTY_HELP_DESC, } ) @@ -65,29 +60,18 @@ def sort( path: str = ".", check: bool = False, exclude: str | None = None, - ruff: bool = False, # isort is the current default - isort: bool = False, pty: bool = True, ): """Sort module imports.""" - if ruff and isort: - raise invoke.Exit("cannot use both `--ruff` and `--isort`", code=1) # noqa: TRY003 - if not isort: - cmds = [ - "ruff", - "check", - path, - "--select I", - "--diff" if check else "--fix", - ] - if exclude: - cmds.extend(["--extend-exclude", exclude]) - else: - cmds = ["isort", path] - if check: - cmds.append("--check-only") - if exclude: - cmds.extend(["--skip", exclude]) + cmds = [ + "ruff", + "check", + path, + "--select I", + "--diff" if check else "--fix", + ] + if exclude: + cmds.extend(["--extend-exclude", exclude]) ctx.run(" ".join(cmds), echo=True, pty=pty) @@ -775,31 +759,6 @@ def _exit_with_error_if_not_run_from_correct_dir( ) -@invoke.task( - aliases=("links",), - help={"skip_external": "Skip external link checks (is slow), default is True"}, -) -def link_checker(ctx: Context, skip_external: bool = True): - """Checks the Docusaurus docs for broken links""" - import docs.checks.docs_link_checker as checker - - path = pathlib.Path("docs/docusaurus/docs") - docs_root = pathlib.Path("docs/docusaurus/docs") - static_root = pathlib.Path("docs/docusaurus/static") - site_prefix: str = "docs" - static_prefix: str = "static" - - code, message = checker.scan_docs( - path=path, - docs_root=docs_root, - static_root=static_root, - site_prefix=site_prefix, - static_prefix=static_prefix, - skip_external=skip_external, - ) - raise invoke.Exit(message, code) - - @invoke.task( aliases=("automerge",), ) @@ -1003,15 +962,19 @@ def _get_marker_dependencies(markers: str | Sequence[str]) -> list[TestDependenc "markers": "Optional marker to install dependencies for. Can be specified multiple times.", "requirements_dev": "Short name of `requirements-dev-*.txt` file to install, e.g. test, spark, cloud, etc. Can be specified multiple times.", # noqa: E501 "constraints": "Optional flag to install dependencies with constraints, default True", + "gx_install": "Install the local version of Great Expectations.", + "editable_install": "Install an editable local version of Great Expectations.", + "force_reinstall": "Force re-installation of dependencies.", }, ) -def deps( +def deps( # noqa: C901 - too complex ctx: Context, markers: list[str], requirements_dev: list[str], constraints: bool = True, gx_install: bool = False, editable_install: bool = False, + force_reinstall: bool = False, ): """ Install dependencies for development and testing. @@ -1034,6 +997,9 @@ def deps( elif gx_install: cmds.append(".") + if force_reinstall: + cmds.append("--force-reinstall") + req_files: list[str] = ["requirements.txt"] for test_deps in _get_marker_dependencies(markers): diff --git a/tests/actions/test_core_actions.py b/tests/actions/test_core_actions.py index 566acead683f..620774985730 100644 --- a/tests/actions/test_core_actions.py +++ b/tests/actions/test_core_actions.py @@ -5,7 +5,7 @@ from contextlib import contextmanager from datetime import datetime, timezone from types import ModuleType -from typing import TYPE_CHECKING, Iterator +from typing import TYPE_CHECKING, Iterator, Literal from unittest import mock import pytest @@ -23,6 +23,7 @@ SlackNotificationAction, SNSNotificationAction, UpdateDataDocsAction, + ValidationAction, ) from great_expectations.checkpoint.checkpoint import Checkpoint, CheckpointResult from great_expectations.core.batch import IDDict, LegacyBatchDefinition @@ -45,6 +46,7 @@ GXCloudIdentifier, ValidationResultIdentifier, ) +from great_expectations.exceptions.exceptions import ValidationActionAlreadyRegisteredError from great_expectations.util import is_library_loadable if TYPE_CHECKING: @@ -59,6 +61,7 @@ SUITE_B: str = "suite_b" BATCH_ID_A: str = "my_datasource-my_first_asset" BATCH_ID_B: str = "my_datasource-my_second_asset" +utc_datetime = datetime.fromisoformat("2024-04-01T20:51:18.077262").replace(tzinfo=timezone.utc) @pytest.fixture @@ -495,7 +498,7 @@ def test_run_emits_events(self, checkpoint_result: CheckpointResult, mocker: Moc "payload": { "severity": "critical", "source": "Great Expectations", - "summary": f"Great Expectations Checkpoint {checkpoint_name} has succeeded", # noqa: E501 + "summary": f"Great Expectations Checkpoint {checkpoint_name} has succeeded", # noqa: E501 # FIXME CoP }, "routing_key": "test", } @@ -507,7 +510,7 @@ def test_run_emits_events(self, checkpoint_result: CheckpointResult, mocker: Moc "payload": { "severity": "critical", "source": "Great Expectations", - "summary": f"Great Expectations Checkpoint {checkpoint_name} has failed", # noqa: E501 + "summary": f"Great Expectations Checkpoint {checkpoint_name} has failed", # noqa: E501 # FIXME CoP }, "routing_key": "test", } @@ -564,14 +567,14 @@ def test_run(self, checkpoint_result: CheckpointResult, mocked_posthog): "type": "section", "text": { "type": "mrkdwn", - "text": f"*Asset*: __no_data_asset_name__ *Expectation Suite*: {SUITE_A}", # noqa: E501 + "text": f"*Asset*: __no_data_asset_name__ *Expectation Suite*: {SUITE_A}", # noqa: E501 # FIXME CoP }, }, { "type": "section", "text": { "type": "mrkdwn", - "text": f"*Asset*: __no_data_asset_name__ *Expectation Suite*: {SUITE_B}", # noqa: E501 + "text": f"*Asset*: __no_data_asset_name__ *Expectation Suite*: {SUITE_B}", # noqa: E501 # FIXME CoP }, }, {"type": "divider"}, @@ -883,3 +886,12 @@ def test_run_with_cloud( validation_identifier_a: {}, validation_identifier_b: {}, } + + +class TestCustomActions: + @pytest.mark.unit + def test_custom_action_shadows_existing_type(self): + with pytest.raises(ValidationActionAlreadyRegisteredError): + + class CustomSlackAction(ValidationAction): + type: Literal["slack"] = "slack" # Shadows existing value diff --git a/tests/analytics/test_analytics.py b/tests/analytics/test_analytics.py index 5970674c9d0c..f38fb663855d 100644 --- a/tests/analytics/test_analytics.py +++ b/tests/analytics/test_analytics.py @@ -97,6 +97,7 @@ def test_ephemeral_context_init(monkeypatch): organization_id=None, oss_id=mock.ANY, user_id=None, + user_agent_str=None, ) mock_submit.assert_called_once_with( mock.ANY, @@ -106,13 +107,39 @@ def test_ephemeral_context_init(monkeypatch): "oss_id": mock.ANY, "service": "gx-core", "gx_version": mock.ANY, + "user_agent_str": None, }, groups={"data_context": mock.ANY}, ) +@pytest.mark.unit +def test_ephemeral_context_init_with_optional_fields(monkeypatch): + monkeypatch.setattr(ENV_CONFIG, "gx_analytics_enabled", True) # Enable usage stats + + with mock.patch("posthog.capture") as mock_submit: + user_agent_str = "test / x.x.x" + _ = gx.get_context(mode="ephemeral", user_agent_str=user_agent_str) + + mock_submit.assert_called_once_with( + mock.ANY, + "data_context.initialized", + { + "data_context_id": mock.ANY, + "oss_id": mock.ANY, + "service": "gx-core", + "gx_version": mock.ANY, + "user_agent_str": user_agent_str, + }, + groups={"data_context": mock.ANY}, + ) + + +@pytest.mark.parametrize("user_agent_str", [None, "test / x.x.x"]) @pytest.mark.cloud -def test_cloud_context_init(cloud_api_fake, cloud_details, monkeypatch): +def test_cloud_context_init( + user_agent_str: Optional[str], cloud_api_fake, cloud_details, monkeypatch +): monkeypatch.setattr(ENV_CONFIG, "gx_analytics_enabled", True) # Enable usage stats with ( @@ -126,6 +153,7 @@ def test_cloud_context_init(cloud_api_fake, cloud_details, monkeypatch): cloud_organization_id=cloud_details.org_id, cloud_base_url=cloud_details.base_url, cloud_mode=True, + user_agent_str=user_agent_str, ) mock_init.assert_called_once_with( @@ -135,6 +163,7 @@ def test_cloud_context_init(cloud_api_fake, cloud_details, monkeypatch): organization_id=UUID(cloud_details.org_id), oss_id=mock.ANY, cloud_mode=True, + user_agent_str=user_agent_str, ) mock_submit.assert_called_once_with( mock.ANY, @@ -144,20 +173,22 @@ def test_cloud_context_init(cloud_api_fake, cloud_details, monkeypatch): "oss_id": mock.ANY, "service": "gx-core", "gx_version": mock.ANY, + "user_agent_str": mock.ANY, }, groups={"data_context": mock.ANY}, ) @pytest.mark.parametrize( - ("environment_variable", "constructor_variable", "expected_value"), + ("environment_variable", "constructor_variable", "expected_value", "user_agent_str"), [ - (False, None, False), - (False, False, False), - (False, True, True), # enabling in config overrides environment variable - (True, None, True), - (True, False, False), - (True, True, True), + (False, None, False, None), + (False, False, False, None), + (False, True, True, None), # enabling in config overrides environment variable + (True, None, True, None), + (True, False, False, None), + (True, True, True, None), + (True, True, True, "some user agent string"), ], ) @pytest.mark.unit @@ -165,6 +196,7 @@ def test_analytics_enabled_on_load( environment_variable: bool, constructor_variable: Optional[bool], expected_value: bool, + user_agent_str: Optional[str], monkeypatch, ): monkeypatch.setattr(ENV_CONFIG, "gx_analytics_enabled", environment_variable) @@ -179,6 +211,7 @@ def test_analytics_enabled_on_load( gx.get_context( mode="ephemeral", project_config=project_config, + user_agent_str=user_agent_str, ) mock_init.assert_called_with( @@ -187,6 +220,35 @@ def test_analytics_enabled_on_load( organization_id=mock.ANY, oss_id=mock.ANY, user_id=mock.ANY, + user_agent_str=user_agent_str, + ) + + +@pytest.mark.unit +@pytest.mark.parametrize("user_agent_str", [None, "some user agent string"]) +def test_analytics_enabled_on_load__filesystem( + user_agent_str: Optional[str], + tmp_path, + monkeypatch, +): + monkeypatch.setattr(ENV_CONFIG, "gx_analytics_enabled", True) + + with mock.patch( + "great_expectations.data_context.data_context.abstract_data_context.init_analytics" + ) as mock_init: + gx.get_context( + mode="file", + project_root_dir=tmp_path, + user_agent_str=user_agent_str, + ) + + mock_init.assert_called_with( + enable=True, + data_context_id=mock.ANY, + organization_id=mock.ANY, + oss_id=mock.ANY, + user_id=mock.ANY, + user_agent_str=user_agent_str, ) @@ -223,4 +285,35 @@ def test_analytics_enabled_after_setting_explicitly( organization_id=mock.ANY, oss_id=mock.ANY, user_id=mock.ANY, + user_agent_str=mock.ANY, + ) + + +@pytest.mark.parametrize("initial_user_agent_str", [None, "old user agent string"]) +@pytest.mark.parametrize("new_user_agent_str", [None, "new user agent string"]) +@pytest.mark.unit +def test_user_agent_str_after_setting_explicitly( + initial_user_agent_str: Optional[str], + new_user_agent_str: Optional[str], + monkeypatch, +): + monkeypatch.setattr(ENV_CONFIG, "gx_analytics_enabled", True) + + with mock.patch( + "great_expectations.data_context.data_context.abstract_data_context.init_analytics" + ) as mock_init: + context = gx.get_context( + mode="ephemeral", + user_agent_str=initial_user_agent_str, + ) + + context.set_user_agent_str(new_user_agent_str) + + mock_init.assert_called_with( + enable=True, + data_context_id=mock.ANY, + organization_id=mock.ANY, + oss_id=mock.ANY, + user_id=mock.ANY, + user_agent_str=new_user_agent_str, ) diff --git a/tests/analytics/test_events.py b/tests/analytics/test_events.py index 484d6930f8b5..5782b4100b01 100644 --- a/tests/analytics/test_events.py +++ b/tests/analytics/test_events.py @@ -1,5 +1,6 @@ import pytest +from great_expectations.analytics.base_event import Event from great_expectations.analytics.events import ( ActionInfo, CheckpointCreatedEvent, @@ -99,11 +100,17 @@ ], ) @pytest.mark.unit -def test_event_properties(event, expected_properties): +def test_event_properties(event: Event, expected_properties: dict): actual_properties = event.properties() # Assert that base properties are present - for base_property in ("data_context_id", "oss_id", "service", "gx_version"): + for base_property in ( + "data_context_id", + "oss_id", + "service", + "gx_version", + "user_agent_str", + ): assert base_property in actual_properties actual_properties.pop(base_property) diff --git a/tests/build_index_page.py b/tests/build_index_page.py index e6b35ba4a78f..7aa99c666562 100644 --- a/tests/build_index_page.py +++ b/tests/build_index_page.py @@ -1,7 +1,7 @@ import glob -json_files = glob.glob("tests/**/output/**/*.json", recursive=True) # noqa: PTH207 -html_files = glob.glob("tests/**/output/**/*.html", recursive=True) # noqa: PTH207 +json_files = glob.glob("tests/**/output/**/*.json", recursive=True) # noqa: PTH207 # FIXME CoP +html_files = glob.glob("tests/**/output/**/*.html", recursive=True) # noqa: PTH207 # FIXME CoP html_list = "" for f_ in html_files: diff --git a/tests/checkpoint/cloud_config.py b/tests/checkpoint/cloud_config.py index cf54087880f7..6247768e79c2 100644 --- a/tests/checkpoint/cloud_config.py +++ b/tests/checkpoint/cloud_config.py @@ -18,7 +18,7 @@ def store_get(self, key): # key is a 3-tuple with the form # (GXCloudRESTResource, cloud_id as a string uuid, name as string) # For example: - # (, '731dc2a5-45d8-4827-9118-39b77c5cd413', 'my_checkpoint') # noqa: E501 + # (, '731dc2a5-45d8-4827-9118-39b77c5cd413', 'my_checkpoint') # noqa: E501 # FIXME CoP type_ = key[0] if type_ == GXCloudRESTResource.CHECKPOINT: return {"data": _checkpoint_config(data_file_name, with_slack)} @@ -44,7 +44,7 @@ def store_set(self, key, value, **kwargs): "created_by_id": "934e0898-6a5c-4ffd-9125-89381a46d191", "organization_id": org_id, "validation_result": { - "display_url": f"{base_url}{org_id}/?validationResultId=2e13ecc3-eaaa-444b-b30d-2f616f80ae35", # noqa: E501 + "display_url": f"{base_url}{org_id}/?validationResultId=2e13ecc3-eaaa-444b-b30d-2f616f80ae35", # noqa: E501 # FIXME CoP }, } } @@ -256,7 +256,7 @@ def _datasource(data_dir): "taxi_data": { "batch_identifiers": ["runtime_batch_identifier_name"], "class_name": "Asset", - "module_name": "great_expectations.datasource.data_connector.asset", # noqa: E501 + "module_name": "great_expectations.datasource.data_connector.asset", # noqa: E501 # FIXME CoP } }, "class_name": "RuntimeDataConnector", diff --git a/tests/checkpoint/conftest.py b/tests/checkpoint/conftest.py index f7350f18b20a..b94c5967ac55 100644 --- a/tests/checkpoint/conftest.py +++ b/tests/checkpoint/conftest.py @@ -77,7 +77,7 @@ def fluent_batch_request(batch_request_as_dict: Dict[str, str]) -> FluentBatchRe def titanic_pandas_data_context_stats_enabled_and_expectation_suite_with_one_expectation( titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP # create expectation suite suite = context.suites.add(ExpectationSuite("my_expectation_suite")) expectation = gxe.ExpectColumnValuesToBeBetween( @@ -92,10 +92,10 @@ def titanic_pandas_data_context_stats_enabled_and_expectation_suite_with_one_exp @pytest.fixture -def titanic_data_context_with_fluent_pandas_datasources_stats_enabled_and_expectation_suite_with_one_expectation( # noqa: E501 +def titanic_data_context_with_fluent_pandas_datasources_stats_enabled_and_expectation_suite_with_one_expectation( # noqa: E501 # FIXME CoP titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP datasource_name = "my_pandas_filesystem_datasource" datasource = context.data_sources.get(datasource_name=datasource_name) @@ -130,10 +130,10 @@ def titanic_data_context_with_fluent_pandas_datasources_stats_enabled_and_expect @pytest.fixture -def titanic_data_context_with_fluent_pandas_and_spark_datasources_stats_enabled_and_expectation_suite_with_one_expectation( # noqa: E501 +def titanic_data_context_with_fluent_pandas_and_spark_datasources_stats_enabled_and_expectation_suite_with_one_expectation( # noqa: E501 # FIXME CoP titanic_data_context_with_fluent_pandas_and_spark_datasources_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_data_context_with_fluent_pandas_and_spark_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_data_context_with_fluent_pandas_and_spark_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP datasource_name = "my_pandas_filesystem_datasource" datasource = context.data_sources.get(datasource_name=datasource_name) diff --git a/tests/checkpoint/test_checkpoint.py b/tests/checkpoint/test_checkpoint.py index 0f4184c015ac..6115ad52cc94 100644 --- a/tests/checkpoint/test_checkpoint.py +++ b/tests/checkpoint/test_checkpoint.py @@ -3,7 +3,7 @@ import json import pathlib import uuid -from typing import TYPE_CHECKING, List, Type +from typing import TYPE_CHECKING, Generator, List, Literal, Type from unittest import mock import pandas as pd @@ -17,11 +17,10 @@ MicrosoftTeamsNotificationAction, SlackNotificationAction, UpdateDataDocsAction, - ValidationAction, ) +from great_expectations.checkpoint.actions import ValidationAction from great_expectations.checkpoint.checkpoint import ( Checkpoint, - CheckpointAction, CheckpointResult, ) from great_expectations.compatibility.pydantic import ValidationError @@ -60,6 +59,7 @@ ) from great_expectations.exceptions.exceptions import ( CheckpointNotFoundError, + ValidationActionRegistryRetrievalError, ValidationDefinitionNotFoundError, ) from great_expectations.exceptions.resource_freshness import ResourceFreshnessAggregateError @@ -258,7 +258,7 @@ def validation_definitions( "notify_on": "all", "renderer": { "class_name": "MicrosoftTeamsRenderer", - "module_name": "great_expectations.render.renderer.microsoft_teams_renderer", # noqa: E501 + "module_name": "great_expectations.render.renderer.microsoft_teams_renderer", # noqa: E501 # FIXME CoP }, "teams_webhook": "teams_webhook", "type": "microsoft", @@ -311,7 +311,7 @@ def test_checkpoint_serialization( def test_checkpoint_filesystem_round_trip_adds_ids( self, tmp_path: pathlib.Path, - actions: list[CheckpointAction], + actions: list[ValidationAction], ): with working_directory(tmp_path): context = gx.get_context(mode="file") @@ -327,7 +327,7 @@ def test_checkpoint_filesystem_round_trip_adds_ids( cp_name = "my_checkpoint" ds = context.data_sources.add_pandas(ds_name) - asset = ds.add_csv_asset(asset_name, "my_file.csv") # type: ignore[arg-type] + asset = ds.add_csv_asset(asset_name, "my_file.csv") # type: ignore[arg-type] # FIXME CoP bc1 = asset.add_batch_definition(batch_definition_name_1) suite1 = context.suites.add(ExpectationSuite(suite_name_1)) @@ -381,7 +381,7 @@ def test_checkpoint_filesystem_round_trip_adds_ids( "notify_on": "all", "renderer": { "class_name": "MicrosoftTeamsRenderer", - "module_name": "great_expectations.render.renderer.microsoft_teams_renderer", # noqa: E501 + "module_name": "great_expectations.render.renderer.microsoft_teams_renderer", # noqa: E501 # FIXME CoP }, "teams_webhook": "teams_webhook", "type": "microsoft", @@ -406,7 +406,7 @@ def test_checkpoint_filesystem_round_trip_adds_ids( assert cp.validation_definitions[1].batch_definition.name == batch_definition_name_2 assert cp.validation_definitions[1].suite.name == suite_name_2 - # Check that all validation_definitions and nested suites have been assigned IDs during serialization # noqa: E501 + # Check that all validation_definitions and nested suites have been assigned IDs during serialization # noqa: E501 # FIXME CoP self._assert_valid_uuid(id=cp.validation_definitions[0].id) self._assert_valid_uuid(id=cp.validation_definitions[1].id) self._assert_valid_uuid(id=cp.validation_definitions[0].suite.id) @@ -421,6 +421,8 @@ def _assert_valid_uuid(self, id: str | None) -> None: except ValueError: pytest.fail(f"{id} is not a valid UUID.") + +class TestCheckpointDeserialization: @pytest.mark.parametrize( "serialized_checkpoint, expected_error", [ @@ -465,16 +467,22 @@ def test_checkpoint_deserialization_failure( assert expected_error in str(e.value) - @pytest.mark.unit - def test_checkpoint_deserialization_with_actions(self, mocker: MockerFixture): - # Arrange + @pytest.fixture + def _set_context(self, mocker: MockerFixture) -> Generator[None, None, None]: context = mocker.Mock(spec=AbstractDataContext) context.validation_definition_store.get.return_value = mocker.Mock( spec=ValidationDefinition ) + set_context(context) + yield + set_context(None) - # Act + @pytest.mark.unit + def test_checkpoint_deserialization_with_actions_success( + self, _set_context: Generator[None, None, None] + ): + # Arrange serialized_checkpoint = { "actions": [ {"name": "my_docs_action", "site_names": [], "type": "update_data_docs"}, @@ -488,6 +496,8 @@ def test_checkpoint_deserialization_with_actions(self, mocker: MockerFixture): {"id": "3fb9ce09-a8fb-44d6-8abd-7d699443f6a1", "name": "my_validation_def"} ], } + + # Act checkpoint = Checkpoint.parse_obj(serialized_checkpoint) # Assert @@ -496,6 +506,69 @@ def test_checkpoint_deserialization_with_actions(self, mocker: MockerFixture): assert isinstance(checkpoint.actions[1], SlackNotificationAction) assert isinstance(checkpoint.actions[2], MicrosoftTeamsNotificationAction) + @pytest.mark.parametrize( + "action_config, expected_error", + [ + pytest.param( + {"name": "my_docs_action", "site_names": []}, + ValidationActionRegistryRetrievalError, + id="no_type", + ), + pytest.param( + {"name": "my_custom_action", "type": "not_registered"}, + ValidationActionRegistryRetrievalError, + id="not_registered", + ), + ], + ) + @pytest.mark.unit + def test_checkpoint_deserialization_with_actions_failure( + self, + _set_context: Generator[None, None, None], + action_config: dict, + expected_error: Type[Exception], + ): + # Arrange + serialized_checkpoint = { + "actions": [ + action_config, + ], + "id": "e7d1f462-821b-429c-8086-cca80eeea5e9", + "name": "my_checkpoint", + "validation_definitions": [ + {"id": "3fb9ce09-a8fb-44d6-8abd-7d699443f6a1", "name": "my_validation_def"} + ], + } + + # Act & Assert + with pytest.raises(expected_error): + Checkpoint.parse_obj(serialized_checkpoint) + + @pytest.mark.unit + def test_checkpoint_deserialization_with_custom_validation_action( + self, _set_context: Generator[None, None, None] + ): + # Arrange + class CustomAction(ValidationAction): + type: Literal["custom"] = "custom" + + serialized_checkpoint = { + "actions": [ + {"name": "my_custom_action", "type": "custom"}, + ], + "id": "e7d1f462-821b-429c-8086-cca80eeea5e9", + "name": "my_checkpoint", + "validation_definitions": [ + {"id": "3fb9ce09-a8fb-44d6-8abd-7d699443f6a1", "name": "my_validation_def"} + ], + } + + # Act + checkpoint = Checkpoint.parse_obj(serialized_checkpoint) + + # Assert + assert isinstance(checkpoint.actions[0], CustomAction) + class TestCheckpointResult: suite_name: str = "my_suite" @@ -657,7 +730,7 @@ def test_checkpoint_sorts_actions(self, validation_definition: ValidationDefinit ) data_docs_action = UpdateDataDocsAction(name="my_docs_action") - actions: List[CheckpointAction] = [slack_action, teams_action, data_docs_action] + actions: List[ValidationAction] = [slack_action, teams_action, data_docs_action] validation_definitions = [validation_definition] checkpoint = Checkpoint( @@ -688,7 +761,7 @@ def test_checkpoint_run_passes_through_runtime_params( batch_parameters=batch_parameters, expectation_parameters=expectation_parameters ) - validation_definition.run.assert_called_with( # type: ignore[attr-defined] + validation_definition.run.assert_called_with( # type: ignore[attr-defined] # FIXME CoP checkpoint_id=checkpoint_id, batch_parameters=batch_parameters, expectation_parameters=expectation_parameters, @@ -804,7 +877,7 @@ def test_result_describe(self, mocker: MockerFixture): assert actual == expected def _build_file_backed_checkpoint( - self, tmp_path: pathlib.Path, actions: list[CheckpointAction] | None = None + self, tmp_path: pathlib.Path, actions: list[ValidationAction] | None = None ) -> Checkpoint: actions = actions or [] with working_directory(tmp_path): diff --git a/tests/checkpoint/test_checkpoint_id_pk.py b/tests/checkpoint/test_checkpoint_id_pk.py index 5d35930f62d2..3978af47d1f4 100644 --- a/tests/checkpoint/test_checkpoint_id_pk.py +++ b/tests/checkpoint/test_checkpoint_id_pk.py @@ -44,7 +44,7 @@ def data_context_with_connection_to_metrics_db( ], It is used by tests for unexpected_index_list (ID/Primary Key). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP project_path = tmp_path / "test_configuration" context = gx.get_context(mode="file", project_root_dir=project_path) @@ -206,7 +206,7 @@ def test_sql_result_format_in_checkpoint_pk_defined_one_expectation_complete_out @pytest.mark.filesystem -def test_sql_result_format_in_checkpoint_pk_defined_column_pair_expectation_complete_output_with_query( # noqa: E501 +def test_sql_result_format_in_checkpoint_pk_defined_column_pair_expectation_complete_output_with_query( # noqa: E501 # FIXME CoP data_context_with_connection_to_metrics_db: FileDataContext, expect_column_pair_values_to_be_equal: gxe.ExpectColumnPairValuesToBeEqual, ): @@ -241,7 +241,7 @@ def test_sql_result_format_in_checkpoint_pk_defined_column_pair_expectation_comp unexpected_index_query = evrs[0]["results"][0]["result"]["unexpected_index_query"] assert ( unexpected_index_query - == "SELECT pk_1, ordered_item, received_item \nFROM column_pairs \nWHERE NOT (ordered_item = received_item AND NOT (ordered_item IS NULL OR received_item IS NULL));" # noqa: E501 + == "SELECT pk_1, ordered_item, received_item \nFROM column_pairs \nWHERE NOT (ordered_item = received_item AND NOT (ordered_item IS NULL OR received_item IS NULL));" # noqa: E501 # FIXME CoP ) @@ -278,7 +278,7 @@ def test_sql_result_format_in_checkpoint_pk_defined_column_pair_expectation_summ @pytest.mark.filesystem -def test_sql_result_format_in_checkpoint_pk_defined_multi_column_sum_expectation_complete_output_with_query( # noqa: E501 +def test_sql_result_format_in_checkpoint_pk_defined_multi_column_sum_expectation_complete_output_with_query( # noqa: E501 # FIXME CoP data_context_with_connection_to_metrics_db: FileDataContext, expect_multicolumn_sum_to_equal: gxe.ExpectMulticolumnSumToEqual, ): @@ -554,5 +554,5 @@ def test_sql_complete_output_no_id_pk_fallback( # query does not contain id_pk column assert ( unexpected_index_query - == "SELECT animals \nFROM animal_names \nWHERE animals IS NOT NULL AND (animals NOT IN ('cat', 'fish', 'dog'));" # noqa: E501 + == "SELECT animals \nFROM animal_names \nWHERE animals IS NOT NULL AND (animals NOT IN ('cat', 'fish', 'dog'));" # noqa: E501 # FIXME CoP ) diff --git a/tests/conftest.py b/tests/conftest.py index b27f07e83f7b..92eec175d816 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -164,7 +164,7 @@ def spark_warehouse_session(tmp_path_factory): def pytest_configure(config): config.addinivalue_line( "markers", - "smoketest: mark test as smoketest--it does not have useful assertions but may produce side effects " # noqa: E501 + "smoketest: mark test as smoketest--it does not have useful assertions but may produce side effects " # noqa: E501 # FIXME CoP "that require manual inspection.", ) config.addinivalue_line( @@ -279,7 +279,7 @@ def pytest_addoption(parser): parser.addoption( "--performance-tests", action="store_true", - help="If set, run performance tests (which might also require additional arguments like --bigquery)", # noqa: E501 + help="If set, run performance tests (which might also require additional arguments like --bigquery)", # noqa: E501 # FIXME CoP ) @@ -292,12 +292,12 @@ def build_test_backends_list_v3_api(metafunc): # adding deprecation warnings if metafunc.config.getoption("--no-postgresql"): warnings.warn( - "--no-sqlalchemy is deprecated as of v0.14 in favor of the --postgresql flag. It will be removed in v0.16. Please adjust your tests accordingly", # noqa: E501 + "--no-sqlalchemy is deprecated as of v0.14 in favor of the --postgresql flag. It will be removed in v0.16. Please adjust your tests accordingly", # noqa: E501 # FIXME CoP DeprecationWarning, ) if metafunc.config.getoption("--no-spark"): warnings.warn( - "--no-spark is deprecated as of v0.14 in favor of the --spark flag. It will be removed in v0.16. Please adjust your tests accordingly.", # noqa: E501 + "--no-spark is deprecated as of v0.14 in favor of the --spark flag. It will be removed in v0.16. Please adjust your tests accordingly.", # noqa: E501 # FIXME CoP DeprecationWarning, ) include_pandas: bool = True @@ -494,7 +494,7 @@ def sa(test_backends): def spark_session(test_backends) -> pyspark.SparkSession: from great_expectations.compatibility import pyspark - if pyspark.SparkSession: # type: ignore[truthy-function] + if pyspark.SparkSession: # type: ignore[truthy-function] # FIXME CoP return SparkDFExecutionEngine.get_or_create_spark_session() raise ValueError("spark tests are requested, but pyspark is not installed") @@ -504,7 +504,7 @@ def spark_session(test_backends) -> pyspark.SparkSession: def spark_connect_session(test_backends): from great_expectations.compatibility import pyspark - if pyspark.SparkConnectSession: # type: ignore[truthy-function] + if pyspark.SparkConnectSession: # type: ignore[truthy-function] # FIXME CoP spark_connect_session = pyspark.SparkSession.builder.remote( "sc://localhost:15002" ).getOrCreate() @@ -575,8 +575,8 @@ def spark_df_taxi_data_schema(spark_session): @pytest.fixture def spark_session_v012(test_backends): try: - import pyspark # noqa: F401 - from pyspark.sql import SparkSession # noqa: F401 + import pyspark # noqa: F401 # FIXME CoP + from pyspark.sql import SparkSession # noqa: F401 # FIXME CoP return SparkDFExecutionEngine.get_or_create_spark_session() except ImportError: @@ -612,29 +612,29 @@ def empty_data_context( project_path.mkdir() project_path = str(project_path) context = gx.get_context(mode="file", project_root_dir=project_path) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 - os.makedirs(asset_config_path, exist_ok=True) # noqa: PTH103 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP + os.makedirs(asset_config_path, exist_ok=True) # noqa: PTH103 # FIXME CoP assert context.list_datasources() == [] project_manager.set_project(context) return context @pytest.fixture -def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 +def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 # FIXME CoP tmp_path_factory, monkeypatch, ): project_path: str = str(tmp_path_factory.mktemp("titanic_data_context_013")) - context_path: str = os.path.join( # noqa: PTH118 + context_path: str = os.path.join( # noqa: PTH118 # FIXME CoP project_path, FileDataContext.GX_DIR ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "plugins"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "plugins"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) shutil.copy( @@ -651,8 +651,8 @@ def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_em ), pathlib.Path(context_path) / "plugins" / "extended_checkpoint.py", ) - data_path: str = os.path.join(context_path, "..", "data", "titanic") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path: str = os.path.join(context_path, "..", "data", "titanic") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP shutil.copy( file_relative_path( __file__, @@ -663,15 +663,15 @@ def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_em ) ), ), - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_19120414_1313.csv" ) ), @@ -679,10 +679,10 @@ def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_em shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_19120414_1313" ) ), @@ -690,10 +690,10 @@ def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_em shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_1911.csv" ) ), @@ -701,10 +701,10 @@ def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_em shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_1912.csv" ) ), @@ -719,30 +719,30 @@ def titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_em @pytest.fixture -def titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 +def titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 # FIXME CoP titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled, tmp_path_factory, monkeypatch, ): - context = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP project_manager.set_project(context) return context @pytest.fixture -def titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 +def titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 # FIXME CoP sa, - titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled: AbstractDataContext, # noqa: E501 + titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled: AbstractDataContext, # noqa: E501 # FIXME CoP tmp_path_factory, test_backends, monkeypatch, ): - context = titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP project_dir = context.root_directory assert isinstance(project_dir, str) - data_path: str = os.path.join(project_dir, "..", "data", "titanic") # noqa: PTH118 + data_path: str = os.path.join(project_dir, "..", "data", "titanic") # noqa: PTH118 # FIXME CoP if ( any(dbms in test_backends for dbms in ["postgresql", "sqlite", "mysql", "mssql"]) @@ -751,9 +751,9 @@ def titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_co ): db_fixture_file_path: str = file_relative_path( __file__, - os.path.join("test_sets", "titanic_sql_test_cases.db"), # noqa: PTH118 + os.path.join("test_sets", "titanic_sql_test_cases.db"), # noqa: PTH118 # FIXME CoP ) - db_file_path: str = os.path.join( # noqa: PTH118 + db_file_path: str = os.path.join( # noqa: PTH118 # FIXME CoP data_path, "titanic_sql_test_cases.db", ) @@ -771,7 +771,7 @@ def titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_co @pytest.fixture -def titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 +def titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 # FIXME CoP sa, spark_session, titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, @@ -779,7 +779,7 @@ def titanic_v013_multi_datasource_multi_execution_engine_data_context_with_check test_backends, monkeypatch, ): - context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP project_manager.set_project(context) return context @@ -790,13 +790,13 @@ def deterministic_asset_data_connector_context( monkeypatch, ): project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "..", "data", "titanic") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "..", "data", "titanic") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP shutil.copy( file_relative_path( __file__, @@ -807,12 +807,12 @@ def deterministic_asset_data_connector_context( ) ), ), - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) shutil.copy( file_relative_path(__file__, "./test_sets/Titanic.csv"), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_19120414_1313.csv" ) ), @@ -820,7 +820,7 @@ def deterministic_asset_data_connector_context( shutil.copy( file_relative_path(__file__, "./test_sets/Titanic.csv"), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_1911.csv" ) ), @@ -828,7 +828,7 @@ def deterministic_asset_data_connector_context( shutil.copy( file_relative_path(__file__, "./test_sets/Titanic.csv"), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_1912.csv" ) ), @@ -842,20 +842,20 @@ def deterministic_asset_data_connector_context( @pytest.fixture -def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 +def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 # FIXME CoP tmp_path_factory, monkeypatch, ): project_path: str = str(tmp_path_factory.mktemp("titanic_data_context_013")) - context_path: str = os.path.join( # noqa: PTH118 + context_path: str = os.path.join( # noqa: PTH118 # FIXME CoP project_path, FileDataContext.GX_DIR ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path: str = os.path.join(context_path, "..", "data", "titanic") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path: str = os.path.join(context_path, "..", "data", "titanic") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP shutil.copy( file_relative_path( __file__, @@ -866,10 +866,10 @@ def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with ) ), ), - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "plugins"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "plugins"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) shutil.copy( @@ -889,10 +889,10 @@ def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_19120414_1313.csv" ) ), @@ -900,10 +900,10 @@ def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_19120414_1313" ) ), @@ -911,10 +911,10 @@ def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_1911.csv" ) ), @@ -922,10 +922,10 @@ def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with shutil.copy( file_relative_path( __file__, - os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 + os.path.join("test_sets", "Titanic.csv"), # noqa: PTH118 # FIXME CoP ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "titanic", "Titanic_1912.csv" ) ), @@ -976,12 +976,12 @@ def titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with @pytest.fixture -def titanic_data_context_with_fluent_pandas_and_spark_datasources_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 +def titanic_data_context_with_fluent_pandas_and_spark_datasources_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 # FIXME CoP titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled, spark_df_from_pandas_df, spark_session, ): - context = titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP context_path: str = context.root_directory path_to_folder_containing_csv_files = pathlib.Path( context_path, @@ -1031,12 +1031,12 @@ def titanic_data_context_with_fluent_pandas_and_spark_datasources_with_checkpoin @pytest.fixture -def titanic_data_context_with_fluent_pandas_and_sqlite_datasources_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 +def titanic_data_context_with_fluent_pandas_and_sqlite_datasources_with_checkpoints_v1_with_empty_store_stats_enabled( # noqa: E501 # FIXME CoP titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled, db_file, sa, ): - context = titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_data_context_with_fluent_pandas_datasources_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP datasource_name = "my_sqlite_datasource" connection_string = f"sqlite:///{db_file}" @@ -1067,11 +1067,11 @@ def empty_context_with_checkpoint(empty_data_context): root_dir = empty_data_context.root_directory fixture_name = "my_checkpoint.yml" fixture_path = file_relative_path(__file__, f"./data_context/fixtures/contexts/{fixture_name}") - checkpoints_file = os.path.join( # noqa: PTH118 + checkpoints_file = os.path.join( # noqa: PTH118 # FIXME CoP root_dir, "checkpoints", fixture_name ) shutil.copy(fixture_path, checkpoints_file) - assert os.path.isfile(checkpoints_file) # noqa: PTH113 + assert os.path.isfile(checkpoints_file) # noqa: PTH113 # FIXME CoP project_manager.set_project(context) return context @@ -1080,9 +1080,9 @@ def empty_context_with_checkpoint(empty_data_context): def empty_data_context_stats_enabled(tmp_path_factory, monkeypatch): project_path = str(tmp_path_factory.mktemp("empty_data_context")) context = gx.get_context(mode="file", project_root_dir=project_path) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 - os.makedirs(asset_config_path, exist_ok=True) # noqa: PTH103 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP + os.makedirs(asset_config_path, exist_ok=True) # noqa: PTH103 # FIXME CoP project_manager.set_project(context) return context @@ -1090,28 +1090,28 @@ def empty_data_context_stats_enabled(tmp_path_factory, monkeypatch): @pytest.fixture def titanic_data_context(tmp_path_factory) -> FileDataContext: project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "checkpoints"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "checkpoints"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP titanic_yml_path = file_relative_path( __file__, "./test_fixtures/great_expectations_v013_titanic.yml" ) shutil.copy( titanic_yml_path, - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) titanic_csv_path = file_relative_path(__file__, "./test_sets/Titanic.csv") shutil.copy( titanic_csv_path, - str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 + str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 # FIXME CoP ) context = get_context(context_root_dir=context_path) project_manager.set_project(context) @@ -1121,28 +1121,28 @@ def titanic_data_context(tmp_path_factory) -> FileDataContext: @pytest.fixture def titanic_data_context_no_data_docs_no_checkpoint_store(tmp_path_factory): project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "checkpoints"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "checkpoints"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP titanic_yml_path = file_relative_path( __file__, "./test_fixtures/great_expectations_titanic_pre_v013_no_data_docs.yml" ) shutil.copy( titanic_yml_path, - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) titanic_csv_path = file_relative_path(__file__, "./test_sets/Titanic.csv") shutil.copy( titanic_csv_path, - str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 + str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 # FIXME CoP ) context = get_context(context_root_dir=context_path) project_manager.set_project(context) @@ -1152,28 +1152,28 @@ def titanic_data_context_no_data_docs_no_checkpoint_store(tmp_path_factory): @pytest.fixture def titanic_data_context_no_data_docs(tmp_path_factory): project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "checkpoints"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "checkpoints"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP titanic_yml_path = file_relative_path( __file__, "./test_fixtures/great_expectations_titanic_no_data_docs.yml" ) shutil.copy( titanic_yml_path, - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) titanic_csv_path = file_relative_path(__file__, "./test_sets/Titanic.csv") shutil.copy( titanic_csv_path, - str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 + str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 # FIXME CoP ) context = get_context(context_root_dir=context_path) project_manager.set_project(context) @@ -1183,28 +1183,28 @@ def titanic_data_context_no_data_docs(tmp_path_factory): @pytest.fixture def titanic_data_context_stats_enabled(tmp_path_factory, monkeypatch): project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "checkpoints"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "checkpoints"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP titanic_yml_path = file_relative_path( __file__, "./test_fixtures/great_expectations_v013_titanic.yml" ) shutil.copy( titanic_yml_path, - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) titanic_csv_path = file_relative_path(__file__, "./test_sets/Titanic.csv") shutil.copy( titanic_csv_path, - str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 + str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 # FIXME CoP ) context = get_context(context_root_dir=context_path) project_manager.set_project(context) @@ -1214,28 +1214,28 @@ def titanic_data_context_stats_enabled(tmp_path_factory, monkeypatch): @pytest.fixture def titanic_data_context_stats_enabled_config_version_2(tmp_path_factory, monkeypatch): project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "checkpoints"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "checkpoints"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP titanic_yml_path = file_relative_path( __file__, "./test_fixtures/great_expectations_titanic.yml" ) shutil.copy( titanic_yml_path, - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) titanic_csv_path = file_relative_path(__file__, "./test_sets/Titanic.csv") shutil.copy( titanic_csv_path, - str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 + str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 # FIXME CoP ) context = get_context(context_root_dir=context_path) project_manager.set_project(context) @@ -1245,28 +1245,28 @@ def titanic_data_context_stats_enabled_config_version_2(tmp_path_factory, monkey @pytest.fixture def titanic_data_context_stats_enabled_config_version_3(tmp_path_factory, monkeypatch): project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "checkpoints"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "checkpoints"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "..", "data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP titanic_yml_path = file_relative_path( __file__, "./test_fixtures/great_expectations_v013_upgraded_titanic.yml" ) shutil.copy( titanic_yml_path, - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) titanic_csv_path = file_relative_path(__file__, "./test_sets/Titanic.csv") shutil.copy( titanic_csv_path, - str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 + str(os.path.join(context_path, "..", "data", "Titanic.csv")), # noqa: PTH118 # FIXME CoP ) context = get_context(context_root_dir=context_path) project_manager.set_project(context) @@ -1276,7 +1276,7 @@ def titanic_data_context_stats_enabled_config_version_3(tmp_path_factory, monkey @pytest.fixture(scope="module") def titanic_spark_db(tmp_path_factory, spark_warehouse_session): try: - from pyspark.sql import DataFrame # noqa: TCH002 + from pyspark.sql import DataFrame # noqa: TCH002 # FIXME CoP except ImportError: raise ValueError("spark tests are requested, but pyspark is not installed") @@ -1284,7 +1284,7 @@ def titanic_spark_db(tmp_path_factory, spark_warehouse_session): titanic_csv_path: str = file_relative_path(__file__, "./test_sets/Titanic.csv") project_path: str = str(tmp_path_factory.mktemp("data")) project_dataset_path: str = str( - os.path.join(project_path, "Titanic.csv") # noqa: PTH118 + os.path.join(project_path, "Titanic.csv") # noqa: PTH118 # FIXME CoP ) shutil.copy(titanic_csv_path, project_dataset_path) @@ -1385,26 +1385,26 @@ def data_context_parameterized_expectation_suite(tmp_path_factory): created with gx.get_context() """ project_path = str(tmp_path_factory.mktemp("data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP fixture_dir = file_relative_path(__file__, "./test_fixtures") - os.makedirs( # noqa: PTH103 - os.path.join(asset_config_path, "my_dag_node"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(asset_config_path, "my_dag_node"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) shutil.copy( - os.path.join(fixture_dir, "great_expectations_v013_basic.yml"), # noqa: PTH118 - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + os.path.join(fixture_dir, "great_expectations_v013_basic.yml"), # noqa: PTH118 # FIXME CoP + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) shutil.copy( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP fixture_dir, "expectation_suites/parameterized_expectation_suite_fixture.json", ), - os.path.join(asset_config_path, "my_dag_node", "default.json"), # noqa: PTH118 + os.path.join(asset_config_path, "my_dag_node", "default.json"), # noqa: PTH118 # FIXME CoP ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "plugins"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "plugins"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) return get_context(context_root_dir=context_path, cloud_mode=False) @@ -1476,7 +1476,7 @@ def evr_success(): @pytest.fixture -def sqlite_view_engine(test_backends) -> Engine: # type: ignore[return] +def sqlite_view_engine(test_backends) -> Engine: # type: ignore[return] # FIXME CoP # Create a small in-memory engine with two views, one of which is temporary if "sqlite" in test_backends: try: @@ -1501,7 +1501,7 @@ def sqlite_view_engine(test_backends) -> Engine: # type: ignore[return] ) return sqlite_engine except ImportError: - sa = None # type: ignore[assignment] + sa = None # type: ignore[assignment] # FIXME CoP else: pytest.skip("SqlAlchemy tests disabled; not testing views") @@ -1515,7 +1515,7 @@ def expectation_suite_identifier(): def test_folder_connection_path_csv(tmp_path_factory): df1 = pd.DataFrame({"col_1": [1, 2, 3, 4, 5], "col_2": ["a", "b", "c", "d", "e"]}) path = str(tmp_path_factory.mktemp("test_folder_connection_path_csv")) - df1.to_csv(path_or_buf=os.path.join(path, "test.csv"), index=False) # noqa: PTH118 + df1.to_csv(path_or_buf=os.path.join(path, "test.csv"), index=False) # noqa: PTH118 # FIXME CoP return str(path) @@ -1530,7 +1530,7 @@ def test_db_connection_string(tmp_path_factory, test_backends): import sqlalchemy as sa basepath = str(tmp_path_factory.mktemp("db_context")) - path = os.path.join(basepath, "test.db") # noqa: PTH118 + path = os.path.join(basepath, "test.db") # noqa: PTH118 # FIXME CoP engine = sa.create_engine("sqlite:///" + str(path)) add_dataframe_to_db(df=df1, name="table_1", con=engine, index=True) add_dataframe_to_db(df=df2, name="table_2", con=engine, index=True, schema="main") @@ -1546,7 +1546,7 @@ def test_df(tmp_path_factory): def generate_ascending_list_of_datetimes( k, start_date=datetime.date(2020, 1, 1), end_date=datetime.date(2020, 12, 31) ): - start_time = datetime.datetime(start_date.year, start_date.month, start_date.day) # noqa: DTZ001 + start_time = datetime.datetime(start_date.year, start_date.month, start_date.day) # noqa: DTZ001 # FIXME CoP days_between_dates = (end_date - start_date).total_seconds() datetime_list = [ @@ -1593,7 +1593,7 @@ def generate_ascending_list_of_datetimes( def sqlite_connection_string() -> str: db_file_path: str = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -1634,7 +1634,7 @@ def fds_data_context( def db_file(): return file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -1759,7 +1759,7 @@ def ge_cloud_config_e2e() -> GXCloudConfig: GXCloudEnvironmentVariable.ACCESS_TOKEN, ) cloud_config = GXCloudConfig( - base_url=base_url, # type: ignore[arg-type] + base_url=base_url, # type: ignore[arg-type] # FIXME CoP organization_id=organization_id, access_token=access_token, ) @@ -1772,7 +1772,7 @@ def ge_cloud_config_e2e() -> GXCloudConfig: return_value=[], ) def empty_base_data_context_in_cloud_mode( - mock_list_keys: MagicMock, # Avoid making a call to Cloud backend during datasource instantiation # noqa: E501 + mock_list_keys: MagicMock, # Avoid making a call to Cloud backend during datasource instantiation # noqa: E501 # FIXME CoP tmp_path: pathlib.Path, empty_ge_cloud_data_context_config: DataContextConfig, ge_cloud_config: GXCloudConfig, @@ -1797,7 +1797,7 @@ def empty_data_context_in_cloud_mode( ge_cloud_config: GXCloudConfig, empty_ge_cloud_data_context_config: DataContextConfig, ): - """This fixture is a DataContext in cloud mode that mocks calls to the cloud backend during setup so that it can be instantiated in tests.""" # noqa: E501 + """This fixture is a DataContext in cloud mode that mocks calls to the cloud backend during setup so that it can be instantiated in tests.""" # noqa: E501 # FIXME CoP project_path = tmp_path / "empty_data_context" project_path.mkdir(exist_ok=True) @@ -1892,7 +1892,7 @@ def empty_base_data_context_in_cloud_mode_custom_base_url( ) -> CloudDataContext: project_path = tmp_path / "empty_data_context" project_path.mkdir() - project_path = str(project_path) # type: ignore[assignment] + project_path = str(project_path) # type: ignore[assignment] # FIXME CoP custom_base_url: str = "https://some_url.org/" custom_ge_cloud_config = copy.deepcopy(ge_cloud_config) @@ -1922,7 +1922,7 @@ def cloud_data_context_with_datasource_pandas_engine( return context -# TODO: AJB 20210525 This fixture is not yet used but may be helpful to generate batches for unit tests of multibatch # noqa: E501 +# TODO: AJB 20210525 This fixture is not yet used but may be helpful to generate batches for unit tests of multibatch # noqa: E501 # FIXME CoP # workflows. It should probably be extended to add different column types / data. @pytest.fixture def multibatch_generic_csv_generator(): @@ -1938,7 +1938,7 @@ def _multibatch_generic_csv_generator( ) -> List[str]: data_path = pathlib.Path(data_path) if start_date is None: - start_date = datetime.datetime(2000, 1, 1) # noqa: DTZ001 + start_date = datetime.datetime(2000, 1, 1) # noqa: DTZ001 # FIXME CoP file_list = [] category_strings = { @@ -1950,22 +1950,22 @@ def _multibatch_generic_csv_generator( 5: "category5", 6: "category6", } - for batch_num in range(num_event_batches): # type: ignore[arg-type] + for batch_num in range(num_event_batches): # type: ignore[arg-type] # FIXME CoP # generate a dataframe with multiple column types batch_start_date = start_date + datetime.timedelta( - days=(batch_num * num_events_per_batch) # type: ignore[operator] + days=(batch_num * num_events_per_batch) # type: ignore[operator] # FIXME CoP ) # TODO: AJB 20210416 Add more column types df = pd.DataFrame( { "event_date": [ (batch_start_date + datetime.timedelta(days=i)).strftime("%Y-%m-%d") - for i in range(num_events_per_batch) # type: ignore[arg-type] + for i in range(num_events_per_batch) # type: ignore[arg-type] # FIXME CoP ], - "batch_num": [batch_num + 1 for _ in range(num_events_per_batch)], # type: ignore[arg-type] + "batch_num": [batch_num + 1 for _ in range(num_events_per_batch)], # type: ignore[arg-type] # FIXME CoP "string_cardinality_3": [ category_strings[i % 3] - for i in range(num_events_per_batch) # type: ignore[arg-type] + for i in range(num_events_per_batch) # type: ignore[arg-type] # FIXME CoP ], } ) diff --git a/tests/core/factory/test_checkpoint_factory.py b/tests/core/factory/test_checkpoint_factory.py index dab205a33b8b..a7ffa93da60c 100644 --- a/tests/core/factory/test_checkpoint_factory.py +++ b/tests/core/factory/test_checkpoint_factory.py @@ -1,5 +1,6 @@ import re from unittest import mock +from unittest.mock import ANY from unittest.mock import ANY as ANY_TEST_ARG import pytest @@ -247,7 +248,7 @@ def test_checkpoint_factory_all(context_fixture_name: str, request: pytest.Fixtu # Arrange ds = context.data_sources.add_pandas("my_datasource") - asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] + asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] # FIXME CoP batch_def = asset.add_batch_definition("my_batch_definition") suite = context.suites.add(ExpectationSuite(name="my_suite")) @@ -292,7 +293,7 @@ def test_checkpoint_factory_all_with_bad_config( # Arrange context: AbstractDataContext = in_memory_runtime_context ds = context.data_sources.add_pandas("my_datasource") - asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] + asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] # FIXME CoP batch_def = asset.add_batch_definition("my_batch_definition") suite = context.suites.add(ExpectationSuite(name="my_suite")) @@ -415,3 +416,316 @@ def _test_checkpoint_factory_delete_emits_event(self, context): mock_submit.assert_called_once_with( event=CheckpointDeletedEvent(checkpoint_id=checkpoint.id) ) + + +class TestCheckpointFactoryAddOrUpdate: + CHECKPOINT_NAME = "checkpoint A" + + @pytest.mark.filesystem + def test_add_empty_new_checkpoint__filesystem(self, empty_data_context): + return self._test_add_empty_new_checkpoint(empty_data_context) + + @pytest.mark.cloud + def test_add_empty_new_checkpoint__cloud(self, empty_cloud_context_fluent): + return self._test_add_empty_new_checkpoint(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_empty_new_checkpoint__ephemeral(self, ephemeral_context_with_defaults): + return self._test_add_empty_new_checkpoint(ephemeral_context_with_defaults) + + def _test_add_empty_new_checkpoint(self, context: AbstractDataContext): + # arrange + checkpoint = Checkpoint(name=self.CHECKPOINT_NAME, validation_definitions=[]) + + # act + created_checkpoint = context.checkpoints.add_or_update(checkpoint=checkpoint) + + # assert + assert created_checkpoint.id + context.checkpoints.get(self.CHECKPOINT_NAME) + + @pytest.mark.filesystem + def test_add_new_checkpoint_with_validations__filesystem(self, empty_data_context): + return self._test_add_new_checkpoint_with_validations(empty_data_context) + + @pytest.mark.cloud + def test_add_new_checkpoint_with_validations__cloud(self, empty_cloud_context_fluent): + return self._test_add_new_checkpoint_with_validations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_new_checkpoint_with_validations__ephemeral(self, ephemeral_context_with_defaults): + return self._test_add_new_checkpoint_with_validations(ephemeral_context_with_defaults) + + def _test_add_new_checkpoint_with_validations(self, context: AbstractDataContext): + # arrange + batch_def = ( + context.data_sources.add_pandas("data source A") + .add_dataframe_asset("asset A") + .add_batch_definition_whole_dataframe("batch def A") + ) + + validation_definitions = [ + ValidationDefinition( + name="val def A", + data=batch_def, + suite=ExpectationSuite(name="suite A"), + ), + ValidationDefinition( + name="val def B", + data=batch_def, + suite=ExpectationSuite(name="suite B"), + ), + ] + checkpoint = Checkpoint( + name=self.CHECKPOINT_NAME, validation_definitions=validation_definitions + ) + + # act + created_checkpoint = context.checkpoints.add_or_update(checkpoint=checkpoint) + + # assert + assert created_checkpoint.id + assert len(created_checkpoint.validation_definitions) == len(validation_definitions) + for val_def, created_val_def in zip( + validation_definitions, created_checkpoint.validation_definitions + ): + assert created_val_def.id + val_def_dict = val_def.dict() + val_def_dict["id"] = ANY + assert val_def_dict == created_val_def.dict() + + @pytest.mark.filesystem + def test_update_existing_checkpoint_adds_validations__filesystem(self, empty_data_context): + return self._test_update_existing_checkpoint_adds_validations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_checkpoint_adds_validations__cloud(self, empty_cloud_context_fluent): + return self._test_update_existing_checkpoint_adds_validations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_checkpoint_adds_validations__ephemeral( + self, ephemeral_context_with_defaults + ): + return self._test_update_existing_checkpoint_adds_validations( + ephemeral_context_with_defaults + ) + + def _test_update_existing_checkpoint_adds_validations(self, context: AbstractDataContext): + # arrange + context.checkpoints.add( + checkpoint=Checkpoint(name=self.CHECKPOINT_NAME, validation_definitions=[]) + ) + + batch_def = ( + context.data_sources.add_pandas("data source A") + .add_dataframe_asset("asset A") + .add_batch_definition_whole_dataframe("batch def A") + ) + validation_definitions = [ + ValidationDefinition( + name="val def A", + data=batch_def, + suite=ExpectationSuite(name="suite A"), + ), + ValidationDefinition( + name="val def B", + data=batch_def, + suite=ExpectationSuite(name="suite B"), + ), + ] + checkpoint = Checkpoint( + name=self.CHECKPOINT_NAME, validation_definitions=validation_definitions + ) + + # act + created_checkpoint = context.checkpoints.add_or_update(checkpoint=checkpoint) + + # assert + assert created_checkpoint.id + assert len(checkpoint.validation_definitions) == len(validation_definitions) + for val_def, created_val_def in zip( + validation_definitions, created_checkpoint.validation_definitions + ): + assert created_val_def.id + val_def_dict = val_def.dict() + val_def_dict["id"] = ANY + assert val_def_dict == created_val_def.dict() + + @pytest.mark.filesystem + def test_update_existing_checkpoint_updates_validations__filesystem(self, empty_data_context): + return self._test_update_existing_checkpoint_updates_validations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_checkpoint_updates_validations__cloud( + self, empty_cloud_context_fluent + ): + return self._test_update_existing_checkpoint_updates_validations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_checkpoint_updates_validations__ephemeral( + self, ephemeral_context_with_defaults + ): + return self._test_update_existing_checkpoint_updates_validations( + ephemeral_context_with_defaults + ) + + def _test_update_existing_checkpoint_updates_validations(self, context: AbstractDataContext): + # arrange + batch_def = ( + context.data_sources.add_pandas("data source A") + .add_dataframe_asset("asset A") + .add_batch_definition_whole_dataframe("batch def A") + ) + existing_suite = context.suites.add(ExpectationSuite(name="suite A")) + existing_val_def = context.validation_definitions.add( + ValidationDefinition( + name="val def A", + data=batch_def, + suite=existing_suite, + ) + ) + + context.checkpoints.add( + checkpoint=Checkpoint( + name=self.CHECKPOINT_NAME, validation_definitions=[existing_val_def] + ) + ) + new_suite_name = "suite C" + + # act + created_checkpoint = context.checkpoints.add_or_update( + checkpoint=Checkpoint( + name=self.CHECKPOINT_NAME, + validation_definitions=[ + ValidationDefinition( + name="val def A", + data=batch_def, + suite=ExpectationSuite(name=new_suite_name), + ), + ], + ) + ) + # assert + for val_def in created_checkpoint.validation_definitions: + assert val_def.suite.name == new_suite_name + + @pytest.mark.filesystem + def test_update_existing_checkpoint_deletes_validations__filesystem(self, empty_data_context): + return self._test_update_existing_checkpoint_deletes_validations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_checkpoint_deletes_validations__cloud( + self, empty_cloud_context_fluent + ): + return self._test_update_existing_checkpoint_deletes_validations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_checkpoint_deletes_validations__ephemeral( + self, ephemeral_context_with_defaults + ): + return self._test_update_existing_checkpoint_deletes_validations( + ephemeral_context_with_defaults + ) + + def _test_update_existing_checkpoint_deletes_validations(self, context: AbstractDataContext): + # arrange + batch_def = ( + context.data_sources.add_pandas("data source A") + .add_dataframe_asset("asset A") + .add_batch_definition_whole_dataframe("batch def A") + ) + SUITE_NAME = "suite A" + VALIDATION_DEFINITION_NAME = "val def A" + existing_suite_1 = context.suites.add(ExpectationSuite(name=SUITE_NAME)) + existing_suite_2 = context.suites.add(ExpectationSuite(name="suite B")) + existing_val_def_1 = context.validation_definitions.add( + ValidationDefinition( + name=VALIDATION_DEFINITION_NAME, + data=batch_def, + suite=existing_suite_1, + ), + ) + existing_val_def_2 = context.validation_definitions.add( + ValidationDefinition( + name="val def B", + data=batch_def, + suite=existing_suite_2, + ) + ) + + context.checkpoints.add( + checkpoint=Checkpoint( + name=self.CHECKPOINT_NAME, + validation_definitions=[existing_val_def_1, existing_val_def_2], + ) + ) + + # act + created_checkpoint = context.checkpoints.add_or_update( + checkpoint=Checkpoint( + name=self.CHECKPOINT_NAME, + validation_definitions=[ + ValidationDefinition( + name="val def A", + data=batch_def, + suite=ExpectationSuite(name=SUITE_NAME), + ), + ], + ) + ) + + # assert + assert len(created_checkpoint.validation_definitions) == 1 + assert created_checkpoint.validation_definitions[0].name == VALIDATION_DEFINITION_NAME + assert created_checkpoint.validation_definitions[0].suite.name == SUITE_NAME + + @pytest.mark.filesystem + def test_add_or_update_is_idempotent__filesystem(self, empty_data_context): + return self._test_add_or_update_is_idempotent(empty_data_context) + + @pytest.mark.cloud + def test_add_or_update_is_idempotent__cloud(self, empty_cloud_context_fluent): + return self._test_add_or_update_is_idempotent(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_or_update_is_idempotent__ephemeral(self, ephemeral_context_with_defaults): + return self._test_add_or_update_is_idempotent(ephemeral_context_with_defaults) + + def _test_add_or_update_is_idempotent(self, context: AbstractDataContext): + # arrange + batch_def = ( + context.data_sources.add_pandas("data source A") + .add_dataframe_asset("asset A") + .add_batch_definition_whole_dataframe("batch def A") + ) + SUITE_NAME = "suite A" + VALIDATION_DEFINITION_NAME = "val def A" + + # act + created_checkpoint_1 = context.checkpoints.add_or_update( + checkpoint=Checkpoint( + name=self.CHECKPOINT_NAME, + validation_definitions=[ + ValidationDefinition( + name=VALIDATION_DEFINITION_NAME, + data=batch_def, + suite=ExpectationSuite(name=SUITE_NAME), + ), + ], + ) + ) + created_checkpoint_2 = context.checkpoints.add_or_update( + checkpoint=Checkpoint( + name=self.CHECKPOINT_NAME, + validation_definitions=[ + ValidationDefinition( + name=VALIDATION_DEFINITION_NAME, + data=batch_def, + suite=ExpectationSuite(name=SUITE_NAME), + ), + ], + ) + ) + + # assert + assert created_checkpoint_1 == created_checkpoint_2 diff --git a/tests/core/factory/test_suite_factory.py b/tests/core/factory/test_suite_factory.py index 0b7179eac0fa..549fb9f9ed96 100644 --- a/tests/core/factory/test_suite_factory.py +++ b/tests/core/factory/test_suite_factory.py @@ -1,8 +1,12 @@ import re +from copy import copy from typing import Dict from unittest import mock +from unittest.mock import ( + ANY, + Mock, # noqa: TID251 # FIXME CoP +) from unittest.mock import ANY as ANY_TEST_ARG -from unittest.mock import Mock # noqa: TID251 import pytest from pytest_mock import MockerFixture @@ -19,6 +23,10 @@ from great_expectations.data_context.data_context.context_factory import set_context from great_expectations.data_context.store import ExpectationsStore from great_expectations.exceptions import DataContextError +from great_expectations.expectations import ( + ExpectColumnDistinctValuesToContainSet, + ExpectColumnSumToBeBetween, +) from great_expectations.types import SerializableDictDot @@ -260,7 +268,7 @@ def to_json_dict(self) -> Dict[str, JSONValues]: # type: ignore[explicit-overri # Put suite_2 into an invalid state, These BadExpectations are real Expectations since # we want them to not deserialize correctly. - suite_2.expectations = [BadExpectation(id=1), BadExpectation(id=2)] # type: ignore[list-item] + suite_2.expectations = [BadExpectation(id=1), BadExpectation(id=2)] # type: ignore[list-item] # FIXME CoP suite_2.save() # Act @@ -325,6 +333,263 @@ def test_suite_factory_all_with_bad_pydantic_config( assert re.match("pydantic.*ValidationError", analytics_submit_args.error_type) +class TestSuiteFactoryAddOrUpdate: + @pytest.mark.filesystem + def test_add_empty_new_suite__filesystem(self, empty_data_context): + self._test_add_empty_new_suite(empty_data_context) + + @pytest.mark.cloud + def test_add_empty_new_suite__cloud(self, empty_cloud_context_fluent): + self._test_add_empty_new_suite(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_empty_new_suite__ephemeral(self, ephemeral_context_with_defaults): + self._test_add_empty_new_suite(ephemeral_context_with_defaults) + + def _test_add_empty_new_suite(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + suite = ExpectationSuite(name=suite_name) + + # act + created_suite = context.suites.add_or_update(suite=suite) + + # assert + assert created_suite.id + context.suites.get(suite_name) + + @pytest.mark.filesystem + def test_add_new_suite_with_expectations_filesystem(self, empty_data_context): + self._test_add_new_suite_with_expectations(empty_data_context) + + @pytest.mark.cloud + def test_add_new_suite_with_expectations__cloud(self, empty_cloud_context_fluent): + self._test_add_new_suite_with_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_new_suite_with_expectations__ephemeral(self, ephemeral_context_with_defaults): + self._test_add_new_suite_with_expectations(ephemeral_context_with_defaults) + + def _test_add_new_suite_with_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + + # act + created_suite = context.suites.add_or_update(suite=suite) + + # assert + assert created_suite.id + context.suites.get(suite_name) + for exp, created_exp in zip(expectations, created_suite.expectations): + assert created_exp.id + exp.id = ANY + assert exp == created_exp + + @pytest.mark.filesystem + def test_update_existing_suite_adds_expectations__filesystem(self, empty_data_context): + self._test_update_existing_suite_adds_expectations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_suite_adds_expectations__cloud(self, empty_cloud_context_fluent): + self._test_update_existing_suite_adds_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_suite_adds_expectations__ephemeral( + self, ephemeral_context_with_defaults + ): + self._test_update_existing_suite_adds_expectations(ephemeral_context_with_defaults) + + def _test_update_existing_suite_adds_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + existing_suite = context.suites.add(suite=ExpectationSuite(name=suite_name)) + + # act + updated_suite = context.suites.add_or_update(suite=suite) + + # assert + assert updated_suite.id == existing_suite.id + for exp, created_exp in zip(expectations, updated_suite.expectations): + assert created_exp.id + exp.id = ANY + assert exp == created_exp + + @pytest.mark.filesystem + def test_update_existing_suite_updates_expectations__filesystem(self, empty_data_context): + self._test_update_existing_suite_updates_expectations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_suite_updates_expectations__cloud(self, empty_cloud_context_fluent): + self._test_update_existing_suite_updates_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_suite_updates_expectations__ephemeral( + self, ephemeral_context_with_defaults + ): + self._test_update_existing_suite_updates_expectations(ephemeral_context_with_defaults) + + def _test_update_existing_suite_updates_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + existing_suite = context.suites.add( + suite=ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + ) + new_col_name = "col C" + for exp in expectations: + exp.column = new_col_name + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + + # act + updated_suite = context.suites.add_or_update(suite=suite) + + # assert + assert updated_suite.id == existing_suite.id + for exp, created_exp in zip(expectations, updated_suite.expectations): + assert created_exp.id + exp.id = ANY + assert exp == created_exp + assert created_exp.column == new_col_name # type: ignore[attr-defined] # column exists + + for old_exp, new_exp in zip(existing_suite.expectations, updated_suite.expectations): + # expectations have been deleted and re added, not updated + assert old_exp.id != new_exp.id + + @pytest.mark.filesystem + def test_update_existing_suite_deletes_expectations__filesystem(self, empty_data_context): + self._test_update_existing_suite_deletes_expectations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_suite_deletes_expectations__cloud(self, empty_cloud_context_fluent): + self._test_update_existing_suite_deletes_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_suite_deletes_expectations__ephemeral( + self, ephemeral_context_with_defaults + ): + self._test_update_existing_suite_deletes_expectations(ephemeral_context_with_defaults) + + def _test_update_existing_suite_deletes_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + existing_suite = context.suites.add( + suite=ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + ) + new_col_name = "col C" + for exp in expectations: + exp.column = new_col_name + suite = ExpectationSuite( + name=suite_name, + expectations=[], + ) + + # act + updated_suite = context.suites.add_or_update(suite=suite) + + # assert + assert updated_suite.id == existing_suite.id + assert updated_suite.expectations == [] + + @pytest.mark.filesystem + def test_add_or_update_is_idempotent__filesystem(self, empty_data_context): + self._test_add_or_update_is_idempotent(empty_data_context) + + @pytest.mark.cloud + def test_add_or_update_is_idempotent__cloud(self, empty_cloud_context_fluent): + self._test_add_or_update_is_idempotent(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_or_update_is_idempotent__ephemeral(self, ephemeral_context_with_defaults): + self._test_add_or_update_is_idempotent(ephemeral_context_with_defaults) + + def _test_add_or_update_is_idempotent(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + + # act + suite_1 = context.suites.add_or_update(suite=suite) + suite_2 = context.suites.add_or_update(suite=suite) + suite_3 = context.suites.add_or_update(suite=suite) + + # assert + assert suite_1 == suite_2 == suite_3 + + class TestSuiteFactoryAnalytics: @pytest.mark.filesystem def test_suite_factory_add_emits_event_filesystem(self, empty_data_context): diff --git a/tests/core/factory/test_validation_definition_factory.py b/tests/core/factory/test_validation_definition_factory.py index b7629e486877..31bc5ccb27bb 100644 --- a/tests/core/factory/test_validation_definition_factory.py +++ b/tests/core/factory/test_validation_definition_factory.py @@ -25,6 +25,7 @@ from great_expectations.data_context.data_context.cloud_data_context import ( CloudDataContext, ) +from great_expectations.data_context.data_context.ephemeral_data_context import EphemeralDataContext from great_expectations.data_context.data_context.file_data_context import ( FileDataContext, ) @@ -334,7 +335,7 @@ def test_validation_definition_factory_all( # Arrange ds = context.data_sources.add_pandas("my_datasource") - asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] + asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] # FIXME CoP suite = context.suites.add(ExpectationSuite(name="my_suite")) validation_definition_a = ValidationDefinition( name="validation definition a", @@ -375,7 +376,7 @@ def test_validation_definition_factory_all_with_bad_config( # Arrange ds = context.data_sources.add_pandas("my_datasource") - asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] + asset = ds.add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] # FIXME CoP suite = context.suites.add(ExpectationSuite(name="my_suite")) validation_definition_1 = ValidationDefinition( @@ -466,6 +467,207 @@ def test_validation_definition_factory_round_trip( assert persisted_validation_definition.json() == retrieved_validation_definition.json() +class TestValidationDefinitionFactoryAddOrUpdate: + def _build_batch_definition(self, context: AbstractDataContext): + ds = context.data_sources.add_pandas("my_datasource") + asset = ds.add_csv_asset("my_taxi_asset", pathlib.Path("data.csv")) + return asset.add_batch_definition("my_batch_definition") + + def _build_suite(self, name: str = "my_suite") -> ExpectationSuite: + return ExpectationSuite( + name=name, + expectations=[ + gxe.ExpectColumnValuesToBeBetween( + column="passenger_count", min_value=0, max_value=10 + ) + ], + ) + + @pytest.mark.filesystem + def test_add_new_validation__filesystem(self, empty_data_context: FileDataContext): + self._test_add_new_validation(empty_data_context) + + @pytest.mark.cloud + def test_add_new_validation__cloud(self, empty_cloud_context_fluent: CloudDataContext): + self._test_add_new_validation(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_new_validation__ephemeral( + self, ephemeral_context_with_defaults: EphemeralDataContext + ): + self._test_add_new_validation(ephemeral_context_with_defaults) + + def _test_add_new_validation(self, context: AbstractDataContext): + # arrange + vd_name = "my_validation_definition" + batch_def = self._build_batch_definition(context) + suite = self._build_suite() + vd = ValidationDefinition( + name=vd_name, + data=batch_def, + suite=context.suites.add(suite), + ) + + # act + created_vd = context.validation_definitions.add_or_update(validation=vd) + + # assert + assert created_vd.id + context.validation_definitions.get(vd_name) + + @pytest.mark.filesystem + def test_add_new_validation_with_new_suite__filesystem( + self, empty_data_context: FileDataContext + ): + self._test_add_new_validation_with_new_suite(empty_data_context) + + @pytest.mark.cloud + def test_add_new_validation_with_new_suite__cloud( + self, empty_cloud_context_fluent: CloudDataContext + ): + self._test_add_new_validation_with_new_suite(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_new_validation_with_new_suite__ephemeral( + self, ephemeral_context_with_defaults: EphemeralDataContext + ): + self._test_add_new_validation_with_new_suite(ephemeral_context_with_defaults) + + def _test_add_new_validation_with_new_suite(self, context: AbstractDataContext): + # arrange + vd_name = "my_validation_definition" + batch_def = self._build_batch_definition(context) + suite = self._build_suite() + vd = ValidationDefinition( + name=vd_name, + data=batch_def, + suite=suite, + ) + + # act + created_vd = context.validation_definitions.add_or_update(validation=vd) + + # assert + assert created_vd.id + context.validation_definitions.get(vd_name) + + @pytest.mark.filesystem + def test_update_existing_validation__filesystem(self, empty_data_context: FileDataContext): + self._test_update_existing_validation(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_validation__cloud(self, empty_cloud_context_fluent: CloudDataContext): + self._test_update_existing_validation(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_validation__ephemeral( + self, ephemeral_context_with_defaults: EphemeralDataContext + ): + self._test_update_existing_validation(ephemeral_context_with_defaults) + + def _test_update_existing_validation(self, context: AbstractDataContext): + # arrange + vd_name = "my_validation_definition" + batch_def = self._build_batch_definition(context) + suite = self._build_suite() + vd = ValidationDefinition( + name=vd_name, + data=batch_def, + suite=context.suites.add(suite), + ) + existing_vd = context.validation_definitions.add(validation=vd) + + # act + vd.suite.expectations = [ + gxe.ExpectColumnMaxToBeBetween(column="passenger_count", min_value=0, max_value=5) + ] + + updated_vd = context.validation_definitions.add_or_update(validation=vd) + + # assert + assert updated_vd.id == existing_vd.id + assert len(updated_vd.suite.expectations) == 1 and isinstance( + updated_vd.suite.expectations[0], gxe.ExpectColumnMaxToBeBetween + ) + context.validation_definitions.get(vd_name) + + @pytest.mark.filesystem + def test_overwrite_existing_validation__filesystem(self, empty_data_context: FileDataContext): + self._test_overwrite_existing_validation(empty_data_context) + + @pytest.mark.cloud + def test_overwrite_existing_validation__cloud( + self, empty_cloud_context_fluent: CloudDataContext + ): + self._test_overwrite_existing_validation(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_overwrite_existing_validation__ephemeral( + self, ephemeral_context_with_defaults: EphemeralDataContext + ): + self._test_overwrite_existing_validation(ephemeral_context_with_defaults) + + def _test_overwrite_existing_validation(self, context: AbstractDataContext): + # arrange + vd_name = "my_validation_definition" + batch_def = self._build_batch_definition(context) + suite = context.suites.add(self._build_suite()) + vd = ValidationDefinition( + name=vd_name, + data=batch_def, + suite=suite, + ) + existing_vd = context.validation_definitions.add(validation=vd) + + # act + new_suite = context.suites.add(self._build_suite(name="new_suite")) + new_vd = ValidationDefinition( + name=vd_name, + data=batch_def, + suite=new_suite, + ) + updated_vd = context.validation_definitions.add_or_update(validation=new_vd) + + # assert + assert updated_vd.suite.id != existing_vd.suite.id # New suite should have a different ID + assert updated_vd.data.id == existing_vd.data.id + assert updated_vd.id == existing_vd.id + context.validation_definitions.get(vd_name) + + @pytest.mark.filesystem + def test_add_or_update_is_idempotent__filesystem(self, empty_data_context: FileDataContext): + self._test_add_or_update_is_idempotent(empty_data_context) + + @pytest.mark.cloud + def test_add_or_update_is_idempotent__cloud(self, empty_cloud_context_fluent: CloudDataContext): + self._test_add_or_update_is_idempotent(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_or_update_is_idempotent__ephemeral( + self, ephemeral_context_with_defaults: EphemeralDataContext + ): + self._test_add_or_update_is_idempotent(ephemeral_context_with_defaults) + + def _test_add_or_update_is_idempotent(self, context: AbstractDataContext): + # arrange + vd_name = "my_validation_definition" + batch_def = self._build_batch_definition(context) + suite = self._build_suite() + vd = ValidationDefinition( + name=vd_name, + data=batch_def, + suite=suite, + ) + + # act + vd_1 = context.validation_definitions.add_or_update(validation=vd) + vd_2 = context.validation_definitions.add_or_update(validation=vd) + vd_3 = context.validation_definitions.add_or_update(validation=vd) + + # assert + assert vd_1 == vd_2 == vd_3 + + class TestValidationDefinitionFactoryAnalytics: @pytest.mark.filesystem def test_validation_definition_factory_add_emits_event_filesystem(self, empty_data_context): diff --git a/tests/core/test__docs_decorators.py b/tests/core/test__docs_decorators.py index 696c635173dc..a67cb6d9e012 100644 --- a/tests/core/test__docs_decorators.py +++ b/tests/core/test__docs_decorators.py @@ -88,6 +88,16 @@ def my_method(self): print(f"Classes missing @public_api ->\n{pf(classes_that_need_public_api_decorator)}") assert sorted(classes_that_need_public_api_decorator.keys()) == [] + @pytest.mark.unit + def test_public_api_objects_have_docstrings(self): + """ + All objects that are decorated with @public_api should have a docstring. + """ + violations = public_api_introspector.docstring_violations + assert ( + len(violations) == 0 + ), f"Public API decorated objects without docstrings: {pf(violations)}" + # @deprecated @@ -609,7 +619,7 @@ def _func_full_docstring_all_decorators_all_sections( @pytest.mark.unit def test_all_decorators_full_docstring_all_sections(): - """Makes sure that Returns and Raises are rendered correctly in the context of a full docstring.""" # noqa: E501 + """Makes sure that Returns and Raises are rendered correctly in the context of a full docstring.""" # noqa: E501 # FIXME CoP assert _func_full_docstring_all_decorators_all_sections.__doc__ == ( "--Public API--My docstring.\n" diff --git a/tests/core/test_batch.py b/tests/core/test_batch.py index db4f87553340..61a8fcabf11d 100644 --- a/tests/core/test_batch.py +++ b/tests/core/test_batch.py @@ -205,7 +205,7 @@ def test_get_batch_request_from_acceptable_arguments_block_data_connector_query( @pytest.mark.unit -def test_get_batch_request_from_acceptable_arguments_block_partitioner_sampler_batch_spec_passthrough( # noqa: E501 +def test_get_batch_request_from_acceptable_arguments_block_partitioner_sampler_batch_spec_passthrough( # noqa: E501 # FIXME CoP base_block: Dict[str, str], ): # partitioner and sampling as batch_spec_passthrough diff --git a/tests/core/test_batch_definition.py b/tests/core/test_batch_definition.py index d90f504313d5..e6f6c76ae58e 100644 --- a/tests/core/test_batch_definition.py +++ b/tests/core/test_batch_definition.py @@ -1,5 +1,6 @@ from __future__ import annotations +import pathlib import re import uuid from typing import TYPE_CHECKING, Optional @@ -27,6 +28,9 @@ import pytest_mock + from great_expectations.data_context.data_context.ephemeral_data_context import ( + EphemeralDataContext, + ) from great_expectations.datasource.fluent.batch_request import BatchRequest @@ -259,3 +263,27 @@ def test_is_fresh_fails_on_batch_definition_retrieval(in_memory_runtime_context) assert diagnostics.success is False assert len(diagnostics.errors) == 1 assert isinstance(diagnostics.errors[0], BatchDefinitionNotFoundError) + + +@pytest.mark.unit +def test_save(in_memory_runtime_context: EphemeralDataContext): + context = in_memory_runtime_context + + ds_name = "my_pandas_ds" + asset_name = "my_csv_asset" + batch_def_name = "my_batch_def" + + datasource = context.data_sources.add_pandas(name=ds_name) + asset = datasource.add_csv_asset(name=asset_name, filepath_or_buffer=pathlib.Path("data.csv")) + batch_definition = asset.add_batch_definition(name=batch_def_name) + + assert batch_definition.partitioner is None + + batch_definition.partitioner = FileNamePartitionerYearly(regex=re.compile("my_regex")) + batch_definition.save() + + retrieved_datasource = context.data_sources.get(name=ds_name) + retrieved_asset = retrieved_datasource.get_asset(name=asset_name) + retrieved_batch_definition = retrieved_asset.get_batch_definition(name=batch_def_name) + + assert retrieved_batch_definition.partitioner diff --git a/tests/core/test_batch_related_objects.py b/tests/core/test_batch_related_objects.py index 8e2223c808ba..d41a9eaa08fc 100644 --- a/tests/core/test_batch_related_objects.py +++ b/tests/core/test_batch_related_objects.py @@ -84,7 +84,7 @@ def test_iddict_is_hashable(): # noinspection PyBroadException,PyUnusedLocal try: # noinspection PyUnusedLocal - dictionaries_as_set: set = { # noqa: F841 + dictionaries_as_set: set = { # noqa: F841 # FIXME CoP deep_convert_properties_iterable_to_id_dict(source=data_0), deep_convert_properties_iterable_to_id_dict(source=data_1), deep_convert_properties_iterable_to_id_dict(source=data_2), diff --git a/tests/core/test_evaluation_parameters.py b/tests/core/test_evaluation_parameters.py index 4e9863c3a6bc..d5caa512833f 100644 --- a/tests/core/test_evaluation_parameters.py +++ b/tests/core/test_evaluation_parameters.py @@ -89,7 +89,7 @@ def test_parse_suite_parameter(): @pytest.mark.unit def test_parser_timing(): """We currently reuse the parser, clearing the stack between calls, which is about 10 times faster than not - doing so. But these operations are really quick, so this may not be necessary.""" # noqa: E501 + doing so. But these operations are really quick, so this may not be necessary.""" # noqa: E501 # FIXME CoP assert ( timeit( "parse_suite_parameter('x', {'x': 1})", @@ -110,7 +110,7 @@ def test_math_suite_paramaters(): @pytest.mark.unit def test_temporal_suite_parameters(): # allow 1 second for "now" tolerance - now = datetime.now() # noqa: DTZ005 + now = datetime.now() # noqa: DTZ005 # FIXME CoP assert ( (now - timedelta(weeks=1, seconds=3)) < dateutil.parser.parse(parse_suite_parameter("now() - timedelta(weeks=1, seconds=2)")) @@ -121,7 +121,7 @@ def test_temporal_suite_parameters(): @pytest.mark.unit def test_temporal_suite_parameters_complex(): # allow 1 second for "now" tolerance - now = datetime.now() # noqa: DTZ005 + now = datetime.now() # noqa: DTZ005 # FIXME CoP # Choosing "2*3" == 6 weeks shows we can parse an expression inside a kwarg. assert ( (now - timedelta(weeks=2 * 3, seconds=3)) @@ -173,7 +173,7 @@ def test_now_suite_parameter(): """ now() is unique in the fact that it is the only suite param built-in that has zero arity (takes no arguments). The following tests ensure that it is properly parsed and evaluated in a variety of contexts. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # By itself res = parse_suite_parameter("now()") assert dateutil.parser.parse(res), "Provided suite parameter is not dateutil-parseable" diff --git a/tests/core/test_expectation_configuration.py b/tests/core/test_expectation_configuration.py index 58e3ad6d680f..76390456dbe4 100644 --- a/tests/core/test_expectation_configuration.py +++ b/tests/core/test_expectation_configuration.py @@ -97,8 +97,8 @@ def config8(): @pytest.mark.unit def test_expectation_configuration_equality(config1, config2, config3, config4): """Equality should depend on all defined properties of a configuration object, but not on whether the *instances* - are the same.""" # noqa: E501 - assert config1 is config1 # no difference # noqa: PLR0124 + are the same.""" # noqa: E501 # FIXME CoP + assert config1 is config1 # no difference # noqa: PLR0124 # FIXME CoP assert config1 is not config2 # different instances, but same content assert config1 == config2 # different instances, but same content assert not (config1 != config2) # ne works properly diff --git a/tests/core/test_expectation_suite.py b/tests/core/test_expectation_suite.py index cdd98334f77e..71d367857192 100644 --- a/tests/core/test_expectation_suite.py +++ b/tests/core/test_expectation_suite.py @@ -4,7 +4,7 @@ from copy import copy, deepcopy from typing import Dict, Union from unittest import mock -from unittest.mock import MagicMock, Mock # noqa: TID251 +from unittest.mock import MagicMock, Mock # noqa: TID251 # FIXME CoP from uuid import UUID, uuid4 import pytest @@ -27,10 +27,15 @@ from great_expectations.data_context.data_context.context_factory import set_context from great_expectations.data_context.store.expectations_store import ExpectationsStore from great_expectations.exceptions import InvalidExpectationConfigurationError +from great_expectations.expectations import ( + ExpectColumnValuesToBeUnique, + ExpectColumnValuesToNotBeNull, +) from great_expectations.expectations.expectation import Expectation from great_expectations.expectations.expectation_configuration import ( ExpectationConfiguration, ) +from great_expectations.render import RenderedAtomicContent, RenderedAtomicValue @pytest.fixture @@ -152,7 +157,7 @@ def test_expectation_suite_init_overrides_expectations_dict_and_obj( """What does this test and why? The expectations param of ExpectationSuite takes a list of ExpectationConfiguration or dicts and both can be provided at the same time. We need to make sure they both show up as expectation configurations in the instantiated ExpectationSuite. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP test_expectations_input = [ expect_column_values_to_be_in_set_col_a_with_meta_dict, @@ -161,7 +166,7 @@ def test_expectation_suite_init_overrides_expectations_dict_and_obj( suite = ExpectationSuite( name=fake_expectation_suite_name, - expectations=test_expectations_input, # type: ignore[arg-type] + expectations=test_expectations_input, # type: ignore[arg-type] # FIXME CoP ) assert suite.name == fake_expectation_suite_name @@ -210,7 +215,7 @@ def __dict__(self): with pytest.raises(InvalidExpectationConfigurationError) as e: ExpectationSuite( name=fake_expectation_suite_name, - meta=test_meta, # type: ignore[arg-type] + meta=test_meta, # type: ignore[arg-type] # FIXME CoP ) assert "is of type NotSerializable which cannot be serialized to json" in str(e.value) @@ -247,7 +252,7 @@ def test_instantiate_suite_fails_with_expectations_with_ids(self, expectation): ] with pytest.raises( ValueError, - match="Expectations in parameter `expectations` must not belong to another ExpectationSuite.", # noqa: E501 + match="Expectations in parameter `expectations` must not belong to another ExpectationSuite.", # noqa: E501 # FIXME CoP ): ExpectationSuite(name=self.expectation_suite_name, expectations=expectations) @@ -394,6 +399,25 @@ def test_add_doesnt_mutate_suite_when_save_fails(self, expectation): assert len(suite.expectations) == 0, "Expectation must not be added to Suite." + @pytest.mark.unit + def test_add_success_when_attributes_are_identical(self): + context = Mock(spec=AbstractDataContext) + set_context(project=context) + + parameters = {"column": "passenger_count"} + expectation_a = ExpectColumnValuesToBeUnique(**parameters) + expectation_b = ExpectColumnValuesToNotBeNull(**parameters) + + suite = ExpectationSuite( + name=self.expectation_suite_name, + expectations=[expectation_a], + ) + + with mock.patch.object(ExpectationSuite, "_submit_expectation_created_event"): + suite.add_expectation(expectation=expectation_b) + + assert len(suite.expectations) == 2 + @pytest.mark.unit def test_delete_success_with_saved_suite(self, expectation): context = Mock(spec=AbstractDataContext) @@ -661,7 +685,7 @@ def test_expectation_save_callback_can_come_from_any_copy_of_a_suite( with the remote ExpectationSuite. ExpectationSuite._save_expectation (and the corresponding logic the suite uses within the ExpectationsStore) must work equivalently regardless of which Suite instance it belongs to. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Arrange context = empty_cloud_context_fluent suite_name = "test-suite" @@ -892,7 +916,84 @@ def test_expectation_suite_equality_false( assert different_but_equivalent_suite != suite_with_single_expectation -# ### Below this line are mainly existing tests and fixtures that we are in the process of cleaning up # noqa: E501 +class TestExpectationsAreEqualish: + @pytest.mark.unit + @pytest.mark.parametrize( + "expectation_a,expectation_b", + [ + pytest.param( + gxe.ExpectColumnValuesToBeInSet( + column="a", + value_set=[1, 2, 3], + result_format="BASIC", + ), + gxe.ExpectColumnValuesToBeInSet( + column="a", + value_set=[1, 2, 3], + result_format="BASIC", + ), + id="same args passed in", + ), + pytest.param( + gxe.ExpectColumnValuesToNotBeNull(column="a", id=str(uuid4())), + gxe.ExpectColumnValuesToNotBeNull(column="a", id=str(uuid4())), + id="different ids", + ), + pytest.param( + gxe.ExpectColumnValuesToNotBeNull(column="a", rendered_content=[]), + gxe.ExpectColumnValuesToNotBeNull( + column="a", + rendered_content=[ + RenderedAtomicContent( + name="atomic.prescriptive.summary", + value=RenderedAtomicValue(template="Render this string!"), + value_type="StringValueType", + ) + ], + ), + id="different rendered_content", + ), + pytest.param( + gxe.ExpectColumnValuesToNotBeNull(column="a", notes="Note ABC"), + gxe.ExpectColumnValuesToNotBeNull(column="a", notes="Note 123"), + id="different notes", + ), + pytest.param( + gxe.ExpectColumnValuesToNotBeNull(column="a", meta={"warehouse": "theirs"}), + gxe.ExpectColumnValuesToNotBeNull(column="a", meta={"warehouse": "ours"}), + id="different meta", + ), + ], + ) + def test_equalish(self, expectation_a, expectation_b): + assert ExpectationSuite._expectations_are_equalish(expectation_a, expectation_b) + + @pytest.mark.unit + @pytest.mark.parametrize( + "expectation_a,expectation_b", + [ + pytest.param( + gxe.ExpectColumnValuesToNotBeNull(column="a"), + gxe.ExpectColumnValuesToBeUnique(column="b"), + id="different expectation, different args passed in", + ), + pytest.param( + gxe.ExpectColumnValuesToNotBeNull(column="a"), + gxe.ExpectColumnValuesToBeUnique(column="a"), + id="different expectation, same args passed in", + ), + pytest.param( + gxe.ExpectColumnValuesToNotBeNull(column="a"), + gxe.ExpectColumnValuesToNotBeNull(column="b"), + id="same expectation, different args passed in", + ), + ], + ) + def test_not_equalish(self, expectation_a, expectation_b): + assert not ExpectationSuite._expectations_are_equalish(expectation_a, expectation_b) + + +# ### Below this line are mainly existing tests and fixtures that we are in the process of cleaning up # noqa: E501 # FIXME CoP @pytest.fixture diff --git a/tests/core/test_expectation_validation_result.py b/tests/core/test_expectation_validation_result.py index 3e9cc8557b7e..47639d44ed60 100644 --- a/tests/core/test_expectation_validation_result.py +++ b/tests/core/test_expectation_validation_result.py @@ -2,6 +2,7 @@ import json +import pandas as pd import pytest import great_expectations.expectations as gxe @@ -26,7 +27,7 @@ def test_expectation_validation_result_describe_returns_expected_description(): column="passenger_count", min_value=0, max_value=6, - notes="Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 + notes="Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 # FIXME CoP ).configuration, result={ "element_count": 100000, @@ -107,7 +108,7 @@ def test_expectation_validation_result_describe_returns_expected_description_wit column="passenger_count", min_value=0, max_value=6, - notes="Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 + notes="Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 # FIXME CoP ).configuration, result={ "element_count": 100000, @@ -203,7 +204,7 @@ def test_expectation_suite_validation_result_returns_expected_shape( "expectation_config": ExpectationConfiguration( **{ "meta": {}, - "notes": "Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 + "notes": "Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 # FIXME CoP "id": "9f76d0b5-9d99-4ed9-a269-339b35e60490", "kwargs": { "batch_id": "default_pandas_datasource-#ephemeral_pandas_asset", @@ -403,7 +404,7 @@ def test_render_updates_rendered_content(): column="passenger_count", min_value=0, max_value=6, - notes="Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 + notes="Per the TLC data dictionary, this is a driver-submitted value (historically between 0 to 6)", # noqa: E501 # FIXME CoP ).configuration, result={ "element_count": 100000, @@ -424,3 +425,71 @@ def test_render_updates_rendered_content(): evr.render() assert evr.rendered_content is not None + + +class TestSerialization: + @pytest.mark.unit + def test_expectation_validation_results_serializes(self) -> None: + evr = ExpectationValidationResult( + success=True, + expectation_config=gxe.ExpectColumnDistinctValuesToEqualSet( + column="passenger_count", + value_set=[1, 2], + ).configuration, + result={ + "details": { + "observed_value": pd.Series({"a": 1, "b": 2, "c": 4}), + } + }, + ) + + # Ensure the results are serializable. + as_dict = evr.describe_dict() + from_describe_dict = json.dumps(as_dict, indent=4) + from_describe = evr.describe() + + assert from_describe_dict == from_describe + assert as_dict["result"]["details"]["observed_value"] == [ + {"index": "a", "value": 1}, + {"index": "b", "value": 2}, + {"index": "c", "value": 4}, + ] + + @pytest.mark.unit + def test_expectation_suite_validation_results_serializes(self) -> None: + svr = ExpectationSuiteValidationResult( + success=True, + statistics={ + "evaluated_expectations": 2, + "successful_expectations": 2, + "unsuccessful_expectations": 0, + "success_percent": 100.0, + }, + suite_name="whatever", + results=[ + ExpectationValidationResult( + success=True, + expectation_config=gxe.ExpectColumnDistinctValuesToEqualSet( + column="passenger_count", + value_set=[1, 2], + ).configuration, + result={ + "details": { + "observed_value": pd.Series({"a": 1, "b": 2, "c": 4}), + } + }, + ) + ], + ) + + # Ensure the results are serializable. + as_dict = svr.describe_dict() + from_describe_dict = json.dumps(as_dict, indent=4) + from_describe = svr.describe() + + assert from_describe_dict == from_describe + assert as_dict["expectations"][0]["result"]["details"]["observed_value"] == [ + {"index": "a", "value": 1}, + {"index": "b", "value": 2}, + {"index": "c", "value": 4}, + ] diff --git a/tests/core/test_serialization.py b/tests/core/test_serialization.py index 25726c80fac7..1a6259fa96d6 100644 --- a/tests/core/test_serialization.py +++ b/tests/core/test_serialization.py @@ -175,7 +175,7 @@ def test_batch_request_deepcopy(): } }, "class_name": "ConfiguredAssetFilesystemDataConnector", - "module_name": "great_expectations.datasource.data_connector.configured_asset_filesystem_data_connector", # noqa: E501 + "module_name": "great_expectations.datasource.data_connector.configured_asset_filesystem_data_connector", # noqa: E501 # FIXME CoP }, id="data_connector_with_schema", ), @@ -205,7 +205,7 @@ def test_batch_request_deepcopy(): } }, "class_name": "ConfiguredAssetFilesystemDataConnector", - "module_name": "great_expectations.datasource.data_connector.configured_asset_filesystem_data_connector", # noqa: E501 + "module_name": "great_expectations.datasource.data_connector.configured_asset_filesystem_data_connector", # noqa: E501 # FIXME CoP }, id="data_connector_without_schema", ), diff --git a/tests/core/test_util.py b/tests/core/test_util.py index d602de91163c..29e04f7bf391 100644 --- a/tests/core/test_util.py +++ b/tests/core/test_util.py @@ -14,7 +14,7 @@ @pytest.mark.unit def test_substitute_all_strftime_format_strings(): - now = datetime.datetime.utcnow() # noqa: DTZ003 + now = datetime.datetime.utcnow() # noqa: DTZ003 # FIXME CoP input_dict = { "month_no": "%m", diff --git a/tests/core/test_validation_definition.py b/tests/core/test_validation_definition.py index 5ec37d24ce65..f0db2e841539 100644 --- a/tests/core/test_validation_definition.py +++ b/tests/core/test_validation_definition.py @@ -63,7 +63,7 @@ ) if TYPE_CHECKING: - from unittest.mock import MagicMock # noqa: TID251 + from unittest.mock import MagicMock # noqa: TID251 # FIXME CoP from pytest_mock import MockerFixture @@ -99,7 +99,7 @@ def validation_definition(ephemeral_context: EphemeralDataContext) -> Validation context = ephemeral_context batch_definition = ( context.data_sources.add_pandas(DATA_SOURCE_NAME) - .add_csv_asset(ASSET_NAME, "taxi.csv") # type: ignore + .add_csv_asset(ASSET_NAME, "taxi.csv") # type: ignore # FIXME CoP .add_batch_definition(BATCH_DEFINITION_NAME) ) return context.validation_definitions.add( @@ -165,7 +165,7 @@ def cloud_validation_definition( context = empty_cloud_data_context batch_definition = ( empty_cloud_data_context.data_sources.add_pandas(DATA_SOURCE_NAME) - .add_csv_asset(ASSET_NAME, "taxi.csv") # type: ignore + .add_csv_asset(ASSET_NAME, "taxi.csv") # type: ignore # FIXME CoP .add_batch_definition(BATCH_DEFINITION_NAME) ) suite = context.suites.add(ExpectationSuite(name="my_suite")) diff --git a/tests/core/test_yaml_handler.py b/tests/core/test_yaml_handler.py index 93aef8d4f7bc..20da9a0b5da3 100644 --- a/tests/core/test_yaml_handler.py +++ b/tests/core/test_yaml_handler.py @@ -50,7 +50,7 @@ def test_load_incorrect_input(yaml_handler: YAMLHandler) -> None: @pytest.mark.filesystem def test_file_output(tmp_path: Path, yaml_handler: YAMLHandler) -> None: simplest_yaml: str = "abc: 1" - test_file: str = os.path.join(tmp_path, "out.yaml") # noqa: PTH118 + test_file: str = os.path.join(tmp_path, "out.yaml") # noqa: PTH118 # FIXME CoP out: Path = Path(test_file) data: dict = yaml_handler.load(simplest_yaml) @@ -76,7 +76,7 @@ def test_dump_default_behavior_with_no_stream_specified( @pytest.mark.unit def test_dump_stdout_specified(capsys, yaml_handler: YAMLHandler) -> None: - # ruamel documentation recommends that we specify the stream as stdout when we are using YAML to return a string. # noqa: E501 + # ruamel documentation recommends that we specify the stream as stdout when we are using YAML to return a string. # noqa: E501 # FIXME CoP simplest_dict: dict = dict(abc=1) yaml_handler.dump(simplest_dict, stream=sys.stdout) captured: Any = capsys.readouterr() diff --git a/tests/data_context/cloud_data_context/test_datasource_crud.py b/tests/data_context/cloud_data_context/test_datasource_crud.py index 2d877b039a96..7d0926790e4c 100644 --- a/tests/data_context/cloud_data_context/test_datasource_crud.py +++ b/tests/data_context/cloud_data_context/test_datasource_crud.py @@ -47,7 +47,7 @@ def test_cloud_context_add_datasource_with_fds( def test_cloud_context_datasource_crud_e2e() -> None: context = gx.get_context(cloud_mode=True) - datasource_name = f"OSSTestDatasource_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 + datasource_name = f"OSSTestDatasource_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 # FIXME CoP context.data_sources.add_pandas(name=datasource_name) diff --git a/tests/data_context/cloud_data_context/test_include_rendered_content.py b/tests/data_context/cloud_data_context/test_include_rendered_content.py index 89a8ee122f2f..a913fb8a1125 100644 --- a/tests/data_context/cloud_data_context/test_include_rendered_content.py +++ b/tests/data_context/cloud_data_context/test_include_rendered_content.py @@ -13,7 +13,7 @@ @pytest.mark.xfail( - reason="add_or_update not responsible for rendered content - rewrite test for new suites factory" # noqa: E501 + reason="add_or_update not responsible for rendered content - rewrite test for new suites factory" # noqa: E501 # FIXME CoP ) @pytest.mark.cloud def test_cloud_backed_data_context_expectation_validation_result_include_rendered_content( @@ -21,11 +21,11 @@ def test_cloud_backed_data_context_expectation_validation_result_include_rendere ) -> None: """ All CloudDataContexts should save an ExpectationValidationResult with rendered_content by default. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP context = empty_cloud_context_fluent df = pd.DataFrame([1, 2, 3, 4, 5]) - suite_name = f"test_suite_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 + suite_name = f"test_suite_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 # FIXME CoP data_asset = context.data_sources.pandas_default.add_dataframe_asset( name="my_dataframe_asset", diff --git a/tests/data_context/conftest.py b/tests/data_context/conftest.py index 0f06bdb97bd6..31b3409a16c6 100644 --- a/tests/data_context/conftest.py +++ b/tests/data_context/conftest.py @@ -6,7 +6,7 @@ import shutil import unittest.mock from typing import Any, Callable, Dict, Optional, Union, cast -from unittest.mock import Mock # noqa: TID251 +from unittest.mock import Mock # noqa: TID251 # FIXME CoP import pytest import requests @@ -33,10 +33,10 @@ @pytest.fixture() def data_context_without_config_variables_filepath_configured(tmp_path_factory): - # This data_context is *manually* created to have the config we want, vs created with DataContext.create # noqa: E501 + # This data_context is *manually* created to have the config we want, vs created with DataContext.create # noqa: E501 # FIXME CoP project_path = str(tmp_path_factory.mktemp("data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP create_data_context_files( context_path, @@ -52,10 +52,10 @@ def data_context_without_config_variables_filepath_configured(tmp_path_factory): def data_context_with_variables_in_config(tmp_path_factory, monkeypatch): monkeypatch.setenv("FOO", "BAR") monkeypatch.setenv("REPLACE_ME_ESCAPED_ENV", "ive_been_$--replaced") - # This data_context is *manually* created to have the config we want, vs created with DataContext.create # noqa: E501 + # This data_context is *manually* created to have the config we want, vs created with DataContext.create # noqa: E501 # FIXME CoP project_path = str(tmp_path_factory.mktemp("data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP create_data_context_files( context_path, @@ -74,47 +74,47 @@ def create_data_context_files( config_variables_fixture_filename=None, ): if config_variables_fixture_filename: - os.makedirs(context_path, exist_ok=True) # noqa: PTH103 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "uncommitted"), # noqa: PTH118 + os.makedirs(context_path, exist_ok=True) # noqa: PTH103 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "uncommitted"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) copy_relative_path( f"../test_fixtures/{config_variables_fixture_filename}", str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "uncommitted/config_variables.yml" ) ), ) copy_relative_path( f"../test_fixtures/{ge_config_fixture_filename}", - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) else: - os.makedirs(context_path, exist_ok=True) # noqa: PTH103 + os.makedirs(context_path, exist_ok=True) # noqa: PTH103 # FIXME CoP copy_relative_path( f"../test_fixtures/{ge_config_fixture_filename}", - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) create_common_data_context_files(context_path, asset_config_path) def create_common_data_context_files(context_path, asset_config_path): - os.makedirs( # noqa: PTH103 - os.path.join( # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP asset_config_path, "mydatasource/mygenerator/my_dag_node" ), exist_ok=True, ) copy_relative_path( "../test_fixtures/" "expectation_suites/parameterized_expectation_suite_fixture.json", - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP asset_config_path, "mydatasource/mygenerator/my_dag_node/default.json" ), ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "plugins"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "plugins"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) diff --git a/tests/data_context/fixtures/plugins/my_custom_non_core_ge_class.py b/tests/data_context/fixtures/plugins/my_custom_non_core_ge_class.py index e0a78b6c1b41..895e94c3010b 100644 --- a/tests/data_context/fixtures/plugins/my_custom_non_core_ge_class.py +++ b/tests/data_context/fixtures/plugins/my_custom_non_core_ge_class.py @@ -2,4 +2,4 @@ class MyCustomNonCoreGeClass: """ This class is used only for testing. E.g. ensuring appropriate usage stats messaging when using plugin functionality when the custom class is not a core GX class type. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP diff --git a/tests/data_context/store/test_configuration_store.py b/tests/data_context/store/test_configuration_store.py index 4c97180535f5..196aa26175ab 100644 --- a/tests/data_context/store/test_configuration_store.py +++ b/tests/data_context/store/test_configuration_store.py @@ -322,6 +322,6 @@ class InvalidConfigurationStore(ConfigurationStore): InvalidConfigurationStore(store_name="my_configuration_store") assert ( - "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class." # noqa: E501 + "Invalid configuration: A configuration_class needs to inherit from the BaseYamlConfig class." # noqa: E501 # FIXME CoP in str(e.value) ) diff --git a/tests/data_context/store/test_database_store_backend.py b/tests/data_context/store/test_database_store_backend.py index 49224265c4ad..d22c76bb94dd 100644 --- a/tests/data_context/store/test_database_store_backend.py +++ b/tests/data_context/store/test_database_store_backend.py @@ -160,7 +160,7 @@ def test_database_store_backend_id_initialization(caplog, sa, test_backends): Note: StoreBackend & TupleStoreBackend are abstract classes, so we will test the concrete classes that inherit from them. See also test_store_backends::test_StoreBackend_id_initialization - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if "postgresql" not in test_backends: pytest.skip("test_database_store_backend_id_initialization requires postgresql") diff --git a/tests/data_context/store/test_datasource_store_cloud_backend.py b/tests/data_context/store/test_datasource_store_cloud_backend.py index 4e21ecabba84..de58a2af1761 100644 --- a/tests/data_context/store/test_datasource_store_cloud_backend.py +++ b/tests/data_context/store/test_datasource_store_cloud_backend.py @@ -21,7 +21,7 @@ def test_datasource_store_get_by_id( """What does this test and why? The datasource store when used with a cloud backend should emit the correct request when getting a datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP id = "8706d5fb-0432-47ab-943c-daa824210e99" @@ -61,7 +61,7 @@ def test_datasource_store_get_by_name( """What does this test and why? The datasource store when used with a cloud backend should emit the correct request when getting a datasource with a name. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP id = "8706d5fb-0432-47ab-943c-daa824210e99" datasource_name: str = "example_datasource_config_name" @@ -84,7 +84,7 @@ def mocked_response(*args, **kwargs): "great_expectations.data_context.store.DatasourceStore.has_key", autospec=True ) as mock_has_key, ): - # Mocking has_key so that we don't try to connect to the cloud backend to verify key existence. # noqa: E501 + # Mocking has_key so that we don't try to connect to the cloud backend to verify key existence. # noqa: E501 # FIXME CoP mock_has_key.return_value = True datasource_store_ge_cloud_backend.retrieve_by_name(name=datasource_name) @@ -107,7 +107,7 @@ def test_datasource_store_delete_by_id( """What does this test and why? The datasource store when used with a cloud backend should emit the correct request when getting a datasource. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP id: str = "example_id_normally_uuid" key = GXCloudIdentifier(resource_type=GXCloudRESTResource.DATASOURCE, id=id) @@ -142,7 +142,7 @@ def test_datasource_store_delete_by_id( ) def test_datasource_http_error_handling( datasource_store_ge_cloud_backend: DatasourceStore, - mock_http_unavailable: Dict[str, mock.Mock], # noqa: TID251 + mock_http_unavailable: Dict[str, mock.Mock], # noqa: TID251 # FIXME CoP http_verb: str, method: str, args: list, diff --git a/tests/data_context/store/test_expectations_store.py b/tests/data_context/store/test_expectations_store.py index 4bee0c64200c..f9de37c60af5 100644 --- a/tests/data_context/store/test_expectations_store.py +++ b/tests/data_context/store/test_expectations_store.py @@ -196,6 +196,24 @@ def _create_suite_config(name: str, id: str, expectations: list[dict] | None = N [ { "type": "expect_column_to_exist", + "description": None, + "id": "c8a239a6-fb80-4f51-a90e-40c38dffdf91", + "kwargs": {"column": "infinities"}, + "meta": {}, + "expectation_context": None, + "rendered_content": [], + } + ], +) + +# this should have non-null values +_SUITE_CONFIG_WITH_FULLY_POPULATED_EXPECTATIONS = _create_suite_config( + "my_suite_with_expectations", + "03d61d4e-003f-48e7-a3b2-f9f842384da3", + [ + { + "type": "expect_column_to_exist", + "description": "some description", "id": "c8a239a6-fb80-4f51-a90e-40c38dffdf91", "kwargs": {"column": "infinities"}, "meta": {}, @@ -229,6 +247,12 @@ def _create_suite_config(name: str, id: str, expectations: list[dict] | None = N None, id="null_result_format", ), + pytest.param( + {"data": _SUITE_CONFIG_WITH_FULLY_POPULATED_EXPECTATIONS}, + _SUITE_CONFIG_WITH_FULLY_POPULATED_EXPECTATIONS, + None, + id="null_result_format", + ), ], ) def test_gx_cloud_response_json_to_object_dict( diff --git a/tests/data_context/store/test_store_backends.py b/tests/data_context/store/test_store_backends.py index d2cb5d865f26..22d9763e16a7 100644 --- a/tests/data_context/store/test_store_backends.py +++ b/tests/data_context/store/test_store_backends.py @@ -88,7 +88,7 @@ def check_store_backend_store_backend_id_functionality( store_backend_id: Manually input store_backend_id Returns: None - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Check that store_backend_id exists can be read assert store_backend.store_backend_id is not None store_error_uuid = "00000000-0000-0000-0000-00000000e003" @@ -122,7 +122,7 @@ def test_StoreBackend_id_initialization(tmp_path_factory, aws_credentials): Note: StoreBackend & TupleStoreBackend are abstract classes, so we will test the concrete classes that inherit from them. See also test_database_store_backend::test_database_store_backend_id_initialization - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # InMemoryStoreBackend # Initialize without store_backend_id and check that it is generated correctly @@ -131,8 +131,8 @@ def test_StoreBackend_id_initialization(tmp_path_factory, aws_credentials): # Create a new store with the same config and make sure it reports the same store_backend_id # in_memory_store_backend_duplicate = InMemoryStoreBackend() - # assert in_memory_store_backend.store_backend_id == in_memory_store_backend_duplicate.store_backend_id # noqa: E501 - # This is not currently implemented for the InMemoryStoreBackend, the store_backend_id is ephemeral since # noqa: E501 + # assert in_memory_store_backend.store_backend_id == in_memory_store_backend_duplicate.store_backend_id # noqa: E501 # FIXME CoP + # This is not currently implemented for the InMemoryStoreBackend, the store_backend_id is ephemeral since # noqa: E501 # FIXME CoP # there is no place to persist it. # TupleFilesystemStoreBackend @@ -144,7 +144,7 @@ def test_StoreBackend_id_initialization(tmp_path_factory, aws_credentials): tuple_filesystem_store_backend = TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP ) # Check that store_backend_id is created on instantiation, before being accessed desired_directory_tree_str = f"""\ @@ -164,7 +164,7 @@ def test_StoreBackend_id_initialization(tmp_path_factory, aws_credentials): project_path_with_filepath_template = str(full_test_dir_with_file_template) tuple_filesystem_store_backend_with_filepath_template = TupleFilesystemStoreBackend( - root_directory=os.path.join(project_path, path), # noqa: PTH118 + root_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP base_directory=project_path_with_filepath_template, filepath_template="my_file_{0}", ) @@ -182,7 +182,7 @@ def test_StoreBackend_id_initialization(tmp_path_factory, aws_credentials): # Create a new store with the same config and make sure it reports the same store_backend_id tuple_filesystem_store_backend_duplicate = TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP # filepath_template="my_file_{0}", ) check_store_backend_store_backend_id_functionality( @@ -223,7 +223,7 @@ def test_StoreBackend_id_initialization(tmp_path_factory, aws_credentials): # TODO: Improve GCS Testing e.g. using a docker service to mock # Note: Currently there is not a great way to mock GCS so here we are just testing that a config # with unreachable bucket returns the error store backend id - # If we were to mock GCS, we would need to provide the value returned from the TupleGCSStoreBackend which # noqa: E501 + # If we were to mock GCS, we would need to provide the value returned from the TupleGCSStoreBackend which # noqa: E501 # FIXME CoP # is circumventing actually testing the store backend. bucket = "leakybucket" @@ -307,7 +307,7 @@ def test_tuple_filesystem_store_filepath_prefix_error(tmp_path_factory): with pytest.raises(StoreBackendError) as e: TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP filepath_prefix="invalid_prefix_ends_with/", ) assert "filepath_prefix may not end with" in e.value.message @@ -315,7 +315,7 @@ def test_tuple_filesystem_store_filepath_prefix_error(tmp_path_factory): with pytest.raises(StoreBackendError) as e: TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP filepath_prefix="invalid_prefix_ends_with\\", ) assert "filepath_prefix may not end with" in e.value.message @@ -330,7 +330,7 @@ def test_FilesystemStoreBackend_two_way_string_conversion(tmp_path_factory): my_store = TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP filepath_template="{0}/{1}/{2}/foo-{2}-expectations.txt", ) @@ -356,7 +356,7 @@ def test_TupleFilesystemStoreBackend(tmp_path_factory): my_store = TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP filepath_template="my_file_{0}", ) @@ -386,7 +386,7 @@ def test_TupleFilesystemStoreBackend(tmp_path_factory): my_store_with_base_public_path = TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP filepath_template="my_file_{0}", base_public_path=base_public_path, ) @@ -403,7 +403,7 @@ def test_TupleFilesystemStoreBackend_get_all(tmp_path_factory): my_store = TupleFilesystemStoreBackend( root_directory=project_path, - base_directory=os.path.join(project_path, path), # noqa: PTH118 + base_directory=os.path.join(project_path, path), # noqa: PTH118 # FIXME CoP filepath_template="my_file_{0}", ) @@ -426,16 +426,16 @@ def test_TupleFilesystemStoreBackend_ignores_jupyter_notebook_checkpoints( test_dir = full_test_dir.parts[-1] project_path = str(full_test_dir) - checkpoint_dir = os.path.join(project_path, ".ipynb_checkpoints") # noqa: PTH118 - os.mkdir(checkpoint_dir) # noqa: PTH102 - assert os.path.isdir(checkpoint_dir) # noqa: PTH112 - nb_file = os.path.join(checkpoint_dir, "foo.json") # noqa: PTH118 + checkpoint_dir = os.path.join(project_path, ".ipynb_checkpoints") # noqa: PTH118 # FIXME CoP + os.mkdir(checkpoint_dir) # noqa: PTH102 # FIXME CoP + assert os.path.isdir(checkpoint_dir) # noqa: PTH112 # FIXME CoP + nb_file = os.path.join(checkpoint_dir, "foo.json") # noqa: PTH118 # FIXME CoP with open(nb_file, "w") as f: f.write("") - assert os.path.isfile(nb_file) # noqa: PTH113 + assert os.path.isfile(nb_file) # noqa: PTH113 # FIXME CoP my_store = TupleFilesystemStoreBackend( - root_directory=os.path.join(project_path, "dummy_str"), # noqa: PTH118 + root_directory=os.path.join(project_path, "dummy_str"), # noqa: PTH118 # FIXME CoP base_directory=project_path, ) @@ -492,7 +492,7 @@ def test_TupleS3StoreBackend_with_prefix(aws_credentials): obj = boto3.client("s3").get_object(Bucket=bucket, Key=prefix + "/my_file_AAA") assert obj["ContentType"] == "text/html; charset=utf-8" - assert obj["ContentEncoding"] == "utf-8" + assert obj["ContentEncoding"] == "utf-8,aws-chunked" my_store.set(("BBB",), "bbb") assert my_store.get(("BBB",)) == "bbb" @@ -595,7 +595,7 @@ def test_TupleS3StoreBackend_get_all(aws_credentials): @mock_s3 @pytest.mark.aws_deps -def test_tuple_s3_store_backend_slash_conditions(aws_credentials): # noqa: PLR0915 +def test_tuple_s3_store_backend_slash_conditions(aws_credentials): # noqa: PLR0915 # FIXME CoP bucket = "my_bucket" prefix = None conn = boto3.resource("s3", region_name="us-east-1") @@ -790,7 +790,7 @@ def test_TupleS3StoreBackend_with_empty_prefixes(aws_credentials): obj = boto3.client("s3").get_object(Bucket=bucket, Key=prefix + "my_file_AAA") assert my_store._build_s3_object_key(("AAA",)) == "my_file_AAA" assert obj["ContentType"] == "text/html; charset=utf-8" - assert obj["ContentEncoding"] == "utf-8" + assert obj["ContentEncoding"] == "utf-8,aws-chunked" my_store.set(("BBB",), "bbb") assert my_store.get(("BBB",)) == "bbb" @@ -816,7 +816,7 @@ def test_TupleS3StoreBackend_with_s3_put_options(aws_credentials): my_store = TupleS3StoreBackend( bucket=bucket, - # Since not all out options are supported in moto, only Metadata and StorageClass is passed here. # noqa: E501 + # Since not all out options are supported in moto, only Metadata and StorageClass is passed here. # noqa: E501 # FIXME CoP s3_put_options={ "Metadata": {"test": "testMetadata"}, "StorageClass": "REDUCED_REDUNDANCY", @@ -857,7 +857,7 @@ def test_TupleGCSStoreBackend_base_public_path(): This test will exercise the get_url_for_key method twice to see that we are getting the expected url, with or without base_public_path - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP bucket = "leakybucket" prefix = "this_is_a_test_prefix" project = "dummy-project" @@ -876,7 +876,7 @@ def test_TupleGCSStoreBackend_base_public_path(): ("BBB",), b"bbb", content_encoding=None, content_type="image/png" ) - run_id = RunIdentifier("my_run_id", datetime.datetime.utcnow()) # noqa: DTZ003 + run_id = RunIdentifier("my_run_id", datetime.datetime.utcnow()) # noqa: DTZ003 # FIXME CoP key = ValidationResultIdentifier( ExpectationSuiteIdentifier(name="my_suite_name"), run_id, @@ -902,7 +902,7 @@ def test_TupleGCSStoreBackend_base_public_path(): ) @pytest.mark.slow # 1.35s @pytest.mark.big -def test_TupleGCSStoreBackend(): # noqa: PLR0915 +def test_TupleGCSStoreBackend(): # noqa: PLR0915 # FIXME CoP # pytest.importorskip("google-cloud-storage") """ What does this test test and why? @@ -911,7 +911,7 @@ def test_TupleGCSStoreBackend(): # noqa: PLR0915 and assert that the store backend makes the right calls for set, get, and list. TODO : One option may be to have a GCS Store in Docker, which can be use to "actually" run these tests. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP bucket = "leakybucket" prefix = "this_is_a_test_prefix" @@ -993,7 +993,7 @@ def test_TupleGCSStoreBackend(): # noqa: PLR0915 with pytest.raises(InvalidKeyError): my_store.get(("non_existent_key",)) - run_id = RunIdentifier("my_run_id", datetime.datetime.utcnow()) # noqa: DTZ003 + run_id = RunIdentifier("my_run_id", datetime.datetime.utcnow()) # noqa: DTZ003 # FIXME CoP key = ValidationResultIdentifier( ExpectationSuiteIdentifier(name="my_suite_name"), run_id, @@ -1120,7 +1120,7 @@ def test_TupleAzureBlobStoreBackend_connection_string(): Since no package like moto exists for Azure-Blob services, we mock the Azure-blob client and assert that the store backend makes the right calls for set, get, and list. """ - connection_string = "DefaultEndpointsProtocol=https;AccountName=dummy;AccountKey=secret;EndpointSuffix=core.windows.net" # noqa: E501 + connection_string = "DefaultEndpointsProtocol=https;AccountName=dummy;AccountKey=secret;EndpointSuffix=core.windows.net" # noqa: E501 # FIXME CoP prefix = "this_is_a_test_prefix" container = "dummy-container" @@ -1539,11 +1539,11 @@ def test_InMemoryStoreBackend_add_or_update(previous_key_exists: bool): @pytest.mark.unit def test_store_backend_path_special_character_escape(): - path = "/validations/default/pandas_data_asset/20230315T205136.109084Z/default_pandas_datasource-#ephemeral_pandas_asset.html" # noqa: E501 + path = "/validations/default/pandas_data_asset/20230315T205136.109084Z/default_pandas_datasource-#ephemeral_pandas_asset.html" # noqa: E501 # FIXME CoP escaped_path = StoreBackend._url_path_escape_special_characters(path=path) assert ( escaped_path - == "/validations/default/pandas_data_asset/20230315T205136.109084Z/default_pandas_datasource-%23ephemeral_pandas_asset.html" # noqa: E501 + == "/validations/default/pandas_data_asset/20230315T205136.109084Z/default_pandas_datasource-%23ephemeral_pandas_asset.html" # noqa: E501 # FIXME CoP ) diff --git a/tests/data_context/store/test_v1_checkpoint_store.py b/tests/data_context/store/test_v1_checkpoint_store.py index bdaa764d5666..595709cc9add 100644 --- a/tests/data_context/store/test_v1_checkpoint_store.py +++ b/tests/data_context/store/test_v1_checkpoint_store.py @@ -188,8 +188,8 @@ def test_get_key(request, store_fixture: str): @pytest.mark.cloud def test_get_key_cloud(cloud_backed_store: CheckpointStore): key = cloud_backed_store.get_key(name="my_checkpoint") - assert key.resource_type == GXCloudRESTResource.CHECKPOINT # type: ignore[union-attr] - assert key.resource_name == "my_checkpoint" # type: ignore[union-attr] + assert key.resource_type == GXCloudRESTResource.CHECKPOINT # type: ignore[union-attr] # FIXME CoP + assert key.resource_name == "my_checkpoint" # type: ignore[union-attr] # FIXME CoP def _create_checkpoint_config(name: str, id: str) -> dict[str, Any]: diff --git a/tests/data_context/store/test_validation_definition_store.py b/tests/data_context/store/test_validation_definition_store.py index 43f82fceabf6..78be764b5d56 100644 --- a/tests/data_context/store/test_validation_definition_store.py +++ b/tests/data_context/store/test_validation_definition_store.py @@ -58,7 +58,7 @@ def validation_definition( context = in_memory_runtime_context batch_definition = ( context.data_sources.add_pandas("my_datasource") - .add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] + .add_csv_asset("my_asset", "data.csv") # type: ignore[arg-type] # FIXME CoP .add_batch_definition("my_batch_definition") ) return ValidationDefinition( @@ -133,8 +133,8 @@ def test_get_key(request, store_fixture: str): @pytest.mark.cloud def test_get_key_cloud(cloud_backed_store: ValidationDefinitionStore): key = cloud_backed_store.get_key(name="my_validation") - assert key.resource_type == GXCloudRESTResource.VALIDATION_DEFINITION # type: ignore[union-attr] - assert key.resource_name == "my_validation" # type: ignore[union-attr] + assert key.resource_type == GXCloudRESTResource.VALIDATION_DEFINITION # type: ignore[union-attr] # FIXME CoP + assert key.resource_name == "my_validation" # type: ignore[union-attr] # FIXME CoP _VALIDATION_ID = "a4sdfd-64c8-46cb-8f7e-03c12cea1d67" diff --git a/tests/data_context/store/test_validation_results_store.py b/tests/data_context/store/test_validation_results_store.py index 38d3431fd930..a461b61dff50 100644 --- a/tests/data_context/store/test_validation_results_store.py +++ b/tests/data_context/store/test_validation_results_store.py @@ -19,7 +19,7 @@ @freeze_time("09/26/2019 13:42:41") @mock_s3 @pytest.mark.filterwarnings( - "ignore:String run_ids are deprecated*:DeprecationWarning:great_expectations.data_context.types.resource_identifiers" # noqa: E501 + "ignore:String run_ids are deprecated*:DeprecationWarning:great_expectations.data_context.types.resource_identifiers" # noqa: E501 # FIXME CoP ) @pytest.mark.aws_deps def test_ValidationResultsStore_with_TupleS3StoreBackend(aws_credentials): @@ -30,7 +30,7 @@ def test_ValidationResultsStore_with_TupleS3StoreBackend(aws_credentials): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) - # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend # noqa: E501 + # First, demonstrate that we pick up default configuration including from an S3TupleS3StoreBackend # noqa: E501 # FIXME CoP my_store = ValidationResultsStore( store_backend={ "class_name": "TupleS3StoreBackend", @@ -168,7 +168,7 @@ def test_ValidationResultsStore_with_InMemoryStoreBackend(): @pytest.mark.big @freeze_time("09/26/2019 13:42:41") @pytest.mark.filterwarnings( - "ignore:String run_ids are deprecated*:DeprecationWarning:great_expectations.data_context.types.resource_identifiers" # noqa: E501 + "ignore:String run_ids are deprecated*:DeprecationWarning:great_expectations.data_context.types.resource_identifiers" # noqa: E501 # FIXME CoP ) def test_ValidationResultsStore_with_TupleFileSystemStoreBackend(tmp_path_factory): full_test_dir = tmp_path_factory.mktemp( @@ -264,7 +264,7 @@ def test_ValidationResultsStore_with_TupleFileSystemStoreBackend(tmp_path_factor @pytest.mark.filterwarnings( - "ignore:String run_ids are deprecated*:DeprecationWarning:great_expectations.data_context.types.resource_identifiers" # noqa: E501 + "ignore:String run_ids are deprecated*:DeprecationWarning:great_expectations.data_context.types.resource_identifiers" # noqa: E501 # FIXME CoP ) @pytest.mark.big def test_ValidationResultsStore_with_DatabaseStoreBackend(sa): diff --git a/tests/data_context/test_configuration_storage.py b/tests/data_context/test_configuration_storage.py index 2ce293ed128f..1e89b171e509 100644 --- a/tests/data_context/test_configuration_storage.py +++ b/tests/data_context/test_configuration_storage.py @@ -30,28 +30,28 @@ def data_context_parameterized_expectation_suite_with_usage_statistics_enabled( created with DataContext.create() """ project_path = str(tmp_path_factory.mktemp("data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP fixture_dir = file_relative_path(__file__, "../test_fixtures") - os.makedirs( # noqa: PTH103 - os.path.join(asset_config_path, "my_dag_node"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(asset_config_path, "my_dag_node"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) shutil.copy( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP fixture_dir, "great_expectations_v013_basic_with_usage_stats_enabled.yml" ), - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) shutil.copy( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP fixture_dir, "expectation_suites/parameterized_expectation_suite_fixture.json", ), - os.path.join(asset_config_path, "my_dag_node", "default.json"), # noqa: PTH118 + os.path.join(asset_config_path, "my_dag_node", "default.json"), # noqa: PTH118 # FIXME CoP ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "plugins"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "plugins"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) return gx.get_context(context_root_dir=context_path) @@ -60,9 +60,9 @@ def data_context_parameterized_expectation_suite_with_usage_statistics_enabled( def test_preserve_comments_in_yml_after_adding_datasource( data_context_parameterized_expectation_suite_with_usage_statistics_enabled, ): - # Skipping this test for now, because the order of the contents of the returned CommentedMap is inconsistent. # noqa: E501 + # Skipping this test for now, because the order of the contents of the returned CommentedMap is inconsistent. # noqa: E501 # FIXME CoP pytest.skip("KNOWN ISSUE") - config_filepath = os.path.join( # noqa: PTH118 + config_filepath = os.path.join( # noqa: PTH118 # FIXME CoP data_context_parameterized_expectation_suite_with_usage_statistics_enabled.root_directory, FileDataContext.GX_YML, ) diff --git a/tests/data_context/test_data_context.py b/tests/data_context/test_data_context.py index 9748862d36c6..c3dce4f1ecfc 100644 --- a/tests/data_context/test_data_context.py +++ b/tests/data_context/test_data_context.py @@ -56,18 +56,18 @@ def data_context_with_bad_datasource(tmp_path_factory): It is used by test_get_batch_multiple_datasources_do_not_scan_all() """ project_path = str(tmp_path_factory.mktemp("data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP fixture_dir = file_relative_path(__file__, "../test_fixtures") - os.makedirs( # noqa: PTH103 - os.path.join(asset_config_path, "my_dag_node"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(asset_config_path, "my_dag_node"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) shutil.copy( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP fixture_dir, "great_expectations_bad_datasource.yml" ), - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) return get_context(context_root_dir=context_path) @@ -174,7 +174,7 @@ def test__normalize_absolute_or_relative_path(tmp_path_factory, basic_data_conte ) assert context._normalize_absolute_or_relative_path("yikes").endswith( - os.path.join(test_dir, "yikes") # noqa: PTH118 + os.path.join(test_dir, "yikes") # noqa: PTH118 # FIXME CoP ) assert test_dir not in context._normalize_absolute_or_relative_path("/yikes") @@ -199,11 +199,11 @@ def test_load_data_context_from_environment_variables(tmp_path, monkeypatch): shutil.copy( file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_fixtures", "great_expectations_basic.yml" ), ), - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) monkeypatch.setenv("GX_HOME", str(context_path)) assert FileDataContext.find_context_root_dir() == str(context_path) @@ -220,12 +220,12 @@ def test_data_context_create_does_not_raise_error_or_warning_if_ge_dir_exists( @pytest.fixture() def empty_context(tmp_path_factory) -> FileDataContext: project_path = str(tmp_path_factory.mktemp("data_context")) - ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 + ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP context = get_context(context_root_dir=ge_dir) assert isinstance(context, FileDataContext) - assert os.path.isdir(ge_dir) # noqa: PTH112 - assert os.path.isfile( # noqa: PTH113 - os.path.join(ge_dir, FileDataContext.GX_YML) # noqa: PTH118 + assert os.path.isdir(ge_dir) # noqa: PTH112 # FIXME CoP + assert os.path.isfile( # noqa: PTH113 # FIXME CoP + os.path.join(ge_dir, FileDataContext.GX_YML) # noqa: PTH118 # FIXME CoP ) return context @@ -248,12 +248,12 @@ def test_data_context_does_ge_yml_exist_returns_false_when_it_does_not_exist( ): ge_dir = empty_context.root_directory # mangle project - safe_remove(os.path.join(ge_dir, empty_context.GX_YML)) # noqa: PTH118 + safe_remove(os.path.join(ge_dir, empty_context.GX_YML)) # noqa: PTH118 # FIXME CoP assert FileDataContext.does_config_exist_on_disk(ge_dir) is False @pytest.mark.filesystem -def test_data_context_does_project_have_a_datasource_in_config_file_returns_true_when_it_has_a_datasource_configured_in_yml_file_on_disk( # noqa: E501 +def test_data_context_does_project_have_a_datasource_in_config_file_returns_true_when_it_has_a_datasource_configured_in_yml_file_on_disk( # noqa: E501 # FIXME CoP empty_context, ): ge_dir = empty_context.root_directory @@ -262,7 +262,7 @@ def test_data_context_does_project_have_a_datasource_in_config_file_returns_true @pytest.mark.filesystem -def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_it_does_not_have_a_datasource_configured_in_yml_file_on_disk( # noqa: E501 +def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_it_does_not_have_a_datasource_configured_in_yml_file_on_disk( # noqa: E501 # FIXME CoP empty_context, ): ge_dir = empty_context.root_directory @@ -270,35 +270,35 @@ def test_data_context_does_project_have_a_datasource_in_config_file_returns_fals @pytest.mark.filesystem -def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_it_does_not_have_a_ge_yml_file( # noqa: E501 +def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_it_does_not_have_a_ge_yml_file( # noqa: E501 # FIXME CoP empty_context, ): ge_dir = empty_context.root_directory - safe_remove(os.path.join(ge_dir, empty_context.GX_YML)) # noqa: PTH118 + safe_remove(os.path.join(ge_dir, empty_context.GX_YML)) # noqa: PTH118 # FIXME CoP assert FileDataContext._does_project_have_a_datasource_in_config_file(ge_dir) is False @pytest.mark.filesystem -def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_it_does_not_have_a_ge_dir( # noqa: E501 +def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_it_does_not_have_a_ge_dir( # noqa: E501 # FIXME CoP empty_context, ): ge_dir = empty_context.root_directory - safe_remove(os.path.join(ge_dir)) # noqa: PTH118 + safe_remove(os.path.join(ge_dir)) # noqa: PTH118 # FIXME CoP assert FileDataContext._does_project_have_a_datasource_in_config_file(ge_dir) is False @pytest.mark.filesystem -def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_the_project_has_an_invalid_config_file( # noqa: E501 +def test_data_context_does_project_have_a_datasource_in_config_file_returns_false_when_the_project_has_an_invalid_config_file( # noqa: E501 # FIXME CoP empty_context, ): ge_dir = empty_context.root_directory - with open(os.path.join(ge_dir, FileDataContext.GX_YML), "w") as yml: # noqa: PTH118 + with open(os.path.join(ge_dir, FileDataContext.GX_YML), "w") as yml: # noqa: PTH118 # FIXME CoP yml.write("this file: is not a valid ge config") assert FileDataContext._does_project_have_a_datasource_in_config_file(ge_dir) is False @pytest.mark.filesystem -def test_data_context_is_project_initialized_returns_true_when_its_valid_context_has_one_datasource_and_one_suite( # noqa: E501 +def test_data_context_is_project_initialized_returns_true_when_its_valid_context_has_one_datasource_and_one_suite( # noqa: E501 # FIXME CoP empty_context, ): context = empty_context @@ -311,7 +311,7 @@ def test_data_context_is_project_initialized_returns_true_when_its_valid_context @pytest.mark.filesystem -def test_data_context_is_project_initialized_returns_true_when_its_valid_context_has_one_datasource_and_no_suites( # noqa: E501 +def test_data_context_is_project_initialized_returns_true_when_its_valid_context_has_one_datasource_and_no_suites( # noqa: E501 # FIXME CoP empty_context, ): context = empty_context @@ -336,7 +336,7 @@ def test_data_context_is_project_initialized_returns_false_when_config_yml_is_mi ): ge_dir = empty_context.root_directory # mangle project - safe_remove(os.path.join(ge_dir, empty_context.GX_YML)) # noqa: PTH118 + safe_remove(os.path.join(ge_dir, empty_context.GX_YML)) # noqa: PTH118 # FIXME CoP assert FileDataContext.is_project_initialized(ge_dir) is False @@ -348,20 +348,20 @@ def test_data_context_is_project_initialized_returns_false_when_uncommitted_dir_ ge_dir = empty_context.root_directory # mangle project shutil.rmtree( - os.path.join(ge_dir, empty_context.GX_UNCOMMITTED_DIR) # noqa: PTH118 + os.path.join(ge_dir, empty_context.GX_UNCOMMITTED_DIR) # noqa: PTH118 # FIXME CoP ) assert FileDataContext.is_project_initialized(ge_dir) is False @pytest.mark.filesystem -def test_data_context_is_project_initialized_returns_false_when_uncommitted_data_docs_dir_is_missing( # noqa: E501 +def test_data_context_is_project_initialized_returns_false_when_uncommitted_data_docs_dir_is_missing( # noqa: E501 # FIXME CoP empty_context, ): ge_dir = empty_context.root_directory # mangle project shutil.rmtree( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP ge_dir, empty_context.GX_UNCOMMITTED_DIR, "data_docs" ) ) @@ -370,13 +370,13 @@ def test_data_context_is_project_initialized_returns_false_when_uncommitted_data @pytest.mark.filesystem -def test_data_context_is_project_initialized_returns_false_when_uncommitted_validations_dir_is_missing( # noqa: E501 +def test_data_context_is_project_initialized_returns_false_when_uncommitted_validations_dir_is_missing( # noqa: E501 # FIXME CoP empty_context, ): ge_dir = empty_context.root_directory # mangle project shutil.rmtree( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP ge_dir, empty_context.GX_UNCOMMITTED_DIR, "validations" ) ) @@ -391,7 +391,7 @@ def test_data_context_is_project_initialized_returns_false_when_config_variable_ ge_dir = empty_context.root_directory # mangle project safe_remove( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP ge_dir, empty_context.GX_UNCOMMITTED_DIR, "config_variables.yml" ) ) @@ -405,7 +405,7 @@ def test_data_context_create_raises_warning_and_leaves_existing_yml_untouched( ): project_path = str(tmp_path_factory.mktemp("data_context")) gx.get_context(mode="file", project_root_dir=project_path) - ge_yml = os.path.join(project_path, "gx/great_expectations.yml") # noqa: PTH118 + ge_yml = os.path.join(project_path, "gx/great_expectations.yml") # noqa: PTH118 # FIXME CoP with open(ge_yml, "a") as ff: ff.write("# LOOK I WAS MODIFIED") @@ -424,15 +424,15 @@ def test_data_context_create_makes_uncommitted_dirs_when_all_are_missing( gx.get_context(mode="file", project_root_dir=project_path) # mangle the existing setup - ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - uncommitted_dir = os.path.join(ge_dir, "uncommitted") # noqa: PTH118 + ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + uncommitted_dir = os.path.join(ge_dir, "uncommitted") # noqa: PTH118 # FIXME CoP shutil.rmtree(uncommitted_dir) # re-run create to simulate onboarding gx.get_context(mode="file", project_root_dir=project_path) obs = gen_directory_tree_str(ge_dir) - assert os.path.isdir( # noqa: PTH112 + assert os.path.isdir( # noqa: PTH112 # FIXME CoP uncommitted_dir ), "No uncommitted directory created" assert ( @@ -485,7 +485,7 @@ def test_data_context_create_does_nothing_if_all_uncommitted_dirs_exist( validation_definitions/ """ project_path = str(tmp_path_factory.mktemp("stuff")) - ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 + ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP gx.get_context(mode="file", project_root_dir=project_path) fixture = gen_directory_tree_str(ge_dir) @@ -509,8 +509,8 @@ def test_data_context_do_all_uncommitted_dirs_exist(tmp_path_factory): .ge_store_backend_id """ project_path = str(tmp_path_factory.mktemp("stuff")) - ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - uncommitted_dir = os.path.join(ge_dir, "uncommitted") # noqa: PTH118 + ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + uncommitted_dir = os.path.join(ge_dir, "uncommitted") # noqa: PTH118 # FIXME CoP gx.get_context(mode="file", project_root_dir=project_path) fixture = gen_directory_tree_str(uncommitted_dir) assert fixture == expected @@ -519,8 +519,8 @@ def test_data_context_do_all_uncommitted_dirs_exist(tmp_path_factory): assert FileDataContext.all_uncommitted_directories_exist(ge_dir) # remove a few - shutil.rmtree(os.path.join(uncommitted_dir, "data_docs")) # noqa: PTH118 - shutil.rmtree(os.path.join(uncommitted_dir, "validations")) # noqa: PTH118 + shutil.rmtree(os.path.join(uncommitted_dir, "data_docs")) # noqa: PTH118 # FIXME CoP + shutil.rmtree(os.path.join(uncommitted_dir, "validations")) # noqa: PTH118 # FIXME CoP # Test that not all exist assert not FileDataContext.all_uncommitted_directories_exist(project_path) @@ -538,8 +538,8 @@ def test_data_context_create_builds_base_directories(tmp_path_factory): "checkpoints", "uncommitted", ]: - base_dir = os.path.join(project_path, context.GX_DIR, directory) # noqa: PTH118 - assert os.path.isdir(base_dir) # noqa: PTH112 + base_dir = os.path.join(project_path, context.GX_DIR, directory) # noqa: PTH118 # FIXME CoP + assert os.path.isdir(base_dir) # noqa: PTH112 # FIXME CoP @pytest.mark.filesystem @@ -548,9 +548,9 @@ def test_data_context_create_does_not_overwrite_existing_config_variables_yml( ): project_path = str(tmp_path_factory.mktemp("data_context")) gx.get_context(mode="file", project_root_dir=project_path) - ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - uncommitted_dir = os.path.join(ge_dir, "uncommitted") # noqa: PTH118 - config_vars_yml = os.path.join( # noqa: PTH118 + ge_dir = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + uncommitted_dir = os.path.join(ge_dir, "uncommitted") # noqa: PTH118 # FIXME CoP + config_vars_yml = os.path.join( # noqa: PTH118 # FIXME CoP uncommitted_dir, "config_variables.yml" ) @@ -581,7 +581,7 @@ def test_scaffold_directories(tmp_path_factory): "validation_definitions", } assert set( - os.listdir(os.path.join(empty_directory, "uncommitted")) # noqa: PTH118 + os.listdir(os.path.join(empty_directory, "uncommitted")) # noqa: PTH118 # FIXME CoP ) == { "data_docs", "validations", @@ -592,17 +592,17 @@ def test_scaffold_directories(tmp_path_factory): def test_load_config_variables_property(basic_data_context_config, tmp_path_factory, monkeypatch): # Setup: base_path = str(tmp_path_factory.mktemp("test_load_config_variables_file")) - os.makedirs( # noqa: PTH103 - os.path.join(base_path, "uncommitted"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(base_path, "uncommitted"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) with open( - os.path.join(base_path, "uncommitted", "dev_variables.yml"), # noqa: PTH118 + os.path.join(base_path, "uncommitted", "dev_variables.yml"), # noqa: PTH118 # FIXME CoP "w", ) as outfile: yaml.dump({"env": "dev"}, outfile) with open( - os.path.join(base_path, "uncommitted", "prod_variables.yml"), # noqa: PTH118 + os.path.join(base_path, "uncommitted", "prod_variables.yml"), # noqa: PTH118 # FIXME CoP "w", ) as outfile: yaml.dump({"env": "prod"}, outfile) @@ -776,7 +776,7 @@ def test_multiple_rendered_content_blocks_one_is_busted( } ), value_type="StringValueType", - exception='Renderer "atomic.prescriptive.custom_renderer_type" failed to render Expectation ' # noqa: E501 + exception='Renderer "atomic.prescriptive.custom_renderer_type" failed to render Expectation ' # noqa: E501 # FIXME CoP '"expect_sky_to_be_color with exception message: This renderer is broken!".', ), RenderedAtomicContent( diff --git a/tests/data_context/test_data_context_config_ui.py b/tests/data_context/test_data_context_config_ui.py index 6ce38a303eb3..7980c37bb3f3 100644 --- a/tests/data_context/test_data_context_config_ui.py +++ b/tests/data_context/test_data_context_config_ui.py @@ -24,7 +24,7 @@ What does this test and why? This file will hold various tests to ensure that the UI functions as expected when creating a DataContextConfig object. It will ensure that the appropriate defaults are used, including when the store_backend_defaults parameter is set. -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP _DEFAULT_CONFIG_VERSION: Final[float] = float( DataContextConfigDefaults.DEFAULT_CONFIG_VERSION.value @@ -42,8 +42,8 @@ def construct_data_context_config(): def _construct_data_context_config( data_context_id: str, config_version: float = _DEFAULT_CONFIG_VERSION, - expectations_store_name: str = DataContextConfigDefaults.DEFAULT_EXPECTATIONS_STORE_NAME.value, # noqa: E501 - validation_results_store_name: str = DataContextConfigDefaults.DEFAULT_VALIDATIONS_STORE_NAME.value, # noqa: E501 + expectations_store_name: str = DataContextConfigDefaults.DEFAULT_EXPECTATIONS_STORE_NAME.value, # noqa: E501 # FIXME CoP + validation_results_store_name: str = DataContextConfigDefaults.DEFAULT_VALIDATIONS_STORE_NAME.value, # noqa: E501 # FIXME CoP checkpoint_store_name: str = DataContextConfigDefaults.DEFAULT_CHECKPOINT_STORE_NAME.value, fluent_datasources: Optional[Dict] = None, plugins_directory: Optional[str] = None, @@ -306,7 +306,7 @@ def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaul What does this test and why? Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately This test sets the root_dir parameter - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP test_root_directory = "test_root_dir" @@ -353,14 +353,14 @@ def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaul @pytest.mark.unit -def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults_no_root_directory( # noqa: E501 +def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults_no_root_directory( # noqa: E501 # FIXME CoP construct_data_context_config, ): """ What does this test and why? Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately This test does not set the optional root_directory parameter - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP store_backend_defaults = FilesystemStoreBackendDefaults() data_context_config = DataContextConfig( @@ -604,7 +604,7 @@ def test_DataContextConfig_with_DatabaseStoreBackendDefaults(construct_data_cont What does this test and why? Make sure that using DatabaseStoreBackendDefaults as the store_backend_defaults applies appropriate defaults, including default_credentials getting propagated to stores and not data_docs - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP store_backend_defaults = DatabaseStoreBackendDefaults( default_credentials={ @@ -874,7 +874,7 @@ def test_override_general_defaults( """ What does this test and why? A DataContextConfig should be able to be created by passing items into the constructor that override any defaults. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_context_config = DataContextConfig( config_version=999, diff --git a/tests/data_context/test_data_context_config_variables.py b/tests/data_context/test_data_context_config_variables.py index e21aa6b129b4..d70c2622dd64 100644 --- a/tests/data_context/test_data_context_config_variables.py +++ b/tests/data_context/test_data_context_config_variables.py @@ -36,7 +36,7 @@ def empty_data_context_with_config_variables(monkeypatch, empty_data_context): ) shutil.copy( ge_config_path, - os.path.join(root_dir, FileDataContext.GX_YML), # noqa: PTH118 + os.path.join(root_dir, FileDataContext.GX_YML), # noqa: PTH118 # FIXME CoP ) config_variables_path = file_relative_path( __file__, @@ -44,7 +44,7 @@ def empty_data_context_with_config_variables(monkeypatch, empty_data_context): ) shutil.copy( config_variables_path, - os.path.join(root_dir, "uncommitted"), # noqa: PTH118 + os.path.join(root_dir, "uncommitted"), # noqa: PTH118 # FIXME CoP ) return get_context(context_root_dir=root_dir) @@ -71,8 +71,8 @@ def test_substituted_config_variables_not_written_to_file(tmp_path_factory): # with substitution variables project_path = str(tmp_path_factory.mktemp("data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP create_data_context_files( context_path, @@ -177,7 +177,7 @@ def test_substitute_config_variable(): r"prefix$ARG4.$arg0/$aRg3:${ARG4}/\$dontsub${arg0}:${aRg3}.suffix", config_variables_dict, ) - == "prefixval_of_ARG_4.val_of_arg_0/val_of_aRg_3:val_of_ARG_4/$dontsubval_of_arg_0:val_of_aRg_3.suffix" # noqa: E501 + == "prefixval_of_ARG_4.val_of_arg_0/val_of_aRg_3:val_of_ARG_4/$dontsubval_of_arg_0:val_of_aRg_3.suffix" # noqa: E501 # FIXME CoP ) @@ -186,7 +186,7 @@ def test_escape_all_config_variables(empty_data_context_with_config_variables): """ Make sure that all types of input to escape_all_config_variables are escaped properly: str, dict, OrderedDict, list Make sure that changing the escape string works as expected. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP context = empty_data_context_with_config_variables # str @@ -302,7 +302,7 @@ def test_escape_all_config_variables_skip_substitution_vars( """ What does this test and why? escape_all_config_variables(skip_if_substitution_variable=True/False) should function as documented. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP context = empty_data_context_with_config_variables # str @@ -533,7 +533,7 @@ def test_create_data_context_and_config_vars_in_code( What does this test and why? Creating a DataContext via .create(), then using .save_config_variable() to save a variable that will eventually be substituted (e.g. ${SOME_VAR}) should result in the proper escaping of $. This is in response to issue #2196 - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP project_path = str(tmp_path_factory.mktemp("data_context")) context = gx.get_context( diff --git a/tests/data_context/test_data_context_data_docs_api.py b/tests/data_context/test_data_context_data_docs_api.py index b3d62c84c455..371285eac9c1 100644 --- a/tests/data_context/test_data_context_data_docs_api.py +++ b/tests/data_context/test_data_context_data_docs_api.py @@ -157,8 +157,8 @@ def context_with_multiple_built_sites(empty_data_context): assert obs[1]["site_url"].endswith("gx/uncommitted/data_docs/another_local_site/index.html") assert obs[1]["site_name"] == "another_local_site" for site in ["local_site", "another_local_site"]: - assert os.path.isfile( # noqa: PTH113 - os.path.join( # noqa: PTH118 + assert os.path.isfile( # noqa: PTH113 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP context.root_directory, context.GX_UNCOMMITTED_DIR, "data_docs", @@ -248,14 +248,14 @@ def test_clean_data_docs_on_context_with_no_sites_raises_error( @pytest.mark.filesystem -def test_clean_data_docs_on_context_with_multiple_sites_with_no_site_name_cleans_all_sites_and_returns_true( # noqa: E501 +def test_clean_data_docs_on_context_with_multiple_sites_with_no_site_name_cleans_all_sites_and_returns_true( # noqa: E501 # FIXME CoP context_with_multiple_built_sites, ): context = context_with_multiple_built_sites assert context.clean_data_docs() is True for site in ["local_site", "another_local_site"]: - assert not os.path.isfile( # noqa: PTH113 - os.path.join( # noqa: PTH118 + assert not os.path.isfile( # noqa: PTH113 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP context.root_directory, context.GX_UNCOMMITTED_DIR, "data_docs", @@ -266,19 +266,19 @@ def test_clean_data_docs_on_context_with_multiple_sites_with_no_site_name_cleans @pytest.mark.filesystem -def test_clean_data_docs_on_context_with_multiple_sites_with_existing_site_name_cleans_selected_site_and_returns_true( # noqa: E501 +def test_clean_data_docs_on_context_with_multiple_sites_with_existing_site_name_cleans_selected_site_and_returns_true( # noqa: E501 # FIXME CoP context_with_multiple_built_sites, ): context = context_with_multiple_built_sites assert context.clean_data_docs(site_name="another_local_site") is True - data_docs_dir = os.path.join( # noqa: PTH118 + data_docs_dir = os.path.join( # noqa: PTH118 # FIXME CoP context.root_directory, context.GX_UNCOMMITTED_DIR, "data_docs" ) - assert not os.path.isfile( # noqa: PTH113 - os.path.join(data_docs_dir, "another_local_site", "index.html") # noqa: PTH118 + assert not os.path.isfile( # noqa: PTH113 # FIXME CoP + os.path.join(data_docs_dir, "another_local_site", "index.html") # noqa: PTH118 # FIXME CoP ) - assert os.path.isfile( # noqa: PTH113 - os.path.join(data_docs_dir, "local_site", "index.html") # noqa: PTH118 + assert os.path.isfile( # noqa: PTH113 # FIXME CoP + os.path.join(data_docs_dir, "local_site", "index.html") # noqa: PTH118 # FIXME CoP ) @@ -292,7 +292,7 @@ def test_clean_data_docs_on_context_with_multiple_sites_with_non_existent_site_n @pytest.mark.filesystem -def test_existing_local_data_docs_urls_returns_url_on_project_with_no_datasources_and_a_site_configured( # noqa: E501 +def test_existing_local_data_docs_urls_returns_url_on_project_with_no_datasources_and_a_site_configured( # noqa: E501 # FIXME CoP tmp_path_factory, ): """ @@ -315,7 +315,7 @@ def test_existing_local_data_docs_urls_returns_single_url_from_customized_local_ tmp_path_factory, ): empty_directory = str(tmp_path_factory.mktemp("yo_yo")) - ge_dir = os.path.join(empty_directory, FileDataContext.GX_DIR) # noqa: PTH118 + ge_dir = os.path.join(empty_directory, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP context = get_context(context_root_dir=ge_dir) context._project_config["data_docs_sites"] = { @@ -334,10 +334,10 @@ def test_existing_local_data_docs_urls_returns_single_url_from_customized_local_ context = get_context(context_root_dir=ge_dir) context.build_data_docs() - expected_path = os.path.join( # noqa: PTH118 + expected_path = os.path.join( # noqa: PTH118 # FIXME CoP ge_dir, "uncommitted/data_docs/some/local/path/index.html" ) - assert os.path.isfile(expected_path) # noqa: PTH113 + assert os.path.isfile(expected_path) # noqa: PTH113 # FIXME CoP obs = context.get_docs_sites_urls() assert obs == [{"site_name": "my_rad_site", "site_url": f"file://{expected_path}"}] @@ -348,7 +348,7 @@ def test_existing_local_data_docs_urls_returns_multiple_urls_from_customized_loc tmp_path_factory, ): empty_directory = str(tmp_path_factory.mktemp("yo_yo_ma")) - ge_dir = os.path.join(empty_directory, FileDataContext.GX_DIR) # noqa: PTH118 + ge_dir = os.path.join(empty_directory, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP context = get_context(context_root_dir=ge_dir) context._project_config["data_docs_sites"] = { @@ -373,12 +373,12 @@ def test_existing_local_data_docs_urls_returns_multiple_urls_from_customized_loc context._save_project_config() context = get_context(context_root_dir=ge_dir) context.build_data_docs() - data_docs_dir = os.path.join(ge_dir, "uncommitted/data_docs/") # noqa: PTH118 + data_docs_dir = os.path.join(ge_dir, "uncommitted/data_docs/") # noqa: PTH118 # FIXME CoP - path_1 = os.path.join(data_docs_dir, "some/path/index.html") # noqa: PTH118 - path_2 = os.path.join(data_docs_dir, "another/path/index.html") # noqa: PTH118 + path_1 = os.path.join(data_docs_dir, "some/path/index.html") # noqa: PTH118 # FIXME CoP + path_2 = os.path.join(data_docs_dir, "another/path/index.html") # noqa: PTH118 # FIXME CoP for expected_path in [path_1, path_2]: - assert os.path.isfile(expected_path) # noqa: PTH113 + assert os.path.isfile(expected_path) # noqa: PTH113 # FIXME CoP obs = context.get_docs_sites_urls() @@ -397,7 +397,7 @@ def test_build_data_docs_skipping_index_does_not_build_index( ): # TODO What's the latest and greatest way to use configs rather than my hackery? empty_directory = str(tmp_path_factory.mktemp("empty")) - ge_dir = os.path.join(empty_directory, FileDataContext.GX_DIR) # noqa: PTH118 + ge_dir = os.path.join(empty_directory, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP context = get_context(context_root_dir=ge_dir) config = context.get_config() config.data_docs_sites = { @@ -405,7 +405,7 @@ def test_build_data_docs_skipping_index_does_not_build_index( "class_name": "SiteBuilder", "store_backend": { "class_name": "TupleFilesystemStoreBackend", - "base_directory": os.path.join( # noqa: PTH118 + "base_directory": os.path.join( # noqa: PTH118 # FIXME CoP "uncommitted", "data_docs" ), }, @@ -417,13 +417,13 @@ def test_build_data_docs_skipping_index_does_not_build_index( context._save_project_config() del context context = get_context(context_root_dir=ge_dir) - data_docs_dir = os.path.join(ge_dir, "uncommitted", "data_docs") # noqa: PTH118 - index_path = os.path.join(data_docs_dir, "index.html") # noqa: PTH118 - assert not os.path.isfile(index_path) # noqa: PTH113 + data_docs_dir = os.path.join(ge_dir, "uncommitted", "data_docs") # noqa: PTH118 # FIXME CoP + index_path = os.path.join(data_docs_dir, "index.html") # noqa: PTH118 # FIXME CoP + assert not os.path.isfile(index_path) # noqa: PTH113 # FIXME CoP context.build_data_docs(build_index=False) - assert os.path.isdir(os.path.join(data_docs_dir, "static")) # noqa: PTH112, PTH118 - assert not os.path.isfile(index_path) # noqa: PTH113 + assert os.path.isdir(os.path.join(data_docs_dir, "static")) # noqa: PTH112, PTH118 # FIXME CoP + assert not os.path.isfile(index_path) # noqa: PTH113 # FIXME CoP @pytest.mark.unit diff --git a/tests/data_context/test_data_context_ge_cloud_mode.py b/tests/data_context/test_data_context_ge_cloud_mode.py index 2e5fcc1e88b0..3fd3820a794f 100644 --- a/tests/data_context/test_data_context_ge_cloud_mode.py +++ b/tests/data_context/test_data_context_ge_cloud_mode.py @@ -32,7 +32,7 @@ def test_data_context_ge_cloud_mode_makes_successful_request_to_cloud_api( ge_cloud_runtime_organization_id, ge_cloud_access_token, ): - called_with_url = f"{ge_cloud_runtime_base_url}/api/v1/organizations/{ge_cloud_runtime_organization_id}/data-context-configuration" # noqa: E501 + called_with_url = f"{ge_cloud_runtime_base_url}/api/v1/organizations/{ge_cloud_runtime_organization_id}/data-context-configuration" # noqa: E501 # FIXME CoP # Ensure that the request goes through responses.get( @@ -47,7 +47,7 @@ def test_data_context_ge_cloud_mode_makes_successful_request_to_cloud_api( cloud_organization_id=ge_cloud_runtime_organization_id, cloud_access_token=ge_cloud_access_token, ) - except Exception: # Not concerned with constructor output (only evaluating interaction with requests during __init__) # noqa: E501 + except Exception: # Not concerned with constructor output (only evaluating interaction with requests during __init__) # noqa: E501 # FIXME CoP pass # Only ever called once with the endpoint URL and auth token as args diff --git a/tests/data_context/test_data_context_in_code_config.py b/tests/data_context/test_data_context_in_code_config.py index 4bfa21fbb14c..ef0333681f08 100644 --- a/tests/data_context/test_data_context_in_code_config.py +++ b/tests/data_context/test_data_context_in_code_config.py @@ -90,9 +90,7 @@ def get_store_backend_id_from_s3(bucket: str, prefix: str, key: str) -> uuid.UUI """ s3_response_object = boto3.client("s3").get_object(Bucket=bucket, Key=f"{prefix}/{key}") - ge_store_backend_id_file_contents = ( - s3_response_object["Body"].read().decode(s3_response_object.get("ContentEncoding", "utf-8")) - ) + ge_store_backend_id_file_contents = s3_response_object["Body"].read().decode("utf-8") store_backend_id_file_parser = StoreBackend.STORE_BACKEND_ID_PREFIX + pp.Word(pp.hexnums + "-") parsed_store_backend_id = store_backend_id_file_parser.parseString( @@ -208,7 +206,7 @@ def test_DataContext_construct_data_context_id_uses_id_of_currently_configured_e @pytest.mark.aws_deps @mock_s3 -def test_DataContext_construct_data_context_id_uses_id_stored_in_DataContextConfig_if_no_configured_expectations_store( # noqa: E501 +def test_DataContext_construct_data_context_id_uses_id_stored_in_DataContextConfig_if_no_configured_expectations_store( # noqa: E501 # FIXME CoP monkeypatch, aws_credentials ): """ @@ -262,7 +260,7 @@ def test_suppress_store_backend_id_is_true_for_inactive_stores(): Trying to read / set the store_backend_id for inactive stores should not be attempted during DataContext initialization. This test ensures that the _suppress_store_backend_id parameter is set to True for inactive stores. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP bucket = "leakybucket" expectations_store_prefix = "expectations_store_prefix" @@ -357,7 +355,7 @@ def test_inaccessible_active_bucket_warning_messages(caplog, aws_credentials): e.g. Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named expectations_S3_store Active stores are those named in: "expectations_store_name", "validation_results_store_name" - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP bucket = "leakybucket" expectations_store_prefix = "expectations_store_prefix" @@ -399,13 +397,13 @@ def test_inaccessible_active_bucket_warning_messages(caplog, aws_credentials): _ = get_context(project_config=in_code_data_context_project_config) assert ( caplog.messages.count( - "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named expectations_S3_store. Exception was: \n Unable to set object in s3." # noqa: E501 + "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named expectations_S3_store. Exception was: \n Unable to set object in s3." # noqa: E501 # FIXME CoP ) == 1 ) assert ( caplog.messages.count( - "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named validation_results_S3_store. Exception was: \n Unable to set object in s3." # noqa: E501 + "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named validation_results_S3_store. Exception was: \n Unable to set object in s3." # noqa: E501 # FIXME CoP ) == 1 ) @@ -478,13 +476,13 @@ def test_inaccessible_inactive_bucket_no_warning_messages(caplog): _ = get_context(project_config=in_code_data_context_project_config) assert ( caplog.messages.count( - "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named expectations_S3_store" # noqa: E501 + "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named expectations_S3_store" # noqa: E501 # FIXME CoP ) == 0 ) assert ( caplog.messages.count( - "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named validation_results_S3_store" # noqa: E501 + "Invalid store configuration: Please check the configuration of your TupleS3StoreBackend named validation_results_S3_store" # noqa: E501 # FIXME CoP ) == 0 ) diff --git a/tests/data_context/test_data_context_state_management.py b/tests/data_context/test_data_context_state_management.py index d19bc6883ec2..4d6676598461 100644 --- a/tests/data_context/test_data_context_state_management.py +++ b/tests/data_context/test_data_context_state_management.py @@ -78,7 +78,7 @@ def add_or_update(self, key, value, **kwargs): class EphemeralDataContextSpy(EphemeralDataContext): """ Simply wraps around EphemeralDataContext but keeps tabs on specific method calls around state management. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def __init__( self, diff --git a/tests/data_context/test_data_context_store_configs.py b/tests/data_context/test_data_context_store_configs.py index 06e5ca6e9af7..b540737cca1a 100644 --- a/tests/data_context/test_data_context_store_configs.py +++ b/tests/data_context/test_data_context_store_configs.py @@ -14,12 +14,12 @@ @pytest.fixture(scope="function") def totally_empty_data_context(tmp_path_factory): # NOTE: This sets up a DataContext with a real path and a config saved to that path. - # Now that BaseDataContext exists, it's possible to test most DataContext methods without touching the file system. # noqa: E501 + # Now that BaseDataContext exists, it's possible to test most DataContext methods without touching the file system. # noqa: E501 # FIXME CoP # However, as of 2019/08/22, most tests still use filesystem-based fixtures. # TODO: Where appropriate, switch DataContext tests to the new method. project_root_dir = str(tmp_path_factory.mktemp("totally_empty_data_context")) - os.mkdir( # noqa: PTH102 - os.path.join(project_root_dir, FileDataContext.GX_DIR) # noqa: PTH118 + os.mkdir( # noqa: PTH102 # FIXME CoP + os.path.join(project_root_dir, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP ) config = { @@ -47,13 +47,13 @@ def totally_empty_data_context(tmp_path_factory): "data_docs_sites": {}, } with open( - os.path.join(project_root_dir, "gx/great_expectations.yml"), # noqa: PTH118 + os.path.join(project_root_dir, "gx/great_expectations.yml"), # noqa: PTH118 # FIXME CoP "w", ) as config_file: yaml.dump(config, config_file) context = gx.get_context( - context_root_dir=os.path.join( # noqa: PTH118 + context_root_dir=os.path.join( # noqa: PTH118 # FIXME CoP project_root_dir, FileDataContext.GX_DIR ) ) diff --git a/tests/data_context/test_data_context_types.py b/tests/data_context/test_data_context_types.py index eedaa3446add..1942fd3b9318 100644 --- a/tests/data_context/test_data_context_types.py +++ b/tests/data_context/test_data_context_types.py @@ -1,4 +1,4 @@ -from unittest.mock import Mock # noqa: TID251 +from unittest.mock import Mock # noqa: TID251 # FIXME CoP import pytest diff --git a/tests/data_context/test_data_context_utils.py b/tests/data_context/test_data_context_utils.py index 2d9b4f8d1b6b..294c149b4310 100644 --- a/tests/data_context/test_data_context_utils.py +++ b/tests/data_context/test_data_context_utils.py @@ -61,7 +61,7 @@ def test_mask_db_url__does_not_mask_config_strings(): @pytest.mark.filterwarnings( "ignore:SQLAlchemy is not installed*:UserWarning:great_expectations.data_context.util" ) -def test_password_masker_mask_db_url( # noqa: PLR0915, C901- 11 +def test_password_masker_mask_db_url( # noqa: PLR0915, C901- 11 # FIXME CoP monkeypatch, tmp_path ): """ @@ -69,7 +69,7 @@ def test_password_masker_mask_db_url( # noqa: PLR0915, C901- 11 The PasswordMasker.mask_db_url() should mask passwords consistently inruff database urls. The output of mask_db_url should be the same whether user_urlparse is set to True or False. This test uses database url examples from https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # PostgreSQL (if installed in test environment) # default db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") @@ -284,13 +284,13 @@ def test_password_masker_mask_db_url( # noqa: PLR0915, C901- 11 @pytest.mark.unit def test_sanitize_config_azure_blob_store(): - azure_url: str = "DefaultEndpointsProtocol=https;AccountName=iamname;AccountKey=i_am_account_key;EndpointSuffix=core.windows.net" # noqa: E501 + azure_url: str = "DefaultEndpointsProtocol=https;AccountName=iamname;AccountKey=i_am_account_key;EndpointSuffix=core.windows.net" # noqa: E501 # FIXME CoP assert ( PasswordMasker.mask_db_url(azure_url) - == "DefaultEndpointsProtocol=https;AccountName=iamname;AccountKey=***;EndpointSuffix=core.windows.net" # noqa: E501 + == "DefaultEndpointsProtocol=https;AccountName=iamname;AccountKey=***;EndpointSuffix=core.windows.net" # noqa: E501 # FIXME CoP ) - azure_wrong_url: str = "DefaultEndpointsProtocol=i_dont_work;AccountName=iamname;AccountKey=i_am_account_key;EndpointSuffix=core.windows.net" # noqa: E501 + azure_wrong_url: str = "DefaultEndpointsProtocol=i_dont_work;AccountName=iamname;AccountKey=i_am_account_key;EndpointSuffix=core.windows.net" # noqa: E501 # FIXME CoP with pytest.raises(StoreConfigurationError): PasswordMasker.mask_db_url(azure_wrong_url) diff --git a/tests/data_context/test_data_context_variables.py b/tests/data_context/test_data_context_variables.py index ec15ab1485d9..4438e91569b8 100644 --- a/tests/data_context/test_data_context_variables.py +++ b/tests/data_context/test_data_context_variables.py @@ -38,7 +38,7 @@ ) if TYPE_CHECKING: - from unittest.mock import MagicMock # noqa: TID251 + from unittest.mock import MagicMock # noqa: TID251 # FIXME CoP from pytest_mock import MockerFixture @@ -442,12 +442,12 @@ def test_file_data_context_variables_e2e( It is also important to note that in the case of $VARS syntax, we NEVER want to persist the underlying value in order to preserve sensitive information. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Prepare updated progress_bars to set and serialize to disk updated_progress_bars: ProgressBarsConfig = copy.deepcopy(progress_bars) updated_progress_bars.globally = False - # Prepare updated plugins directory to set and serialize to disk (ensuring we hide the true value behind $VARS syntax) # noqa: E501 + # Prepare updated plugins directory to set and serialize to disk (ensuring we hide the true value behind $VARS syntax) # noqa: E501 # FIXME CoP env_var_name: str = "MY_PLUGINS_DIRECTORY" value_associated_with_env_var: str = "foo/bar/baz" monkeypatch.setenv(env_var_name, value_associated_with_env_var) @@ -474,7 +474,7 @@ def test_file_data_context_variables_e2e( @pytest.mark.cloud @pytest.mark.xfail( strict=False, - reason="GX Cloud E2E tests are failing due to new top-level `analytics` and `data_context_id` variables not yet being recognized by the server", # noqa: E501 + reason="GX Cloud E2E tests are failing due to new top-level `analytics` and `data_context_id` variables not yet being recognized by the server", # noqa: E501 # FIXME CoP ) def test_cloud_data_context_variables_successfully_hits_cloud_endpoint( cloud_data_context: CloudDataContext, @@ -498,7 +498,7 @@ def test_cloud_data_context_variables_successfully_hits_cloud_endpoint( ) @pytest.mark.xfail( strict=False, - reason="GX Cloud E2E tests are failing due to env vars not being consistently recognized by Docker; x-failing for purposes of 0.15.22 release", # noqa: E501 + reason="GX Cloud E2E tests are failing due to env vars not being consistently recognized by Docker; x-failing for purposes of 0.15.22 release", # noqa: E501 # FIXME CoP ) def test_cloud_enabled_data_context_variables_e2e( mock_save_project_config: MagicMock, @@ -517,13 +517,13 @@ def test_cloud_enabled_data_context_variables_e2e( It is also important to note that in the case of $VARS syntax, we NEVER want to persist the underlying value in order to preserve sensitive information. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Prepare updated plugins directory to set and save to the Cloud backend. - # As values are persisted in the Cloud DB, we want to randomize our values each time for consistent test results # noqa: E501 - updated_plugins_dir = f"plugins_dir_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 + # As values are persisted in the Cloud DB, we want to randomize our values each time for consistent test results # noqa: E501 # FIXME CoP + updated_plugins_dir = f"plugins_dir_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 # FIXME CoP updated_data_docs_sites = data_docs_sites - new_site_name = f"docs_site_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 + new_site_name = f"docs_site_{''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))}" # noqa: E501 # FIXME CoP updated_data_docs_sites[new_site_name] = {} context = get_context(cloud_mode=True) diff --git a/tests/data_context/test_project_manager.py b/tests/data_context/test_project_manager.py index 33dd025fdd6f..c84f14339c41 100644 --- a/tests/data_context/test_project_manager.py +++ b/tests/data_context/test_project_manager.py @@ -1,4 +1,4 @@ -from unittest.mock import Mock # noqa: TID251 +from unittest.mock import Mock # noqa: TID251 # FIXME CoP import pytest diff --git a/tests/data_context/test_templates.py b/tests/data_context/test_templates.py index 80f433f3120c..08c100d4aec5 100644 --- a/tests/data_context/test_templates.py +++ b/tests/data_context/test_templates.py @@ -83,7 +83,7 @@ def project_help_comment(): # config_version refers to the syntactic version of this config file, and is used in maintaining backwards compatibility # It is auto-generated and usually does not need to be changed. config_version: 4 -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP return PROJECT_HELP_COMMENT @@ -94,7 +94,7 @@ def test_project_optional_config_comment_matches_default( """ What does this test and why? Make sure that the templates built on data_context.types.base.DataContextConfigDefaults match the desired default. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert project_optional_config_comment == templates.PROJECT_OPTIONAL_CONFIG_COMMENT @@ -104,6 +104,6 @@ def test_project_help_comment_matches_default(project_help_comment): """ What does this test and why? Make sure that the templates built on data_context.types.base.DataContextConfigDefaults match the desired default. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP assert project_help_comment == templates.PROJECT_HELP_COMMENT diff --git a/tests/datasource/conftest.py b/tests/datasource/conftest.py index 391a8fa3d43c..7f44f2d4929c 100644 --- a/tests/datasource/conftest.py +++ b/tests/datasource/conftest.py @@ -119,7 +119,7 @@ def test_cases_for_sql_data_connector_sqlite_connection_url(sa): db_file_path: str = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -144,7 +144,7 @@ def test_cases_for_sql_data_connector_sqlite_execution_engine( "md5", 2, lambda x, d: hashlib.md5(str(x).encode("utf-8")).hexdigest()[-1 * d :] ) - conn: sa.engine.Connection = engine.connect() # noqa: F841 + conn: sa.engine.Connection = engine.connect() # noqa: F841 # FIXME CoP # Build a SqlAlchemyDataset using that database return SqlAlchemyExecutionEngine( diff --git a/tests/datasource/data_connector/sorters/conftest.py b/tests/datasource/data_connector/sorters/conftest.py index 81643c0a4957..9b22c066a33c 100644 --- a/tests/datasource/data_connector/sorters/conftest.py +++ b/tests/datasource/data_connector/sorters/conftest.py @@ -5,18 +5,18 @@ def periodic_table_of_elements(): # fmt: off data = [ - "Hydrogen", "Helium", "Lithium", "Beryllium", "Boron", "Carbon", "Nitrogen", "Oxygen", "Fluorine", "Neon", # noqa: E501 - "Sodium", "Magnesium", "Aluminum", "Silicon", "Phosphorus", "Sulfur", "Chlorine", "Argon", "Potassium", "Calcium", # noqa: E501 - "Scandium", "Titanium", "Vanadium", "Chromium", "Manganese", "Iron", "Cobalt", "Nickel", "Copper", "Zinc", # noqa: E501 - "Gallium", "Germanium", "Arsenic", "Selenium", "Bromine", "Krypton", "Rubidium", "Strontium", "Yttrium", "Zirconium", # noqa: E501 - "Niobium", "Molybdenum", "Technetium", "Ruthenium", "Rhodium", "Palladium", "Silver", "Cadmium", "Indium", "Tin", # noqa: E501 - "Antimony", "Tellurium", "Iodine", "Xenon", "Cesium", "Barium", "Lanthanum", "Cerium", "Praseodymium", "Neodymium", # noqa: E501 - "Promethium", "Samarium", "Europium", "Gadolinium", "Terbium", "Dysprosium", "Holmium", "Erbium", "Thulium", "Ytterbium", # noqa: E501 - "Lutetium", "Hafnium", "Tantalum", "Tungsten", "Rhenium", "Osmium", "Iridium", "Platinum", "Gold", "Mercury", # noqa: E501 - "Thallium", "Lead", "Bismuth", "Polonium", "Astatine", "Radon", "Francium", "Radium", "Actinium", "Thorium", # noqa: E501 - "Protactinium", "Uranium", "Neptunium", "Plutonium", "Americium", "Curium", "Berkelium", "Californium", "Einsteinium", "Fermium", # noqa: E501 - "Mendelevium", "Nobelium", "Lawrencium", "Rutherfordium", "Dubnium", "Seaborgium", "Bohrium", "Hassium", "Meitnerium", "Darmstadtium", # noqa: E501 - "Roentgenium", "Copernicium", "Nihomium", "Flerovium", "Moscovium", "Livermorium", "Tennessine", "Oganesson", # noqa: E501 + "Hydrogen", "Helium", "Lithium", "Beryllium", "Boron", "Carbon", "Nitrogen", "Oxygen", "Fluorine", "Neon", # noqa: E501 # FIXME CoP + "Sodium", "Magnesium", "Aluminum", "Silicon", "Phosphorus", "Sulfur", "Chlorine", "Argon", "Potassium", "Calcium", # noqa: E501 # FIXME CoP + "Scandium", "Titanium", "Vanadium", "Chromium", "Manganese", "Iron", "Cobalt", "Nickel", "Copper", "Zinc", # noqa: E501 # FIXME CoP + "Gallium", "Germanium", "Arsenic", "Selenium", "Bromine", "Krypton", "Rubidium", "Strontium", "Yttrium", "Zirconium", # noqa: E501 # FIXME CoP + "Niobium", "Molybdenum", "Technetium", "Ruthenium", "Rhodium", "Palladium", "Silver", "Cadmium", "Indium", "Tin", # noqa: E501 # FIXME CoP + "Antimony", "Tellurium", "Iodine", "Xenon", "Cesium", "Barium", "Lanthanum", "Cerium", "Praseodymium", "Neodymium", # noqa: E501 # FIXME CoP + "Promethium", "Samarium", "Europium", "Gadolinium", "Terbium", "Dysprosium", "Holmium", "Erbium", "Thulium", "Ytterbium", # noqa: E501 # FIXME CoP + "Lutetium", "Hafnium", "Tantalum", "Tungsten", "Rhenium", "Osmium", "Iridium", "Platinum", "Gold", "Mercury", # noqa: E501 # FIXME CoP + "Thallium", "Lead", "Bismuth", "Polonium", "Astatine", "Radon", "Francium", "Radium", "Actinium", "Thorium", # noqa: E501 # FIXME CoP + "Protactinium", "Uranium", "Neptunium", "Plutonium", "Americium", "Curium", "Berkelium", "Californium", "Einsteinium", "Fermium", # noqa: E501 # FIXME CoP + "Mendelevium", "Nobelium", "Lawrencium", "Rutherfordium", "Dubnium", "Seaborgium", "Bohrium", "Hassium", "Meitnerium", "Darmstadtium", # noqa: E501 # FIXME CoP + "Roentgenium", "Copernicium", "Nihomium", "Flerovium", "Moscovium", "Livermorium", "Tennessine", "Oganesson", # noqa: E501 # FIXME CoP ] # fmt: on return data diff --git a/tests/datasource/data_connector/test_batch_filter.py b/tests/datasource/data_connector/test_batch_filter.py index 6566fa52ab60..3899748e925b 100644 --- a/tests/datasource/data_connector/test_batch_filter.py +++ b/tests/datasource/data_connector/test_batch_filter.py @@ -36,7 +36,7 @@ }, slice(3, None, None), [3, 4, 5, 6, 7, 8, 9], - id="batch_slice: str (without square brackets, forward traversal at start); (start, stop=None, step=None)", # noqa: E501 + id="batch_slice: str (without square brackets, forward traversal at start); (start, stop=None, step=None)", # noqa: E501 # FIXME CoP ), pytest.param( { @@ -124,7 +124,7 @@ }, slice(None, None, 2), [0, 2, 4, 6, 8], - id="batch_slice: str (full forward traversal with step=2); (start=None, stop=None, step=2)", # noqa: E501 + id="batch_slice: str (full forward traversal with step=2); (start=None, stop=None, step=2)", # noqa: E501 # FIXME CoP ), pytest.param( { @@ -144,7 +144,7 @@ def test_batch_filter_parse_batch_slice( original_list: List[int] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] batch_filter_obj: BatchFilter = build_batch_filter( - data_connector_query_dict=data_connector_query_dict # type: ignore[arg-type] + data_connector_query_dict=data_connector_query_dict # type: ignore[arg-type] # FIXME CoP ) assert batch_filter_obj.index == parsed_batch_slice assert original_list[parsed_batch_slice] == sliced_list diff --git a/tests/datasource/data_connector/test_data_connector_util.py b/tests/datasource/data_connector/test_data_connector_util.py index 1dcd5635e5ea..f7502bade6e1 100644 --- a/tests/datasource/data_connector/test_data_connector_util.py +++ b/tests/datasource/data_connector/test_data_connector_util.py @@ -9,7 +9,7 @@ convert_batch_identifiers_to_data_reference_string_using_regex, map_batch_definition_to_data_reference_string_using_regex, ) -from great_expectations.datasource.fluent.data_connector.google_cloud_storage_data_connector import ( # noqa: E501 +from great_expectations.datasource.fluent.data_connector.google_cloud_storage_data_connector import ( # noqa: E501 # FIXME CoP list_gcs_keys, ) @@ -163,24 +163,24 @@ def test__invert_regex_to_data_reference_template(): assert returned == "{name}-*.csv" # From https://github.com/madisonmay/CommonRegex/blob/master/commonregex.py - date = r"(?:(?]+[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019])?)' - email = r"([a-z0-9!#$%&'*+\/=?^_`{|.}~-]+@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)" # noqa: E501 - ip = r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" # noqa: E501 - ipv6 = r"\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(? CallbackResult: # noqa: C901 +def put_datasource_cb(request: PreparedRequest) -> CallbackResult: # noqa: C901 # FIXME CoP LOGGER.debug(f"{request.method} {request.url}") if not request.url: raise NotImplementedError("request.url should not be empty") @@ -430,8 +430,8 @@ def get_datasources_cb( LOGGER.debug(f"{request.method} {url}") parsed_url = urllib.parse.urlparse(url) - query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] - queried_names: Sequence[str] = query_params.get("name", []) # type: ignore[assignment] + query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] # FIXME CoP + queried_names: Sequence[str] = query_params.get("name", []) # type: ignore[assignment] # FIXME CoP all_datasources: dict[str, dict] = _CLOUD_API_FAKE_DB["datasources"] datasources_list: list[dict] = list(all_datasources.values()) @@ -453,8 +453,8 @@ def get_expectation_suites_cb(request: PreparedRequest) -> CallbackResult: LOGGER.debug(f"{request.method} {url}") parsed_url = urllib.parse.urlparse(url) - query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] - queried_names: Sequence[str] = query_params.get("name", []) # type: ignore[assignment] + query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] # FIXME CoP + queried_names: Sequence[str] = query_params.get("name", []) # type: ignore[assignment] # FIXME CoP exp_suites: dict[str, dict] = _CLOUD_API_FAKE_DB["expectation_suites"] exp_suite_list: list[dict] = [d["data"] for d in exp_suites.values()] @@ -475,7 +475,7 @@ def get_expectation_suite_by_id_cb( LOGGER.debug(f"{request.method} {url}") parsed_url = urllib.parse.urlparse(url) - expectation_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] + expectation_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] # FIXME CoP expectation_suite: dict | None = _CLOUD_API_FAKE_DB["expectation_suites"].get(expectation_id) if expectation_suite: @@ -538,7 +538,7 @@ def put_expectation_suites_cb(request: PreparedRequest) -> CallbackResult: payload: dict = json.loads(request.body) parsed_url = urllib.parse.urlparse(request.url) - suite_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] + suite_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] # FIXME CoP name = payload["data"]["name"] @@ -578,7 +578,7 @@ def put_expectation_suites_cb(request: PreparedRequest) -> CallbackResult: def delete_expectation_suites_cb(request: PreparedRequest) -> CallbackResult: parsed_url = urllib.parse.urlparse(request.url) - suite_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] + suite_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] # FIXME CoP exp_suites: dict[str, dict] = _CLOUD_API_FAKE_DB["expectation_suites"] old_suite = exp_suites.pop(suite_id, None) if not old_suite: @@ -605,8 +605,8 @@ def get_checkpoints_cb(requests: PreparedRequest) -> CallbackResult: LOGGER.debug(f"{requests.method} {url}") parsed_url = urllib.parse.urlparse(url) - query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] - queried_names: Sequence[str] = query_params.get("name", []) # type: ignore[assignment] + query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] # FIXME CoP + queried_names: Sequence[str] = query_params.get("name", []) # type: ignore[assignment] # FIXME CoP checkpoints: dict[str, dict] = _CLOUD_API_FAKE_DB["checkpoints"] checkpoint_list: list[dict] = list(checkpoints.values()) @@ -625,10 +625,11 @@ def get_checkpoint_by_id_cb(request: PreparedRequest) -> CallbackResult: LOGGER.debug(f"{request.method} {url}") parsed_url = urllib.parse.urlparse(url) - checkpoint_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] + checkpoint_id: str = parsed_url.path.split("/")[-1] # type: ignore[arg-type,assignment] # FIXME CoP if checkpoint := _CLOUD_API_FAKE_DB["checkpoints"].get(checkpoint_id): - result = CallbackResult(200, headers=DEFAULT_HEADERS, body=json.dumps(checkpoint)) + resp_body = json.dumps({"data": checkpoint}) + result = CallbackResult(200, headers=DEFAULT_HEADERS, body=resp_body) else: result = CallbackResult( 404, @@ -684,6 +685,37 @@ def post_checkpoints_cb(request: PreparedRequest) -> CallbackResult: return result +def put_checkpoint_cb(request: PreparedRequest) -> CallbackResult: + if not request.body: + raise NotImplementedError("Handling missing body") + + payload: dict = json.loads(request.body) + name = payload["data"]["name"] + + checkpoints: dict[str, dict] = _CLOUD_API_FAKE_DB["checkpoints"] + checkpoint_names: set[str] = _CLOUD_API_FAKE_DB["CHECKPOINT_NAMES"] + + if name not in checkpoint_names: + result = CallbackResult( + 404, + headers=DEFAULT_HEADERS, + body=ErrorPayloadSchema( + errors=[ + { + "code": "404", + "detail": f"Checkpoint '{name}' not found", + "source": None, + } + ] + ).json(), + ) + else: + checkpoint_id = payload["data"]["id"] + checkpoints[checkpoint_id] = payload["data"] + result = CallbackResult(200, headers=DEFAULT_HEADERS, body=json.dumps(payload)) + return result + + def delete_checkpoint_by_id_cb( request: PreparedRequest, ) -> CallbackResult: @@ -714,8 +746,8 @@ def delete_checkpoint_by_name_cb( LOGGER.debug(f"{request.method} {url}") parsed_url = urllib.parse.urlparse(url) - query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] - queried_names: list[str] = query_params.get("name", []) # type: ignore[assignment] + query_params = urllib.parse.parse_qs(parsed_url.query) # type: ignore[type-var] # FIXME CoP + queried_names: list[str] = query_params.get("name", []) # type: ignore[assignment] # FIXME CoP if not queried_names: raise ValueError("Must provide checkpoint name for deletion.") @@ -952,6 +984,11 @@ def gx_cloud_api_fake_ctx( re.compile(urllib.parse.urljoin(org_url_base_V1, f"checkpoints/{UUID_REGEX}")), get_checkpoint_by_id_cb, ) + resp_mocker.add_callback( + responses.PUT, + re.compile(urllib.parse.urljoin(org_url_base_V1, f"checkpoints/{UUID_REGEX}")), + put_checkpoint_cb, + ) resp_mocker.add_callback( responses.POST, urllib.parse.urljoin(org_url_base_V1, "validation-results"), diff --git a/tests/datasource/fluent/conftest.py b/tests/datasource/fluent/conftest.py index 1187e2f7ff20..03e465a8896f 100644 --- a/tests/datasource/fluent/conftest.py +++ b/tests/datasource/fluent/conftest.py @@ -95,18 +95,18 @@ def sqlachemy_execution_engine_mock_cls( partitioner_query_response: An optional list of dictionaries. Each dictionary is a row returned from the partitioner query. The keys are the column names and the value is the column values, eg: [{'year': 2021, 'month': 1}, {'year': 2021, 'month': 2}] - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class MockSqlAlchemyExecutionEngine(SqlAlchemyExecutionEngine): def __init__(self, create_temp_table: bool = True, *args, **kwargs): - # We should likely let the user pass in an engine. In a SqlAlchemyExecutionEngine used in # noqa: E501 + # We should likely let the user pass in an engine. In a SqlAlchemyExecutionEngine used in # noqa: E501 # FIXME CoP # non-mocked code the engine property is of the type: # from sqlalchemy.engine import Engine as SaEngine - self.engine = MockSaEngine(dialect=Dialect(dialect)) # type: ignore[assignment] + self.engine = MockSaEngine(dialect=Dialect(dialect)) # type: ignore[assignment] # FIXME CoP self._create_temp_table = create_temp_table @override - def get_batch_data_and_markers( # type: ignore[override] + def get_batch_data_and_markers( # type: ignore[override] # FIXME CoP self, batch_spec: SqlAlchemyDatasourceBatchSpec ) -> tuple[BatchData, BatchMarkers]: validate_batch_spec(batch_spec) @@ -121,7 +121,7 @@ def __init__(self, attributes): # We know that partitioner_query_response is non-empty because of validation # at the top of the outer function. # In some cases, such as in the datetime partitioners, - # a dictionary is returned our from out partitioner query with the key as the parameter_name. # noqa: E501 + # a dictionary is returned our from out partitioner query with the key as the parameter_name. # noqa: E501 # FIXME CoP # Otherwise, a list of values is returned. if isinstance(partitioner_query_response[0], dict): return [Row(row_dict) for row_dict in partitioner_query_response] @@ -135,7 +135,7 @@ def __init__(self, *args, **kwargs): pass @override - def get_batch_data_and_markers(self, batch_spec) -> tuple[BatchData, BatchMarkers]: # type: ignore[override] + def get_batch_data_and_markers(self, batch_spec) -> tuple[BatchData, BatchMarkers]: # type: ignore[override] # FIXME CoP return BatchData(self), BatchMarkers(ge_load_time=None) @@ -466,9 +466,9 @@ def _source( dialect=dialect, partitioner_query_response=partitioner_response, ) - original_override = PostgresDatasource.execution_engine_override # type: ignore[misc] + original_override = PostgresDatasource.execution_engine_override # type: ignore[misc] # FIXME CoP try: - PostgresDatasource.execution_engine_override = execution_eng_cls # type: ignore[misc] + PostgresDatasource.execution_engine_override = execution_eng_cls # type: ignore[misc] # FIXME CoP postgres_datasource = PostgresDatasource( name="my_datasource", connection_string=connection_string, @@ -478,14 +478,14 @@ def _source( postgres_datasource._data_context = data_context yield postgres_datasource finally: - PostgresDatasource.execution_engine_override = original_override # type: ignore[misc] + PostgresDatasource.execution_engine_override = original_override # type: ignore[misc] # FIXME CoP # We may be able to parameterize this fixture so we can instantiate _source in the fixture. # This would reduce the `with ...` boilerplate in the individual tests. @pytest.fixture def create_source() -> ContextManager: - return _source # type: ignore[return-value] + return _source # type: ignore[return-value] # FIXME CoP @pytest.fixture diff --git a/tests/datasource/fluent/data_asset/data_connector/test_azure_blob_storage_data_connector.py b/tests/datasource/fluent/data_asset/data_connector/test_azure_blob_storage_data_connector.py index 2defadb120bc..c449f05b5d64 100644 --- a/tests/datasource/fluent/data_asset/data_connector/test_azure_blob_storage_data_connector.py +++ b/tests/datasource/fluent/data_asset/data_connector/test_azure_blob_storage_data_connector.py @@ -308,7 +308,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # TODO: ALEX-UNCOMMENT_WHEN_SORTERS_ARE_INCLUDED_AND_TEST_SORTED_BATCH_DEFINITION_LIST # @pytest.mark.big # @mock.patch( -# "great_expectations.datasource.fluent.data_asset.data_connector.azure_blob_storage_data_connector.list_azure_keys" # noqa: E501 +# "great_expectations.datasource.fluent.data_asset.data_connector.azure_blob_storage_data_connector.list_azure_keys" # noqa: E501 # FIXME CoP # ) # def test_return_all_batch_definitions_sorted( # mock_list_keys, @@ -359,7 +359,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "alex_20200809_1000.csv", "name": "alex", "timestamp": "20200809", "price": "1000"} # noqa: E501 +# {"path": "alex_20200809_1000.csv", "name": "alex", "timestamp": "20200809", "price": "1000"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -367,7 +367,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "eugene_20200809_1500.csv", "name": "eugene", "timestamp": "20200809", "price": "1500"} # noqa: E501 +# {"path": "eugene_20200809_1500.csv", "name": "eugene", "timestamp": "20200809", "price": "1500"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -375,7 +375,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "james_20200811_1009.csv", "name": "james", "timestamp": "20200811", "price": "1009"} # noqa: E501 +# {"path": "james_20200811_1009.csv", "name": "james", "timestamp": "20200811", "price": "1009"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -383,7 +383,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "abe_20200809_1040.csv", "name": "abe", "timestamp": "20200809", "price": "1040"} # noqa: E501 +# {"path": "abe_20200809_1040.csv", "name": "abe", "timestamp": "20200809", "price": "1040"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -391,7 +391,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "will_20200809_1002.csv", "name": "will", "timestamp": "20200809", "price": "1002"} # noqa: E501 +# {"path": "will_20200809_1002.csv", "name": "will", "timestamp": "20200809", "price": "1002"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -399,7 +399,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "james_20200713_1567.csv", "name": "james", "timestamp": "20200713", "price": "1567"} # noqa: E501 +# {"path": "james_20200713_1567.csv", "name": "james", "timestamp": "20200713", "price": "1567"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -407,7 +407,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "eugene_20201129_1900.csv", "name": "eugene", "timestamp": "20201129", "price": "1900"} # noqa: E501 +# {"path": "eugene_20201129_1900.csv", "name": "eugene", "timestamp": "20201129", "price": "1900"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -415,7 +415,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "will_20200810_1001.csv", "name": "will", "timestamp": "20200810", "price": "1001"} # noqa: E501 +# {"path": "will_20200810_1001.csv", "name": "will", "timestamp": "20200810", "price": "1001"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -423,7 +423,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "james_20200810_1003.csv", "name": "james", "timestamp": "20200810", "price": "1003"} # noqa: E501 +# {"path": "james_20200810_1003.csv", "name": "james", "timestamp": "20200810", "price": "1003"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -431,7 +431,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_azure_blob_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "alex_20200819_1300.csv", "name": "alex", "timestamp": "20200819", "price": "1300"} # noqa: E501 +# {"path": "alex_20200819_1300.csv", "name": "alex", "timestamp": "20200819", "price": "1300"} # noqa: E501 # FIXME CoP # ), # ), # ] diff --git a/tests/datasource/fluent/data_asset/data_connector/test_google_cloud_storage_data_connector.py b/tests/datasource/fluent/data_asset/data_connector/test_google_cloud_storage_data_connector.py index 11658db785ba..734b33ecd090 100644 --- a/tests/datasource/fluent/data_asset/data_connector/test_google_cloud_storage_data_connector.py +++ b/tests/datasource/fluent/data_asset/data_connector/test_google_cloud_storage_data_connector.py @@ -308,7 +308,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # TODO: ALEX-UNCOMMENT_WHEN_SORTERS_ARE_INCLUDED_AND_TEST_SORTED_BATCH_DEFINITION_LIST # @pytest.mark.big # @mock.patch( -# "great_expectations.datasource.fluent.data_asset.data_connector.google_cloud_storage_data_connector.list_gcs_keys" # noqa: E501 +# "great_expectations.datasource.fluent.data_asset.data_connector.google_cloud_storage_data_connector.list_gcs_keys" # noqa: E501 # FIXME CoP # ) # def test_return_all_batch_definitions_sorted( # mock_list_keys, @@ -358,7 +358,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "alex_20200809_1000.csv", "name": "alex", "timestamp": "20200809", "price": "1000"} # noqa: E501 +# {"path": "alex_20200809_1000.csv", "name": "alex", "timestamp": "20200809", "price": "1000"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -366,7 +366,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "eugene_20200809_1500.csv", "name": "eugene", "timestamp": "20200809", "price": "1500"} # noqa: E501 +# {"path": "eugene_20200809_1500.csv", "name": "eugene", "timestamp": "20200809", "price": "1500"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -374,7 +374,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "james_20200811_1009.csv", "name": "james", "timestamp": "20200811", "price": "1009"} # noqa: E501 +# {"path": "james_20200811_1009.csv", "name": "james", "timestamp": "20200811", "price": "1009"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -382,7 +382,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "abe_20200809_1040.csv", "name": "abe", "timestamp": "20200809", "price": "1040"} # noqa: E501 +# {"path": "abe_20200809_1040.csv", "name": "abe", "timestamp": "20200809", "price": "1040"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -390,7 +390,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "will_20200809_1002.csv", "name": "will", "timestamp": "20200809", "price": "1002"} # noqa: E501 +# {"path": "will_20200809_1002.csv", "name": "will", "timestamp": "20200809", "price": "1002"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -398,7 +398,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "james_20200713_1567.csv", "name": "james", "timestamp": "20200713", "price": "1567"} # noqa: E501 +# {"path": "james_20200713_1567.csv", "name": "james", "timestamp": "20200713", "price": "1567"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -406,7 +406,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "eugene_20201129_1900.csv", "name": "eugene", "timestamp": "20201129", "price": "1900"} # noqa: E501 +# {"path": "eugene_20201129_1900.csv", "name": "eugene", "timestamp": "20201129", "price": "1900"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -414,7 +414,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "will_20200810_1001.csv", "name": "will", "timestamp": "20200810", "price": "1001"} # noqa: E501 +# {"path": "will_20200810_1001.csv", "name": "will", "timestamp": "20200810", "price": "1001"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -422,7 +422,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "james_20200810_1003.csv", "name": "james", "timestamp": "20200810", "price": "1003"} # noqa: E501 +# {"path": "james_20200810_1003.csv", "name": "james", "timestamp": "20200810", "price": "1003"} # noqa: E501 # FIXME CoP # ), # ), # BatchDefinition( @@ -430,7 +430,7 @@ def test_return_all_batch_definitions_unsorted(mock_list_keys): # data_connector_name="fluent", # data_asset_name="my_google_cloud_storage_data_asset", # batch_identifiers=IDDict( -# {"path": "alex_20200819_1300.csv", "name": "alex", "timestamp": "20200819", "price": "1300"} # noqa: E501 +# {"path": "alex_20200819_1300.csv", "name": "alex", "timestamp": "20200819", "price": "1300"} # noqa: E501 # FIXME CoP # ), # ), # ] diff --git a/tests/datasource/fluent/data_asset/test_path_asset.py b/tests/datasource/fluent/data_asset/test_path_asset.py index e04c13206dff..b9979e3a7758 100644 --- a/tests/datasource/fluent/data_asset/test_path_asset.py +++ b/tests/datasource/fluent/data_asset/test_path_asset.py @@ -295,7 +295,7 @@ def test_add_batch_definition_fluent_file_path__add_batch_definition_path_fails_ ], ) @pytest.mark.parametrize("asset", _path_asset_parameters(), indirect=["asset"]) -def test_add_batch_definition_fluent_file_path__add_batch_definition_path_fails_if_multiple_files_are_found( # noqa: E501 +def test_add_batch_definition_fluent_file_path__add_batch_definition_path_fails_if_multiple_files_are_found( # noqa: E501 # FIXME CoP datasource, asset, path: PathStr, file_path_data_connector ): """This edge case occurs if a user doesn't actually provide a path, but @@ -363,7 +363,7 @@ def test_add_batch_definition_fluent_file_path__add_batch_definition_yearly_succ pytest.param(re.compile(r"data_2024.csv"), id="re.Pattern"), ], ) -def test_add_batch_definition_fluent_file_path__add_batch_definition_yearly_fails_if_required_group_is_missing( # noqa: E501 +def test_add_batch_definition_fluent_file_path__add_batch_definition_yearly_fails_if_required_group_is_missing( # noqa: E501 # FIXME CoP datasource, asset, sort, batching_regex ): # arrange @@ -389,7 +389,7 @@ def test_add_batch_definition_fluent_file_path__add_batch_definition_yearly_fail pytest.param(re.compile(r"data_(?P\d{4})-(?P\d{4}).csv"), id="re.Pattern"), ], ) -def test_add_batch_definition_fluent_file_path__add_batch_definition_yearly_fails_if_unknown_groups_are_found( # noqa: E501 +def test_add_batch_definition_fluent_file_path__add_batch_definition_yearly_fails_if_unknown_groups_are_found( # noqa: E501 # FIXME CoP datasource, asset, sort, batching_regex ): # arrange @@ -448,7 +448,7 @@ def test_add_batch_definition_fluent_file_path__add_batch_definition_monthly_suc pytest.param(re.compile(r"data_(?P\d{4})-(?P\d{2}).csv"), id="re.Pattern"), ], ) -def test_add_batch_definition_fluent_file_path__add_batch_definition_monthly_fails_if_required_group_is_missing( # noqa: E501 +def test_add_batch_definition_fluent_file_path__add_batch_definition_monthly_fails_if_required_group_is_missing( # noqa: E501 # FIXME CoP datasource, asset, sort, batching_regex ): # arrange @@ -468,7 +468,7 @@ def test_add_batch_definition_fluent_file_path__add_batch_definition_monthly_fai @pytest.mark.unit @pytest.mark.parametrize("asset", _path_asset_parameters(), indirect=["asset"]) @pytest.mark.parametrize("sort", [True, False]) -def test_add_batch_definition_fluent_file_path__add_batch_definition_monthly_fails_if_unknown_groups_are_found( # noqa: E501 +def test_add_batch_definition_fluent_file_path__add_batch_definition_monthly_fails_if_unknown_groups_are_found( # noqa: E501 # FIXME CoP datasource, asset, sort ): # arrange @@ -522,7 +522,7 @@ def test_add_batch_definition_fluent_file_path__add_batch_definition_daily_succe pytest.param(re.compile(r"data_2024.csv"), id="re.Pattern"), ], ) -def test_add_batch_definition_fluent_file_path__add_batch_definition_daily_fails_if_required_group_is_missing( # noqa: E501 +def test_add_batch_definition_fluent_file_path__add_batch_definition_daily_fails_if_required_group_is_missing( # noqa: E501 # FIXME CoP datasource, asset, sort, batching_regex ): # arrange @@ -553,7 +553,7 @@ def test_add_batch_definition_fluent_file_path__add_batch_definition_daily_fails ), ], ) -def test_add_batch_definition_fluent_file_path__add_batch_definition_daily_fails_if_unknown_groups_are_found( # noqa: E501 +def test_add_batch_definition_fluent_file_path__add_batch_definition_daily_fails_if_unknown_groups_are_found( # noqa: E501 # FIXME CoP datasource, asset, sort, batching_regex ): # arrange diff --git a/tests/datasource/fluent/integration/integration_test_utils.py b/tests/datasource/fluent/integration/integration_test_utils.py index 00dc346a6488..f2c5825ce41c 100644 --- a/tests/datasource/fluent/integration/integration_test_utils.py +++ b/tests/datasource/fluent/integration/integration_test_utils.py @@ -81,15 +81,15 @@ def run_checkpoint_and_data_doc( expected_metric_values = { "expect_table_row_count_to_be_between": { "value": 10000, - "rendered_template": "Must have greater than or equal to $min_value and less than or equal to $max_value rows.", # noqa: E501 + "rendered_template": "Must have greater than or equal to $min_value and less than or equal to $max_value rows.", # noqa: E501 # FIXME CoP }, "expect_column_max_to_be_between": { "value": 6, - "rendered_template": "$column maximum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "rendered_template": "$column maximum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "expect_column_median_to_be_between": { "value": 1, - "rendered_template": "$column median must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "rendered_template": "$column median must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, } assert len(validation_result.results) == 3 @@ -114,15 +114,15 @@ def run_checkpoint_and_data_doc( with open(path) as f: data_doc_index = f.read() - # Checking for ge-success-icon tests the result table was generated and it was populated with a successful run. # noqa: E501 + # Checking for ge-success-icon tests the result table was generated and it was populated with a successful run. # noqa: E501 # FIXME CoP assert "ge-success-icon" in data_doc_index assert "ge-failed-icon" not in data_doc_index -def run_batch_head( # noqa: C901 +def run_batch_head( # noqa: C901 # FIXME CoP datasource_test_data: tuple[AbstractDataContext, Datasource, DataAsset, BatchRequest], fetch_all: bool | str, - n_rows: int | float | str | None, # noqa: PYI041 + n_rows: int | float | str | None, # noqa: PYI041 # FIXME CoP success: bool, ) -> None: _, datasource, _, batch_request = datasource_test_data @@ -172,7 +172,7 @@ def run_batch_head( # noqa: C901 # if n_rows is greater than the total_row_count, we only expect total_row_count rows elif n_rows > total_row_count: assert head_data_row_count == total_row_count - # if n_rows is negative and abs(n_rows) is larger than total_row_count we expect zero rows # noqa: E501 + # if n_rows is negative and abs(n_rows) is larger than total_row_count we expect zero rows # noqa: E501 # FIXME CoP elif n_rows < 0 and abs(n_rows) > total_row_count: assert head_data_row_count == 0 # if n_rows is negative, we expect all but the final abs(n_rows) @@ -192,11 +192,11 @@ def run_batch_head( # noqa: C901 assert isinstance(head_data, HeadData) assert len(head_data.data.index) == 5 - assert set(metrics[table_columns_metric.id]) == expected_columns # type: ignore[arg-type] + assert set(metrics[table_columns_metric.id]) == expected_columns # type: ignore[arg-type] # FIXME CoP else: with pytest.raises(ValidationError) as e: - batch.head(n_rows=n_rows, fetch_all=fetch_all) # type: ignore[arg-type] + batch.head(n_rows=n_rows, fetch_all=fetch_all) # type: ignore[arg-type] # FIXME CoP n_rows_validation_error = ( "1 validation error for Head\n" "n_rows\n" diff --git a/tests/datasource/fluent/integration/test_connections.py b/tests/datasource/fluent/integration/test_connections.py index 7fbc48998cbe..94f163a3205e 100644 --- a/tests/datasource/fluent/integration/test_connections.py +++ b/tests/datasource/fluent/integration/test_connections.py @@ -21,6 +21,9 @@ @pytest.mark.snowflake class TestSnowflake: + @pytest.mark.xfail( + raises=sa.exc.ProgrammingError + ) # inspector.get_table_names() fails with this role @pytest.mark.parametrize( "connection_string", [ @@ -43,7 +46,7 @@ def test_un_queryable_asset_should_raise_error( ) inspector: Inspector = sa.inspection.inspect(snowflake_ds.get_engine()) - inspector_tables: list[str] = inspector.get_table_names() + inspector_tables: list[str] = list(inspector.get_table_names(schema="public")) print(f"tables: {len(inspector_tables)}\n{inspector_tables}") random.shuffle(inspector_tables) @@ -86,7 +89,7 @@ def test_queryable_asset_should_pass_test_connection( ) inspector: Inspector = sa.inspection.inspect(snowflake_ds.get_engine()) - inspector_tables = inspector.get_table_names() + inspector_tables = list(inspector.get_table_names()) print(f"tables: {len(inspector_tables)}\n{inspector_tables}") table_name = random.choice(inspector_tables) diff --git a/tests/datasource/fluent/integration/test_integration_datasource.py b/tests/datasource/fluent/integration/test_integration_datasource.py index 7f739ad71ac5..749f7e1b2cf3 100644 --- a/tests/datasource/fluent/integration/test_integration_datasource.py +++ b/tests/datasource/fluent/integration/test_integration_datasource.py @@ -95,7 +95,7 @@ def test_run_checkpoint_and_data_doc( def test_batch_head( datasource_test_data: tuple[AbstractDataContext, Datasource, DataAsset, BatchRequest], fetch_all: bool | str, - n_rows: int | float | str | None, # noqa: PYI041 + n_rows: int | float | str | None, # noqa: PYI041 # FIXME CoP success: bool, ) -> None: run_batch_head( @@ -114,7 +114,7 @@ def test_success_with_partitioners(self, empty_data_context): passenger_count_value = 5 asset = datasource.add_query_asset( name="query_asset", - query=f" SELECT * from yellow_tripdata_sample_2019_02 WHERE passenger_count = {passenger_count_value}", # noqa: E501 + query=f" SELECT * from yellow_tripdata_sample_2019_02 WHERE passenger_count = {passenger_count_value}", # noqa: E501 # FIXME CoP ) validator = context.get_validator( batch_request=asset.build_batch_request( @@ -318,7 +318,7 @@ def test_partitioner_build_batch_request_allows_selecting_by_date_and_datetime_a asset = datasource.add_query_asset( "query_asset", - "SELECT date(pickup_datetime) as pickup_date, passenger_count FROM yellow_tripdata_sample_2019_02", # noqa: E501 + "SELECT date(pickup_datetime) as pickup_date, passenger_count FROM yellow_tripdata_sample_2019_02", # noqa: E501 # FIXME CoP ) partitioner = PartitionerColumnValue(column_name="pickup_date") # Test getting all batches @@ -345,8 +345,8 @@ def test_partitioner_build_batch_request_allows_selecting_by_date_and_datetime_a "great_expectations.datasource.fluent.sql_datasource._partitioner_and_sql_asset_to_batch_identifier_data" ) as mock_batch_identifiers: mock_batch_identifiers.return_value = [ - {"pickup_date": datetime.datetime(2019, 2, 1)}, # noqa: DTZ001 - {"pickup_date": datetime.datetime(2019, 2, 2)}, # noqa: DTZ001 + {"pickup_date": datetime.datetime(2019, 2, 1)}, # noqa: DTZ001 # FIXME CoP + {"pickup_date": datetime.datetime(2019, 2, 2)}, # noqa: DTZ001 # FIXME CoP ] specified_batches = asset.get_batch_identifiers_list( asset.build_batch_request( @@ -378,7 +378,7 @@ def test_success_with_partitioners_from_batch_definitions( passenger_count_value = 5 asset = datasource.add_query_asset( name="query_asset", - query=f"SELECT * from yellow_tripdata_sample_2020 WHERE passenger_count = {passenger_count_value}", # noqa: E501 + query=f"SELECT * from yellow_tripdata_sample_2020 WHERE passenger_count = {passenger_count_value}", # noqa: E501 # FIXME CoP ) batch_definition = asset.add_batch_definition( name="whatevs", @@ -442,13 +442,13 @@ def test_batch_request_error_messages( assert "new_option" in batch_request.options with pytest.raises(pydantic.ValidationError): - batch_request.options = {10: "value for non-string key"} # type: ignore[dict-item] + batch_request.options = {10: "value for non-string key"} # type: ignore[dict-item] # FIXME CoP with pytest.raises(pydantic.ValidationError): - batch_request.options = "not a dictionary" # type: ignore[assignment] + batch_request.options = "not a dictionary" # type: ignore[assignment] # FIXME CoP # batch_slice can be updated if it takes the correct form - batch_request.batch_slice = "[5:10]" # type: ignore[assignment] + batch_request.batch_slice = "[5:10]" # type: ignore[assignment] # FIXME CoP assert batch_request.batch_slice == slice(5, 10, None) # batch_slice can be updated via update method @@ -456,10 +456,10 @@ def test_batch_request_error_messages( assert batch_request.batch_slice == slice(2, 10, 2) with pytest.raises(ValueError): - batch_request.batch_slice = "nonsense slice" # type: ignore[assignment] + batch_request.batch_slice = "nonsense slice" # type: ignore[assignment] # FIXME CoP with pytest.raises(ValueError): - batch_request.batch_slice = True # type: ignore[assignment] + batch_request.batch_slice = True # type: ignore[assignment] # FIXME CoP @pytest.mark.cloud diff --git a/tests/datasource/fluent/integration/test_sql_datasources.py b/tests/datasource/fluent/integration/test_sql_datasources.py index 9cb4b1f9154a..add3a2add055 100644 --- a/tests/datasource/fluent/integration/test_sql_datasources.py +++ b/tests/datasource/fluent/integration/test_sql_datasources.py @@ -360,7 +360,7 @@ def __call__( @pytest.fixture( scope="class", ) -def table_factory() -> Generator[TableFactory, None, None]: # noqa: C901 +def table_factory() -> Generator[TableFactory, None, None]: # noqa: C901 # FIXME CoP """ Class scoped. Given a SQLALchemy engine, table_name and schema, @@ -380,7 +380,7 @@ def _table_factory( LOGGER.info(f"Skipping table creation for {table_names} for {sa_engine.dialect.name}") return LOGGER.info( - f"SQLA:{SQLA_VERSION} - Creating `{sa_engine.dialect.name}` table for {table_names} if it does not exist" # noqa: E501 + f"SQLA:{SQLA_VERSION} - Creating `{sa_engine.dialect.name}` table for {table_names} if it does not exist" # noqa: E501 # FIXME CoP ) dialect = GXSqlDialect(sa_engine.dialect.name) created_tables: list[dict[Literal["table_name", "schema"], str | None]] = [] @@ -501,7 +501,7 @@ def snowflake_ds( "snowflake", connection_string="snowflake://ci:${SNOWFLAKE_CI_USER_PASSWORD}@oca29081.us-east-1/ci" f"/{RAND_SCHEMA}?warehouse=ci&role=ci", - # NOTE: uncomment this and set SNOWFLAKE_USER to run tests against your own snowflake account # noqa: E501 + # NOTE: uncomment this and set SNOWFLAKE_USER to run tests against your own snowflake account # noqa: E501 # FIXME CoP # connection_string="snowflake://${SNOWFLAKE_USER}@oca29081.us-east-1/DEMO_DB/RESTAURANTS?warehouse=COMPUTE_WH&role=PUBLIC&authenticator=externalbrowser", ) return ds @@ -720,7 +720,7 @@ def _fails_expectation(param_id: str) -> bool: This does not mean that it SHOULD fail, but that it currently does. """ column_name: ColNameParamId - dialect, column_name, *_ = param_id.split("-") # type: ignore[assignment] + dialect, column_name, *_ = param_id.split("-") # type: ignore[assignment] # FIXME CoP dialects_need_fixes: list[DatabaseType] = FAILS_EXPECTATION.get(column_name, []) return dialect in dialects_need_fixes @@ -742,7 +742,7 @@ def _raw_query_check_column_exists( qualified_table_name: str, gx_execution_engine: SqlAlchemyExecutionEngine, ) -> bool: - """Use a simple 'SELECT {column_name_param} from {qualified_table_name};' query to check if the column exists.'""" # noqa: E501 + """Use a simple 'SELECT {column_name_param} from {qualified_table_name};' query to check if the column exists.'""" # noqa: E501 # FIXME CoP with gx_execution_engine.get_connection() as connection: query = f"""SELECT {column_name_param} FROM {qualified_table_name} LIMIT 1;""" print(f"query:\n {query}") @@ -834,7 +834,7 @@ def test_unquoted_params( pytest.skip(f"see _desired_state tests for {column_name!r}") elif _fails_expectation(param_id): # apply marker this way so that xpasses can be seen in the report - request.applymarker(pytest.mark.xfail) + request.applymarker(pytest.mark.xfail(run=False)) print(f"expectations_type:\n {expectation_type}") @@ -939,7 +939,7 @@ def test_quoted_params( pytest.skip(f"quote char dialect mismatch: {column_name[0]}") elif _fails_expectation(param_id): # apply marker this way so that xpasses can be seen in the report - request.applymarker(pytest.mark.xfail) + request.applymarker(pytest.mark.xfail(run=False)) print(f"expectations_type:\n {expectation_type}") diff --git a/tests/datasource/fluent/test_config.py b/tests/datasource/fluent/test_config.py index f06631e1e618..5f4f28de7684 100644 --- a/tests/datasource/fluent/test_config.py +++ b/tests/datasource/fluent/test_config.py @@ -254,7 +254,7 @@ def test_from_datasource(self, asset_dict: dict): def test_from_gx_config(self, asset_dict: dict): """ Ensure that unset fields are excluded even when being parsed by the top-level `GxConfig` class. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # fill in required args asset_dict.update( { @@ -866,7 +866,7 @@ def test_config_substitution_retains_original_value_on_save( print(context.fluent_config) - ds_w_subs: SqliteDatasource = context.fluent_config.get_datasource(name="my_sqlite_ds_w_subs") # type: ignore[assignment] + ds_w_subs: SqliteDatasource = context.fluent_config.get_datasource(name="my_sqlite_ds_w_subs") # type: ignore[assignment] # FIXME CoP assert str(ds_w_subs.connection_string) == r"${MY_CONN_STR}" assert ( @@ -909,7 +909,7 @@ def test_config_substitution_retains_original_value_on_save_w_run_time_mods( datasources = context.fluent_datasources assert ( - str(datasources["my_sqlite_ds_w_subs"].connection_string) # type: ignore[attr-defined] + str(datasources["my_sqlite_ds_w_subs"].connection_string) # type: ignore[attr-defined] # FIXME CoP == r"${MY_CONN_STR}" ) @@ -917,7 +917,7 @@ def test_config_substitution_retains_original_value_on_save_w_run_time_mods( context.data_sources.add_sqlite("my_new_one", connection_string="sqlite://") # add a new asset to an existing data - sqlite_ds_w_subs: SqliteDatasource = context.data_sources.get( # type: ignore[assignment] + sqlite_ds_w_subs: SqliteDatasource = context.data_sources.get( # type: ignore[assignment] # FIXME CoP "my_sqlite_ds_w_subs" ) sqlite_ds_w_subs.add_table_asset("new_asset", table_name="yellow_tripdata_sample_2019_01") diff --git a/tests/datasource/fluent/test_config_str.py b/tests/datasource/fluent/test_config_str.py index aa0e40339e47..d587e807e68c 100644 --- a/tests/datasource/fluent/test_config_str.py +++ b/tests/datasource/fluent/test_config_str.py @@ -25,7 +25,7 @@ class MyClass(FluentBaseModel): normal_field: str secret_field: SecretStr config_field: ConfigStr - config_field_w_default: ConfigStr = r"hey-${MY_SECRET}" # type: ignore[assignment] + config_field_w_default: ConfigStr = r"hey-${MY_SECRET}" # type: ignore[assignment] # FIXME CoP @pytest.fixture @@ -90,9 +90,9 @@ def test_config_substitution(monkeypatch: MonkeyPatch, env_config_provider: _Con m = MyClass( normal_field="normal", - secret_field="secret", # type: ignore[arg-type] - config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] - config_field_w_default=r"hello-${MY_ENV_VAR}", # type: ignore[arg-type] + secret_field="secret", # type: ignore[arg-type] # FIXME CoP + config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] # FIXME CoP + config_field_w_default=r"hello-${MY_ENV_VAR}", # type: ignore[arg-type] # FIXME CoP ) assert m.config_field.get_config_value(env_config_provider) == "success" assert m.config_field_w_default.get_config_value(env_config_provider) == "hello-success" @@ -105,8 +105,8 @@ def test_config_substitution_dict( m = MyClass( normal_field="normal", - secret_field="secret", # type: ignore[arg-type] - config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] + secret_field="secret", # type: ignore[arg-type] # FIXME CoP + config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] # FIXME CoP ) d = m.dict(config_provider=env_config_provider) @@ -127,8 +127,8 @@ class MyCollection(FluentBaseModel): my_classes=[ MyClass( normal_field="normal", - secret_field="secret", # type: ignore[arg-type] - config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] + secret_field="secret", # type: ignore[arg-type] # FIXME CoP + config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] # FIXME CoP ) ] ) @@ -151,8 +151,8 @@ class MyCollection(FluentBaseModel): my_classes=[ MyClass( normal_field="normal", - secret_field="secret", # type: ignore[arg-type] - config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] + secret_field="secret", # type: ignore[arg-type] # FIXME CoP + config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] # FIXME CoP ) ] ) @@ -167,8 +167,8 @@ def test_serialization_returns_original(monkeypatch: MonkeyPatch, method: str): m = MyClass( normal_field="normal", - secret_field="secret", # type: ignore[arg-type] - config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] + secret_field="secret", # type: ignore[arg-type] # FIXME CoP + config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] # FIXME CoP ) serialize_method: Callable = getattr(m, method) dumped = str(serialize_method()) @@ -190,8 +190,8 @@ class MyCollection(FluentBaseModel): my_classes=[ MyClass( normal_field="normal", - secret_field="secret", # type: ignore[arg-type] - config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] + secret_field="secret", # type: ignore[arg-type] # FIXME CoP + config_field=r"${MY_ENV_VAR}", # type: ignore[arg-type] # FIXME CoP ) ] ) @@ -239,8 +239,8 @@ def test_serialization( monkeypatch.setenv("MY_SECRET", "dont_serialize_me") m = MyClass( normal_field="normal", - secret_field="my_secret", # type: ignore[arg-type] - config_field=r"${MY_SECRET}", # type: ignore[arg-type] + secret_field="my_secret", # type: ignore[arg-type] # FIXME CoP + config_field=r"${MY_SECRET}", # type: ignore[arg-type] # FIXME CoP ) # but it should not actually be used diff --git a/tests/datasource/fluent/test_contexts.py b/tests/datasource/fluent/test_contexts.py index ba5010cdd285..a0250fda6ebf 100644 --- a/tests/datasource/fluent/test_contexts.py +++ b/tests/datasource/fluent/test_contexts.py @@ -461,7 +461,7 @@ def verify_asset_name_cb(request: PreparedRequest) -> CallbackResult: payload = CloudResponseSchema.from_datasource_json(request.body) LOGGER.info(f"PUT payload: ->\n{pf(payload.dict())}") - assets = payload.data["assets"] # type: ignore[index] + assets = payload.data["assets"] # type: ignore[index] # FIXME CoP assert assets, "No assets found" for asset in assets: if asset["name"] == DEFAULT_PANDAS_DATA_ASSET_NAME: diff --git a/tests/datasource/fluent/test_databricks_sql_datasource.py b/tests/datasource/fluent/test_databricks_sql_datasource.py index 24982ee24768..8fc2cb0aab14 100644 --- a/tests/datasource/fluent/test_databricks_sql_datasource.py +++ b/tests/datasource/fluent/test_databricks_sql_datasource.py @@ -17,7 +17,7 @@ [ { "loc": ("connection_string",), - "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 + "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 # FIXME CoP "type": "value_error", }, { @@ -33,7 +33,7 @@ [ { "loc": ("connection_string",), - "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 + "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 # FIXME CoP "type": "value_error", }, { @@ -49,7 +49,7 @@ [ { "loc": ("connection_string",), - "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 + "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 # FIXME CoP "type": "value_error", }, { @@ -65,7 +65,7 @@ [ { "loc": ("connection_string",), - "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 + "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 # FIXME CoP "type": "value_error", }, { @@ -81,7 +81,7 @@ [ { "loc": ("connection_string",), - "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 + "msg": "ConfigStr - contains no config template strings in the format '${MY_CONFIG_VAR}' or '$MY_CONFIG_VAR'", # noqa: E501 # FIXME CoP "type": "value_error", }, { diff --git a/tests/datasource/fluent/test_fabric.py b/tests/datasource/fluent/test_fabric.py index 57ea750fc15b..fa960d66d9be 100644 --- a/tests/datasource/fluent/test_fabric.py +++ b/tests/datasource/fluent/test_fabric.py @@ -38,7 +38,7 @@ def empty_data_context(empty_data_context: AbstractDataContext) -> AbstractDataC def patch_power_bi_datasource(monkeypatch: pytest.MonkeyPatch) -> None: """ Monkeypatch the PowerBI datasource to enable datasource creation even without `semantic-link` installed - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP monkeypatch.setattr(FabricPowerBIDatasource, "test_connection", lambda _: True) diff --git a/tests/datasource/fluent/test_invalid_datasource.py b/tests/datasource/fluent/test_invalid_datasource.py index 5804d099cfec..a90d722e3bb3 100644 --- a/tests/datasource/fluent/test_invalid_datasource.py +++ b/tests/datasource/fluent/test_invalid_datasource.py @@ -242,7 +242,7 @@ def test_extra_fields_are_ignored( Standard fields such as `type`, `name`, `id` etc. should be included in the InvalidDatasource instance and should never be sensitive. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP print(f"Datasource config:\n{pf(invalid_ds_cfg)}") invalid_ds = invalid_datasource_factory(invalid_ds_cfg) @@ -296,7 +296,7 @@ def test_connection_raises_informative_error( { "name": "my invalid ds", "type": random_ds_type, - "foo": "bar", # regardless of the type this extra field should make the datasource invalid # noqa: E501 + "foo": "bar", # regardless of the type this extra field should make the datasource invalid # noqa: E501 # FIXME CoP "assets": [ {"name": "definitely_invalid", "type": "NOT_A_VALID_TYPE"}, {"name": "maybe_valid", "type": "table", "table_name": "my_table"}, diff --git a/tests/datasource/fluent/test_metadatasource.py b/tests/datasource/fluent/test_metadatasource.py index 514288861975..9977f11a9d21 100644 --- a/tests/datasource/fluent/test_metadatasource.py +++ b/tests/datasource/fluent/test_metadatasource.py @@ -75,7 +75,7 @@ def get_context( @validate_arguments def __init__(self, context_root_dir: Optional[DirectoryPath] = None) -> None: self.root_directory = context_root_dir - self._data_sources: DataSourceManager = DataSourceManager(self) # type: ignore[arg-type] + self._data_sources: DataSourceManager = DataSourceManager(self) # type: ignore[arg-type] # FIXME CoP self._datasources: Dict[str, Datasource] = {} self.config_provider: _ConfigurationProvider | None = None logger.info(f"Available Factories - {self._data_sources.factories}") @@ -87,7 +87,7 @@ def data_sources(self) -> DataSourceManager: @property def datasources(self) -> DatasourceDict: - return self._datasources # type: ignore[return-value] + return self._datasources # type: ignore[return-value] # FIXME CoP def _add_fluent_datasource(self, datasource: Datasource) -> Datasource: self._datasources[datasource.name] = datasource @@ -133,7 +133,7 @@ def context_sources_cleanup() -> Generator[DataSourceManager, None, None]: """Return the sources object and reset types/factories on teardown""" try: # setup - sources_copy = copy.deepcopy(DataSourceManager._DataSourceManager__crud_registry) # type: ignore[attr-defined] + sources_copy = copy.deepcopy(DataSourceManager._DataSourceManager__crud_registry) # type: ignore[attr-defined] # FIXME CoP type_lookup_copy = copy.deepcopy(DataSourceManager.type_lookup) sources = get_context().data_sources @@ -143,13 +143,13 @@ def context_sources_cleanup() -> Generator[DataSourceManager, None, None]: yield sources finally: - DataSourceManager._DataSourceManager__crud_registry = sources_copy # type: ignore[attr-defined] + DataSourceManager._DataSourceManager__crud_registry = sources_copy # type: ignore[attr-defined] # FIXME CoP DataSourceManager.type_lookup = type_lookup_copy @pytest.fixture(scope="function") def empty_sources(context_sources_cleanup) -> Generator[DataSourceManager, None, None]: - DataSourceManager._DataSourceManager__crud_registry.clear() # type: ignore[attr-defined] + DataSourceManager._DataSourceManager__crud_registry.clear() # type: ignore[attr-defined] # FIXME CoP DataSourceManager.type_lookup.clear() assert not DataSourceManager.type_lookup yield context_sources_cleanup @@ -284,7 +284,7 @@ def execution_engine_type(self) -> Type[ExecutionEngine]: return DummyExecutionEngine @override - def test_connection(self) -> None: ... # type: ignore[override] + def test_connection(self) -> None: ... # type: ignore[override] # FIXME CoP # check that no types were registered assert len(empty_sources.type_lookup) < 1 @@ -294,7 +294,7 @@ class MissingExecEngineTypeDatasource(Datasource): type: str = "valid" @override - def test_connection(self) -> None: ... # type: ignore[override] + def test_connection(self) -> None: ... # type: ignore[override] # FIXME CoP with pytest.raises(NotImplementedError): MissingExecEngineTypeDatasource(name="name").get_execution_engine() @@ -302,7 +302,7 @@ def test_connection(self) -> None: ... # type: ignore[override] def test_ds_assets_type_field_not_set(self, empty_sources: DataSourceManager): with pytest.raises( TypeRegistrationError, - match="No `type` field found for `BadAssetDatasource.asset_types` -> `MissingTypeAsset` unable to register asset type", # noqa: E501 + match="No `type` field found for `BadAssetDatasource.asset_types` -> `MissingTypeAsset` unable to register asset type", # noqa: E501 # FIXME CoP ): class MissingTypeAsset(DataAsset): @@ -324,7 +324,7 @@ def execution_engine_type(self) -> Type[ExecutionEngine]: return DummyExecutionEngine @override - def test_connection(self) -> None: ... # type: ignore[override] + def test_connection(self) -> None: ... # type: ignore[override] # FIXME CoP # check that no types were registered assert len(empty_sources.type_lookup) < 1 diff --git a/tests/datasource/fluent/test_pandas_azure_blob_storage_datasource.py b/tests/datasource/fluent/test_pandas_azure_blob_storage_datasource.py index 7ef6e1020928..675ce6c2d1c2 100644 --- a/tests/datasource/fluent/test_pandas_azure_blob_storage_datasource.py +++ b/tests/datasource/fluent/test_pandas_azure_blob_storage_datasource.py @@ -139,7 +139,7 @@ def test_construct_pandas_abs_datasource_with_conn_str_and_credential(): pandas_abs_datasource = PandasAzureBlobStorageDatasource( name="pandas_abs_datasource", azure_options={ # Representative of format noted in official docs - "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 + "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 # FIXME CoP "credential": "my_credential", }, ) @@ -167,7 +167,7 @@ def test_construct_pandas_abs_datasource_with_valid_conn_str_assigns_account_nam pandas_abs_datasource = PandasAzureBlobStorageDatasource( name="pandas_abs_datasource", azure_options={ # Representative of format noted in official docs - "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 + "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 # FIXME CoP "credential": "my_credential", }, ) @@ -178,13 +178,13 @@ def test_construct_pandas_abs_datasource_with_valid_conn_str_assigns_account_nam @pytest.mark.big def test_construct_pandas_abs_datasource_with_multiple_auth_methods_raises_error(): - # Raises error in DataContext's schema validation due to having both `account_url` and `conn_str` # noqa: E501 + # Raises error in DataContext's schema validation due to having both `account_url` and `conn_str` # noqa: E501 # FIXME CoP with pytest.raises(PandasAzureBlobStorageDatasourceError): pandas_abs_datasource = PandasAzureBlobStorageDatasource( name="pandas_abs_datasource", azure_options={ "account_url": "account.blob.core.windows.net", - "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 + "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 # FIXME CoP "credential": "my_credential", }, ) @@ -217,7 +217,7 @@ def test_add_csv_asset_to_datasource( @mock.patch("azure.storage.blob.BlobServiceClient") def test_construct_csv_asset_directly(mock_azure_client, mock_list_keys, object_keys: List[str]): mock_list_keys.return_value = object_keys - asset = CSVAsset( # type: ignore[call-arg] + asset = CSVAsset( # type: ignore[call-arg] # FIXME CoP name="csv_asset", ) assert asset.name == "csv_asset" diff --git a/tests/datasource/fluent/test_pandas_datasource.py b/tests/datasource/fluent/test_pandas_datasource.py index acc60635a2aa..1b8fe1e59fc7 100644 --- a/tests/datasource/fluent/test_pandas_datasource.py +++ b/tests/datasource/fluent/test_pandas_datasource.py @@ -3,6 +3,7 @@ import copy import inspect import logging +import os import pathlib import uuid from pprint import pformat as pf @@ -207,7 +208,7 @@ def test_minimal_validation(self, asset_class: Type[_PandasDataAsset]): This is also a proxy for testing that the dynamic pydantic model creation was successful. """ with pytest.raises(pydantic.ValidationError) as exc_info: - asset_class( # type: ignore[call-arg] + asset_class( # type: ignore[call-arg] # FIXME CoP name="test", invalid_keyword_arg="bad", ) @@ -354,15 +355,15 @@ def test_positional_arguments( # This is not a an ideal mock. # In this test we are validating that the read_method for a particular pandas datasource # has the correct positional arguments. - # We don't care about the actual data being read in and the batch that will be produced from that data. # noqa: E501 - # In fact, we call all our read methods on a path which might not be readable by the reader (eg calling # noqa: E501 - # read_json on a csv file). We patch the internal call that actually tries to read and create the batch. # noqa: E501 + # We don't care about the actual data being read in and the batch that will be produced from that data. # noqa: E501 # FIXME CoP + # In fact, we call all our read methods on a path which might not be readable by the reader (eg calling # noqa: E501 # FIXME CoP + # read_json on a csv file). We patch the internal call that actually tries to read and create the batch. # noqa: E501 # FIXME CoP # Ideally, we would rewrite this test so we wouldn't need to mock like this. mocker.patch( "great_expectations.datasource.fluent.pandas_datasource._PandasDataAsset.get_batch" ) - # read_* normally returns batch but, since we've added a mock in the line above, we get a mock object returned. # noqa: E501 - # We are calling it here for it's side effect on the default asset so get and inspect that afterwards. # noqa: E501 + # read_* normally returns batch but, since we've added a mock in the line above, we get a mock object returned. # noqa: E501 # FIXME CoP + # We are calling it here for it's side effect on the default asset so get and inspect that afterwards. # noqa: E501 # FIXME CoP _ = read_method(*positional_args.values()) default_asset = empty_data_context.data_sources.pandas_default.get_asset( name=DEFAULT_PANDAS_DATA_ASSET_NAME @@ -438,7 +439,7 @@ def test_default_pandas_datasource_name_conflict( def test_read_dataframe(empty_data_context: AbstractDataContext, test_df_pandas: pd.DataFrame): # validates that a dataframe object is passed with pytest.raises(ValueError) as exc_info: - _ = empty_data_context.data_sources.pandas_default.read_dataframe(dataframe={}) # type: ignore[arg-type] + _ = empty_data_context.data_sources.pandas_default.read_dataframe(dataframe={}) # type: ignore[arg-type] # FIXME CoP assert ( 'Cannot execute "PandasDatasource.read_dataframe()" without a valid "dataframe" argument.' @@ -474,17 +475,26 @@ def test_read_dataframe(empty_data_context: AbstractDataContext, test_df_pandas: @pytest.mark.cloud def test_cloud_get_csv_asset_not_in_memory(valid_file_path: pathlib.Path): # this test runs end-to-end in a real Cloud Data Context - context = gx.get_context(mode="cloud") - csv_asset_name = f"DA_{uuid.uuid4().hex}" - datasource = context.data_sources.pandas_default - _ = datasource.add_csv_asset( - name=csv_asset_name, - filepath_or_buffer=valid_file_path, + context = gx.get_context( + mode="cloud", + cloud_base_url=os.environ.get("GX_CLOUD_BASE_URL"), + cloud_organization_id=os.environ.get("GX_CLOUD_ORGANIZATION_ID"), + cloud_access_token=os.environ.get("GX_CLOUD_ACCESS_TOKEN"), ) - csv_asset = datasource.get_asset(name=csv_asset_name) - csv_asset.build_batch_request() + datasource_name = f"DS_{uuid.uuid4().hex}" + csv_asset_name = f"DA_{uuid.uuid4().hex}" + datasource = context.data_sources.add_pandas(name=datasource_name) + try: + _ = datasource.add_csv_asset( + name=csv_asset_name, + filepath_or_buffer=valid_file_path, + ) + csv_asset = datasource.get_asset(name=csv_asset_name) + csv_asset.build_batch_request() - assert csv_asset_name not in context.data_sources.all()._in_memory_data_assets + assert csv_asset_name not in context.data_sources.all()._in_memory_data_assets + finally: + context.data_sources.delete(name=datasource_name) @pytest.mark.filesystem diff --git a/tests/datasource/fluent/test_pandas_filesystem_datasource.py b/tests/datasource/fluent/test_pandas_filesystem_datasource.py index f885e35b6a5e..e9f1ade4f631 100644 --- a/tests/datasource/fluent/test_pandas_filesystem_datasource.py +++ b/tests/datasource/fluent/test_pandas_filesystem_datasource.py @@ -297,7 +297,7 @@ def test_invalid_connect_options( pandas_filesystem_datasource: PandasFilesystemDatasource, ): with pytest.raises(pydantic.ValidationError) as exc_info: - pandas_filesystem_datasource.add_csv_asset( # type: ignore[call-arg] + pandas_filesystem_datasource.add_csv_asset( # type: ignore[call-arg] # FIXME CoP name="csv_asset", glob_foobar="invalid", ) @@ -548,7 +548,7 @@ def datasource_test_connection_error_messages( request, ) -> tuple[PandasFilesystemDatasource, TestConnectionError]: _, test_connection_error = request.param(csv_path=csv_path) - csv_asset = CSVAsset( # type: ignore[call-arg] + csv_asset = CSVAsset( # type: ignore[call-arg] # FIXME CoP name="csv_asset", ) csv_asset._datasource = pandas_filesystem_datasource diff --git a/tests/datasource/fluent/test_pandas_google_cloud_storage_datasource.py b/tests/datasource/fluent/test_pandas_google_cloud_storage_datasource.py index b783e4f4bc53..9e37af08f9cb 100644 --- a/tests/datasource/fluent/test_pandas_google_cloud_storage_datasource.py +++ b/tests/datasource/fluent/test_pandas_google_cloud_storage_datasource.py @@ -164,7 +164,7 @@ def test_add_csv_asset_to_datasource( @mock.patch("google.cloud.storage.Client") def test_construct_csv_asset_directly(mock_gcs_client, mock_list_keys, object_keys: List[str]): mock_list_keys.return_value = object_keys - asset = CSVAsset( # type: ignore[call-arg] + asset = CSVAsset( # type: ignore[call-arg] # FIXME CoP name="csv_asset", ) assert asset.name == "csv_asset" diff --git a/tests/datasource/fluent/test_pandas_s3_datasource.py b/tests/datasource/fluent/test_pandas_s3_datasource.py index 73ffd4b92f2e..fad8d78b7306 100644 --- a/tests/datasource/fluent/test_pandas_s3_datasource.py +++ b/tests/datasource/fluent/test_pandas_s3_datasource.py @@ -117,7 +117,7 @@ def test_construct_csv_asset_directly(): @pytest.mark.aws_deps def test_invalid_connect_options(pandas_s3_datasource: PandasS3Datasource, aws_credentials): with pytest.raises(pydantic.ValidationError) as exc_info: - pandas_s3_datasource.add_csv_asset( # type: ignore[call-arg] + pandas_s3_datasource.add_csv_asset( # type: ignore[call-arg] # FIXME CoP name="csv_asset", extra_field="invalid", ) @@ -185,7 +185,7 @@ def test_invalid_connect_options_value( param({}, id="default connect options"), param({"s3_prefix": ""}, id="prefix ''"), param({"s3_delimiter": "/"}, id="s3_delimiter '/'"), - # param({"s3_prefix": "non_default"}, id="s3_prefix 'non_default'"), # TODO: what prefix should I test? # noqa: E501 + # param({"s3_prefix": "non_default"}, id="s3_prefix 'non_default'"), # TODO: what prefix should I test? # noqa: E501 # FIXME CoP param( {"s3_prefix": "", "s3_delimiter": "/", "s3_max_keys": 20}, id="all options", diff --git a/tests/datasource/fluent/test_postgres_datasource.py b/tests/datasource/fluent/test_postgres_datasource.py index f5c5c449175f..d033314b3ce2 100644 --- a/tests/datasource/fluent/test_postgres_datasource.py +++ b/tests/datasource/fluent/test_postgres_datasource.py @@ -56,7 +56,7 @@ from tests.sqlalchemy_test_doubles import Dialect, MockSaEngine, MockSaInspector if TYPE_CHECKING: - from unittest.mock import Mock # noqa: TID251 + from unittest.mock import Mock # noqa: TID251 # FIXME CoP from pytest_mock import MockFixture @@ -211,7 +211,7 @@ def year_month_partitioner(column_name: str) -> SqlPartitionerYearAndMonth: def test_construct_table_asset_directly_with_partitioner(create_source): with create_source(validate_batch_spec=lambda _: None, dialect="postgresql") as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, @@ -253,7 +253,7 @@ def validate_batch_spec(spec: SqlAlchemyDatasourceBatchSpec) -> None: data_context=empty_data_context, ) as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -305,7 +305,7 @@ def collect_batch_spec(spec: SqlAlchemyDatasourceBatchSpec) -> None: data_context=empty_data_context, ) as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -339,7 +339,7 @@ def collect_batch_spec(spec: SqlAlchemyDatasourceBatchSpec) -> None: data_context=empty_data_context, ) as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -383,7 +383,7 @@ def collect_batch_spec(spec: SqlAlchemyDatasourceBatchSpec) -> None: partitioner_query_response=[{"year": year, "month": month} for month in list(range(1, 13))], ) as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -436,7 +436,7 @@ def validate_batch_spec(spec: SqlAlchemyDatasourceBatchSpec) -> None: partitioner_query_response=[{"month": month, "year": year}], ) as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -547,7 +547,7 @@ def test_get_batch_with_good_batch_request( data_context=empty_data_context, ) as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -578,7 +578,7 @@ def test_get_batch_with_malformed_batch_request( ): with create_source(validate_batch_spec=lambda _: None, dialect="postgresql") as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -599,7 +599,7 @@ def test_get_batch_with_malformed_batch_request( def test_get_bad_batch_request(create_source: CreateSourceFixture): with create_source(validate_batch_spec=lambda _: None, dialect="postgresql") as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -613,20 +613,20 @@ def test_get_bad_batch_request(create_source: CreateSourceFixture): def test_data_source_json_has_properties(create_source: CreateSourceFixture): with create_source(validate_batch_spec=lambda _: None, dialect="postgresql") as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" ) # type should be in dumped json even if not explicitly set - assert f'"type": "{asset.type}"' # noqa: PLW0129 + assert f'"type": "{asset.type}"' # noqa: PLW0129 # FIXME CoP @pytest.mark.postgresql def test_data_source_yaml_has_properties(create_source: CreateSourceFixture): with create_source(validate_batch_spec=lambda _: None, dialect="postgresql") as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP asset, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -640,7 +640,7 @@ def test_data_source_yaml_has_properties(create_source: CreateSourceFixture): def test_datasource_dict_has_properties(create_source): with create_source(validate_batch_spec=lambda _: None, dialect="postgresql") as source: ( - source, # noqa: PLW2901 + source, # noqa: PLW2901 # FIXME CoP _, ) = create_and_add_table_asset_without_testing_connection( source=source, name="my_asset", table_name="my_table" @@ -679,7 +679,7 @@ def test_validate_valid_postgres_connection_string( dialect="postgresql", connection_string=connection_string, ): - # As long as no exception is thrown we consider this a pass. Pydantic normalizes the underlying # noqa: E501 + # As long as no exception is thrown we consider this a pass. Pydantic normalizes the underlying # noqa: E501 # FIXME CoP # connection string so a direct str comparison isn't possible. pass @@ -802,9 +802,9 @@ def has_table(table_name: str, schema: str | None = None) -> bool: return table_name in mock_inspector_returns["table_names"] # directly patching the instance rather then using mocker.patch - mock_inspector.get_schema_names = get_schema_names # type: ignore[method-assign] - mock_inspector.get_table_names = get_table_names # type: ignore[method-assign] - mock_inspector.has_table = has_table # type: ignore[method-assign] + mock_inspector.get_schema_names = get_schema_names # type: ignore[method-assign] # FIXME CoP + mock_inspector.get_table_names = get_table_names # type: ignore[method-assign] # FIXME CoP + mock_inspector.has_table = has_table # type: ignore[method-assign] # FIXME CoP inspect = mocker.patch("sqlalchemy.inspect") inspect.return_value = mock_inspector @@ -890,7 +890,7 @@ def test_adding_partitioner_persists_results( validate_partitioner=False, ) - final_yaml: dict = YAMLHandler().load( # type: ignore[assignment] + final_yaml: dict = YAMLHandler().load( # type: ignore[assignment] # FIXME CoP gx_yaml.read_text(), )["fluent_datasources"] diff --git a/tests/datasource/fluent/test_schemas.py b/tests/datasource/fluent/test_schemas.py index 4470a4f4ec02..a5fe90463163 100644 --- a/tests/datasource/fluent/test_schemas.py +++ b/tests/datasource/fluent/test_schemas.py @@ -11,7 +11,7 @@ from packaging.version import Version from great_expectations.datasource.fluent import ( - _PANDAS_SCHEMA_VERSION, # this is the version we run in the standard test pipeline. Update as needed # noqa: E501 + _PANDAS_SCHEMA_VERSION, # this is the version we run in the standard test pipeline. Update as needed # noqa: E501 # FIXME CoP _SCHEMAS_DIR, DataAsset, Datasource, @@ -59,7 +59,7 @@ def _models_and_schema_dirs() -> ( ["fluent_ds_or_asset_model", "schema_dir"], [pytest.param(t[0], t[1], id=t[2]) for t in _models_and_schema_dirs()], ) -def test_vcs_schemas_match( # noqa: C901 +def test_vcs_schemas_match( # noqa: C901 # FIXME CoP fluent_ds_or_asset_model: Type[Datasource | DataAsset], schema_dir: pathlib.Path ): """ @@ -93,7 +93,7 @@ def _sort_lists(schema_as_dict: dict) -> None: Args: schema_as_dict: source dictionary (will be modified "in-situ") - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP key: str value: Any @@ -158,7 +158,7 @@ def test_no_orphaned_schemas(): if schema.stem not in all_schemas: orphans.append(schema) - assert not orphans, f"The following schemas appear to be orphaned and should be removed. Run `invoke schema --sync --clean`\n{pf(orphans)}" # noqa: E501 + assert not orphans, f"The following schemas appear to be orphaned and should be removed. Run `invoke schema --sync --clean`\n{pf(orphans)}" # noqa: E501 # FIXME CoP if __name__ == "__main__": diff --git a/tests/datasource/fluent/test_snowflake_datasource.py b/tests/datasource/fluent/test_snowflake_datasource.py index ce2c72df03d9..c1c5e0aab83f 100644 --- a/tests/datasource/fluent/test_snowflake_datasource.py +++ b/tests/datasource/fluent/test_snowflake_datasource.py @@ -28,7 +28,7 @@ from great_expectations.execution_engine import SqlAlchemyExecutionEngine if TYPE_CHECKING: - from pytest.mark.structures import ParameterSet # type: ignore[import-not-found] + from pytest.mark.structures import ParameterSet # type: ignore[import-not-found] # FIXME CoP from pytest_mock import MockerFixture TEST_LOGGER: Final = logging.getLogger(__name__) diff --git a/tests/datasource/fluent/test_spark_azure_blob_storage_datasource.py b/tests/datasource/fluent/test_spark_azure_blob_storage_datasource.py index b43f7471c997..d030b72de973 100644 --- a/tests/datasource/fluent/test_spark_azure_blob_storage_datasource.py +++ b/tests/datasource/fluent/test_spark_azure_blob_storage_datasource.py @@ -116,7 +116,7 @@ def test_construct_spark_abs_datasource_with_conn_str_and_credential(): spark_abs_datasource = SparkAzureBlobStorageDatasource( name="spark_abs_datasource", azure_options={ # Representative of format noted in official docs - "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 + "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 # FIXME CoP "credential": "my_credential", }, ) @@ -146,7 +146,7 @@ def test_construct_spark_abs_datasource_with_valid_conn_str_assigns_account_name spark_abs_datasource = SparkAzureBlobStorageDatasource( name="spark_abs_datasource", azure_options={ # Representative of format noted in official docs - "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 + "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 # FIXME CoP "credential": "my_credential", }, ) @@ -158,13 +158,13 @@ def test_construct_spark_abs_datasource_with_valid_conn_str_assigns_account_name @pytest.mark.unit def test_construct_spark_abs_datasource_with_multiple_auth_methods_raises_error(): - # Raises error in DataContext's schema validation due to having both `account_url` and `conn_str` # noqa: E501 + # Raises error in DataContext's schema validation due to having both `account_url` and `conn_str` # noqa: E501 # FIXME CoP with pytest.raises(SparkAzureBlobStorageDatasourceError): spark_abs_datasource = SparkAzureBlobStorageDatasource( name="spark_abs_datasource", azure_options={ "account_url": "account.blob.core.windows.net", - "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 + "conn_str": "DefaultEndpointsProtocol=https;AccountName=storagesample;AccountKey=my_account_key", # noqa: E501 # FIXME CoP "credential": "my_credential", }, ) diff --git a/tests/datasource/fluent/test_spark_datasource.py b/tests/datasource/fluent/test_spark_datasource.py index 44f09c3a205a..0f631e37d4b2 100644 --- a/tests/datasource/fluent/test_spark_datasource.py +++ b/tests/datasource/fluent/test_spark_datasource.py @@ -149,7 +149,7 @@ def test_unmodifiable_config_option_warning( with pytest.warns(RuntimeWarning): _ = empty_data_context.data_sources.add_spark( name="my_spark_datasource", - spark_config=spark_config, # type: ignore[arg-type] + spark_config=spark_config, # type: ignore[arg-type] # FIXME CoP ) diff --git a/tests/datasource/fluent/test_spark_dbfs_datasource.py b/tests/datasource/fluent/test_spark_dbfs_datasource.py index 1deadbaa0e88..5bd72205c5bf 100644 --- a/tests/datasource/fluent/test_spark_dbfs_datasource.py +++ b/tests/datasource/fluent/test_spark_dbfs_datasource.py @@ -82,7 +82,7 @@ def test_construct_csv_asset_directly(): @pytest.mark.spark @pytest.mark.xfail( - reason="Accessing objects on pyfakefs.fake_filesystem.FakeFilesystem using Spark is not working (this test is conducted using Jupyter notebook manually)." # noqa: E501 + reason="Accessing objects on pyfakefs.fake_filesystem.FakeFilesystem using Spark is not working (this test is conducted using Jupyter notebook manually)." # noqa: E501 # FIXME CoP ) def test_get_batch_list_from_fully_specified_batch_request( spark_dbfs_datasource: SparkDBFSDatasource, diff --git a/tests/datasource/fluent/test_spark_filesystem_datasource.py b/tests/datasource/fluent/test_spark_filesystem_datasource.py index 1f1ddb0c591b..477b68179b45 100644 --- a/tests/datasource/fluent/test_spark_filesystem_datasource.py +++ b/tests/datasource/fluent/test_spark_filesystem_datasource.py @@ -805,7 +805,7 @@ def test_get_batch_list_from_directory_merges_files( batch_data = batch.data # The directory contains 12 files with 10,000 records each so the batch data # (spark dataframe) should contain 120,000 records: - assert batch_data.dataframe.count() == 12 * 10000 # type: ignore[attr-defined] + assert batch_data.dataframe.count() == 12 * 10000 # type: ignore[attr-defined] # FIXME CoP @pytest.mark.spark @@ -952,7 +952,7 @@ def bad_batching_regex_config( ) -> tuple[re.Pattern, TestConnectionError]: batching_regex = re.compile(r"green_tripdata_sample_(?P\d{4})-(?P\d{2})\.csv") test_connection_error = TestConnectionError( - f"""No file at base_directory path "{csv_path.resolve()}" matched regular expressions pattern "{batching_regex.pattern}" and/or glob_directive "**/*" for DataAsset "csv_asset".""" # noqa: E501 + f"""No file at base_directory path "{csv_path.resolve()}" matched regular expressions pattern "{batching_regex.pattern}" and/or glob_directive "**/*" for DataAsset "csv_asset".""" # noqa: E501 # FIXME CoP ) return batching_regex, test_connection_error @@ -995,7 +995,7 @@ def test_get_batch_identifiers_list_does_not_modify_input_batch_request( request = batch_def.build_batch_request({"year": "2018"}) request_before_call = copy.deepcopy(request) batches = asset.get_batch_identifiers_list(request) - # We assert the request before the call to get_batch_identifiers_list is equal to the request after the # noqa: E501 + # We assert the request before the call to get_batch_identifiers_list is equal to the request after the # noqa: E501 # FIXME CoP # call. This test exists because this call was modifying the request. assert request == request_before_call # We get all 12 batches, one for each month of 2018. @@ -1016,7 +1016,7 @@ def test_get_batch_does_not_modify_input_batch_request( request = batch_def.build_batch_request({"year": "2018"}) request_before_call = copy.deepcopy(request) _ = asset.get_batch(request) - # We assert the request before the call to get_batch is equal to the request after the # noqa: E501 + # We assert the request before the call to get_batch is equal to the request after the # noqa: E501 # FIXME CoP # call. This test exists because this call was modifying the request. assert request == request_before_call @@ -1068,7 +1068,7 @@ def expected_num_records_directory_asset_no_partitioner_2020_passenger_count_2( directory_asset_with_no_partitioner.build_batch_request() ) pre_partitioner_batch_data = pre_partitioner_batch.data - expected_num_records = pre_partitioner_batch_data.dataframe.filter( # type: ignore[attr-defined] + expected_num_records = pre_partitioner_batch_data.dataframe.filter( # type: ignore[attr-defined] # FIXME CoP F.col("pickup_datetime").contains("2018-01-11") ).count() assert expected_num_records == 3, "Check that the referenced data hasn't changed" @@ -1233,7 +1233,7 @@ def expected_num_records_file_asset_no_partitioner_2020_10_passenger_count_2( ) batch = file_asset_with_no_partitioner.get_batch(single_batch_batch_request) pre_partitioner_batch_data = batch.data - expected_num_records = pre_partitioner_batch_data.dataframe.filter( # type: ignore[attr-defined] + expected_num_records = pre_partitioner_batch_data.dataframe.filter( # type: ignore[attr-defined] # FIXME CoP F.col("passenger_count") == 2 ).count() assert expected_num_records == 2, "Check that the referenced data hasn't changed" @@ -1257,7 +1257,7 @@ def expected_num_records_file_asset_no_partitioner_2020_10( pre_partitioner_batch_data = batch.data expected_num_records = ( - pre_partitioner_batch_data.dataframe.filter( # type: ignore[attr-defined] + pre_partitioner_batch_data.dataframe.filter( # type: ignore[attr-defined] # FIXME CoP F.year(F.col("pickup_datetime")) == 2020 ) .filter(F.month(F.col("pickup_datetime")) == 11) @@ -1371,6 +1371,6 @@ def test_add_file_csv_asset_with_partitioner_conflicting_identifier_gets_correct post_partitioner_batch_data = post_partitioner_batch.data assert ( - post_partitioner_batch_data.dataframe.count() # type: ignore[attr-defined] + post_partitioner_batch_data.dataframe.count() # type: ignore[attr-defined] # FIXME CoP == expected_num_records_file_asset_no_partitioner_2020_10 ) diff --git a/tests/datasource/fluent/test_sql_datasources.py b/tests/datasource/fluent/test_sql_datasources.py index 35cc2328fc4e..fdbafc7e6c56 100644 --- a/tests/datasource/fluent/test_sql_datasources.py +++ b/tests/datasource/fluent/test_sql_datasources.py @@ -31,7 +31,7 @@ @pytest.fixture -def create_engine_spy(mocker: MockerFixture) -> Generator[mock.MagicMock, None, None]: # noqa: TID251 +def create_engine_spy(mocker: MockerFixture) -> Generator[mock.MagicMock, None, None]: # noqa: TID251 # FIXME CoP spy = mocker.spy(sa, "create_engine") yield spy if not spy.call_count: @@ -41,13 +41,13 @@ def create_engine_spy(mocker: MockerFixture) -> Generator[mock.MagicMock, None, @pytest.fixture def gx_sqlalchemy_execution_engine_spy( mocker: MockerFixture, monkeypatch: pytest.MonkeyPatch -) -> Generator[mock.MagicMock, None, None]: # noqa: TID251 +) -> Generator[mock.MagicMock, None, None]: # noqa: TID251 # FIXME CoP """ Mock the SQLDatasource.execution_engine_type property to return a spy so that what would be passed to the GX SqlAlchemyExecutionEngine constructor can be inspected. NOTE: This is not exactly what gets passed to the sqlalchemy.engine.create_engine() function, but it is close. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP spy = mocker.Mock(spec=SqlAlchemyExecutionEngine) monkeypatch.setattr(SQLDatasource, "execution_engine_type", spy) yield spy @@ -110,7 +110,7 @@ def _fake_create_engine(*args, **kwargs) -> sa.engine.Engine: class TestConfigPasstrough: def test_kwargs_passed_to_create_engine( self, - create_engine_spy: mock.MagicMock, # noqa: TID251 + create_engine_spy: mock.MagicMock, # noqa: TID251 # FIXME CoP monkeypatch: pytest.MonkeyPatch, ephemeral_context_with_defaults: EphemeralDataContext, ds_kwargs: dict, @@ -133,7 +133,7 @@ def test_kwargs_passed_to_create_engine( def test_ds_config_passed_to_gx_sqlalchemy_execution_engine( self, - gx_sqlalchemy_execution_engine_spy: mock.MagicMock, # noqa: TID251 + gx_sqlalchemy_execution_engine_spy: mock.MagicMock, # noqa: TID251 # FIXME CoP monkeypatch: pytest.MonkeyPatch, ephemeral_context_with_defaults: EphemeralDataContext, ds_kwargs: dict, @@ -173,7 +173,7 @@ def test_table_quoted_name_type_does_not_exist( DBMS entity names (table, column, etc.) must adhere to correct case insensitivity standards. All upper case is standard for Oracle, DB2, and Snowflake, while all lowercase is standard for SQLAlchemy; hence, proper conversion to quoted names must occur. This test ensures that mechanism for detection of non-existent table_nam" works correctly. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP table_names_in_dbms_schema: list[str] = [ "table_name_0", "table_name_1", @@ -203,7 +203,7 @@ def test_table_quoted_name_type_all_upper_case_normalizion_is_noop(): DBMS entity names (table, column, etc.) must adhere to correct case insensitivity standards. All upper case is standard for Oracle, DB2, and Snowflake, while all lowercase is standard for SQLAlchemy; hence, proper conversion to quoted names must occur. This test ensures that all upper case entity usage does not undergo any conversion. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP table_names_in_dbms_schema: list[str] = [ "ACTORS", "ARTISTS", @@ -247,7 +247,7 @@ def test_table_quoted_name_type_all_lower_case_normalizion_full(): DBMS entity names (table, column, etc.) must adhere to correct case insensitivity standards. All upper case is standard for Oracle, DB2, and Snowflake, while all lowercase is standard for SQLAlchemy; hence, proper conversion to quoted names must occur. This test ensures that all lower case entity usage undergo conversion to quoted literals. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP table_names_in_dbms_schema: list[str] = [ "actors", "artists", @@ -331,7 +331,6 @@ def test_specific_datasource_warnings( context.data_sources.add_sql(name="my_datasource", connection_string=connection_string) else: with warnings.catch_warnings(): - warnings.simplefilter("error") # should already be the default context.data_sources.add_sql( name="my_datasource", connection_string=connection_string ).test_connection() diff --git a/tests/datasource/fluent/test_sqlite_datasource.py b/tests/datasource/fluent/test_sqlite_datasource.py index c06d717a9cfc..152c6091f169 100644 --- a/tests/datasource/fluent/test_sqlite_datasource.py +++ b/tests/datasource/fluent/test_sqlite_datasource.py @@ -98,9 +98,9 @@ def _create_sqlite_source( # These type ignores when dealing with the execution_engine_override are because # it is a generic. We don't care about the exact type since we swap it out with our # mock for the purpose of this test and then replace it with the original. - original_override = SqliteDatasource.execution_engine_override # type: ignore[misc] + original_override = SqliteDatasource.execution_engine_override # type: ignore[misc] # FIXME CoP try: - SqliteDatasource.execution_engine_override = execution_eng_cls # type: ignore[misc] + SqliteDatasource.execution_engine_override = execution_eng_cls # type: ignore[misc] # FIXME CoP sqlite_datasource = SqliteDatasource( name="sqlite_datasource", connection_string="sqlite://", @@ -110,7 +110,7 @@ def _create_sqlite_source( sqlite_datasource._data_context = data_context yield sqlite_datasource finally: - SqliteDatasource.execution_engine_override = original_override # type: ignore[misc] + SqliteDatasource.execution_engine_override = original_override # type: ignore[misc] # FIXME CoP @pytest.fixture diff --git a/tests/datasource/fluent/test_viral_snippets.py b/tests/datasource/fluent/test_viral_snippets.py index 9bbeed0e17a9..47520e922737 100644 --- a/tests/datasource/fluent/test_viral_snippets.py +++ b/tests/datasource/fluent/test_viral_snippets.py @@ -172,7 +172,7 @@ def test_context_add_or_update_datasource( assert datasource.connection_string == f"sqlite:///{sqlite_database_path}" # modify the datasource - datasource.connection_string = "sqlite:///" # type: ignore[assignment] + datasource.connection_string = "sqlite:///" # type: ignore[assignment] # FIXME CoP context.data_sources.add_or_update_sqlite(datasource) updated_datasource = context.data_sources.all()[datasource.name] @@ -235,7 +235,7 @@ def test_quickstart_workflow( In particular, this test covers the file-backend and cloud-backed usecases with this script. The ephemeral usecase is covered in: tests/integration/docusaurus/tutorials/quickstart/quickstart.py - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # Slight deviation from the Quickstart here: # 1. Using existing contexts instead of `get_context` # 2. Using `read_csv` on a local file instead of making a network request diff --git a/tests/datasource/test_sqlalchemy_datasource_workarounds.py b/tests/datasource/test_sqlalchemy_datasource_workarounds.py index 7dc3ca80c554..083fca15558c 100644 --- a/tests/datasource/test_sqlalchemy_datasource_workarounds.py +++ b/tests/datasource/test_sqlalchemy_datasource_workarounds.py @@ -1,2 +1,2 @@ -# This file is intended for tests whose functionality is a workaround for deficiencies in upstream libraries. # noqa: E501 +# This file is intended for tests whose functionality is a workaround for deficiencies in upstream libraries. # noqa: E501 # FIXME CoP # In an ideal world, it is empty. As fixes come to be, we can replace its items. diff --git a/tests/execution_engine/conftest.py b/tests/execution_engine/conftest.py index af48d47f45c2..dc2ebcae5b79 100644 --- a/tests/execution_engine/conftest.py +++ b/tests/execution_engine/conftest.py @@ -96,7 +96,7 @@ def azure_batch_spec() -> AzureBatchSpec: "alpha-2.csv", ] path = keys[0] - full_path = os.path.join( # noqa: PTH118 + full_path = os.path.join( # noqa: PTH118 # FIXME CoP "mock_account.blob.core.windows.net", container, path ) @@ -120,7 +120,7 @@ def gcs_batch_spec() -> GCSBatchSpec: "alpha-2.csv", ] path = keys[0] - full_path = os.path.join("gs://", bucket, path) # noqa: PTH118 + full_path = os.path.join("gs://", bucket, path) # noqa: PTH118 # FIXME CoP batch_spec = GCSBatchSpec( path=full_path, @@ -135,7 +135,7 @@ def test_sparkdf(spark_session) -> pyspark.DataFrame: def generate_ascending_list_of_datetimes( n, start_date=datetime.date(2020, 1, 1), end_date=datetime.date(2020, 12, 31) ) -> List[datetime.datetime]: - start_time = datetime.datetime(start_date.year, start_date.month, start_date.day) # noqa: DTZ001 + start_time = datetime.datetime(start_date.year, start_date.month, start_date.day) # noqa: DTZ001 # FIXME CoP seconds_between_dates = (end_date - start_date).total_seconds() # noinspection PyUnusedLocal datetime_list = [ @@ -194,7 +194,7 @@ def test_folder_connection_path_tsv(tmp_path_factory) -> str: df1 = pd.DataFrame({"col_1": [1, 2, 3, 4, 5], "col_2": ["a", "b", "c", "d", "e"]}) path = str(tmp_path_factory.mktemp("test_folder_connection_path_tsv")) df1.to_csv( - path_or_buf=os.path.join(path, "test.tsv"), # noqa: PTH118 + path_or_buf=os.path.join(path, "test.tsv"), # noqa: PTH118 # FIXME CoP sep="\t", index=False, ) @@ -205,5 +205,5 @@ def test_folder_connection_path_tsv(tmp_path_factory) -> str: def test_folder_connection_path_parquet(tmp_path_factory) -> str: df1 = pd.DataFrame({"col_1": [1, 2, 3, 4, 5], "col_2": ["a", "b", "c", "d", "e"]}) path = str(tmp_path_factory.mktemp("test_folder_connection_path_parquet")) - df1.to_parquet(path=os.path.join(path, "test.parquet")) # noqa: PTH118 + df1.to_parquet(path=os.path.join(path, "test.parquet")) # noqa: PTH118 # FIXME CoP return str(path) diff --git a/tests/execution_engine/partition_and_sample/partition_and_sample_test_cases.py b/tests/execution_engine/partition_and_sample/partition_and_sample_test_cases.py index eb0c48f4ce28..d030ce777001 100644 --- a/tests/execution_engine/partition_and_sample/partition_and_sample_test_cases.py +++ b/tests/execution_engine/partition_and_sample/partition_and_sample_test_cases.py @@ -12,7 +12,7 @@ pytest.param({"month": 10}, id="month_dict"), pytest.param("10-31-2018", id="dateutil parseable date string"), pytest.param( - datetime.datetime(2018, 10, 31, 0, 0, 0), # noqa: DTZ001 + datetime.datetime(2018, 10, 31, 0, 0, 0), # noqa: DTZ001 # FIXME CoP id="datetime", ), pytest.param( @@ -31,7 +31,7 @@ id="non dateutil parseable date string", ), pytest.param( - datetime.datetime(2018, 11, 30, 0, 0, 0), # noqa: DTZ001 + datetime.datetime(2018, 11, 30, 0, 0, 0), # noqa: DTZ001 # FIXME CoP marks=pytest.mark.xfail(strict=True), id="incorrect datetime should fail", ), @@ -75,7 +75,7 @@ pytest.param({"year": 2018, "month": 10}, id="year_and_month_dict"), pytest.param("10-31-2018", id="dateutil parseable date string"), pytest.param( - datetime.datetime(2018, 10, 30, 0, 0, 0), # noqa: DTZ001 + datetime.datetime(2018, 10, 30, 0, 0, 0), # noqa: DTZ001 # FIXME CoP id="datetime", ), pytest.param( @@ -99,7 +99,7 @@ id="non dateutil parseable date string", ), pytest.param( - datetime.datetime(2018, 11, 30, 0, 0, 0), # noqa: DTZ001 + datetime.datetime(2018, 11, 30, 0, 0, 0), # noqa: DTZ001 # FIXME CoP marks=pytest.mark.xfail(strict=True), id="incorrect datetime should fail", ), diff --git a/tests/execution_engine/partition_and_sample/test_pandas_execution_engine_partitioning.py b/tests/execution_engine/partition_and_sample/test_pandas_execution_engine_partitioning.py index 7e14a93851c1..f36105543a84 100644 --- a/tests/execution_engine/partition_and_sample/test_pandas_execution_engine_partitioning.py +++ b/tests/execution_engine/partition_and_sample/test_pandas_execution_engine_partitioning.py @@ -86,7 +86,7 @@ def test_s3_files(s3, s3_bucket, test_df_small_csv): def batch_with_partition_on_whole_table_s3(test_s3_files) -> S3BatchSpec: bucket, keys = test_s3_files path = keys[0] - full_path = f"s3a://{os.path.join(bucket, path)}" # noqa: PTH118 + full_path = f"s3a://{os.path.join(bucket, path)}" # noqa: PTH118 # FIXME CoP batch_spec = S3BatchSpec( path=full_path, @@ -179,7 +179,7 @@ def test_partition_on_date_parts_single_date_parts( date_parts that is a string, DatePart enum objects, mixed case string. To match our interface it should accept a dateutil parseable string as the batch identifier or a datetime and also fail when parameters are invalid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: PandasDataPartitioner = PandasDataPartitioner() column_name: str = "timestamp" result: pd.DataFrame = data_partitioner.partition_on_date_parts( @@ -209,7 +209,7 @@ def test_partition_on_date_parts_multiple_date_parts( date parts that are strings, DatePart enum objects, a mixture and mixed case. To match our interface it should accept a dateutil parseable string as the batch identifier or a datetime and also fail when parameters are invalid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: PandasDataPartitioner = PandasDataPartitioner() column_name: str = "timestamp" result: pd.DataFrame = data_partitioner.partition_on_date_parts( @@ -237,14 +237,14 @@ def test_partition_on_date_parts_multiple_date_parts( ], ) def test_named_date_part_methods( - mock_partition_on_date_parts: mock.MagicMock, # noqa: TID251 + mock_partition_on_date_parts: mock.MagicMock, # noqa: TID251 # FIXME CoP partitioner_method_name: str, called_with_date_parts: List[DatePart], simple_multi_year_pandas_df: pd.DataFrame, ): """Test that a partially pre-filled version of partition_on_date_parts() was called with the appropriate params. For example, partition_on_year. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: PandasDataPartitioner = PandasDataPartitioner() column_name: str = "column_name" batch_identifiers: dict = {column_name: {"year": 2018, "month": 10, "day": 31}} @@ -314,7 +314,7 @@ def test_get_batch_with_partition_on_whole_table_filesystem( ): test_df = PandasExecutionEngine().get_batch_data( PathBatchSpec( - path=os.path.join( # noqa: PTH118 + path=os.path.join( # noqa: PTH118 # FIXME CoP test_folder_connection_path_csv, "test.csv" ), reader_method="read_csv", diff --git a/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_partitioning.py b/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_partitioning.py index 4227436f5c9f..668b4a5e964a 100644 --- a/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_partitioning.py +++ b/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_partitioning.py @@ -150,7 +150,7 @@ def test_partition_on_date_parts_single_date_parts( date_parts that is a string, DatePart enum objects, mixed case string. To match our interface it should accept a dateutil parseable string as the batch identifier or a datetime and also fail when parameters are invalid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: SparkDataPartitioner = SparkDataPartitioner() column_name: str = "timestamp" result: pyspark.DataFrame = data_partitioner.partition_on_date_parts( @@ -179,7 +179,7 @@ def test_partition_on_date_parts_multiple_date_parts( date parts that are strings, DatePart enum objects, a mixture and mixed case. To match our interface it should accept a dateutil parseable string as the batch identifier or a datetime and also fail when parameters are invalid. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: SparkDataPartitioner = SparkDataPartitioner() column_name: str = "timestamp" result: pyspark.DataFrame = data_partitioner.partition_on_date_parts( @@ -206,14 +206,14 @@ def test_partition_on_date_parts_multiple_date_parts( ], ) def test_named_date_part_methods( - mock_partition_on_date_parts: mock.MagicMock, # noqa: TID251 + mock_partition_on_date_parts: mock.MagicMock, # noqa: TID251 # FIXME CoP partitioner_method_name: str, called_with_date_parts: List[DatePart], simple_multi_year_spark_df: pyspark.DataFrame, ): """Test that a partially pre-filled version of partition_on_date_parts() was called with the appropriate params. For example, partition_on_year. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: SparkDataPartitioner = SparkDataPartitioner() column_name: str = "column_name" batch_identifiers: dict = {column_name: {"year": 2018, "month": 10, "day": 31}} @@ -275,7 +275,7 @@ def test_get_batch_empty_partitioner( # reader_options are needed to specify the fact that the first line of test file is the header test_sparkdf = basic_spark_df_execution_engine.get_batch_data( PathBatchSpec( - path=os.path.join( # noqa: PTH118 + path=os.path.join( # noqa: PTH118 # FIXME CoP test_folder_connection_path_csv, "test.csv" ), reader_options={"header": True}, @@ -291,10 +291,10 @@ def test_get_batch_empty_partitioner_tsv( ): # reader_method not configured because spark will configure own reader by default # reader_options are needed to specify the fact that the first line of test file is the header - # reader_options are also needed to specify the separator (otherwise, comma will be used as the default separator) # noqa: E501 + # reader_options are also needed to specify the separator (otherwise, comma will be used as the default separator) # noqa: E501 # FIXME CoP test_sparkdf = basic_spark_df_execution_engine.get_batch_data( PathBatchSpec( - path=os.path.join( # noqa: PTH118 + path=os.path.join( # noqa: PTH118 # FIXME CoP test_folder_connection_path_tsv, "test.tsv" ), reader_options={"header": True, "sep": "\t"}, @@ -313,10 +313,10 @@ def test_get_batch_empty_partitioner_parquet( test_folder_connection_path_parquet, basic_spark_df_execution_engine ): # Note: reader method and reader_options are not needed, because - # SparkDFExecutionEngine automatically determines the file type as well as the schema of the Parquet file. # noqa: E501 + # SparkDFExecutionEngine automatically determines the file type as well as the schema of the Parquet file. # noqa: E501 # FIXME CoP test_sparkdf = basic_spark_df_execution_engine.get_batch_data( PathBatchSpec( - path=os.path.join( # noqa: PTH118 + path=os.path.join( # noqa: PTH118 # FIXME CoP test_folder_connection_path_parquet, "test.parquet" ), partitioner_method=None, @@ -344,7 +344,7 @@ def test_get_batch_with_partition_on_whole_table_filesystem( # reader_method not configured because spark will configure own reader by default test_sparkdf = basic_spark_df_execution_engine.get_batch_data( PathBatchSpec( - path=os.path.join( # noqa: PTH118 + path=os.path.join( # noqa: PTH118 # FIXME CoP test_folder_connection_path_csv, "test.csv" ), partitioner_method="_partition_on_whole_table", diff --git a/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_sampling.py b/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_sampling.py index 9ce93cd26300..6aa6a9839cbd 100644 --- a/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_sampling.py +++ b/tests/execution_engine/partition_and_sample/test_sparkdf_execution_engine_sampling.py @@ -69,8 +69,8 @@ def test_sample_using_random(test_sparkdf, basic_spark_df_execution_engine): # The test dataframe contains 10 columns and 120 rows. assert len(sampled_df.columns) == 10 assert 0 <= sampled_df.count() <= 120 - # The sampling probability "p" used in "SparkDFExecutionEngine._sample_using_random()" is 0.1 (the equivalent of an # noqa: E501 - # unfair coin with the 10% chance of coming up as "heads"). Hence, we should never get as much as 20% of the rows. # noqa: E501 + # The sampling probability "p" used in "SparkDFExecutionEngine._sample_using_random()" is 0.1 (the equivalent of an # noqa: E501 # FIXME CoP + # unfair coin with the 10% chance of coming up as "heads"). Hence, we should never get as much as 20% of the rows. # noqa: E501 # FIXME CoP assert sampled_df.count() < 25 diff --git a/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_partitioning.py b/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_partitioning.py index b8c076d2918a..09a1c980372e 100644 --- a/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_partitioning.py +++ b/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_partitioning.py @@ -28,7 +28,7 @@ ) # Here we add SqlAlchemyDataPartitioner specific test cases to the generic test cases: -from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 +from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 # FIXME CoP TaxiPartitioningTestCase, TaxiPartitioningTestCasesBase, TaxiPartitioningTestCasesDateTime, @@ -75,13 +75,13 @@ ) @pytest.mark.sqlite def test_named_date_part_methods( - mock_partition_on_date_parts: mock.MagicMock, # noqa: TID251 + mock_partition_on_date_parts: mock.MagicMock, # noqa: TID251 # FIXME CoP partitioner_method_name: str, called_with_date_parts: List[DatePart], ): """Test that a partially pre-filled version of partition_on_date_parts() was called with the appropriate params. For example, partition_on_year. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: SqlAlchemyDataPartitioner = SqlAlchemyDataPartitioner(dialect="sqlite") column_name: str = "column_name" batch_identifiers: dict = {column_name: {"year": 2018, "month": 10, "day": 31}} @@ -183,10 +183,10 @@ def test_partition_on_date_parts_multiple_date_parts(batch_identifiers_for_colum @mock.patch("great_expectations.execution_engine.execution_engine.ExecutionEngine") @pytest.mark.sqlite def test_get_data_for_batch_identifiers_year( - mock_execution_engine: mock.MagicMock, # noqa: TID251 - mock_get_data_for_batch_identifiers_for_partition_on_date_parts: mock.MagicMock, # noqa: TID251 + mock_execution_engine: mock.MagicMock, # noqa: TID251 # FIXME CoP + mock_get_data_for_batch_identifiers_for_partition_on_date_parts: mock.MagicMock, # noqa: TID251 # FIXME CoP ): - """test that get_data_for_batch_identifiers_for_partition_on_date_parts() was called with the appropriate params.""" # noqa: E501 + """test that get_data_for_batch_identifiers_for_partition_on_date_parts() was called with the appropriate params.""" # noqa: E501 # FIXME CoP data_partitioner: SqlAlchemyDataPartitioner = SqlAlchemyDataPartitioner(dialect="sqlite") # selectable should be a sa.Selectable object but since we are mocking out # get_data_for_batch_identifiers_for_partition_on_date_parts @@ -214,10 +214,10 @@ def test_get_data_for_batch_identifiers_year( @mock.patch("great_expectations.execution_engine.execution_engine.ExecutionEngine") @pytest.mark.sqlite def test_get_data_for_batch_identifiers_year_and_month( - mock_execution_engine: mock.MagicMock, # noqa: TID251 - mock_get_data_for_batch_identifiers_for_partition_on_date_parts: mock.MagicMock, # noqa: TID251 + mock_execution_engine: mock.MagicMock, # noqa: TID251 # FIXME CoP + mock_get_data_for_batch_identifiers_for_partition_on_date_parts: mock.MagicMock, # noqa: TID251 # FIXME CoP ): - """test that get_data_for_batch_identifiers_for_partition_on_date_parts() was called with the appropriate params.""" # noqa: E501 + """test that get_data_for_batch_identifiers_for_partition_on_date_parts() was called with the appropriate params.""" # noqa: E501 # FIXME CoP data_partitioner: SqlAlchemyDataPartitioner = SqlAlchemyDataPartitioner(dialect="sqlite") selectable: str = "mock_selectable" column_name: str = "column_name" @@ -242,10 +242,10 @@ def test_get_data_for_batch_identifiers_year_and_month( @mock.patch("great_expectations.execution_engine.execution_engine.ExecutionEngine") @pytest.mark.sqlite def test_get_data_for_batch_identifiers_year_and_month_and_day( - mock_execution_engine: mock.MagicMock, # noqa: TID251 - mock_get_data_for_batch_identifiers_for_partition_on_date_parts: mock.MagicMock, # noqa: TID251 + mock_execution_engine: mock.MagicMock, # noqa: TID251 # FIXME CoP + mock_get_data_for_batch_identifiers_for_partition_on_date_parts: mock.MagicMock, # noqa: TID251 # FIXME CoP ): - """test that get_data_for_batch_identifiers_for_partition_on_date_parts() was called with the appropriate params.""" # noqa: E501 + """test that get_data_for_batch_identifiers_for_partition_on_date_parts() was called with the appropriate params.""" # noqa: E501 # FIXME CoP data_partitioner: SqlAlchemyDataPartitioner = SqlAlchemyDataPartitioner(dialect="sqlite") selectable: str = "mock_selectable" column_name: str = "column_name" @@ -269,20 +269,20 @@ def test_get_data_for_batch_identifiers_year_and_month_and_day( SINGLE_DATE_PART_DATE_PARTS, ) @pytest.mark.sqlite -def test_get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts_single_date_parts( # noqa: E501 +def test_get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts_single_date_parts( # noqa: E501 # FIXME CoP date_parts, sa ): """What does this test and why? get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts should still build the correct query when passed a single element list of date_parts that is a string, DatePart enum object, or mixed case string. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP data_partitioner: SqlAlchemyDataPartitioner = SqlAlchemyDataPartitioner(dialect="sqlite") selectable: sa.sql.Selectable = sa.text("table_name") column_name: str = "column_name" - result: sa.sql.elements.BooleanClauseList = data_partitioner.get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts( # noqa: E501 + result: sa.sql.elements.BooleanClauseList = data_partitioner.get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts( # noqa: E501 # FIXME CoP selectable=selectable, column_name=column_name, date_parts=date_parts, @@ -317,19 +317,19 @@ def test_get_partition_query_for_data_for_batch_identifiers_for_partition_on_dat [ pytest.param( "sqlite", - "SELECT DISTINCT(CAST(EXTRACT(year FROM column_name) AS VARCHAR) || CAST (EXTRACT(month FROM column_name) AS VARCHAR)) AS concat_distinct_values, CAST(EXTRACT(year FROM column_name) AS INTEGER) AS year, CAST(EXTRACT(month FROM column_name) AS INTEGER) AS month FROM table_name", # noqa: E501 + "SELECT DISTINCT(CAST(EXTRACT(year FROM column_name) AS VARCHAR) || CAST (EXTRACT(month FROM column_name) AS VARCHAR)) AS concat_distinct_values, CAST(EXTRACT(year FROM column_name) AS INTEGER) AS year, CAST(EXTRACT(month FROM column_name) AS INTEGER) AS month FROM table_name", # noqa: E501 # FIXME CoP marks=pytest.mark.sqlite, id="sqlite", ), pytest.param( "postgres", - "SELECT DISTINCT(CONCAT(CONCAT('', CAST(EXTRACT(year FROM column_name) AS VARCHAR)), CAST(EXTRACT(month FROM column_name) AS VARCHAR))) AS concat_distinct_values, CAST(EXTRACT(year FROM column_name) AS INTEGER) AS year, CAST(EXTRACT(month FROM column_name) AS INTEGER) AS month FROM table_name", # noqa: E501 + "SELECT DISTINCT(CONCAT(CONCAT('', CAST(EXTRACT(year FROM column_name) AS VARCHAR)), CAST(EXTRACT(month FROM column_name) AS VARCHAR))) AS concat_distinct_values, CAST(EXTRACT(year FROM column_name) AS INTEGER) AS year, CAST(EXTRACT(month FROM column_name) AS INTEGER) AS month FROM table_name", # noqa: E501 # FIXME CoP marks=pytest.mark.postgresql, id="postgres", ), ], ) -def test_get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts_multiple_date_parts( # noqa: E501 +def test_get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts_multiple_date_parts( # noqa: E501 # FIXME CoP date_parts, dialect, expected_query_str, sa ): """What does this test and why? @@ -340,7 +340,7 @@ def test_get_partition_query_for_data_for_batch_identifiers_for_partition_on_dat selectable: sa.sql.Selectable = sa.text("table_name") column_name: str = "column_name" - result: sa.sql.elements.BooleanClauseList = data_partitioner.get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts( # noqa: E501 + result: sa.sql.elements.BooleanClauseList = data_partitioner.get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_parts( # noqa: E501 # FIXME CoP selectable=selectable, column_name=column_name, date_parts=date_parts, @@ -396,8 +396,8 @@ def test_get_partitioner_method(underscore_prefix: str, partitioner_method_name: def ten_trips_per_month_df() -> pd.DataFrame: csv_path: str = file_relative_path( - os.path.dirname(os.path.dirname(__file__)), # noqa: PTH120 - os.path.join( # noqa: PTH118 + os.path.dirname(os.path.dirname(__file__)), # noqa: PTH120 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP "test_sets", "taxi_yellow_tripdata_samples", "ten_trips_from_each_month", @@ -462,7 +462,7 @@ def test_sqlite_partition( partitioner_kwargs=test_case.add_batch_definition_kwargs, batch_identifiers={}, ) - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if taxi_test_cases.test_column_name: assert test_case.expected_column_values is not None batch_spec = SqlAlchemyDatasourceBatchSpec( diff --git a/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_sampling.py b/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_sampling.py index 8867673715ba..dadd4bf2f4eb 100644 --- a/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_sampling.py +++ b/tests/execution_engine/partition_and_sample/test_sqlalchemy_execution_engine_sampling.py @@ -90,17 +90,17 @@ def clean_query_for_comparison(query_string: str) -> str: def dialect_name_to_sql_statement(): def _dialect_name_to_sql_statement(dialect_name: GXSqlDialect) -> str: dialect_name_to_sql_statement: dict = { - GXSqlDialect.POSTGRESQL: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE TRUE LIMIT 10", # noqa: E501 + GXSqlDialect.POSTGRESQL: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE TRUE LIMIT 10", # noqa: E501 # FIXME CoP GXSqlDialect.MYSQL: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE TRUE = 1 LIMIT 10", - GXSqlDialect.ORACLE: "SELECT * FROM test_schema_name.test_table WHERE 1 = 1 AND ROWNUM <= 10", # noqa: E501 + GXSqlDialect.ORACLE: "SELECT * FROM test_schema_name.test_table WHERE 1 = 1 AND ROWNUM <= 10", # noqa: E501 # FIXME CoP GXSqlDialect.MSSQL: "SELECT TOP 10 * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE 1 = 1", - GXSqlDialect.SQLITE: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE 1 = 1 LIMIT 10 OFFSET 0", # noqa: E501 - GXSqlDialect.BIGQUERY: "SELECT * FROM `TEST_SCHEMA_NAME`.`TEST_TABLE` WHERE TRUE LIMIT 10", # noqa: E501 + GXSqlDialect.SQLITE: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE 1 = 1 LIMIT 10 OFFSET 0", # noqa: E501 # FIXME CoP + GXSqlDialect.BIGQUERY: "SELECT * FROM `TEST_SCHEMA_NAME`.`TEST_TABLE` WHERE TRUE LIMIT 10", # noqa: E501 # FIXME CoP GXSqlDialect.SNOWFLAKE: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE TRUE LIMIT 10", GXSqlDialect.REDSHIFT: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE TRUE LIMIT 10", - GXSqlDialect.AWSATHENA: 'SELECT * FROM "TEST_SCHEMA_NAME"."TEST_TABLE" WHERE TRUE LIMIT 10', # noqa: E501 - GXSqlDialect.DREMIO: 'SELECT * FROM "TEST_SCHEMA_NAME"."TEST_TABLE" WHERE 1 = 1 LIMIT 10', # noqa: E501 - GXSqlDialect.TERADATASQL: "SELECT TOP 10 * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE 1 = 1", # noqa: E501 + GXSqlDialect.AWSATHENA: 'SELECT * FROM "TEST_SCHEMA_NAME"."TEST_TABLE" WHERE TRUE LIMIT 10', # noqa: E501 # FIXME CoP + GXSqlDialect.DREMIO: 'SELECT * FROM "TEST_SCHEMA_NAME"."TEST_TABLE" WHERE 1 = 1 LIMIT 10', # noqa: E501 # FIXME CoP + GXSqlDialect.TERADATASQL: "SELECT TOP 10 * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE 1 = 1", # noqa: E501 # FIXME CoP GXSqlDialect.TRINO: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE TRUE LIMIT 10", GXSqlDialect.HIVE: "SELECT * FROM `TEST_SCHEMA_NAME`.`TEST_TABLE` WHERE TRUE LIMIT 10", GXSqlDialect.VERTICA: "SELECT * FROM TEST_SCHEMA_NAME.TEST_TABLE WHERE TRUE LIMIT 10", @@ -115,7 +115,7 @@ def pytest_parsed_arguments(request): return request.config.option -# Despite being parameterized over GXSqlDialect, this test skips if the flag corresponding to that dialect isn't # noqa: E501 +# Despite being parameterized over GXSqlDialect, this test skips if the flag corresponding to that dialect isn't # noqa: E501 # FIXME CoP # passed in. Most of these dialects are never run in CI. @pytest.mark.all_backends @pytest.mark.parametrize( @@ -125,7 +125,7 @@ def pytest_parsed_arguments(request): for dialect_name in GXSqlDialect.get_all_dialects() ], ) -def test_sample_using_limit_builds_correct_query_where_clause_none( # noqa: C901 +def test_sample_using_limit_builds_correct_query_where_clause_none( # noqa: C901 # FIXME CoP dialect_name: GXSqlDialect, dialect_name_to_sql_statement, sa, @@ -139,7 +139,7 @@ def test_sample_using_limit_builds_correct_query_where_clause_none( # noqa: C90 if hasattr(pytest_parsed_arguments, str(dialect_name.value)): if not getattr(pytest_parsed_arguments, str(dialect_name.value)): pytest.skip( - f"Skipping {dialect_name.value!s} since the --{dialect_name.value!s} pytest flag was not set" # noqa: E501 + f"Skipping {dialect_name.value!s} since the --{dialect_name.value!s} pytest flag was not set" # noqa: E501 # FIXME CoP ) else: pytest.skip( @@ -180,7 +180,7 @@ def dialect_name_to_connection_string(self, dialect_name: GXSqlDialect) -> str: @property def dialect(self) -> sa.engine.Dialect: - # TODO: AJB 20220512 move this dialect retrieval to a separate class from the SqlAlchemyExecutionEngine # noqa: E501 + # TODO: AJB 20220512 move this dialect retrieval to a separate class from the SqlAlchemyExecutionEngine # noqa: E501 # FIXME CoP # and then use it here. dialect_name: GXSqlDialect = self._dialect_name if dialect_name == GXSqlDialect.ORACLE: @@ -196,7 +196,7 @@ def dialect(self) -> sa.engine.Dialect: # noinspection PyUnresolvedReferences return import_library_module(module_name="sqlalchemy_dremio.pyodbc").dialect() # NOTE: AJB 20220512 Redshift dialect is not yet fully supported. - # The below throws an `AttributeError: type object 'RedshiftDialect_psycopg2' has no attribute 'positional'` # noqa: E501 + # The below throws an `AttributeError: type object 'RedshiftDialect_psycopg2' has no attribute 'positional'` # noqa: E501 # FIXME CoP # elif dialect_name == "redshift": # return import_library_module( # module_name="sqlalchemy_redshift.dialect" @@ -249,8 +249,8 @@ def dialect(self) -> sa.engine.Dialect: @pytest.mark.sqlite def test_sqlite_sample_using_limit(sa): csv_path: str = file_relative_path( - os.path.dirname(os.path.dirname(__file__)), # noqa: PTH120 - os.path.join( # noqa: PTH118 + os.path.dirname(os.path.dirname(__file__)), # noqa: PTH120 # FIXME CoP + os.path.join( # noqa: PTH118 # FIXME CoP "test_sets", "taxi_yellow_tripdata_samples", "ten_trips_from_each_month", diff --git a/tests/execution_engine/test_pandas_execution_engine.py b/tests/execution_engine/test_pandas_execution_engine.py index 6ed81ddc6234..f810141795d5 100644 --- a/tests/execution_engine/test_pandas_execution_engine.py +++ b/tests/execution_engine/test_pandas_execution_engine.py @@ -403,7 +403,7 @@ def test_resolve_metric_bundle(): ) -# Ensuring that we can properly inform user when metric doesn't exist - should get a metric provider error # noqa: E501 +# Ensuring that we can properly inform user when metric doesn't exist - should get a metric provider error # noqa: E501 # FIXME CoP @pytest.mark.unit def test_resolve_metric_bundle_with_nonexistent_metric(): df = pd.DataFrame({"a": [1, 2, 3, None]}) @@ -463,7 +463,7 @@ def test_get_batch_data(test_df): def test_get_batch_s3_compressed_files(test_s3_files_compressed, test_df_small): bucket, keys = test_s3_files_compressed path = keys[0] - full_path = f"s3a://{os.path.join(bucket, path)}" # noqa: PTH118 + full_path = f"s3a://{os.path.join(bucket, path)}" # noqa: PTH118 # FIXME CoP batch_spec = S3BatchSpec(path=full_path, reader_method="read_csv") df = PandasExecutionEngine().get_batch_data(batch_spec=batch_spec) @@ -482,7 +482,7 @@ def test_get_batch_s3_compressed_files(test_s3_files_compressed, test_df_small): def test_get_batch_s3_parquet(test_s3_files_parquet, test_df_small): bucket, keys = test_s3_files_parquet path = [key for key in keys if key.endswith(".parquet")][0] - full_path = f"s3a://{os.path.join(bucket, path)}" # noqa: PTH118 + full_path = f"s3a://{os.path.join(bucket, path)}" # noqa: PTH118 # FIXME CoP batch_spec = S3BatchSpec(path=full_path, reader_method="read_parquet") df = PandasExecutionEngine().get_batch_data(batch_spec=batch_spec) @@ -653,9 +653,9 @@ def test_get_batch_data_with_gcs_batch_spec( ) @pytest.mark.big def test_get_batch_data_with_gcs_batch_spec_no_credentials(gcs_batch_spec, monkeypatch): - # If PandasExecutionEngine contains no credentials for GCS, we will still instantiate _gcs engine, # noqa: E501 - # but will raise Exception when trying get_batch_data(). The only situation where it would work is if we are running in a Google Cloud container. # noqa: E501 - # TODO : Determine how we can test the scenario where we are running PandasExecutionEngine from within Google Cloud env. # noqa: E501 + # If PandasExecutionEngine contains no credentials for GCS, we will still instantiate _gcs engine, # noqa: E501 # FIXME CoP + # but will raise Exception when trying get_batch_data(). The only situation where it would work is if we are running in a Google Cloud container. # noqa: E501 # FIXME CoP + # TODO : Determine how we can test the scenario where we are running PandasExecutionEngine from within Google Cloud env. # noqa: E501 # FIXME CoP monkeypatch.delenv("GOOGLE_APPLICATION_CREDENTIALS", raising=False) with pytest.raises(gx_exceptions.ExecutionEngineError): diff --git a/tests/execution_engine/test_sparkdf_execution_engine.py b/tests/execution_engine/test_sparkdf_execution_engine.py index 9776b380acc4..eaac780d2412 100644 --- a/tests/execution_engine/test_sparkdf_execution_engine.py +++ b/tests/execution_engine/test_sparkdf_execution_engine.py @@ -525,12 +525,12 @@ def test_partition_on_multi_column_values_and_sample_using_random( assert len(returned_df.columns) == 10 # The number of returned rows corresponding to the value of "batch_identifiers" above is 4. assert 0 <= returned_df.count() <= 4 - # The sampling probability "p" used in "SparkDFExecutionEngine._sample_using_random()" is 0.5 (the equivalent of a # noqa: E501 - # fair coin with the 50% chance of coming up as "heads"). Hence, on average we should get 50% of the rows, which is # noqa: E501 - # 2; however, for such a small sample (of 4 rows), the number of rows returned by an individual run can deviate from # noqa: E501 - # this average. Still, in the majority of trials, the number of rows should not be fewer than 2 or greater than 3. # noqa: E501 - # The assertion in the next line, supporting this reasoning, is commented out to insure zero failures. Developers # noqa: E501 - # are encouraged to uncomment it, whenever the "_sample_using_random" feature is the main focus of a given effort. # noqa: E501 + # The sampling probability "p" used in "SparkDFExecutionEngine._sample_using_random()" is 0.5 (the equivalent of a # noqa: E501 # FIXME CoP + # fair coin with the 50% chance of coming up as "heads"). Hence, on average we should get 50% of the rows, which is # noqa: E501 # FIXME CoP + # 2; however, for such a small sample (of 4 rows), the number of rows returned by an individual run can deviate from # noqa: E501 # FIXME CoP + # this average. Still, in the majority of trials, the number of rows should not be fewer than 2 or greater than 3. # noqa: E501 # FIXME CoP + # The assertion in the next line, supporting this reasoning, is commented out to insure zero failures. Developers # noqa: E501 # FIXME CoP + # are encouraged to uncomment it, whenever the "_sample_using_random" feature is the main focus of a given effort. # noqa: E501 # FIXME CoP # assert 2 <= returned_df.count() <= 3 for val in returned_df.collect(): @@ -719,7 +719,7 @@ def test_sparkdf_batch_aggregate_metrics(caplog, spark_session): "metric_partial_fn": desired_aggregate_fn_metric_4, "table.columns": table_columns_metric, } - start = datetime.datetime.now() # noqa: DTZ005 + start = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP caplog.clear() caplog.set_level(logging.DEBUG, logger="great_expectations") results = engine.resolve_metrics( @@ -732,7 +732,7 @@ def test_sparkdf_batch_aggregate_metrics(caplog, spark_session): metrics=metrics, ) metrics.update(results) - end = datetime.datetime.now() # noqa: DTZ005 + end = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP print(end - start) assert metrics[desired_metric_1.id] == 3 assert metrics[desired_metric_2.id] == 1 @@ -905,7 +905,7 @@ def test_get_domain_records_with_unmeetable_row_condition_alt(spark_session): ) -# Testing to ensure that great expectation parser also works in terms of defining a compute domain # noqa: E501 +# Testing to ensure that great expectation parser also works in terms of defining a compute domain # noqa: E501 # FIXME CoP def test_get_compute_domain_with_gx_condition_parser(spark_session): engine: SparkDFExecutionEngine = build_spark_engine( spark=spark_session, @@ -976,7 +976,7 @@ def test_get_compute_domain_with_nonexistent_condition_parser(spark_session): ) -# Ensuring that we can properly inform user when metric doesn't exist - should get a metric provider error # noqa: E501 +# Ensuring that we can properly inform user when metric doesn't exist - should get a metric provider error # noqa: E501 # FIXME CoP def test_resolve_metric_bundle_with_nonexistent_metric(spark_session): engine: SparkDFExecutionEngine = build_spark_engine( spark=spark_session, @@ -1026,7 +1026,7 @@ def test_resolve_metric_bundle_with_compute_domain_kwargs_json_serialization( ): """ Insures that even when "compute_domain_kwargs" has multiple keys, it will be JSON-serialized for "IDDict.to_id()". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP engine = build_spark_engine( spark=spark_session, df=pd.DataFrame( diff --git a/tests/execution_engine/test_sqlalchemy_batch_data.py b/tests/execution_engine/test_sqlalchemy_batch_data.py index b065923fddf9..873fd77263cb 100644 --- a/tests/execution_engine/test_sqlalchemy_batch_data.py +++ b/tests/execution_engine/test_sqlalchemy_batch_data.py @@ -1,4 +1,4 @@ -from unittest.mock import Mock # noqa: TID251 +from unittest.mock import Mock # noqa: TID251 # FIXME CoP import pytest @@ -87,7 +87,7 @@ def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa): execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine( engine=sqlite_view_engine ) - # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True # noqa: E501 + # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True # noqa: E501 # FIXME CoP SqlAlchemyBatchData( execution_engine=execution_engine, table_name="test_table", @@ -131,7 +131,7 @@ def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa): assert len(get_sqlite_temp_table_names_from_engine(sqlite_view_engine)) == 4 # test schema with execution engine - # TODO : Will20210222 Add tests for specifying schema with non-sqlite backend that actually supports new schema creation # noqa: E501 + # TODO : Will20210222 Add tests for specifying schema with non-sqlite backend that actually supports new schema creation # noqa: E501 # FIXME CoP my_batch_spec = SqlAlchemyDatasourceBatchSpec( **{ "table_name": "test_table", @@ -202,7 +202,7 @@ def test_instantiation_with_selectable_only_and_no_temp_table(sqlite_view_engine In cases where we create a validator but explicitly set `create_temp_table`=False, we directly use the selectable created by SqlAlchemyExecutionEngine's _build_selectable_from_batch_spec() method. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP selectable = sa.select("*").select_from(sa.text("main.test_table")) # only have the view that is created by the `sqlite_view_engine` fixture diff --git a/tests/execution_engine/test_sqlalchemy_execution_engine.py b/tests/execution_engine/test_sqlalchemy_execution_engine.py index 0d5869954be3..ca5102683566 100644 --- a/tests/execution_engine/test_sqlalchemy_execution_engine.py +++ b/tests/execution_engine/test_sqlalchemy_execution_engine.py @@ -42,6 +42,7 @@ from great_expectations.validator.validator import Validator from tests.expectations.test_util import get_table_columns_metric from tests.test_utils import ( + get_default_mssql_url, get_sqlite_table_names, get_sqlite_temp_table_names, get_sqlite_temp_table_names_from_engine, @@ -80,7 +81,7 @@ def test_instantiation_via_connection_string(sa, test_db_connection_string): def test_instantiation_via_url(sa): db_file = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -102,7 +103,7 @@ def test_instantiation_via_url(sa): def test_instantiation_via_url_with_invalid_kwargs(sa): db_file = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -117,7 +118,7 @@ def test_instantiation_via_url_with_invalid_kwargs(sa): def test_instantiation_via_url_with_kwargs(sa): db_file = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -145,7 +146,7 @@ def test_instantiation_via_fluent_data_sources_with_kwargs( ): db_file = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -178,7 +179,7 @@ def test_instantiation_via_url_and_retrieve_data_with_other_dialect(sa): # 1. Create engine with sqlite db db_file = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "test_cases_for_sql_data_connector.db" ), ) @@ -234,7 +235,7 @@ def test_instantiation_via_credentials(sa, test_backends, test_df): } assert my_execution_engine.url is None - # Note Abe 20201116: Let's add an actual test of get_batch_data_and_markers, which will require setting up test # noqa: E501 + # Note Abe 20201116: Let's add an actual test of get_batch_data_and_markers, which will require setting up test # noqa: E501 # FIXME CoP # fixtures # my_execution_engine.get_batch_data_and_markers(batch_spec=BatchSpec( # table_name="main.table_1", @@ -349,7 +350,7 @@ def test_sa_batch_aggregate_metrics(caplog, sa): } caplog.clear() caplog.set_level(logging.DEBUG, logger="great_expectations") - start = datetime.datetime.now() # noqa: DTZ005 + start = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP results = execution_engine.resolve_metrics( metrics_to_resolve=( desired_metric_1, @@ -360,7 +361,7 @@ def test_sa_batch_aggregate_metrics(caplog, sa): metrics=metrics, ) metrics.update(results) - end = datetime.datetime.now() # noqa: DTZ005 + end = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP print("t1") print(end - start) assert results[desired_metric_1.id] == 3 @@ -467,7 +468,7 @@ def test_get_domain_records_with_different_column_domain_and_filter_conditions(s @pytest.mark.sqlite -def test_get_domain_records_with_column_domain_and_filter_conditions_raises_error_on_multiple_conditions( # noqa: E501 +def test_get_domain_records_with_column_domain_and_filter_conditions_raises_error_on_multiple_conditions( # noqa: E501 # FIXME CoP sa, ): df = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [2, 3, 4, 5, None], "c": [1, 2, 3, 4, None]}) @@ -705,7 +706,7 @@ def test_get_compute_domain_with_no_domain_kwargs(sa): domain_kwargs={}, domain_type="table" ) - # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 + # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 # FIXME CoP raw_data = execution_engine.execute_query( sa.select(sa.text("*")).select_from( cast(SqlAlchemyBatchData, execution_engine.batch_manager.active_batch_data).selectable @@ -733,7 +734,7 @@ def test_get_compute_domain_with_column_pair(sa): domain_kwargs={"column_A": "a", "column_B": "b"}, domain_type="column_pair" ) - # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 + # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 # FIXME CoP raw_data = execution_engine.execute_query( sa.select(sa.text("*")).select_from( cast(SqlAlchemyBatchData, execution_engine.batch_manager.active_batch_data).selectable @@ -767,7 +768,7 @@ def test_get_compute_domain_with_multicolumn(sa): domain_kwargs={"column_list": ["a", "b", "c"]}, domain_type="multicolumn" ) - # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 + # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 # FIXME CoP raw_data = execution_engine.execute_query( sa.select(sa.text("*")).select_from( cast(SqlAlchemyBatchData, execution_engine.batch_manager.active_batch_data).selectable @@ -795,7 +796,7 @@ def test_get_compute_domain_with_column_domain(sa): domain_kwargs={"column": "a"}, domain_type=MetricDomainTypes.COLUMN ) - # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 + # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 # FIXME CoP raw_data = execution_engine.execute_query( sa.select(sa.text("*")).select_from( cast(SqlAlchemyBatchData, execution_engine.batch_manager.active_batch_data).selectable @@ -827,7 +828,7 @@ def test_get_compute_domain_with_unmeetable_row_condition(sa): domain_type="column", ) - # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 + # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 # FIXME CoP raw_data = execution_engine.execute_query( sa.select(sa.text("*")) .select_from( @@ -846,7 +847,7 @@ def test_get_compute_domain_with_unmeetable_row_condition(sa): assert accessor_kwargs == {"column": "a"}, "Accessor kwargs have been modified" -# Testing to ensure that great expectation parser also works in terms of defining a compute domain # noqa: E501 +# Testing to ensure that great expectation parser also works in terms of defining a compute domain # noqa: E501 # FIXME CoP @pytest.mark.sqlite def test_get_compute_domain_with_gx_condition_parser(sa): execution_engine = build_sa_execution_engine( @@ -863,7 +864,7 @@ def test_get_compute_domain_with_gx_condition_parser(sa): domain_type="column", ) - # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 + # Seeing if raw data is the same as the data after condition has been applied - checking post computation data # noqa: E501 # FIXME CoP raw_data = execution_engine.execute_query( sa.select(sa.text("*")) .select_from( @@ -900,7 +901,7 @@ def test_get_compute_domain_with_nonexistent_condition_parser(sa): ) -# Ensuring that we can properly inform user when metric doesn't exist - should get a metric provider error # noqa: E501 +# Ensuring that we can properly inform user when metric doesn't exist - should get a metric provider error # noqa: E501 # FIXME CoP @pytest.mark.sqlite def test_resolve_metric_bundle_with_nonexistent_metric(sa): execution_engine = build_sa_execution_engine( @@ -946,7 +947,7 @@ def test_resolve_metric_bundle_with_nonexistent_metric(sa): def test_resolve_metric_bundle_with_compute_domain_kwargs_json_serialization(sa): """ Insures that even when "compute_domain_kwargs" has multiple keys, it will be JSON-serialized for "IDDict.to_id()". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP execution_engine = build_sa_execution_engine( pd.DataFrame( { @@ -1129,7 +1130,7 @@ class TestConnectionPersistence: sqlite/mssql temp tables only persist within a connection, so we need to keep the connection alive. These tests ensure that we use the existing connection if one is available. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP def test_same_connection_used_from_static_pool_sqlite(self, sa, pd_dataframe: pd.DataFrame): """What does this test and why? @@ -1139,7 +1140,7 @@ def test_same_connection_used_from_static_pool_sqlite(self, sa, pd_dataframe: pd Here we test that by creating a temp table and then querying it multiple times (each time pulling a connection from the pool). The same connection should be pulled from the pool, if the connection wasn't the same the temporary table wouldn't be accessible. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP execution_engine = SqlAlchemyExecutionEngine(connection_string="sqlite://") with execution_engine.get_connection() as con: add_dataframe_to_db(df=pd_dataframe, name="test", con=con, index=False) @@ -1156,7 +1157,7 @@ def test_same_connection_used_from_static_pool_sqlite(self, sa, pd_dataframe: pd res = execution_engine.execute_query(sa.text(select_temp_table)).fetchall() res2 = execution_engine.execute_query(sa.text(select_temp_table)).fetchall() - # This assert is here just to make sure when we assert res == res2 we are not comparing None == None # noqa: E501 + # This assert is here just to make sure when we assert res == res2 we are not comparing None == None # noqa: E501 # FIXME CoP expected = [(1, 4), (2, 4)] assert [r for r in res] == expected @@ -1171,7 +1172,7 @@ def test_same_connection_accessible_from_execution_engine_sqlite( sqlite and that connection is accessible from the execution engine. Here we test that by creating a temp table and then querying it multiple times (each time pulling a connection from the pool). The same connection should be accessible from the execution engine after each query. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP execution_engine = SqlAlchemyExecutionEngine(connection_string="sqlite://") with execution_engine.get_connection() as con: add_dataframe_to_db(df=pd_dataframe, name="test", con=con, index=False) @@ -1214,7 +1215,7 @@ def test_get_connection(self, sa): @pytest.mark.unit class TestDialectRequiresPersistedConnection: def test__dialect_requires_persisted_connection_mssql(self): - connection_string = "mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@db_hostname:1433/test_ci?driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true" # noqa: E501 + connection_string = get_default_mssql_url() assert _dialect_requires_persisted_connection(connection_string=connection_string) def test__dialect_requires_persisted_connection_sqlite(self): diff --git a/tests/expectations/core/conftest.py b/tests/expectations/core/conftest.py index 845b8cbbec67..1a725e3a9520 100644 --- a/tests/expectations/core/conftest.py +++ b/tests/expectations/core/conftest.py @@ -10,7 +10,7 @@ def titanic_df() -> pd.DataFrame: path = file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "..", "test_sets", diff --git a/tests/expectations/core/test_expect_column_values_to_be_present_in_other_table.py b/tests/expectations/core/test_expect_column_values_to_be_present_in_other_table.py index 5f19bc7565c9..a845e1f11901 100644 --- a/tests/expectations/core/test_expect_column_values_to_be_present_in_other_table.py +++ b/tests/expectations/core/test_expect_column_values_to_be_present_in_other_table.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from contrib.experimental.great_expectations_experimental.expectations.expect_column_values_to_be_present_in_other_table import ( # noqa: E501 +from contrib.experimental.great_expectations_experimental.expectations.expect_column_values_to_be_present_in_other_table import ( # noqa: E501 # FIXME CoP ExpectColumnValuesToBePresentInOtherTable, # needed for expectation registration ) @@ -19,7 +19,7 @@ @pytest.fixture def referential_integrity_db(sa): - """Create a sqlite database with 3 tables: order_table_1, order_table_2, and customer_table. We only run this once to create the database.""" # noqa: E501 + """Create a sqlite database with 3 tables: order_table_1, order_table_2, and customer_table. We only run this once to create the database.""" # noqa: E501 # FIXME CoP sqlite_engine = sa.create_engine(f"sqlite:///{DB_PATH}") order_table_1 = pd.DataFrame( { @@ -115,7 +115,7 @@ def test_configuration_invalid_column_name(sqlite_datasource): This is testing default behavior of `batch.validate()` which catches Exception information and places it in `exception_info`. Here we check that the exception message contains the text we expect - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP datasource = sqlite_datasource asset_name = "order_table_2" asset = datasource.add_table_asset(name=asset_name, table_name="order_table_2") diff --git a/tests/expectations/core/test_expect_queried_column_value_frequency_to_meet_threshold.py b/tests/expectations/core/test_expect_queried_column_value_frequency_to_meet_threshold.py index c3972559d708..cb2724c7ca8c 100644 --- a/tests/expectations/core/test_expect_queried_column_value_frequency_to_meet_threshold.py +++ b/tests/expectations/core/test_expect_queried_column_value_frequency_to_meet_threshold.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING import pytest -from contrib.experimental.great_expectations_experimental.expectations.expect_queried_column_value_frequency_to_meet_threshold import ( # noqa: E501 +from contrib.experimental.great_expectations_experimental.expectations.expect_queried_column_value_frequency_to_meet_threshold import ( # noqa: E501 # FIXME CoP ExpectQueriedColumnValueFrequencyToMeetThreshold, # noqa: F401 # needed for expectation registration ) @@ -57,7 +57,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite( warns, titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP validator: Validator = context.get_validator(batch_request=batch_request) @@ -90,7 +90,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite( [ ( sqlite_batch_request, - "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM titanic) FROM titanic GROUP BY {col}", # noqa: E501 + "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM titanic) FROM titanic GROUP BY {col}", # noqa: E501 # FIXME CoP True, 0.6481340441736482, None, @@ -98,7 +98,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite( ), ( sqlite_runtime_batch_request, - "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM titanic) FROM {batch} GROUP BY {col}", # noqa: E501 + "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM titanic) FROM {batch} GROUP BY {col}", # noqa: E501 # FIXME CoP False, 0.04112718964204113, None, @@ -106,7 +106,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite( ), ( sqlite_batch_request, - "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT(y) FROM wrong) FROM {batch} GROUP BY {col}", # noqa: E501 + "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT(y) FROM wrong) FROM {batch} GROUP BY {col}", # noqa: E501 # FIXME CoP True, 7.091666666666667, None, @@ -114,7 +114,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite( ), ( sqlite_batch_request, - "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM titanic) FROM {batch} GROUP BY {col}", # noqa: E501 + "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM titanic) FROM {batch} GROUP BY {col}", # noqa: E501 # FIXME CoP False, 0.2338156892612338, 'col("Age")<35', @@ -122,7 +122,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite( ), ( sqlite_batch_request, - "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM {batch}) / 2 FROM {batch} GROUP BY {col}", # noqa: E501 + "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM {batch}) / 2 FROM {batch} GROUP BY {col}", # noqa: E501 # FIXME CoP False, 0.3240670220868241, None, @@ -141,7 +141,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_override_query_ warns, titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP validator: Validator = context.get_validator(batch_request=batch_request) @@ -229,14 +229,14 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_spark( "query,success,observed,row_condition,warns", [ ( - "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM {batch}) / 2 FROM {batch} GROUP BY {col}", # noqa: E501 + "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM {batch}) / 2 FROM {batch} GROUP BY {col}", # noqa: E501 # FIXME CoP False, 0.3240670220868241, None, False, ), ( - "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM {batch}) / 2 FROM {batch} GROUP BY {col}", # noqa: E501 + "SELECT {col}, CAST(COUNT({col}) AS float) / (SELECT COUNT({col}) FROM {batch}) / 2 FROM {batch} GROUP BY {col}", # noqa: E501 # FIXME CoP False, 0.3107287449392713, 'col("Age")<35', @@ -295,7 +295,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_override_query_ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite_multi_value( titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP validator: Validator = context.get_validator(batch_request=sqlite_batch_request) diff --git a/tests/expectations/core/test_expect_queried_table_row_count_to_be.py b/tests/expectations/core/test_expect_queried_table_row_count_to_be.py index dd29129ceab4..f32ec37af8ee 100644 --- a/tests/expectations/core/test_expect_queried_table_row_count_to_be.py +++ b/tests/expectations/core/test_expect_queried_table_row_count_to_be.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING import pytest -from contrib.experimental.great_expectations_experimental.expectations.expect_queried_table_row_count_to_be import ( # noqa: E501 +from contrib.experimental.great_expectations_experimental.expectations.expect_queried_table_row_count_to_be import ( # noqa: E501 # FIXME CoP ExpectQueriedTableRowCountToBe, # noqa: F401 # needed for expectation registration ) @@ -55,7 +55,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_sqlite( row_condition, titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP validator: Validator = context.get_validator(batch_request=batch_request) @@ -100,7 +100,7 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_override_query_ row_condition, titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_pandas_and_sqlalchemy_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP validator: Validator = context.get_validator(batch_request=batch_request) diff --git a/tests/expectations/core/test_expect_table_row_count_to_be_between.py b/tests/expectations/core/test_expect_table_row_count_to_be_between.py index 86ffb81b6196..33f6ecc169fa 100644 --- a/tests/expectations/core/test_expect_table_row_count_to_be_between.py +++ b/tests/expectations/core/test_expect_table_row_count_to_be_between.py @@ -11,7 +11,7 @@ def test_expect_table_row_count_to_be_between_runtime_custom_query_no_temp_table_sa( titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP datasource = context.data_sources.all()["my_sqlite_db_datasource"] assert isinstance(datasource, SQLDatasource) @@ -50,7 +50,7 @@ def test_expect_table_row_count_to_be_between_runtime_custom_query_no_temp_table def test_expect_table_row_count_to_be_between_runtime_custom_query_with_where_no_temp_table_sa( titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): - context = titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 + context = titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled # noqa: E501 # FIXME CoP datasource = context.data_sources.all()["my_sqlite_db_datasource"] assert isinstance(datasource, SQLDatasource) diff --git a/tests/expectations/core/test_unexpected_rows_expectation.py b/tests/expectations/core/test_unexpected_rows_expectation.py index cf7f790a2b12..557bcfda9248 100644 --- a/tests/expectations/core/test_unexpected_rows_expectation.py +++ b/tests/expectations/core/test_unexpected_rows_expectation.py @@ -8,6 +8,7 @@ from great_expectations.data_context.util import file_relative_path from great_expectations.expectations import UnexpectedRowsExpectation from great_expectations.expectations.metrics.util import MAX_RESULT_RECORDS +from great_expectations.render.renderer.content_block.content_block import ContentBlockRenderer if TYPE_CHECKING: from great_expectations.data_context import AbstractDataContext @@ -66,7 +67,7 @@ def test_unexpected_rows_expectation_invalid_query_info_message(query: str, capl pytest.param( "SELECT * FROM {batch} WHERE passenger_count > 7", True, - "0 unexpected rows", + 0, 0, id="success", ), @@ -74,14 +75,14 @@ def test_unexpected_rows_expectation_invalid_query_info_message(query: str, capl # There is a single instance where passenger_count == 7 "SELECT * FROM {batch} WHERE passenger_count > 6", False, - "1 unexpected row", + 1, 1, id="failure", ), pytest.param( "SELECT * FROM {batch} WHERE passenger_count > 0", False, - "97853 unexpected rows", + 97853, MAX_RESULT_RECORDS, id="greater than MAX_RESULT_RECORDS unexpected rows", ), @@ -144,3 +145,17 @@ def test_unexpected_rows_expectation_render( == "$unexpected_rows_query" ) assert expectation.rendered_content[0].value.code_block.get("language") == "sql" + + +@pytest.mark.unit +def test_data_docs_rendering(): + query = "SELECT * FROM {batch} WHERE passenger_count > 7" + expectation = UnexpectedRowsExpectation(unexpected_rows_query=query) + results = ContentBlockRenderer.render(expectation.configuration) + assert isinstance(results, list) and len(results) == 1 + result = results[0] + assert result.string_template == { + "template": "Unexpected rows query: $unexpected_rows_query", + "params": {"unexpected_rows_query": query}, + "styling": {}, + } diff --git a/tests/expectations/fixtures/expect_column_values_to_equal_three.py b/tests/expectations/fixtures/expect_column_values_to_equal_three.py index b6c3726d17cc..6d29a1291a50 100644 --- a/tests/expectations/fixtures/expect_column_values_to_equal_three.py +++ b/tests/expectations/fixtures/expect_column_values_to_equal_three.py @@ -77,7 +77,7 @@ class ExpectColumnValuesToEqualThree__SecondIteration(ExpectColumnValuesToEqualT "title": "other_negative_test_with_mostly", "exact_match_out": False, "in": {"column": "mostly_threes", "mostly": 0.9}, - # "include_in_gallery": False, #This key is omitted, so the example shouldn't show up in the gallery # noqa: E501 + # "include_in_gallery": False, #This key is omitted, so the example shouldn't show up in the gallery # noqa: E501 # FIXME CoP "out": { "success": False, "unexpected_index_list": [6, 7], @@ -122,7 +122,7 @@ def _answer_renderer(cls, configuration=None, result=None, runtime_configuration return f'At least {mostly * 100}% of values in column "{column}" equal 3.' else: return f'Less than {mostly * 100}% of values in column "{column}" equal 3.' - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if result.success: return f'All of the values in column "{column}" equal 3.' else: @@ -216,7 +216,7 @@ class ExpectColumnValuesToEqualThree__BrokenIteration( "title": "other_negative_test_with_mostly", "exact_match_out": False, "in": {"column": "mostly_threes", "mostly": 0.9}, - # "include_in_gallery": False, #This key is omitted, so the example shouldn't show up in the gallery # noqa: E501 + # "include_in_gallery": False, #This key is omitted, so the example shouldn't show up in the gallery # noqa: E501 # FIXME CoP "out": { "success": False, "unexpected_index_list": [6, 7], diff --git a/tests/expectations/metrics/column_map_metrics/test_column_map_condition_auxillary_methods.py b/tests/expectations/metrics/column_map_metrics/test_column_map_condition_auxillary_methods.py index 4d897116b59c..30528a70d64c 100644 --- a/tests/expectations/metrics/column_map_metrics/test_column_map_condition_auxillary_methods.py +++ b/tests/expectations/metrics/column_map_metrics/test_column_map_condition_auxillary_methods.py @@ -17,7 +17,7 @@ from great_expectations.expectations.metrics import ( MapMetricProvider, ) -from great_expectations.expectations.metrics.map_metric_provider.column_map_condition_auxilliary_methods import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.column_map_condition_auxilliary_methods import ( # noqa: E501 # FIXME CoP _spark_column_map_condition_values, _sqlalchemy_column_map_condition_values, ) @@ -29,7 +29,7 @@ def mini_taxi_df() -> pd.DataFrame: """ Returns: pandas dataframe that contains a small selection of columns and rows from taxi_data, for unittesting. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP df = pd.DataFrame( { "pk_1": [0, 1, 2, 3, 4], @@ -129,8 +129,8 @@ def sql_execution_engine_with_mini_taxi_selectable(sa, sql_execution_engine_with # We calculate the column_values.between.condition with min value 0.0 and max value 10.0. -# when row_condition is col("pk_1")!=0 _sqlalchemy_column_map_condition_values() method will return [14.8] because it will run against all rows of mini_taxi_df and find the total_amount values that out of range (0 < x < 10.0). # noqa: E501 -# when row_condition is col("pk_1")==0 _sqlalchemy_column_map_condition_values() method will return [] because it will only run against a single row of mini_taxi_df (where pk_1==0), and that total_amount value is within our range (9.75). # noqa: E501 +# when row_condition is col("pk_1")!=0 _sqlalchemy_column_map_condition_values() method will return [14.8] because it will run against all rows of mini_taxi_df and find the total_amount values that out of range (0 < x < 10.0). # noqa: E501 # FIXME CoP +# when row_condition is col("pk_1")==0 _sqlalchemy_column_map_condition_values() method will return [] because it will only run against a single row of mini_taxi_df (where pk_1==0), and that total_amount value is within our range (9.75). # noqa: E501 # FIXME CoP @pytest.mark.sqlite @pytest.mark.parametrize( "execution_engine_fixture_name", @@ -184,7 +184,7 @@ def test_sqlalchemy_column_map_condition_values( metric_value_kwargs=metric_value_kwargs, ) - # table.columns metric has to be calculated and loaded first, because it is a dependency of the `column_values.between.condition` metric. # noqa: E501 + # table.columns metric has to be calculated and loaded first, because it is a dependency of the `column_values.between.condition` metric. # noqa: E501 # FIXME CoP table_columns_metric, table_column_metrics_results = get_table_columns_metric( execution_engine=execution_engine ) @@ -258,7 +258,7 @@ def test_spark_column_map_condition_values( metric_value_kwargs=metric_value_kwargs, ) - # table.columns metric has to be calculated and loaded first, because it is a dependency of the `column_values.between.condition` metric. # noqa: E501 + # table.columns metric has to be calculated and loaded first, because it is a dependency of the `column_values.between.condition` metric. # noqa: E501 # FIXME CoP table_columns_metric, table_column_metrics_results = get_table_columns_metric( execution_engine=execution_engine ) diff --git a/tests/expectations/metrics/column_map_metrics/test_unexpected_indices_and_query.py b/tests/expectations/metrics/column_map_metrics/test_unexpected_indices_and_query.py index 897d03d83c51..5800af653515 100644 --- a/tests/expectations/metrics/column_map_metrics/test_unexpected_indices_and_query.py +++ b/tests/expectations/metrics/column_map_metrics/test_unexpected_indices_and_query.py @@ -26,7 +26,7 @@ def animal_table_df() -> pd.DataFrame: """ Returns: pandas dataframe that contains example data for unexpected_index_column_names metric tests - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP df = pd.DataFrame( { "pk_1": [0, 1, 2, 3, 4, 5], @@ -84,7 +84,7 @@ def _build_table_columns_and_unexpected( Tuple with MetricConfigurations corresponding to unexpected_condition and table_columns metric, as well as metrics dict. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metrics: Dict[Tuple[str, str, str], MetricValue] = {} # get table_columns_metric diff --git a/tests/expectations/metrics/conftest.py b/tests/expectations/metrics/conftest.py index 8435dd660097..59b14ee33e57 100644 --- a/tests/expectations/metrics/conftest.py +++ b/tests/expectations/metrics/conftest.py @@ -43,7 +43,7 @@ def batch_selectable() -> sa.Table: class MockSqlAlchemyExecutionEngine(SqlAlchemyExecutionEngine): def __init__(self, create_temp_table: bool = True, *args, **kwargs): - self.engine = MockSaEngine(dialect=Dialect("sqlite")) # type: ignore[assignment] + self.engine = MockSaEngine(dialect=Dialect("sqlite")) # type: ignore[assignment] # FIXME CoP self._create_temp_table = create_temp_table self._connection = MockConnection() diff --git a/tests/expectations/metrics/query_metrics/test_query_metrics.py b/tests/expectations/metrics/query_metrics/test_query_metrics.py index 78d02b2c6909..cf1981035394 100644 --- a/tests/expectations/metrics/query_metrics/test_query_metrics.py +++ b/tests/expectations/metrics/query_metrics/test_query_metrics.py @@ -23,7 +23,7 @@ @pytest.mark.unit def test_query_template_get_query_function_with_int(): - """Simple test to ensure that the `get_query()` method for QueryTemplateValue can handle integer value""" # noqa: E501 + """Simple test to ensure that the `get_query()` method for QueryTemplateValue can handle integer value""" # noqa: E501 # FIXME CoP query: str = """ SELECT {column_to_check} FROM {batch} @@ -47,7 +47,7 @@ def test_query_template_get_query_function_with_int(): @pytest.mark.unit def test_query_template_get_query_function_with_float(): - """Simple test to ensure that the `get_query()` method for QueryTemplateValue can handle float value""" # noqa: E501 + """Simple test to ensure that the `get_query()` method for QueryTemplateValue can handle float value""" # noqa: E501 # FIXME CoP query: str = """ SELECT {column_to_check} FROM {batch} diff --git a/tests/expectations/metrics/table_metrics/test_table_head.py b/tests/expectations/metrics/table_metrics/test_table_head.py index cee1a3060450..9a13261520ad 100644 --- a/tests/expectations/metrics/table_metrics/test_table_head.py +++ b/tests/expectations/metrics/table_metrics/test_table_head.py @@ -71,7 +71,7 @@ def sqlite_batch_with_selectable_without_temp_table( @pytest.mark.sqlite @pytest.mark.parametrize( - "execution_engine, n_rows, fetch_all, expected_shape, expected_columns, expected_values, expected_temp_tables", # noqa: E501 + "execution_engine, n_rows, fetch_all, expected_shape, expected_columns, expected_values, expected_temp_tables", # noqa: E501 # FIXME CoP [ ( "sqlite_batch_with_table_name", diff --git a/tests/expectations/metrics/test_core.py b/tests/expectations/metrics/test_core.py index 479fd038aa11..25ab85fdb860 100644 --- a/tests/expectations/metrics/test_core.py +++ b/tests/expectations/metrics/test_core.py @@ -410,7 +410,7 @@ def test_column_quoted_name_type_sa_handles_explicit_string_identifiers(sa): When explicit quoted identifiers are passed in, we should use them as-is. Explicit identifiers are used when the column contains a space or reserved word. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP engine = build_sa_execution_engine( pd.DataFrame( { @@ -993,7 +993,7 @@ def test_column_partition_metric_pd(): For "datetime.datetime" data, test set contains 12 dates, starting with January 1, 2021, separated by 7 days. Expected partition boundaries are pre-computed algorithmically and asserted to be "close" to actual metric values. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP week_idx: int engine = build_pandas_engine( pd.DataFrame( @@ -1013,7 +1013,7 @@ def test_column_partition_metric_pd(): 11, ], "b": [ - datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 + datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 # FIXME CoP for week_idx in range(12) ], }, @@ -1134,14 +1134,14 @@ def test_column_partition_metric_pd(): assert all( isclose( operand_a=element.to_pydatetime(), - operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 + operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 # FIXME CoP ) for idx, element in enumerate(results[desired_metric.id]) ) @pytest.mark.sqlite -def test_column_partition_metric_sa(sa): # noqa: PLR0915 +def test_column_partition_metric_sa(sa): # noqa: PLR0915 # FIXME CoP """ Test of "column.partition" metric for both, standard numeric column and "datetime.datetime" valued column. @@ -1154,7 +1154,7 @@ def test_column_partition_metric_sa(sa): # noqa: PLR0915 For "datetime.datetime" data, test set contains 12 dates, starting with January 1, 2021, separated by 7 days. Expected partition boundaries are pre-computed algorithmically and asserted to be "close" to actual metric values. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP week_idx: int engine = build_sa_execution_engine( pd.DataFrame( @@ -1174,7 +1174,7 @@ def test_column_partition_metric_sa(sa): # noqa: PLR0915 11, ], "b": [ - datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 + datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 # FIXME CoP for week_idx in range(12) ], }, @@ -1350,14 +1350,14 @@ def test_column_partition_metric_sa(sa): # noqa: PLR0915 assert all( isclose( operand_a=element, - operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 + operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 # FIXME CoP ) for idx, element in enumerate(results[desired_metric.id]) ) @pytest.mark.spark -def test_column_partition_metric_spark(spark_session): # noqa: PLR0915 +def test_column_partition_metric_spark(spark_session): # noqa: PLR0915 # FIXME CoP """ Test of "column.partition" metric for both, standard numeric column and "datetime.datetime" valued column. @@ -1370,7 +1370,7 @@ def test_column_partition_metric_spark(spark_session): # noqa: PLR0915 For "datetime.datetime" data, test set contains 12 dates, starting with January 1, 2021, separated by 7 days. Expected partition boundaries are pre-computed algorithmically and asserted to be "close" to actual metric values. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP week_idx: int engine: SparkDFExecutionEngine = build_spark_engine( spark=spark_session, @@ -1391,7 +1391,7 @@ def test_column_partition_metric_spark(spark_session): # noqa: PLR0915 11, ], "b": [ - datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 + datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 # FIXME CoP for week_idx in range(12) ], }, @@ -1573,7 +1573,7 @@ def test_column_partition_metric_spark(spark_session): # noqa: PLR0915 assert all( isclose( operand_a=element, - operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 + operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 # FIXME CoP ) for idx, element in enumerate(results[desired_metric.id]) ) @@ -2739,7 +2739,7 @@ def test_z_score_under_threshold_pd(): metric_value_kwargs={"double_sided": True, "threshold": 2}, ) column_values_z_score_under_threshold_condition_metric.metric_dependencies = { - f"column_values.z_score.{MetricPartialFunctionTypeSuffixes.MAP.value}": column_values_z_score_map_metric, # noqa: E501 + f"column_values.z_score.{MetricPartialFunctionTypeSuffixes.MAP.value}": column_values_z_score_map_metric, # noqa: E501 # FIXME CoP "table.columns": table_columns_metric, } results = engine.resolve_metrics( @@ -2846,7 +2846,7 @@ def test_z_score_under_threshold_spark(spark_session): metric_value_kwargs={"double_sided": True, "threshold": 2}, ) condition_metric.metric_dependencies = { - f"column_values.z_score.{MetricPartialFunctionTypeSuffixes.MAP.value}": column_values_z_score_map_metric, # noqa: E501 + f"column_values.z_score.{MetricPartialFunctionTypeSuffixes.MAP.value}": column_values_z_score_map_metric, # noqa: E501 # FIXME CoP "table.columns": table_columns_metric, } results = engine.resolve_metrics(metrics_to_resolve=(condition_metric,), metrics=metrics) @@ -2892,7 +2892,7 @@ def test_table_metric_pd(caplog): @pytest.mark.big -def test_map_column_pairs_equal_metric_pd(): # noqa: PLR0915 +def test_map_column_pairs_equal_metric_pd(): # noqa: PLR0915 # FIXME CoP engine = build_pandas_engine( pd.DataFrame( data={ @@ -3130,7 +3130,7 @@ def test_table_metric_sa(sa): @pytest.mark.sqlite -def test_map_column_pairs_equal_metric_sa(sa): # noqa: PLR0915 +def test_map_column_pairs_equal_metric_sa(sa): # noqa: PLR0915 # FIXME CoP engine = build_sa_execution_engine( pd.DataFrame( data={ @@ -3337,7 +3337,7 @@ def test_map_column_pairs_equal_metric_sa(sa): # noqa: PLR0915 @pytest.mark.spark -def test_map_column_pairs_equal_metric_spark(spark_session): # noqa: PLR0915 +def test_map_column_pairs_equal_metric_spark(spark_session): # noqa: PLR0915 # FIXME CoP engine: SparkDFExecutionEngine = build_spark_engine( spark=spark_session, df=pd.DataFrame( @@ -4638,7 +4638,7 @@ def test_batch_aggregate_metrics_pd(): "table.columns": table_columns_metric, } - start = datetime.datetime.now() # noqa: DTZ005 + start = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP results = engine.resolve_metrics( metrics_to_resolve=( desired_metric_1, @@ -4649,7 +4649,7 @@ def test_batch_aggregate_metrics_pd(): metrics=metrics, ) metrics.update(results) - end = datetime.datetime.now() # noqa: DTZ005 + end = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP print(end - start) assert results[desired_metric_1.id] == "2021-06-18" assert results[desired_metric_2.id] == "2021-01-01" @@ -4754,7 +4754,7 @@ def test_batch_aggregate_metrics_sa(caplog, sa): } caplog.clear() caplog.set_level(logging.DEBUG, logger="great_expectations") - start = datetime.datetime.now() # noqa: DTZ005 + start = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP results = engine.resolve_metrics( metrics_to_resolve=( desired_metric_1, @@ -4765,7 +4765,7 @@ def test_batch_aggregate_metrics_sa(caplog, sa): metrics=metrics, ) metrics.update(results) - end = datetime.datetime.now() # noqa: DTZ005 + end = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP print("t1") print(end - start) assert results[desired_metric_1.id] == 3 @@ -4876,7 +4876,7 @@ def test_batch_aggregate_metrics_spark(caplog, spark_session): desired_metric_4.metric_dependencies = { "metric_partial_fn": desired_aggregate_fn_metric_4, } - start = datetime.datetime.now() # noqa: DTZ005 + start = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP caplog.clear() caplog.set_level(logging.DEBUG, logger="great_expectations") results = engine.resolve_metrics( @@ -4889,7 +4889,7 @@ def test_batch_aggregate_metrics_spark(caplog, spark_session): metrics=metrics, ) metrics.update(results) - end = datetime.datetime.now() # noqa: DTZ005 + end = datetime.datetime.now() # noqa: DTZ005 # FIXME CoP print(end - start) assert results[desired_metric_1.id] == 3 assert results[desired_metric_2.id] == 1 @@ -4905,7 +4905,7 @@ def test_batch_aggregate_metrics_spark(caplog, spark_session): @pytest.mark.big -def test_map_multicolumn_sum_equal_pd(): # noqa: PLR0915 +def test_map_multicolumn_sum_equal_pd(): # noqa: PLR0915 # FIXME CoP engine = build_pandas_engine( pd.DataFrame(data={"a": [0, 1, 2], "b": [5, 4, 3], "c": [0, 0, 1], "d": [7, 8, 9]}) ) @@ -5105,7 +5105,7 @@ def test_map_multicolumn_sum_equal_pd(): # noqa: PLR0915 @pytest.mark.sqlite -def test_map_multicolumn_sum_equal_sa(sa): # noqa: PLR0915 +def test_map_multicolumn_sum_equal_sa(sa): # noqa: PLR0915 # FIXME CoP engine = build_sa_execution_engine( pd.DataFrame(data={"a": [0, 1, 2], "b": [5, 4, 3], "c": [0, 0, 1], "d": [7, 8, 9]}), sa, @@ -5297,7 +5297,7 @@ def test_map_multicolumn_sum_equal_sa(sa): # noqa: PLR0915 @pytest.mark.spark -def test_map_multicolumn_sum_equal_spark(spark_session): # noqa: PLR0915 +def test_map_multicolumn_sum_equal_spark(spark_session): # noqa: PLR0915 # FIXME CoP engine: SparkDFExecutionEngine = build_spark_engine( spark=spark_session, df=pd.DataFrame(data={"a": [0, 1, 2], "b": [5, 4, 3], "c": [0, 0, 1], "d": [7, 8, 9]}), @@ -5492,7 +5492,7 @@ def test_map_multicolumn_sum_equal_spark(spark_session): # noqa: PLR0915 @pytest.mark.big -def test_map_compound_columns_unique_pd(): # noqa: PLR0915 +def test_map_compound_columns_unique_pd(): # noqa: PLR0915 # FIXME CoP engine = build_pandas_engine( pd.DataFrame(data={"a": [0, 1, 1], "b": [1, 2, 3], "c": [0, 2, 2]}) ) @@ -5688,7 +5688,7 @@ def test_map_compound_columns_unique_pd(): # noqa: PLR0915 @pytest.mark.sqlite -def test_map_compound_columns_unique_sa(sa): # noqa: PLR0915 +def test_map_compound_columns_unique_sa(sa): # noqa: PLR0915 # FIXME CoP engine = build_sa_execution_engine( pd.DataFrame(data={"a": [0, 1, 1], "b": [1, 2, 3], "c": [0, 2, 2]}), sa, @@ -5755,7 +5755,7 @@ def test_map_compound_columns_unique_sa(sa): # noqa: PLR0915 metric_value_kwargs=None, ) condition_metric.metric_dependencies = { - f"compound_columns.count.{MetricPartialFunctionTypeSuffixes.MAP.value}": prerequisite_function_metric, # noqa: E501 + f"compound_columns.count.{MetricPartialFunctionTypeSuffixes.MAP.value}": prerequisite_function_metric, # noqa: E501 # FIXME CoP "table.columns": table_columns_metric, } results = engine.resolve_metrics( @@ -5848,7 +5848,7 @@ def test_map_compound_columns_unique_sa(sa): # noqa: PLR0915 metric_value_kwargs=None, ) condition_metric.metric_dependencies = { - f"compound_columns.count.{MetricPartialFunctionTypeSuffixes.MAP.value}": prerequisite_function_metric, # noqa: E501 + f"compound_columns.count.{MetricPartialFunctionTypeSuffixes.MAP.value}": prerequisite_function_metric, # noqa: E501 # FIXME CoP "table.columns": table_columns_metric, } results = engine.resolve_metrics( @@ -5915,7 +5915,7 @@ def test_map_compound_columns_unique_sa(sa): # noqa: PLR0915 @pytest.mark.spark -def test_map_compound_columns_unique_spark(spark_session): # noqa: PLR0915 +def test_map_compound_columns_unique_spark(spark_session): # noqa: PLR0915 # FIXME CoP engine: SparkDFExecutionEngine = build_spark_engine( spark=spark_session, df=pd.DataFrame(data={"a": [0, 1, 1], "b": [1, 2, 3], "c": [0, 2, 2]}), @@ -6106,7 +6106,7 @@ def test_map_compound_columns_unique_spark(spark_session): # noqa: PLR0915 @pytest.mark.big -def test_map_select_column_values_unique_within_record_pd(): # noqa: PLR0915 +def test_map_select_column_values_unique_within_record_pd(): # noqa: PLR0915 # FIXME CoP engine = build_pandas_engine( pd.DataFrame( data={ @@ -6343,7 +6343,7 @@ def test_map_select_column_values_unique_within_record_pd(): # noqa: PLR0915 @pytest.mark.sqlite -def test_map_select_column_values_unique_within_record_sa(sa): # noqa: PLR0915 +def test_map_select_column_values_unique_within_record_sa(sa): # noqa: PLR0915 # FIXME CoP engine = build_sa_execution_engine( pd.DataFrame( data={ diff --git a/tests/expectations/metrics/test_map_metric.py b/tests/expectations/metrics/test_map_metric.py index 0053c676c7c5..ed4b63ccf0d4 100644 --- a/tests/expectations/metrics/test_map_metric.py +++ b/tests/expectations/metrics/test_map_metric.py @@ -137,7 +137,7 @@ def _expecation_configuration_to_validation_result_pandas( Args: expectation_configuration (ExpectationConfiguration): configuration that is being tested - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation = gxe.ExpectColumnValuesToBeInSet(**expectation_configuration.kwargs) batch_definition = LegacyBatchDefinition( datasource_name="pandas_datasource", @@ -173,7 +173,7 @@ def _expecation_configuration_to_validation_result_sql( Args: expectation_configuration (ExpectationConfiguration): configuration that is being tested - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP expectation = gxe.ExpectColumnValuesToBeInSet(**expectation_configuration.kwargs) sqlite_path = file_relative_path(__file__, "../../test_sets/metrics_test.db") connection_string = f"sqlite:///{sqlite_path}" @@ -256,7 +256,7 @@ def test_get_aggregate_count_aware_metric_dependencies(basic_spark_df_execution_ ) assert ( dependencies["metric_partial_fn"].id[0] - == f"column_values.nonnull.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}.{MetricPartialFunctionTypes.AGGREGATE_FN.metric_suffix}" # noqa: E501 + == f"column_values.nonnull.{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}.{MetricPartialFunctionTypes.AGGREGATE_FN.metric_suffix}" # noqa: E501 # FIXME CoP ) metric = MetricConfiguration( @@ -403,7 +403,7 @@ def test_pandas_unexpected_rows_summary_result_format_unexpected_rows_explicitly "value_set": ["cat", "fish", "dog"], "result_format": { "result_format": "SUMMARY", # SUMMARY will include partial_unexpected* values only - "include_unexpected_rows": False, # this is the default value, but making explicit for testing purposes # noqa: E501 + "include_unexpected_rows": False, # this is the default value, but making explicit for testing purposes # noqa: E501 # FIXME CoP }, }, ) @@ -538,7 +538,7 @@ def test_expectation_configuration_has_result_format( }, ) with pytest.warns(UserWarning) as config_warning: - result: ExpectationValidationResult = ( # noqa: F841 + result: ExpectationValidationResult = ( # noqa: F841 # FIXME CoP _expecation_configuration_to_validation_result_pandas( expectation_configuration=expectation_configuration, dataframe=pandas_animals_dataframe_for_unexpected_rows_and_index, diff --git a/tests/expectations/metrics/test_metric_providers.py b/tests/expectations/metrics/test_metric_providers.py index dffe34256216..b2ac82e642cf 100644 --- a/tests/expectations/metrics/test_metric_providers.py +++ b/tests/expectations/metrics/test_metric_providers.py @@ -23,10 +23,10 @@ from great_expectations.expectations.metrics.map_metric_provider.column_condition_partial import ( column_condition_partial, ) -from great_expectations.expectations.metrics.map_metric_provider.column_pair_condition_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.column_pair_condition_partial import ( # noqa: E501 # FIXME CoP column_pair_condition_partial, ) -from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 +from great_expectations.expectations.metrics.map_metric_provider.multicolumn_condition_partial import ( # noqa: E501 # FIXME CoP multicolumn_condition_partial, ) from great_expectations.expectations.metrics.metric_provider import ( @@ -165,7 +165,7 @@ def test__column_map_metric__registration(mock_registry): The actual logic for this lives in the private method: `_register_metric_functions`, which is invoked from within `__new__` for the ancestor class `MetricProvider`. Since _register_metric_functions is private, we don't want to test it directly. Instead, we declare a custom ColumnMapMetricProvider, and test that the correct metrics are registered. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP registered_metric_keys = list(mock_registry._registered_metrics.keys()) for key in registered_metric_keys: assert "column_values.equal_seven" not in key diff --git a/tests/expectations/metrics/test_metrics_util.py b/tests/expectations/metrics/test_metrics_util.py index ba6dd53278d4..70a9b9909102 100644 --- a/tests/expectations/metrics/test_metrics_util.py +++ b/tests/expectations/metrics/test_metrics_util.py @@ -38,7 +38,7 @@ if TYPE_CHECKING: import pandas as pd -# The following class allows for declarative instantiation of base class for SqlAlchemy. Adopted from # noqa: E501 +# The following class allows for declarative instantiation of base class for SqlAlchemy. Adopted from # noqa: E501 # FIXME CoP # https://docs.sqlalchemy.org/en/14/faq/sqlexpressions.html#rendering-postcompile-parameters-as-bound-parameters Base = sqlalchemy.declarative_base() @@ -60,7 +60,7 @@ def _compare_select_statement_with_converted_string(engine) -> None: Helper method used to do the call to sql_statement_with_post_compile_to_string() and compare with expected val Args: engine (ExecutionEngine): SqlAlchemyExecutionEngine with connection to backend under test - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP select_statement: sqlalchemy.Select = select_with_post_compile_statements() returned_string = sql_statement_with_post_compile_to_string( engine=engine, select_statement=select_statement @@ -187,7 +187,7 @@ def test_get_unexpected_indices_for_single_pandas_named_index_named_unexpected_i @pytest.mark.unit -def test_get_unexpected_indices_for_single_pandas_named_index_named_unexpected_index_columns_without_column_values( # noqa: E501 +def test_get_unexpected_indices_for_single_pandas_named_index_named_unexpected_index_columns_without_column_values( # noqa: E501 # FIXME CoP pandas_animals_dataframe_for_unexpected_rows_and_index, unexpected_index_list_one_index_column_without_column_values, ): @@ -298,7 +298,7 @@ def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpect @pytest.mark.unit -def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_without_column_values( # noqa: E501 +def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_without_column_values( # noqa: E501 # FIXME CoP pandas_animals_dataframe_for_unexpected_rows_and_index, unexpected_index_list_two_index_columns_without_column_values, ): @@ -317,7 +317,7 @@ def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpect @pytest.mark.unit -def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_one_column( # noqa: E501 +def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_one_column( # noqa: E501 # FIXME CoP pandas_animals_dataframe_for_unexpected_rows_and_index, unexpected_index_list_one_index_column, ): @@ -335,7 +335,7 @@ def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpect @pytest.mark.unit -def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_one_column_without_column_values( # noqa: E501 +def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_one_column_without_column_values( # noqa: E501 # FIXME CoP pandas_animals_dataframe_for_unexpected_rows_and_index, unexpected_index_list_one_index_column_without_column_values, ): @@ -354,7 +354,7 @@ def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpect @pytest.mark.unit -def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_wrong_column( # noqa: E501 +def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_columns_wrong_column( # noqa: E501 # FIXME CoP pandas_animals_dataframe_for_unexpected_rows_and_index, ): dataframe: pd.DataFrame = pandas_animals_dataframe_for_unexpected_rows_and_index @@ -374,7 +374,7 @@ def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpect @pytest.mark.unit -def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_wrong_domain( # noqa: E501 +def test_get_unexpected_indices_for_multiple_pandas_named_indices_named_unexpected_index_wrong_domain( # noqa: E501 # FIXME CoP pandas_animals_dataframe_for_unexpected_rows_and_index, ): dataframe: pd.DataFrame = pandas_animals_dataframe_for_unexpected_rows_and_index @@ -576,7 +576,7 @@ def test_get_dbms_compatible_metric_domain_column_list_kwargs( """ This shuffle intersperses input "column_list" so to ensure that there is no dependency on position of column names that must be quoted. Sorting in assertion below ensures that types are correct, regardless of column order. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP random.shuffle(test_column_names) metric_domain_kwargs: dict diff --git a/tests/expectations/test_dataclass_serializable_dot_dict_pattern.py b/tests/expectations/test_dataclass_serializable_dot_dict_pattern.py index abb3e13979b6..e55cbf688ff7 100644 --- a/tests/expectations/test_dataclass_serializable_dot_dict_pattern.py +++ b/tests/expectations/test_dataclass_serializable_dot_dict_pattern.py @@ -2,7 +2,7 @@ This file is intended to 1. test the basic behavior of SerializableDictDot, in combination with @dataclass, and 2. provides examples of best practice for working with typed objects within the Great Expectations codebase -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP from dataclasses import dataclass, field from enum import Enum @@ -332,7 +332,7 @@ def test_can_be_nested(): assert my_C["A_list"][1].bar == 102 assert my_C["B_list"][0]["quux"] == 43 - # Note: we don't currently support dot notation access within lists: `assert my_C["A_list"].1.bar == 102` # noqa: E501 + # Note: we don't currently support dot notation access within lists: `assert my_C["A_list"].1.bar == 102` # noqa: E501 # FIXME CoP # Demonstrate that we can access Enum sub-objects assert my_C["beta_var"] == MyEnum("x") @@ -391,14 +391,14 @@ def test_to_raw_dict_works_recursively(): C_dict = my_C.to_raw_dict() # Make sure it's a dictionary, not a DictDot - assert type(C_dict) == dict # noqa: E721 + assert type(C_dict) == dict # noqa: E721 # FIXME CoP assert isinstance(C_dict, DictDot) is False # Dictionaries don't support dot notation. with raises(AttributeError): - C_dict.A_list # noqa: B018 + C_dict.A_list # noqa: B018 # FIXME CoP - assert type(C_dict["A_list"][0]) == dict # noqa: E721 - assert type(C_dict["B_list"][0]) == dict # noqa: E721 + assert type(C_dict["A_list"][0]) == dict # noqa: E721 # FIXME CoP + assert type(C_dict["B_list"][0]) == dict # noqa: E721 # FIXME CoP assert C_dict == { "alpha_var": 20, @@ -451,7 +451,7 @@ def test_instantiation_with_a_from_legacy_dict_method(): One especially thorny example is when the dictionary contains keys that are reserved words in python. For example, test cases use the reserved word: "in" as one of their required fields. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP import inspect import logging @@ -464,13 +464,13 @@ class MyClassE(SerializableDictDot): @classmethod def from_legacy_dict(cls, dict): - """This method is an adapter to allow typing of legacy my_class_e dictionary objects, without needing to immediately clean up every object.""" # noqa: E501 + """This method is an adapter to allow typing of legacy my_class_e dictionary objects, without needing to immediately clean up every object.""" # noqa: E501 # FIXME CoP temp_dict = {} for k, v in dict.items(): # Ignore parameters that don't match the type definition if k in inspect.signature(cls).parameters: temp_dict[k] = v - else: # noqa: PLR5501 + else: # noqa: PLR5501 # FIXME CoP if k == "in": temp_dict["input"] = v else: @@ -497,7 +497,7 @@ def from_legacy_dict(cls, dict): # Note that after instantiation, the class does NOT have an "in" property with raises(AttributeError): - my_E["in"] == 10 # noqa: B015 + my_E["in"] == 10 # noqa: B015 # FIXME CoP # Because `in` is a reserved word, this will raise a SyntaxError: # my_F.in == 100 diff --git a/tests/expectations/test_expectation.py b/tests/expectations/test_expectation.py index 490449faa2aa..59269d4a97b6 100644 --- a/tests/expectations/test_expectation.py +++ b/tests/expectations/test_expectation.py @@ -91,7 +91,9 @@ def fake_expectation_config( [ ( FakeMulticolumnExpectation, - fake_expectation_config("fake_multicolumn_expectation", {"column_list": ["column_2"]}), + fake_expectation_config( + "fake_multicolumn_expectation", {"column_list": ["column_1", "column_2"]} + ), ), ( FakeColumnMapExpectation, @@ -125,7 +127,8 @@ def test_multicolumn_expectation_has_default_mostly(fake_expectation_cls, config ( FakeMulticolumnExpectation, fake_expectation_config( - "fake_multicolumn_expectation", {"column_list": ["column_2"], "mostly": x} + "fake_multicolumn_expectation", + {"column_list": ["column_1", "column_2"], "mostly": x}, ), ) for x in [0, 0.5, 1] @@ -166,7 +169,8 @@ def test_expectation_succeeds_with_valid_mostly(fake_expectation_cls, config): ( FakeMulticolumnExpectation, fake_expectation_config( - "fake_multicolumn_expectation", {"column_list": [], "mostly": -0.5} + "fake_multicolumn_expectation", + {"column_list": ["column_1", "column_2"], "mostly": -0.5}, ), ), ( @@ -320,11 +324,12 @@ class TestSuiteParameterOptions: """Tests around the suite_parameter_options property of Expectations. Note: evaluation_parameter_options is currently a sorted tuple, but doesn't necessarily have to be - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP SUITE_PARAMETER_MIN = "my_min" SUITE_PARAMETER_MAX = "my_max" SUITE_PARAMETER_VALUE = "my_value" + SUITE_PARAMETER_MOSTLY = "my_mostly" @pytest.mark.unit def test_expectation_without_evaluation_parameter(self): @@ -340,6 +345,13 @@ def test_expectation_with_evaluation_parameter(self): ) assert expectation.suite_parameter_options == (self.SUITE_PARAMETER_MAX,) + @pytest.mark.unit + def test_column_map_expectation_with_evaluation_parameter(self): + expectation = gxe.ExpectColumnValuesToBeNull( + column="foo", mostly={"$PARAMETER": self.SUITE_PARAMETER_MOSTLY} + ) + assert expectation.suite_parameter_options == (self.SUITE_PARAMETER_MOSTLY,) + @pytest.mark.unit def test_expectation_with_multiple_suite_parameters(self): expectation = gxe.ExpectColumnValuesToBeBetween( @@ -456,7 +468,7 @@ def test_expectation_equality_ignores_rendered_content(): gxe.ExpectColumnValuesToBeBetween(column="foo"), {}, False, id="different_objects" ), pytest.param( - gxe.ExpectColumnDistinctValuesToBeInSet(column="bar"), + gxe.ExpectColumnDistinctValuesToBeInSet(column="bar", value_set=[1, 2, 3]), gxe.ExpectColumnValuesToBeBetween(column="foo"), True, id="different_expectation_types", @@ -561,7 +573,6 @@ def test_invalid_mostly_values(self, mostly: Any): {1}, [1, 2, 3], ["a", "b", "c"], - None, {"$PARAMETER": "my_param"}, ], ) diff --git a/tests/expectations/test_expectation_atomic_renderers.py b/tests/expectations/test_expectation_atomic_renderers.py index eb92cdb59408..7a87ae27619a 100644 --- a/tests/expectations/test_expectation_atomic_renderers.py +++ b/tests/expectations/test_expectation_atomic_renderers.py @@ -266,7 +266,7 @@ def test_atomic_prescriptive_summary_expect_column_kl_divergence_to_be_less_than "column": {"schema": {"type": "string"}, "value": "min_event_time"}, "threshold": {"schema": {"type": "number"}, "value": 0.1}, }, - "template": "$column Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than $threshold.", # noqa: E501 + "template": "$column Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than $threshold.", # noqa: E501 # FIXME CoP }, }, "schema": {"type": "GraphType"}, @@ -279,8 +279,8 @@ def test_atomic_prescriptive_summary_expect_column_kl_divergence_to_be_less_than def test_atomic_diagnostic_observed_value_expect_column_kl_divergence_to_be_less_than( get_diagnostic_rendered_content, ): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_column_kl_divergence_to_be_less_than", "kwargs": { @@ -365,11 +365,11 @@ def test_atomic_diagnostic_observed_value_expect_column_kl_divergence_to_be_less @pytest.mark.unit -def test_atomic_diagnostic_observed_value_with_boolean_column_expect_column_kl_divergence_to_be_less_than( # noqa: E501 +def test_atomic_diagnostic_observed_value_with_boolean_column_expect_column_kl_divergence_to_be_less_than( # noqa: E501 # FIXME CoP get_diagnostic_rendered_content, ): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_column_kl_divergence_to_be_less_than", "kwargs": { @@ -478,7 +478,7 @@ def test_atomic_prescriptive_summary_expect_column_max_to_be_between( "min_value": {"schema": {"type": "number"}, "value": 1}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column maximum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column maximum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -510,7 +510,7 @@ def test_atomic_prescriptive_summary_expect_column_mean_to_be_between( "min_value": {"schema": {"type": "number"}, "value": 3}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column mean must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column mean must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -542,7 +542,7 @@ def test_atomic_prescriptive_summary_expect_column_median_to_be_between( "min_value": {"schema": {"type": "number"}, "value": 5}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column median must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column median must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -574,7 +574,7 @@ def test_atomic_prescriptive_summary_expect_column_min_to_be_between( "min_value": {"schema": {"type": "number"}, "value": 1}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -608,7 +608,7 @@ def test_atomic_prescriptive_summary_expect_column_most_common_value_to_be_in_se "value_set": {"schema": {"type": "array"}, "value": [1, 2, 3]}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column most common value must belong to this set: $v__0 $v__1 $v__2. Values outside this set that are as common (but not more common) are allowed.", # noqa: E501 + "template": "$column most common value must belong to this set: $v__0 $v__1 $v__2. Values outside this set that are as common (but not more common) are allowed.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -667,7 +667,7 @@ def test_atomic_prescriptive_summary_expect_column_pair_values_a_to_be_greater_t "or_equal": {"schema": {"type": "boolean"}, "value": True}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "Values in $column_A must be greater than or equal to those in $column_B, at least $mostly_pct % of the time.", # noqa: E501 + "template": "Values in $column_A must be greater than or equal to those in $column_B, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -701,7 +701,7 @@ def test_atomic_prescriptive_summary_expect_column_pair_values_to_be_equal( "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time.", # noqa: E501 + "template": "Values in $column_A and $column_B must be equal, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -716,7 +716,7 @@ def test_atomic_prescriptive_summary_expect_column_pair_values_to_be_in_set( @pytest.mark.unit -def test_atomic_prescriptive_summary_expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than( # noqa: E501 +def test_atomic_prescriptive_summary_expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than( # noqa: E501 # FIXME CoP get_prescriptive_rendered_content, ): # Expectation is a stub; open to implement test once renderer method is available @@ -748,7 +748,7 @@ def test_atomic_prescriptive_summary_expect_column_proportion_of_unique_values_t "min_value": {"schema": {"type": "number"}, "value": 10}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column fraction of unique values must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column fraction of unique values must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -833,8 +833,8 @@ def test_atomic_prescriptive_summary_expect_column_quantile_values_to_be_between def test_atomic_diagnostic_observed_value_expect_column_quantile_values_to_be_between( get_diagnostic_rendered_content, ): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_column_quantile_values_to_be_between", "kwargs": { @@ -931,7 +931,7 @@ def test_atomic_prescriptive_summary_expect_column_stdev_to_be_between( "min_value": {"schema": {"type": "number"}, "value": 10}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column standard deviation must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column standard deviation must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -962,7 +962,7 @@ def test_atomic_prescriptive_summary_expect_column_sum_to_be_between( "min_value": {"schema": {"type": "number"}, "value": 10}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column sum must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column sum must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1022,7 +1022,7 @@ def test_atomic_prescriptive_summary_expect_column_unique_value_count_to_be_betw "min_value": {"schema": {"type": "number"}, "value": 10}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column must have greater than or equal to $min_value and less than or equal to $max_value unique values.", # noqa: E501 + "template": "$column must have greater than or equal to $min_value and less than or equal to $max_value unique values.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1056,7 +1056,7 @@ def test_atomic_prescriptive_summary_expect_column_value_lengths_to_be_between( "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be greater than or equal to $min_value and less than or equal to $max_value characters long, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be greater than or equal to $min_value and less than or equal to $max_value characters long, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1088,7 +1088,7 @@ def test_atomic_prescriptive_summary_expect_column_value_lengths_to_equal( "value": {"schema": {"type": "number"}, "value": 100}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be $value characters long, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be $value characters long, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1130,7 +1130,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_between( "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be greater than or equal to $min_value and less than or equal to $max_value, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be greater than or equal to $min_value and less than or equal to $max_value, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1160,7 +1160,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_dateutil_parseab "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be parseable by dateutil, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be parseable by dateutil, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1193,7 +1193,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_decreasing( "strictly": {"schema": {"type": "boolean"}, "value": True}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be strictly less than previous values, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be strictly less than previous values, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1231,7 +1231,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_in_set( "value_set": {"schema": {"type": "array"}, "value": [1, 2, 3, 4]}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must belong to this set: $v__0 $v__1 $v__2 $v__3, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must belong to this set: $v__0 $v__1 $v__2 $v__3, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1270,7 +1270,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_in_type_list( "v__2": {"schema": {"type": "string"}, "value": "type_c"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column value types must belong to this set: $v__0 $v__1 $v__2, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column value types must belong to this set: $v__0 $v__1 $v__2, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1303,7 +1303,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_increasing( "strictly": {"schema": {"type": "boolean"}, "value": True}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be strictly greater than previous values, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be strictly greater than previous values, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1333,7 +1333,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_json_parseable( "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be parseable as JSON, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be parseable as JSON, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1424,7 +1424,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_be_of_type( "type_": {"schema": {"type": "string"}, "value": "my_type"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must be of type $type_, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must be of type $type_, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1490,7 +1490,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_match_json_schema( "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json", # noqa: E501 + "template": "$column values must match the following JSON Schema, at least $mostly_pct % of the time: $formatted_json", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1538,7 +1538,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_match_regex( "regex": {"schema": {"type": "string"}, "value": "^superconductive$"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must match this regular expression: $regex, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must match this regular expression: $regex, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1577,7 +1577,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_match_regex_list( "v__1": {"schema": {"type": "string"}, "value": "ge|great_expectations"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must match all of the following regular expressions: $v__0 $v__1, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must match all of the following regular expressions: $v__0 $v__1, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1609,7 +1609,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_match_strftime_form "strftime_format": {"schema": {"type": "string"}, "value": "%Y-%m"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must match the following strftime format: $strftime_format, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must match the following strftime format: $strftime_format, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1645,7 +1645,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_not_be_in_set( "value_set": {"schema": {"type": "array"}, "value": [1, 2, 3]}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must not belong to this set: $v__0 $v__1 $v__2, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must not belong to this set: $v__0 $v__1 $v__2, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1723,7 +1723,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_not_match_regex( "regex": {"schema": {"type": "string"}, "value": "^superconductive$"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must not match this regular expression: $regex, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must not match this regular expression: $regex, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1758,7 +1758,7 @@ def test_atomic_prescriptive_summary_expect_column_values_to_not_match_regex_lis "v__2": {"schema": {"type": "string"}, "value": "^c"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "$column values must not match any of the following regular expressions: $v__0 $v__1 $v__2, at least $mostly_pct % of the time.", # noqa: E501 + "template": "$column values must not match any of the following regular expressions: $v__0 $v__1 $v__2, at least $mostly_pct % of the time.", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1794,7 +1794,7 @@ def test_atomic_prescriptive_summary_expect_compound_columns_to_be_unique( "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "Values for given compound columns must be unique together, at least $mostly_pct % of the time: $column_list_0 $column_list_1 $column_list_2", # noqa: E501 + "template": "Values for given compound columns must be unique together, at least $mostly_pct % of the time: $column_list_0 $column_list_1 $column_list_2", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1837,7 +1837,7 @@ def test_atomic_prescriptive_summary_expect_multicolumn_values_to_be_unique( "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "Values must be unique across columns, at least $mostly_pct % of the time: $column_list_0 $column_list_1 $column_list_2", # noqa: E501 + "template": "Values must be unique across columns, at least $mostly_pct % of the time: $column_list_0 $column_list_1 $column_list_2", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1877,7 +1877,7 @@ def test_atomic_prescriptive_summary_expect_select_column_values_to_be_unique_wi "mostly_pct": {"schema": {"type": "string"}, "value": "80"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "Values must be unique across columns, at least $mostly_pct % of the time: $column_list_0 $column_list_1", # noqa: E501 + "template": "Values must be unique across columns, at least $mostly_pct % of the time: $column_list_0 $column_list_1", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1956,7 +1956,7 @@ def test_atomic_prescriptive_summary_expect_table_columns_to_match_ordered_list( "column_list_2": {"schema": {"type": "string"}, "value": "c"}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "Must have these columns in this order: $column_list_0 $column_list_1 $column_list_2", # noqa: E501 + "template": "Must have these columns in this order: $column_list_0 $column_list_1 $column_list_2", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -1988,7 +1988,7 @@ def test_atomic_prescriptive_summary_expect_table_columns_to_match_set( "exact_match": {"schema": {"type": "boolean"}, "value": True}, }, "schema": {"type": "com.superconductive.rendered.string"}, - "template": "Must have exactly these columns (in any order): $column_set_0 $column_set_1 $column_set_2", # noqa: E501 + "template": "Must have exactly these columns (in any order): $column_set_0 $column_set_1 $column_set_2", # noqa: E501 # FIXME CoP }, "value_type": "StringValueType", } @@ -2078,8 +2078,8 @@ def test_atomic_prescriptive_summary_expect_table_row_count_to_equal_other_table @pytest.mark.unit def test_atomic_diagnostic_observed_value_without_result(get_diagnostic_rendered_content): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_table_row_count_to_equal", "kwargs": {}, @@ -2106,8 +2106,8 @@ def test_atomic_diagnostic_observed_value_without_result(get_diagnostic_rendered def test_atomic_diagnostic_observed_value_with_numeric_observed_value( get_diagnostic_rendered_content, ): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_table_row_count_to_equal", "kwargs": {}, @@ -2133,8 +2133,8 @@ def test_atomic_diagnostic_observed_value_with_numeric_observed_value( @pytest.mark.unit def test_atomic_diagnostic_observed_value_with_str_observed_value(get_diagnostic_rendered_content): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_table_row_count_to_equal", "kwargs": {}, @@ -2160,8 +2160,8 @@ def test_atomic_diagnostic_observed_value_with_str_observed_value(get_diagnostic @pytest.mark.unit def test_atomic_diagnostic_observed_value_with_unexpected_percent(get_diagnostic_rendered_content): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_table_row_count_to_equal", "kwargs": {}, @@ -2187,8 +2187,8 @@ def test_atomic_diagnostic_observed_value_with_unexpected_percent(get_diagnostic @pytest.mark.unit def test_atomic_diagnostic_observed_value_with_empty_result(get_diagnostic_rendered_content): - # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 - # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 + # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # noqa: E501 # FIXME CoP + # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. # noqa: E501 # FIXME CoP expectation_config = { "type": "expect_table_row_count_to_equal", "kwargs": {}, @@ -2852,3 +2852,200 @@ def test_expect_column_distinct_values_to_equal_set_atomic_diagnostic_observed_v assert name in res["value"]["params"] assert res["value"]["params"][name]["value"] == val assert res["value"]["params"][name]["render_state"] == status + + +@pytest.mark.unit +@pytest.mark.parametrize( + "description, column_set, observed_value, expected_result", + [ + ( + "complete set", + ["a", "b", "c"], + ["a", "b", "c"], + [ + ("ov__0", "a", "expected"), + ("ov__1", "b", "expected"), + ("ov__2", "c", "expected"), + ], + ), + ( + "empty input", + [], + ["a", "b", "c"], + [ + ("ov__0", "a", "unexpected"), + ("ov__1", "b", "unexpected"), + ("ov__2", "c", "unexpected"), + ], + ), + ( + "empty observed", + ["a", "b", "c"], + [], + [ + ("exp__0", "a", "missing"), + ("exp__1", "b", "missing"), + ("exp__2", "c", "missing"), + ], + ), + ( + "empty input and observed", + [], + [], + [], + ), + ( + "subset observed", + ["a", "b", "c"], + ["a", "b"], + [ + ("ov__0", "a", "expected"), + ("ov__1", "b", "expected"), + ("exp__2", "c", "missing"), + ], + ), + ( + "superset observed", + ["a", "b", "c"], + ["a", "b", "c", "d"], + [ + ("ov__0", "a", "expected"), + ("ov__1", "b", "expected"), + ("ov__2", "c", "expected"), + ("ov__3", "d", "unexpected"), + ], + ), + ( + "superset observed 2", + ["a", "b", "c"], + ["a", "d", "b", "c"], + [ + ("ov__0", "a", "expected"), + ("ov__1", "d", "unexpected"), + ("ov__2", "b", "expected"), + ("ov__3", "c", "expected"), + ], + ), + ], +) +def test_expect_table_columns_to_match_set_atomic_diagnostic_observed_value( + description, + column_set, + observed_value, + expected_result, + get_diagnostic_rendered_content, +): + # arrange + x = { + "expectation_config": ExpectationConfiguration( + type="expect_table_columns_to_match_set", + kwargs={"column_set": column_set}, + ), + "result": {"observed_value": observed_value}, + } + + expected_template_string = " ".join([f"${name}" for name, _, _ in expected_result]) + + # act + res = get_diagnostic_rendered_content(x).to_json_dict() + + # assert + assert res["value"]["template"] == expected_template_string + + for name, val, status in expected_result: + assert name in res["value"]["params"] + assert res["value"]["params"][name]["value"] == val + assert res["value"]["params"][name]["render_state"] == status + + +@pytest.mark.unit +@pytest.mark.parametrize( + "observed_value, expected_template_string", + [ + ( + 0, + "$observed_value unexpected rows", + ), + ( + 1, + "$observed_value unexpected row", + ), + ( + 100000, + "$observed_value unexpected rows", + ), + ], +) +def test_unexpected_rows_expectation_atomic_diagnostic_observed_value( + observed_value, + expected_template_string, + get_diagnostic_rendered_content, +): + # arrange + x = { + "expectation_config": ExpectationConfiguration( + type="unexpected_rows_expectation", + kwargs={"description": "my description", "unexpected_rows_query": "valid query"}, + ), + "result": {"observed_value": observed_value}, + } + + # act + res = get_diagnostic_rendered_content(x).to_json_dict() + + # assert + assert res["value"]["template"] == expected_template_string + + +@pytest.mark.unit +def test_unexpected_rows_expectation_atomic_diagnostic_observed_value_when_description_present( + get_diagnostic_rendered_content, +): + """Fixes regression where description overwrote the template""" + + x = { + "expectation_config": ExpectationConfiguration( + type="unexpected_rows_expectation", + kwargs={"description": "my description", "unexpected_rows_query": "valid query"}, + description="plz ignore me", + ), + "result": {"observed_value": 123}, + } + + res = get_diagnostic_rendered_content(x).to_json_dict() + + assert res["value"]["template"] == "$observed_value unexpected rows" + + +@pytest.mark.unit +def test_atomic_prescriptive_summary_with_description( + get_prescriptive_rendered_content, +): + description = "I should overwite" + update_dict = { + "type": "expect_column_distinct_values_to_be_in_set", + "description": description, + "kwargs": { + "column": "my_column", + "value_set": [1, 2, 3], + }, + } + rendered_content = get_prescriptive_rendered_content(update_dict) + + res = rendered_content.to_json_dict() + pprint(res) + assert res == { + "name": "atomic.prescriptive.summary", + "value": { + "params": { + "column": {"schema": {"type": "string"}, "value": "my_column"}, + "v__0": {"schema": {"type": "number"}, "value": 1}, + "v__1": {"schema": {"type": "number"}, "value": 2}, + "v__2": {"schema": {"type": "number"}, "value": 3}, + "value_set": {"schema": {"type": "array"}, "value": [1, 2, 3]}, + }, + "schema": {"type": "com.superconductive.rendered.string"}, + "template": description, + }, + "value_type": "StringValueType", + } diff --git a/tests/expectations/test_expectation_diagnostics.py b/tests/expectations/test_expectation_diagnostics.py index 23dcbff97201..109c022c7aa9 100644 --- a/tests/expectations/test_expectation_diagnostics.py +++ b/tests/expectations/test_expectation_diagnostics.py @@ -65,8 +65,8 @@ **{ "camel_name": "ExpectColumnValuesToMatchRegex", "snake_name": "expect_column_values_to_match_regex", - "short_description": "Expect column entries to be strings that match a given regular expression.", # noqa: E501 - "docstring": 'Expect column entries to be strings that match a given regular expression.\n \n Valid matches can be found anywhere in the string, for example "[at]+" will identify the following strings as expected: "cat", "hat", "aa", "a", and "t", and the following strings as unexpected: "fish", "dog".\n\n expect_column_values_to_match_regex is a :func:`column_map_expectation `.\n\n Args:\n column (str): The column name.\n regex (str): The regular expression the column entries should match.\n\n Keyword Args:\n mostly (None or a float between 0 and 1): Return `"success": True` if at least mostly fraction of values match the expectation. For more detail, see :ref:`mostly`.\n\n Other Parameters:\n result_format (str or None): Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.\n For more detail, see :ref:`result_format `.\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see :ref:`catch_exceptions`.\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see :ref:`meta`.\n\n Returns:\n An ExpectationSuiteValidationResult\n\n Exact fields vary depending on the values passed to :ref:`result_format ` and\n :ref:`catch_exceptions`, and :ref:`meta`.\n\n See Also:\n :func:`expect_column_values_to_not_match_regex `\n\n :func:`expect_column_values_to_match_regex_list `\n\n ', # noqa: E501 + "short_description": "Expect column entries to be strings that match a given regular expression.", # noqa: E501 # FIXME CoP + "docstring": 'Expect column entries to be strings that match a given regular expression.\n \n Valid matches can be found anywhere in the string, for example "[at]+" will identify the following strings as expected: "cat", "hat", "aa", "a", and "t", and the following strings as unexpected: "fish", "dog".\n\n expect_column_values_to_match_regex is a :func:`column_map_expectation `.\n\n Args:\n column (str): The column name.\n regex (str): The regular expression the column entries should match.\n\n Keyword Args:\n mostly (None or a float between 0 and 1): Return `"success": True` if at least mostly fraction of values match the expectation. For more detail, see :ref:`mostly`.\n\n Other Parameters:\n result_format (str or None): Which output mode to use: `BOOLEAN_ONLY`, `BASIC`, `COMPLETE`, or `SUMMARY`.\n For more detail, see :ref:`result_format `.\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see :ref:`catch_exceptions`.\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see :ref:`meta`.\n\n Returns:\n An ExpectationSuiteValidationResult\n\n Exact fields vary depending on the values passed to :ref:`result_format ` and\n :ref:`catch_exceptions`, and :ref:`meta`.\n\n See Also:\n :func:`expect_column_values_to_not_match_regex `\n\n :func:`expect_column_values_to_match_regex_list `\n\n ', # noqa: E501 # FIXME CoP } ), library_metadata=AugmentedLibraryMetadata( @@ -94,7 +94,7 @@ RendererTestDiagnostics( test_title="basic_positive_test", rendered_successfully=True, - renderered_str="a values must match this regular expression: ^a, at least 90 % of the time.", # noqa: E501 + renderered_str="a values must match this regular expression: ^a, at least 90 % of the time.", # noqa: E501 # FIXME CoP ) ], ) @@ -294,7 +294,7 @@ def test__check_example_cases__with_enough_test_cases_but_all_failing(): ], ) == ExpectationDiagnosticCheckMessage( **{ - "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 + "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 # FIXME CoP "passed": False, } ) @@ -323,7 +323,7 @@ def test__check_example_cases__with_enough_test_cases_but_some_failing(): ], ) == ExpectationDiagnosticCheckMessage( **{ - "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 + "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 # FIXME CoP "passed": False, } ) @@ -352,7 +352,7 @@ def test__check_example_cases__with_enough_test_cases_and_no_failing(): ], ) == ExpectationDiagnosticCheckMessage( **{ - "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 + "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 # FIXME CoP "passed": True, } ) @@ -399,7 +399,7 @@ def test__check_example_cases__with_enough_not_enough_test_cases_but_no_failing( ], ) == ExpectationDiagnosticCheckMessage( **{ - "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 + "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 # FIXME CoP "passed": False, } ) diff --git a/tests/expectations/test_expectation_test_data_cases.py b/tests/expectations/test_expectation_test_data_cases.py index d46ab99dbb7b..02bed6671a41 100644 --- a/tests/expectations/test_expectation_test_data_cases.py +++ b/tests/expectations/test_expectation_test_data_cases.py @@ -47,4 +47,4 @@ def test_basic_instantiation_of_ExpectationTestCase(): assert my_test_case.input == {"column": "a", "regex": "^a", "mostly": 0.9} assert my_test_case.input["column"] == "a" - # assert my_test_case.input.column == "a" #Not supported yet. We'll need to add types to input and output to get there. # noqa: E501 + # assert my_test_case.input.column == "a" #Not supported yet. We'll need to add types to input and output to get there. # noqa: E501 # FIXME CoP diff --git a/tests/expectations/test_generate_diagnostic_checklist.py b/tests/expectations/test_generate_diagnostic_checklist.py index 53a15ee05cbe..7994e7224e1a 100644 --- a/tests/expectations/test_generate_diagnostic_checklist.py +++ b/tests/expectations/test_generate_diagnostic_checklist.py @@ -23,7 +23,7 @@ def test_print_diagnostic_checklist__first_iteration(): Has a docstring, including a one-line short description that begins with "Expect" and ends with a period Has at least one positive and negative example case, and all test cases pass Has core logic and passes tests on at least one Execution Engine -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) @@ -50,7 +50,7 @@ def test_print_diagnostic_checklist__second_iteration(): ✔ All 3 tests for pandas are passing Has a full suite of tests, as determined by a code owner Has passed a manual review by a code owner for code standards and style guides -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) @@ -76,5 +76,5 @@ def test_print_diagnostic_checklist__third_iteration(): ✔ All 3 tests for pandas are passing Has a full suite of tests, as determined by a code owner Has passed a manual review by a code owner for code standards and style guides -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP ) diff --git a/tests/expectations/test_run_diagnostics.py b/tests/expectations/test_run_diagnostics.py index 0b489a53e384..bb6d3a832ee6 100644 --- a/tests/expectations/test_run_diagnostics.py +++ b/tests/expectations/test_run_diagnostics.py @@ -134,7 +134,7 @@ def test_expectation_self_check(): "passed": False, "sub_messages": [ { - "message": "No example found to get kwargs for ExpectationConfiguration", # noqa: E501 + "message": "No example found to get kwargs for ExpectationConfiguration", # noqa: E501 # FIXME CoP "passed": False, }, ], @@ -147,7 +147,7 @@ def test_expectation_self_check(): }, { "doc_url": None, - "message": "Has core logic that passes tests for all applicable Execution Engines and SQL dialects", # noqa: E501 + "message": "Has core logic that passes tests for all applicable Execution Engines and SQL dialects", # noqa: E501 # FIXME CoP "passed": False, "sub_messages": [ { @@ -171,13 +171,13 @@ def test_expectation_self_check(): }, { "doc_url": None, - "message": 'Has a docstring, including a one-line short description that begins with "Expect" and ends with a period', # noqa: E501 + "message": 'Has a docstring, including a one-line short description that begins with "Expect" and ends with a period', # noqa: E501 # FIXME CoP "passed": False, "sub_messages": [], }, { "doc_url": None, - "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 + "message": "Has at least one positive and negative example case, and all test cases pass", # noqa: E501 # FIXME CoP "passed": False, "sub_messages": [], }, @@ -202,7 +202,7 @@ def test_expectation_self_check(): }, { "doc_url": None, - "message": "Has passed a manual review by a code owner for code standards and style guides", # noqa: E501 + "message": "Has passed a manual review by a code owner for code standards and style guides", # noqa: E501 # FIXME CoP "passed": False, "sub_messages": [], }, @@ -254,7 +254,7 @@ def test_self_check_on_an_existing_expectation(): "description": { "camel_name": "ExpectColumnValuesToMatchRegex", "snake_name": "expect_column_values_to_match_regex", - "short_description": "Expect column entries to be strings that match a given regular expression.", # noqa: E501 + "short_description": "Expect column entries to be strings that match a given regular expression.", # noqa: E501 # FIXME CoP }, "execution_engines": { "PandasExecutionEngine": True, @@ -263,13 +263,13 @@ def test_self_check_on_an_existing_expectation(): }, "renderers": { "standard": { - "renderer.answer": 'Less than 90.0% of values in column "a" match the regular expression ^a.', # noqa: E501 - "renderer.diagnostic.unexpected_statement": "\n\n1 unexpected values found. 20% of 5 total rows.", # noqa: E501 + "renderer.answer": 'Less than 90.0% of values in column "a" match the regular expression ^a.', # noqa: E501 # FIXME CoP + "renderer.diagnostic.unexpected_statement": "\n\n1 unexpected values found. 20% of 5 total rows.", # noqa: E501 # FIXME CoP "renderer.diagnostic.observed_value": "20% unexpected", "renderer.diagnostic.status_icon": "", "renderer.diagnostic.unexpected_table": None, - "renderer.prescriptive": "a values must match this regular expression: ^a, at least 90 % of the time.", # noqa: E501 - "renderer.question": 'Do at least 90.0% of values in column "a" match the regular expression ^a?', # noqa: E501 + "renderer.prescriptive": "a values must match this regular expression: ^a, at least 90 % of the time.", # noqa: E501 # FIXME CoP + "renderer.question": 'Do at least 90.0% of values in column "a" match the regular expression ^a?', # noqa: E501 # FIXME CoP }, "custom": [], }, @@ -504,7 +504,7 @@ def test_expectation__get_renderers(): @pytest.mark.unit def test_expectation_is_abstract(): - # is_abstract determines whether the expectation should be added to the registry (i.e. is fully implemented) # noqa: E501 + # is_abstract determines whether the expectation should be added to the registry (i.e. is fully implemented) # noqa: E501 # FIXME CoP assert ColumnMapExpectation.is_abstract() assert not ExpectColumnValuesToEqualThree.is_abstract() diff --git a/tests/expectations/test_run_diagnostics_supporting_methods.py b/tests/expectations/test_run_diagnostics_supporting_methods.py index 97a3150bdef8..7afffedd0c5c 100644 --- a/tests/expectations/test_run_diagnostics_supporting_methods.py +++ b/tests/expectations/test_run_diagnostics_supporting_methods.py @@ -227,7 +227,7 @@ def test__get_test_results(): ), ) for result in test_results: - # Abe: 1/1/2022: I'm not sure this is the behavior we want long term. How does backend relate to ExecutionEngine? # noqa: E501 + # Abe: 1/1/2022: I'm not sure this is the behavior we want long term. How does backend relate to ExecutionEngine? # noqa: E501 # FIXME CoP if result.backend == "pandas": assert result.test_passed is True elif result.backend == "sqlite": diff --git a/tests/expectations/test_util.py b/tests/expectations/test_util.py index c6a3155549a7..5d6e8cfe293a 100644 --- a/tests/expectations/test_util.py +++ b/tests/expectations/test_util.py @@ -72,7 +72,7 @@ def get_table_columns_metric( metric_domain_kwargs={}, metric_value_kwargs=None, ) - table_columns_metric.metric_dependencies = { # type: ignore[assignment] + table_columns_metric.metric_dependencies = { # type: ignore[assignment] # FIXME CoP "table.column_types": table_column_types_metric, } results = execution_engine.resolve_metrics( @@ -124,7 +124,7 @@ def bare_bones_prescriptive_renderer( runtime_configuration = runtime_configuration or {} styling = runtime_configuration.get("styling") params = configuration.kwargs - template_str = "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 + template_str = "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 # FIXME CoP return [ RenderedStringTemplateContent( **{ @@ -146,7 +146,7 @@ def bare_bones_prescriptive_renderer( # string template should remain constant assert ( res[0].string_template["template"] - == "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 + == "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 # FIXME CoP ) # params should contain our suite parameters @@ -157,7 +157,7 @@ def bare_bones_prescriptive_renderer( assert res[0].to_json_dict() == { "content_block_type": "string_template", "string_template": { - "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value", # noqa: E501 + "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value", # noqa: E501 # FIXME CoP "params": { "column": "live", "min_value": {"$PARAMETER": "MIN_VAL_PARAM"}, @@ -190,7 +190,7 @@ def bare_bones_prescriptive_renderer( runtime_configuration = runtime_configuration or {} styling = runtime_configuration.get("styling") params = configuration.kwargs - template_str = "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 + template_str = "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 # FIXME CoP return [ RenderedStringTemplateContent( **{ @@ -213,7 +213,7 @@ def bare_bones_prescriptive_renderer( # string template should remain constant assert ( res[0].string_template["template"] - == "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 + == "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value" # noqa: E501 # FIXME CoP ) # params should contain our suite parameters @@ -222,7 +222,7 @@ def bare_bones_prescriptive_renderer( assert res[0].to_json_dict() == { "content_block_type": "string_template", "string_template": { - "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value", # noqa: E501 + "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value", # noqa: E501 # FIXME CoP "params": { "column": "live", "min_value": {"$PARAMETER": "MIN_VAL_PARAM"}, @@ -527,17 +527,17 @@ def test__TestBackend__bad_dialects(): @pytest.mark.unit def test__TestBackend__good_backends_and_dialects(): - tb1 = TestBackend( # noqa: F841 + tb1 = TestBackend( # noqa: F841 # FIXME CoP backend="pandas", dialects=None, ) - tb2 = TestBackend( # noqa: F841 + tb2 = TestBackend( # noqa: F841 # FIXME CoP backend="spark", dialects=None, ) - tb3 = TestBackend( # noqa: F841 + tb3 = TestBackend( # noqa: F841 # FIXME CoP backend="sqlalchemy", dialects=["sqlite", "postgresql", "mysql"], ) diff --git a/tests/experimental/metric_repository/test_batch_inspector.py b/tests/experimental/metric_repository/test_batch_inspector.py index 848979cef250..658ea6968a08 100644 --- a/tests/experimental/metric_repository/test_batch_inspector.py +++ b/tests/experimental/metric_repository/test_batch_inspector.py @@ -1,5 +1,5 @@ import uuid -from unittest.mock import MagicMock, Mock # noqa: TID251 +from unittest.mock import MagicMock, Mock # noqa: TID251 # FIXME CoP import pytest diff --git a/tests/experimental/metric_repository/test_cloud_data_store.py b/tests/experimental/metric_repository/test_cloud_data_store.py index 8e7351ec44cc..c5264fd59814 100644 --- a/tests/experimental/metric_repository/test_cloud_data_store.py +++ b/tests/experimental/metric_repository/test_cloud_data_store.py @@ -1,6 +1,6 @@ import uuid from unittest import mock -from unittest.mock import Mock # noqa: TID251 +from unittest.mock import Mock # noqa: TID251 # FIXME CoP from uuid import UUID import numpy @@ -68,7 +68,7 @@ def test_add_metric_run_non_generic_metric_type( uuid_from_add = cloud_data_store.add(metric_run) - expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":[0.25,0.5,0.75],"exception":null,"column":"column","quantiles":[0.25,0.5,0.75],"allow_relative_error":0.001,"value_type":"list[float]","metric_type":"ColumnQuantileValuesMetric"}]}}' # noqa: E501 + expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":[0.25,0.5,0.75],"exception":null,"column":"column","quantiles":[0.25,0.5,0.75],"allow_relative_error":0.001,"value_type":"list[float]","metric_type":"ColumnQuantileValuesMetric"}]}}' # noqa: E501 # FIXME CoP cloud_data_store._session.post.assert_called_once_with( url="https://app.greatexpectations.fake.io/api/v1/organizations/12345678-1234-5678-1234-567812345678/metric-runs", @@ -120,7 +120,7 @@ def test_add_metric_run_generic_metric_type( uuid_from_add = cloud_data_store.add(metric_run) - expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":1,"exception":null,"column":"column","value_type":"int","metric_type":"ColumnMetric"}]}}' # noqa: E501 + expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":1,"exception":null,"column":"column","value_type":"int","metric_type":"ColumnMetric"}]}}' # noqa: E501 # FIXME CoP cloud_data_store._session.post.assert_called_once_with( url="https://app.greatexpectations.fake.io/api/v1/organizations/12345678-1234-5678-1234-567812345678/metric-runs", @@ -175,7 +175,7 @@ def test_add_metric_run_generic_metric_type_with_exception( uuid_from_add = cloud_data_store.add(metric_run) - expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":1,"exception":{"type":"exception type","message":"exception message"},"column":"column","value_type":"int","metric_type":"ColumnMetric"}]}}' # noqa: E501 + expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":1,"exception":{"type":"exception type","message":"exception message"},"column":"column","value_type":"int","metric_type":"ColumnMetric"}]}}' # noqa: E501 # FIXME CoP cloud_data_store._session.post.assert_called_once_with( url="https://app.greatexpectations.fake.io/api/v1/organizations/12345678-1234-5678-1234-567812345678/metric-runs", @@ -227,7 +227,7 @@ def test_add_metric_run_generic_metric_type_numpy( uuid_from_add = cloud_data_store.add(metric_run) - expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":2.5,"exception":null,"column":"column","value_type":"float64","metric_type":"ColumnMetric"}]}}' # noqa: E501 + expected_data = '{"data":{"data_asset_id":"4469ed3b-61d4-421f-9635-8339d2558b0f","metrics":[{"batch_id":"batch_id","metric_name":"metric_name","value":2.5,"exception":null,"column":"column","value_type":"float64","metric_type":"ColumnMetric"}]}}' # noqa: E501 # FIXME CoP cloud_data_store._session.post.assert_called_once_with( url="https://app.greatexpectations.fake.io/api/v1/organizations/12345678-1234-5678-1234-567812345678/metric-runs", diff --git a/tests/experimental/metric_repository/test_metric_list_metric_retriever.py b/tests/experimental/metric_repository/test_metric_list_metric_retriever.py index 0c9c2c0d4394..f79207371b78 100644 --- a/tests/experimental/metric_repository/test_metric_list_metric_retriever.py +++ b/tests/experimental/metric_repository/test_metric_list_metric_retriever.py @@ -4,7 +4,7 @@ from great_expectations.data_context import CloudDataContext from great_expectations.datasource.fluent import BatchRequest -from great_expectations.datasource.fluent.interfaces import Batch +from great_expectations.datasource.fluent.interfaces import Batch, DataAsset from great_expectations.experimental.metric_repository.metric_list_metric_retriever import ( MetricListMetricRetriever, ) @@ -27,6 +27,13 @@ LOGGER = logging.getLogger(__name__) +@pytest.fixture(scope="function") +def mock_data_asset(mocker): + data_asset = mocker.Mock(spec=DataAsset) + data_asset.name = "some_data_asset_name" + return data_asset + + @pytest.fixture(scope="function") def mock_validator(mocker, mock_batch): validator = mocker.Mock(spec=Validator) @@ -42,9 +49,10 @@ def mock_context(mocker, mock_validator): @pytest.fixture(scope="function") -def mock_batch(mocker): +def mock_batch(mocker, mock_data_asset): batch = mocker.Mock(spec=Batch) batch.id = "batch_id" + batch.data_asset = mock_data_asset return batch @@ -55,7 +63,16 @@ def metric_retriever(mock_context): @pytest.fixture(scope="function") def mock_batch_request(mocker): - return mocker.Mock(spec=BatchRequest) + batch_request = mocker.Mock(spec=BatchRequest) + batch_request.data_asset_name = "some_data_asset_name" + return batch_request + + +@pytest.fixture(scope="function") +def mock_batch_request_variant(mocker): + batch_request = mocker.Mock(spec=BatchRequest) + batch_request.data_asset_name = "other_data_asset_name" + return batch_request def test_get_metrics_table_metrics_only( @@ -654,6 +671,42 @@ def test_get_metrics_only_gets_a_validator_once( mock_context.get_validator.assert_called_once_with(batch_request=mock_batch_request) +def test_get_metrics_only_gets_new_validator_on_asset_change( + mocker: MockerFixture, + mock_context, + mock_validator, + mock_batch_request_variant, + metric_retriever, +): + aborted_metrics = {} + + computed_metrics = { + ("table.row_count", (), ()): 2, + ("table.columns", (), ()): ["col1", "col2"], + ("table.column_types", (), "include_nested=True"): [ + {"name": "col1", "type": "float"}, + {"name": "col2", "type": "float"}, + ], + } + metrics_list: List[MetricTypes] = [ + MetricTypes.TABLE_ROW_COUNT, + MetricTypes.TABLE_COLUMNS, + MetricTypes.TABLE_COLUMN_TYPES, + ] + mock_validator.compute_metrics.return_value = ( + computed_metrics, + aborted_metrics, + ) + mocker.patch( + f"{ColumnDomainBuilder.__module__}.{ColumnDomainBuilder.__name__}.get_effective_column_names", + return_value=["col1", "col2"], + ) + metric_retriever.get_metrics(batch_request=mock_batch_request_variant, metric_list=metrics_list) + + assert mock_context.get_validator.call_count == 4 + mock_context.get_validator.assert_called_with(batch_request=mock_batch_request_variant) + + def test_get_metrics_with_no_metrics( mock_context, mock_validator, mock_batch_request, metric_retriever ): diff --git a/tests/experimental/metric_repository/test_metric_list_metric_retriever_integration.py b/tests/experimental/metric_repository/test_metric_list_metric_retriever_integration.py index 2309f3e78362..5385d44514ce 100644 --- a/tests/experimental/metric_repository/test_metric_list_metric_retriever_integration.py +++ b/tests/experimental/metric_repository/test_metric_list_metric_retriever_integration.py @@ -99,7 +99,7 @@ def test_get_metrics_table_metrics_only( ), ] - # Assert each metric so it is easier to see which one fails (instead of assert metrics == expected_metrics): # noqa: E501 + # Assert each metric so it is easier to see which one fails (instead of assert metrics == expected_metrics): # noqa: E501 # FIXME CoP assert len(metrics) == len(expected_metrics) for metric in metrics: assert metric.dict() in [expected_metric.dict() for expected_metric in expected_metrics] diff --git a/tests/experimental/metric_repository/test_metric_repository.py b/tests/experimental/metric_repository/test_metric_repository.py index 9007afe318a2..dcc974595f69 100644 --- a/tests/experimental/metric_repository/test_metric_repository.py +++ b/tests/experimental/metric_repository/test_metric_repository.py @@ -1,4 +1,4 @@ -from unittest.mock import Mock # noqa: TID251 +from unittest.mock import Mock # noqa: TID251 # FIXME CoP import pytest @@ -25,4 +25,4 @@ def test_add_metric_run(mock_data_store: DataStore, mock_metric_run: MetricRun): metric_repository.add_metric_run(metric_run=mock_metric_run) - mock_data_store.add.assert_called_once_with(value=mock_metric_run) # type: ignore[attr-defined] + mock_data_store.add.assert_called_once_with(value=mock_metric_run) # type: ignore[attr-defined] # FIXME CoP diff --git a/tests/integration/cloud/end_to_end/conftest.py b/tests/integration/cloud/end_to_end/conftest.py index 7441642fbd09..4102d173120c 100644 --- a/tests/integration/cloud/end_to_end/conftest.py +++ b/tests/integration/cloud/end_to_end/conftest.py @@ -60,7 +60,7 @@ def datasource_name( # in that case, we create one simply to test get and delete try: _ = context.data_sources.get(name=datasource_name) - except ValueError: + except KeyError: _ = context.data_sources.add_pandas(name=datasource_name) context.data_sources.get(name=datasource_name) context.delete_datasource(name=datasource_name) @@ -193,7 +193,7 @@ def construct_spark_df_from_pandas( def spark_session() -> pyspark.SparkSession: from great_expectations.compatibility import pyspark - if pyspark.SparkSession: # type: ignore[truthy-function] + if pyspark.SparkSession: # type: ignore[truthy-function] # FIXME CoP return SparkDFExecutionEngine.get_or_create_spark_session() raise ValueError("spark tests are requested, but pyspark is not installed") diff --git a/tests/integration/cloud/end_to_end/test_pandas_datasource.py b/tests/integration/cloud/end_to_end/test_pandas_datasource.py index 0ffe063ca045..4ac69ec65a44 100644 --- a/tests/integration/cloud/end_to_end/test_pandas_datasource.py +++ b/tests/integration/cloud/end_to_end/test_pandas_datasource.py @@ -39,7 +39,7 @@ def datasource( datasource.name == new_datasource_name ), "The datasource was not updated in the previous method call." datasource.name = datasource_name - datasource = context.add_or_update_datasource( # type: ignore[assignment] + datasource = context.data_sources.add_or_update_pandas( datasource=datasource, ) assert ( diff --git a/tests/integration/cloud/end_to_end/test_pandas_filesystem_datasource.py b/tests/integration/cloud/end_to_end/test_pandas_filesystem_datasource.py index c36da1011353..9b6f5a8c7323 100644 --- a/tests/integration/cloud/end_to_end/test_pandas_filesystem_datasource.py +++ b/tests/integration/cloud/end_to_end/test_pandas_filesystem_datasource.py @@ -75,7 +75,7 @@ def datasource( ), "The datasource was not updated in the previous method call." datasource.base_directory = original_base_dir - datasource = context.add_or_update_datasource(datasource=datasource) # type: ignore[assignment] + datasource = context.data_sources.add_or_update_pandas_filesystem(datasource=datasource) assert ( datasource.base_directory == original_base_dir ), "The datasource was not updated in the previous method call." diff --git a/tests/integration/cloud/end_to_end/test_spark_datasource.py b/tests/integration/cloud/end_to_end/test_spark_datasource.py index 6f21d58f9f00..48e8bbd6b1eb 100644 --- a/tests/integration/cloud/end_to_end/test_spark_datasource.py +++ b/tests/integration/cloud/end_to_end/test_spark_datasource.py @@ -36,14 +36,6 @@ def datasource( persist=True, ) datasource.persist = False - datasource = context.data_sources.add_or_update_spark(datasource=datasource) # type: ignore[call-arg] - assert ( - datasource.persist is False - ), "The datasource was not updated in the previous method call." - datasource.persist = True - datasource = context.add_or_update_datasource(datasource=datasource) # type: ignore[assignment] - assert datasource.persist is True, "The datasource was not updated in the previous method call." - datasource.persist = False datasource_dict = datasource.dict() datasource = context.data_sources.add_or_update_spark(**datasource_dict) assert ( @@ -51,7 +43,7 @@ def datasource( ), "The datasource was not updated in the previous method call." datasource.persist = True datasource_dict = datasource.dict() - datasource = context.add_or_update_datasource(**datasource_dict) # type: ignore[assignment] + datasource = context.data_sources.add_or_update_spark(**datasource_dict) assert datasource.persist is True, "The datasource was not updated in the previous method call." return datasource diff --git a/tests/integration/cloud/end_to_end/test_spark_filesystem_datasource.py b/tests/integration/cloud/end_to_end/test_spark_filesystem_datasource.py index b30c95764ded..8107f71b300c 100644 --- a/tests/integration/cloud/end_to_end/test_spark_filesystem_datasource.py +++ b/tests/integration/cloud/end_to_end/test_spark_filesystem_datasource.py @@ -68,7 +68,7 @@ def datasource( ), "The datasource was not updated in the previous method call." datasource.base_directory = normalize_directory_path(original_base_dir, context.root_directory) - datasource = context.add_or_update_datasource(datasource=datasource) # type: ignore[assignment] + datasource = context.data_sources.add_or_update_spark_filesystem(datasource=datasource) assert ( datasource.base_directory == original_base_dir ), "The datasource was not updated in the previous method call." diff --git a/tests/integration/cloud/rest_contracts/conftest.py b/tests/integration/cloud/rest_contracts/conftest.py index 08cc172ef8ea..4b14fa6a7915 100644 --- a/tests/integration/cloud/rest_contracts/conftest.py +++ b/tests/integration/cloud/rest_contracts/conftest.py @@ -78,7 +78,7 @@ def cloud_data_context( cloud_base_url: str, cloud_access_token: str, ) -> CloudDataContext: - """This is a real Cloud Data Context that points to the pact mock service instead of the Mercury API.""" # noqa: E501 + """This is a real Cloud Data Context that points to the pact mock service instead of the Mercury API.""" # noqa: E501 # FIXME CoP cloud_data_context = CloudDataContext( cloud_base_url=cloud_base_url, cloud_organization_id=EXISTING_ORGANIZATION_ID, @@ -121,7 +121,7 @@ def pact_test(request) -> pact.Pact: publish_to_broker = False else: pytest.skip( - "no pact credentials: set PACT_BROKER_READ_ONLY_TOKEN from greatexpectations.pactflow.io" # noqa: E501 + "no pact credentials: set PACT_BROKER_READ_ONLY_TOKEN from greatexpectations.pactflow.io" # noqa: E501 # FIXME CoP ) # Adding random id to the commit hash allows us to run the build @@ -175,7 +175,7 @@ class ContractInteraction(pydantic.BaseModel): Returns: ContractInteraction - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class Config: arbitrary_types_allowed = True @@ -208,7 +208,7 @@ def _run_pact_test( Returns: None - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP request: dict[str, str | PactBody] = { "method": contract_interaction.method, @@ -221,7 +221,7 @@ def _run_pact_test( request["headers"] = dict(gx_cloud_session.headers) if contract_interaction.request_headers is not None: - request["headers"].update(contract_interaction.request_headers) # type: ignore[union-attr] + request["headers"].update(contract_interaction.request_headers) # type: ignore[union-attr] # FIXME CoP gx_cloud_session.headers.update(contract_interaction.request_headers) response: dict[str, int | PactBody] = { diff --git a/tests/integration/cloud/rest_contracts/unit_test_mocks/conftest.py b/tests/integration/cloud/rest_contracts/unit_test_mocks/conftest.py index 425e836e0b7a..1301ae6ed46d 100644 --- a/tests/integration/cloud/rest_contracts/unit_test_mocks/conftest.py +++ b/tests/integration/cloud/rest_contracts/unit_test_mocks/conftest.py @@ -23,7 +23,7 @@ def _convert_matcher_to_value(matcher: pact.matchers.Matcher) -> JsonData: return matcher.generate()["contents"] -def _reify_pact_body( # noqa: C901 +def _reify_pact_body( # noqa: C901 # FIXME CoP body: PactBody, ) -> JsonData: if isinstance(body, list): diff --git a/tests/integration/common_workflows/test_filesystem_asset_workflows.py b/tests/integration/common_workflows/test_filesystem_asset_workflows.py index 533ced04961b..fd66e4547eab 100644 --- a/tests/integration/common_workflows/test_filesystem_asset_workflows.py +++ b/tests/integration/common_workflows/test_filesystem_asset_workflows.py @@ -70,7 +70,7 @@ def pandas_filesystem_whole_table_batch_definition( def pandas_filesystem_monthly_batch_definition( pandas_file_system_asset: PandasCSVAsset, ) -> BatchDefinition: - return pandas_file_system_asset.add_batch_definition_monthly( # type: ignore[attr-defined] + return pandas_file_system_asset.add_batch_definition_monthly( # type: ignore[attr-defined] # FIXME CoP "monthly", re.compile(BATCHING_REGEX), ) @@ -80,7 +80,7 @@ def pandas_filesystem_monthly_batch_definition( def pandas_filesystem_monthly_batch_definition_descending( pandas_file_system_asset: PandasCSVAsset, ) -> BatchDefinition: - return pandas_file_system_asset.add_batch_definition_monthly( # type: ignore[attr-defined] + return pandas_file_system_asset.add_batch_definition_monthly( # type: ignore[attr-defined] # FIXME CoP "monthly", re.compile(BATCHING_REGEX), sort_ascending=False, diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 971c8d0f6b2e..40ae4df235a2 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -174,7 +174,7 @@ def asset_for_datasource( This must be used in conjunction with `indirect=True` to defer execution """ set_context(_batch_setup_for_datasource.context) - yield _batch_setup_for_datasource.asset + yield _batch_setup_for_datasource.make_asset() @pytest.fixture diff --git a/tests/integration/data_sources_and_expectations/README.md b/tests/integration/data_sources_and_expectations/README.md new file mode 100644 index 000000000000..c42364515775 --- /dev/null +++ b/tests/integration/data_sources_and_expectations/README.md @@ -0,0 +1,91 @@ +# DataSource and Expectation Integration Tests +Most of the tests in this directory make use of a few utilities that help load data into various data sources. +The following sections provide an overview of how it works. + +## Overview of the primary classes + +The following is a rough class diagram of the main classes involved in test testing utilities. +* DataSourceTestConfig is the public interface; instance are passed to `parameterize_batch_for_data_sources` + * Holds optional schema information + * Knows about pymarks +* BatchTestSetup these are instantiated behind the scenes + * Holds data + * Knows about the actual data source + * Sets up / tears down data + +```mermaid +classDiagram + class DataSourceTestConfig + class BatchTestSetup + + DataSourceTestConfig <|-- PostgreSQLDatasourceTestConfig + DataSourceTestConfig <|-- SnowflakeDatasourceTestConfig + BatchTestSetup <|-- PostgresBatchTestSetup + BatchTestSetup <|-- SnowflakeBatchTestSetup + + <> DataSourceTestConfig + <> BatchTestSetup + + DataSourceTestConfig : +str label + DataSourceTestConfig : +str pytest_marks + DataSourceTestConfig : +dict column_types + DataSourceTestConfig : +create_batch_setup(data) BatchTestSetup + + BatchTestSetup : +DataSourceTestConfig config + BatchTestSetup : +dict data + BatchTestSetup : +setup() + BatchTestSetup : +teardown() + BatchTestSetup : +make_batch() Batch + + DataSourceTestConfig --> BatchTestSetup: creates +``` + +## Overview of the main flow +The following shows the rough flow when running tests with `parameterize_batch_for_data_sources` and the `batch_for_datasource` fixture. + +Some names have been truncated in the the diagram + +An overview of the main pieces: + +* test: this is the test you are writing +* parameterize_batch: `parameterize_batch_for_data_sources` +* `batch_for_datasource`: fixture that pulls in the batch for you +* _batch_setup: `_batch_setup_for_datasource`. fixture that handles caching test configs and calling setup +* cached_setups: ensures that identical TestSetups are only setup / torn down once to improve performance + +```mermaid +sequenceDiagram + participant test + participant parameterize_batch + participant batch_for_datasource + participant _batch_setup + participant cached_setups + + test->>parameterize_batch: [TestConfig], data + note right of parameterize_batch: pytest.parameterize(label) + note right of parameterize_batch: makes TestSetups available to _batch_setup + loop For each TestConfig + parameterize_batch-->>_batch_setup: pytest.parametrize(TestConfig) + end + + loop For each TestConfig + test-->>batch_for_datasource: requests batch + batch_for_datasource-->>_batch_setup: requests TestSetup + opt If new TestConfig + _batch_setup->>_batch_setup: TestConfig.create_batch_setup + _batch_setup->>cached_setups: cache PostgresBatchTestSetup + end + _batch_setup->>cached_setups: get TestSetup + cached_setups->>_batch_setup: TestSetup + _batch_setup-->>batch_for_datasource: TestSetup + batch_for_datasource-->>batch_for_datasource: TestSetup.make_batch() + batch_for_datasource-->>test: batch + test->>test: Do test + end + + test-->>cached_setups: teardown + loop For each TestSetup + cached_setups->>cached_setups: TestSetup.teardown() + end + +``` diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_be_in_set.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_be_in_set.py index 5b2b9a3a42c5..8bbb7fb48ef1 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_be_in_set.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_be_in_set.py @@ -1,9 +1,12 @@ +from __future__ import annotations + from datetime import datetime import pandas as pd import pytest import great_expectations.expectations as gxe +from great_expectations.compatibility import pydantic from great_expectations.core.result_format import ResultFormat from great_expectations.datasource.fluent.interfaces import Batch from tests.integration.conftest import parameterize_batch_for_data_sources @@ -48,12 +51,12 @@ def test_strings(batch_for_datasource: Batch) -> None: @parameterize_batch_for_data_sources( data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, - data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 + data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 # FIXME CoP ) def test_dates(batch_for_datasource: Batch) -> None: expectation = gxe.ExpectColumnDistinctValuesToBeInSet( column=COL_NAME, - value_set=[datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()], # noqa: DTZ001 + value_set=[datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()], # noqa: DTZ001 # FIXME CoP ) result = batch_for_datasource.validate(expectation) assert result.success @@ -77,25 +80,10 @@ def test_data_is_subset(batch_for_datasource: Batch) -> None: assert result.success -@pytest.mark.xfail(strict=True) -@parameterize_batch_for_data_sources( - data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS -) -def test_empty_value_set(batch_for_datasource: Batch) -> None: - """Failing test that seems like a (pretty minor) bug""" - expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=[]) - result = batch_for_datasource.validate(expectation) - assert not result.success - - -@parameterize_batch_for_data_sources( - data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS -) -def test_value_set_is_none(batch_for_datasource: Batch) -> None: - # why do we even allow this?!? - expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=None) - result = batch_for_datasource.validate(expectation) - assert result.success +@pytest.mark.unit +def test_empty_value_set() -> None: + with pytest.raises(pydantic.ValidationError): + gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=[]) @parameterize_batch_for_data_sources( diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_contain_set.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_contain_set.py index 384f5f97adcc..8ae10fe99c28 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_contain_set.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_contain_set.py @@ -45,12 +45,12 @@ def test_strings(batch_for_datasource: Batch) -> None: @parameterize_batch_for_data_sources( data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, - data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 + data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 # FIXME CoP ) def test_dates(batch_for_datasource: Batch) -> None: expectation = gxe.ExpectColumnDistinctValuesToContainSet( column=COL_NAME, - value_set=[datetime(2024, 11, 19).date()], # noqa: DTZ001 + value_set=[datetime(2024, 11, 19).date()], # noqa: DTZ001 # FIXME CoP ) result = batch_for_datasource.validate(expectation) assert result.success diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py index e7a9a8cece10..0d7e0ec09c1f 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py @@ -1,5 +1,4 @@ from datetime import datetime -from typing import Optional import pandas as pd import pytest @@ -49,12 +48,12 @@ def test_strings(batch_for_datasource: Batch) -> None: @parameterize_batch_for_data_sources( data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, - data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 + data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 # FIXME CoP ) def test_dates(batch_for_datasource: Batch) -> None: expectation = gxe.ExpectColumnDistinctValuesToEqualSet( column=COL_NAME, - value_set=[datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()], # noqa: DTZ001 + value_set=[datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()], # noqa: DTZ001 # FIXME CoP ) result = batch_for_datasource.validate(expectation) assert result.success @@ -69,13 +68,11 @@ def test_ignores_nulls(batch_for_datasource: Batch) -> None: assert result.success -@pytest.mark.parametrize("value_set", [None, [], [1], [1, 4], [1, 2, 3]]) +@pytest.mark.parametrize("value_set", [[1], [1, 4], [1, 2, 3]]) @parameterize_batch_for_data_sources( data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS ) -def test_fails_if_data_is_not_equal( - batch_for_datasource: Batch, value_set: Optional[list[int]] -) -> None: +def test_fails_if_data_is_not_equal(batch_for_datasource: Batch, value_set: list[int]) -> None: expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=value_set) result = batch_for_datasource.validate(expectation) assert not result.success diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_max_to_be_between.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_max_to_be_between.py index 4149706c1037..ed4a211d72e8 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_max_to_be_between.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_max_to_be_between.py @@ -63,13 +63,13 @@ def test_success(batch_for_datasource: Batch, expectation: gxe.ExpectColumnMaxTo @parameterize_batch_for_data_sources( data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, - data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 22).date()]}), # noqa: DTZ001 + data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 22).date()]}), # noqa: DTZ001 # FIXME CoP ) def test_dates(batch_for_datasource: Batch) -> None: expectation = gxe.ExpectColumnMaxToBeBetween( column=COL_NAME, - min_value=datetime(2024, 11, 20).date(), # noqa: DTZ001 - max_value=datetime(2024, 11, 22).date(), # noqa: DTZ001 + min_value=datetime(2024, 11, 20).date(), # noqa: DTZ001 # FIXME CoP + max_value=datetime(2024, 11, 22).date(), # noqa: DTZ001 # FIXME CoP ) result = batch_for_datasource.validate(expectation) assert result.success diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_min_to_be_between.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_min_to_be_between.py index efeff02217eb..136a78bab314 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_min_to_be_between.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_min_to_be_between.py @@ -63,13 +63,13 @@ def test_success(batch_for_datasource: Batch, expectation: gxe.ExpectColumnMinTo @parameterize_batch_for_data_sources( data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, - data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 22).date(), datetime(2024, 11, 26).date()]}), # noqa: DTZ001 + data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 22).date(), datetime(2024, 11, 26).date()]}), # noqa: DTZ001 # FIXME CoP ) def test_dates(batch_for_datasource: Batch) -> None: expectation = gxe.ExpectColumnMinToBeBetween( column=COL_NAME, - min_value=datetime(2024, 11, 20).date(), # noqa: DTZ001 - max_value=datetime(2024, 11, 22).date(), # noqa: DTZ001 + min_value=datetime(2024, 11, 20).date(), # noqa: DTZ001 # FIXME CoP + max_value=datetime(2024, 11, 22).date(), # noqa: DTZ001 # FIXME CoP ) result = batch_for_datasource.validate(expectation) assert result.success diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_a_to_be_greater_than_b.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_a_to_be_greater_than_b.py index 08f515fef4a5..bf2ad7237a9b 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_a_to_be_greater_than_b.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_a_to_be_greater_than_b.py @@ -31,18 +31,18 @@ NUMBERS_A_EQUAL: [5, 8, 11, 13, 14], NUMBERS_B: [1, 5, 9, 12, 12], DATES_A: [ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 3, 1).date(), # noqa: DTZ001 - datetime(2024, 4, 1).date(), # noqa: DTZ001 - datetime(2024, 5, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 3, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 4, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 5, 1).date(), # noqa: DTZ001 # FIXME CoP ], DATES_B: [ - datetime(2023, 12, 1).date(), # noqa: DTZ001 - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 3, 1).date(), # noqa: DTZ001 - datetime(2024, 4, 1).date(), # noqa: DTZ001 + datetime(2023, 12, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 3, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 4, 1).date(), # noqa: DTZ001 # FIXME CoP ], STRINGS_A: ["b", "m", "y", "z", "zz"], STRINGS_B: ["a", "k", "x", "y", "za"], diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_to_be_equal.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_to_be_equal.py index 2dbc7f654740..57254a3ed035 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_to_be_equal.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_pair_values_to_be_equal.py @@ -33,19 +33,19 @@ EQUAL_STRINGS_B: ["foo", "bar", "baz"], UNEQUAL_STRINGS: ["foo", "bar", "wat"], EQUAL_DATES_A: [ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 3, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 3, 1).date(), # noqa: DTZ001 # FIXME CoP ], EQUAL_DATES_B: [ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 3, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 3, 1).date(), # noqa: DTZ001 # FIXME CoP ], UNEQUAL_DATES: [ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 4, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 4, 1).date(), # noqa: DTZ001 # FIXME CoP ], ALL_EQUAL_NUMS_A: [1, 2, 3], ALL_EQUAL_NUMS_B: [1, 2, 3], diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_unique_value_count_to_be_between.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_unique_value_count_to_be_between.py index b0a634888082..07db7e33c502 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_unique_value_count_to_be_between.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_unique_value_count_to_be_between.py @@ -72,14 +72,12 @@ def test_success( column=COL_NAME, min_value=5, strict_min=True ), id="strict_min", - marks=pytest.mark.xfail(strict=True), ), pytest.param( gxe.ExpectColumnUniqueValueCountToBeBetween( column=COL_NAME, max_value=5, strict_max=True ), id="strict_max", - marks=pytest.mark.xfail(strict=True), ), ], ) diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_value_lengths_to_be_between.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_value_lengths_to_be_between.py index 86015c57163f..c34fb0fa7641 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_value_lengths_to_be_between.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_value_lengths_to_be_between.py @@ -37,7 +37,6 @@ def test_success_complete(batch_for_datasource: Batch) -> None: gxe.ExpectColumnValueLengthsToBeBetween( column=COL_NAME, min_value=1, max_value=4, strict_min=True, strict_max=True ), - marks=pytest.mark.xfail(strict=False, reason="fails for python > 3.9"), id="strict_bounds", ), ], diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_between.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_between.py index 2042d14d647e..c3b306d12b74 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_between.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_between.py @@ -21,11 +21,11 @@ { NUMERIC_COLUMN: [1, 2, 3, 4, 5, None], DATE_COLUMN: [ - datetime(2023, 1, 1).date(), # noqa: DTZ001 - datetime(2023, 6, 15).date(), # noqa: DTZ001 - datetime(2023, 12, 31).date(), # noqa: DTZ001 - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 6, 15).date(), # noqa: DTZ001 + datetime(2023, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2023, 6, 15).date(), # noqa: DTZ001 # FIXME CoP + datetime(2023, 12, 31).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 6, 15).date(), # noqa: DTZ001 # FIXME CoP None, ], }, @@ -70,8 +70,8 @@ def test_success_complete_non_sql(batch_for_datasource: Batch) -> None: pytest.param( gxe.ExpectColumnValuesToBeBetween( column=DATE_COLUMN, - min_value=datetime(2023, 1, 1).date(), # noqa: DTZ001 - max_value=datetime(2024, 12, 31).date(), # noqa: DTZ001 + min_value=datetime(2023, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + max_value=datetime(2024, 12, 31).date(), # noqa: DTZ001 # FIXME CoP ), id="dates", ), @@ -116,8 +116,8 @@ def test_success( pytest.param( gxe.ExpectColumnValuesToBeBetween( column=DATE_COLUMN, - min_value=datetime(2024, 1, 1).date(), # noqa: DTZ001 - max_value=datetime(2024, 12, 31).date(), # noqa: DTZ001 + min_value=datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + max_value=datetime(2024, 12, 31).date(), # noqa: DTZ001 # FIXME CoP ), id="dates_outside_range", ), diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_set.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_set.py index 4ac423a41cd4..7598542b2c8e 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_set.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_set.py @@ -24,9 +24,9 @@ NUMBERS_COLUMN: [1, 2, 3], STRINGS_COLUMN: ["a", "b", "c"], DATES_COLUMN: [ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 3, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 3, 1).date(), # noqa: DTZ001 # FIXME CoP ], NULLS_COLUMN: [1, None, 3], }, @@ -76,9 +76,9 @@ def test_success_complete_sql(batch_for_datasource: Batch) -> None: gxe.ExpectColumnValuesToBeInSet( column=DATES_COLUMN, value_set=[ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 3, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 3, 1).date(), # noqa: DTZ001 # FIXME CoP ], ), id="date_set", diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_type_list.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_type_list.py index 02d0277f1f9e..9df08d817fe9 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_type_list.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_in_type_list.py @@ -1,7 +1,15 @@ import pandas as pd import pytest +import sqlalchemy.types as sqltypes +from packaging import version import great_expectations.expectations as gxe +from great_expectations.compatibility.databricks import DATABRICKS_TYPES +from great_expectations.compatibility.postgresql import POSTGRESQL_TYPES +from great_expectations.compatibility.snowflake import SNOWFLAKE_TYPES +from great_expectations.compatibility.sqlalchemy import ( + sqlalchemy as sa, +) from great_expectations.core.result_format import ResultFormat from great_expectations.datasource.fluent.interfaces import Batch from tests.integration.conftest import parameterize_batch_for_data_sources @@ -12,6 +20,7 @@ from tests.integration.test_utils.data_source_config import ( DatabricksDatasourceTestConfig, PandasDataFrameDatasourceTestConfig, + PostgreSQLDatasourceTestConfig, SnowflakeDatasourceTestConfig, ) @@ -49,24 +58,15 @@ data_source_configs=PASSING_DATA_SOURCES_EXCEPT_DATA_FRAMES, data=DATA ) def test_success_complete(batch_for_datasource: Batch) -> None: - type_list = ["INTEGER", "Integer", "int", "int64", "int32", "IntegerType", "_CUSTOM_DECIMAL"] - expectation = gxe.ExpectColumnValuesToBeInTypeList(column=INTEGER_COLUMN, type_list=type_list) - result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) - result_dict = result.to_json_dict()["result"] - - assert result.success - assert isinstance(result_dict, dict) - assert result_dict["observed_value"] in type_list - - -@pytest.mark.xfail -@parameterize_batch_for_data_sources( - data_source_configs=[SnowflakeDatasourceTestConfig(), DatabricksDatasourceTestConfig()], - data=DATA, -) -def test_success_complete_errors(batch_for_datasource: Batch) -> None: - # TODO: get this fixed - type_list = ["INTEGER", "Integer", "int", "int64", "int32", "IntegerType", "_CUSTOM_DECIMAL"] + type_list = [ + "INTEGER", + "Integer", + "int", + "int64", + "int32", + "IntegerType", + "_CUSTOM_DECIMAL", + ] expectation = gxe.ExpectColumnValuesToBeInTypeList(column=INTEGER_COLUMN, type_list=type_list) result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) result_dict = result.to_json_dict()["result"] @@ -135,3 +135,540 @@ def test_failure( ) -> None: result = batch_for_datasource.validate(expectation) assert not result.success + + +@pytest.mark.parametrize( + "expectation", + [ + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="CHARACTER", type_list=["CHARACTER", "VARCHAR(1)"] + ), + id="CHARACTER", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="DEC", type_list=["DEC", "DECIMAL", "DECIMAL(38, 0)"] + ), + id="DEC", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="FIXED", type_list=["FIXED", "DECIMAL", "DECIMAL(38, 0)"] + ), + id="FIXED", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="GEOGRAPHY", type_list=["GEOGRAPHY"]), + id="GEOGRAPHY", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="GEOMETRY", type_list=["GEOMETRY"]), + id="GEOMETRY", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="NUMBER", + type_list=[ + "NUMBER", + "DECIMAL", + "NUMERIC", + "DECIMAL(38, 0)", # 38, 0 is the default precision and scale + ], + ), + id="NUMBER", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="STRING", type_list=["STRING", "VARCHAR", "VARCHAR(16777216)"] + ), + id="STRING", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="TEXT", type_list=["TEXT", "VARCHAR", "VARCHAR(16777216)"] + ), + id="TEXT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="TIMESTAMP_LTZ", type_list=["TIMESTAMP_LTZ"] + ), + id="TIMESTAMP_LTZ", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="TIMESTAMP_NTZ", type_list=["TIMESTAMP_NTZ"] + ), + id="TIMESTAMP_NTZ", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="TIMESTAMP_TZ", type_list=["TIMESTAMP_TZ"]), + id="TIMESTAMP_TZ", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="VARBINARY", type_list=["VARBINARY", "BINARY"] + ), + id="VARBINARY", + ), + # INT , INTEGER , BIGINT , SMALLINT , TINYINT , BYTEINT are Synonymous with NUMBER, + # except that precision and scale cannot be specified (i.e. always defaults to NUMBER(38, 0) + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="BYTEINT", type_list=["DECIMAL(38, 0)"]), + id="BYTEINT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="TINYINT", type_list=["DECIMAL(38, 0)"]), + id="TINYINT", + ), + # Complex data types which are not hashable by testing framework currently + # pytest.param( + # gxe.ExpectColumnValuesToBeInTypeList( + # column="VARIANT", type_list=["VARIANT"] + # ), + # id="VARIANT", + # ), + # pytest.param( + # gxe.ExpectColumnValuesToBeInTypeList(column="OBJECT", type_list=["OBJECT"]), + # id="OBJECT", + # ), + # pytest.param( + # gxe.ExpectColumnValuesToBeInTypeList(column="ARRAY", type_list=["ARRAY"]), + # id="ARRAY", + # ), + # These sqlachemy types map to _CUSTOM_* types in snowflake-sqlalchemy + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="_CUSTOM_Date", type_list=["DATE"]), + id="_CUSTOM_Date", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="_CUSTOM_DateTime", type_list=["TIMESTAMP_NTZ"] + ), + id="_CUSTOM_DateTime", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="_CUSTOM_Time", type_list=["TIME"]), + id="_CUSTOM_Time", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="_CUSTOM_Float", type_list=["FLOAT"]), + id="_CUSTOM_Float", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="_CUSTOM_DECIMAL", type_list=["INTEGER", "DECIMAL(38, 0)"] + ), + id="_CUSTOM_DECIMAL", + ), + ], +) +@parameterize_batch_for_data_sources( + data_source_configs=[ + SnowflakeDatasourceTestConfig( + column_types={ + "ARRAY": SNOWFLAKE_TYPES.ARRAY, + "BYTEINT": SNOWFLAKE_TYPES.BYTEINT, + "CHARACTER": SNOWFLAKE_TYPES.CHARACTER, + "DEC": SNOWFLAKE_TYPES.DEC, + "FIXED": SNOWFLAKE_TYPES.FIXED, + "GEOGRAPHY": SNOWFLAKE_TYPES.GEOGRAPHY, + "GEOMETRY": SNOWFLAKE_TYPES.GEOMETRY, + "NUMBER": SNOWFLAKE_TYPES.NUMBER, + "OBJECT": SNOWFLAKE_TYPES.OBJECT, + "STRING": SNOWFLAKE_TYPES.STRING, + "TEXT": SNOWFLAKE_TYPES.TEXT, + "TIMESTAMP_LTZ": SNOWFLAKE_TYPES.TIMESTAMP_LTZ, + "TIMESTAMP_NTZ": SNOWFLAKE_TYPES.TIMESTAMP_NTZ, + "TIMESTAMP_TZ": SNOWFLAKE_TYPES.TIMESTAMP_TZ, + "TINYINT": SNOWFLAKE_TYPES.TINYINT, + "VARBINARY": SNOWFLAKE_TYPES.VARBINARY, + "VARIANT": SNOWFLAKE_TYPES.VARIANT, + # These sqlachemy types map to _CUSTOM_* types in snowflake-sqlalchemy + "_CUSTOM_Date": sqltypes.Date, + "_CUSTOM_DateTime": sqltypes.DateTime, + "_CUSTOM_Time": sqltypes.Time, + "_CUSTOM_Float": sqltypes.Float, + "_CUSTOM_DECIMAL": sqltypes.INTEGER, + } + ) + ], + data=pd.DataFrame( + { + "BYTEINT": [1, 2, 3], + "CHARACTER": ["a", "b", "c"], + "DEC": [1.0, 2.0, 3.0], + "FIXED": [1.0, 2.0, 3.0], + "GEOGRAPHY": ["POINT(1 1)", "POINT(2 2)", "POINT(3 3)"], + "GEOMETRY": ["POINT(1 1)", "POINT(2 2)", "POINT(3 3)"], + "NUMBER": [1, 2, 3], + "STRING": ["a", "b", "c"], + "TEXT": ["a", "b", "c"], + "TIMESTAMP_LTZ": [ + "2021-01-01 00:00:00", + "2021-01-02 00:00:00", + "2021-01-03 00:00:00", + ], + "TIMESTAMP_NTZ": [ + "2021-01-01 00:00:00", + "2021-01-02 00:00:00", + "2021-01-03 00:00:00", + ], + "TIMESTAMP_TZ": [ + "2021-01-01 00:00:00", + "2021-01-02 00:00:00", + "2021-01-03 00:00:00", + ], + "TINYINT": [1, 2, 3], + "VARBINARY": [b"1", b"2", b"3"], + # Complex data types which are not hashable by testing framework currently + # "ARRAY": pd.Series([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="object"), + # "OBJECT": [{"a": 1}, {"b": 2}, {"c": 3}], + # "VARIANT": [{"a": 1}, {"b": 2}, {"c": 3}], + # These sqlachemy types map to _CUSTOM_* types in snowflake-sqlalchemy + "_CUSTOM_Date": [ + # Date in isoformat + "2021-01-01", + "2021-01-02", + "2021-01-03", + ], + "_CUSTOM_DateTime": [ + # isoformat with microseconds + "2021-01-01 00:00:00.000000", + "2021-01-02 00:00:00.000000", + "2021-01-03 00:00:00.000000", + ], + "_CUSTOM_Time": [ + "00:00:00.878281", + "01:00:00.000000", + "00:10:43.000000", + ], + "_CUSTOM_Float": [1.0, 2.0, 3.0], + "_CUSTOM_DECIMAL": [1, 2, 3], + }, + dtype="object", + ), +) +def test_success_complete_snowflake( + batch_for_datasource: Batch, expectation: gxe.ExpectColumnValuesToBeInTypeList +) -> None: + result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) + result_dict = result.to_json_dict()["result"] + + assert result.success + assert isinstance(result_dict, dict) + assert isinstance(result_dict["observed_value"], str) + assert isinstance(expectation.type_list, list) + assert result_dict["observed_value"] in expectation.type_list + + +@pytest.mark.parametrize( + "expectation", + [ + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="CHAR", type_list=["CHAR", "CHAR(1)"]), + id="CHAR", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="TEXT", type_list=["TEXT"]), + id="TEXT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="INTEGER", type_list=["INTEGER"]), + id="INTEGER", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="SMALLINT", type_list=["SMALLINT"]), + id="SMALLINT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="BIGINT", type_list=["BIGINT"]), + id="BIGINT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="TIMESTAMP", type_list=["TIMESTAMP", "TIMESTAMP WITHOUT TIME ZONE"] + ), + id="TIMESTAMP", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="DATE", type_list=["DATE"]), + id="DATE", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="DOUBLE_PRECISION", type_list=["DOUBLE PRECISION"] + ), + id="DOUBLE_PRECISION", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="BOOLEAN", type_list=["BOOLEAN"]), + id="BOOLEAN", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="NUMERIC", type_list=["NUMERIC"]), + id="NUMERIC", + ), + ], +) +@parameterize_batch_for_data_sources( + data_source_configs=[ + PostgreSQLDatasourceTestConfig( + column_types={ + "CHAR": POSTGRESQL_TYPES.CHAR, + "TEXT": POSTGRESQL_TYPES.TEXT, + "INTEGER": POSTGRESQL_TYPES.INTEGER, + "SMALLINT": POSTGRESQL_TYPES.SMALLINT, + "BIGINT": POSTGRESQL_TYPES.BIGINT, + "TIMESTAMP": POSTGRESQL_TYPES.TIMESTAMP, + "DATE": POSTGRESQL_TYPES.DATE, + "DOUBLE_PRECISION": POSTGRESQL_TYPES.DOUBLE_PRECISION, + "BOOLEAN": POSTGRESQL_TYPES.BOOLEAN, + "NUMERIC": POSTGRESQL_TYPES.NUMERIC, + } + ), + ], + data=pd.DataFrame( + { + "CHAR": ["a", "b", "c"], + "TEXT": ["a", "b", "c"], + "INTEGER": [1, 2, 3], + "SMALLINT": [1, 2, 3], + "BIGINT": [1, 2, 3], + "TIMESTAMP": [ + "2021-01-01 00:00:00", + "2021-01-02 00:00:00", + "2021-01-03 00:00:00", + ], + "DATE": [ + # Date in isoformat + "2021-01-01", + "2021-01-02", + "2021-01-03", + ], + "DOUBLE_PRECISION": [1.0, 2.0, 3.0], + "BOOLEAN": [False, False, True], + "NUMERIC": [1, 2, 3], + }, + dtype="object", + ), +) +def test_success_complete_postgres( + batch_for_datasource: Batch, expectation: gxe.ExpectColumnValuesToBeInTypeList +) -> None: + result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) + result_dict = result.to_json_dict()["result"] + + assert result.success + assert isinstance(result_dict, dict) + assert isinstance(result_dict["observed_value"], str) + assert isinstance(expectation.type_list, list) + assert result_dict["observed_value"] in expectation.type_list + + +@pytest.mark.parametrize( + "expectation", + [ + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="STRING", type_list=["STRING"]), + id="STRING", + ), + # SqlA Text gets converted to Databricks STRING + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="TEXT", type_list=["STRING"]), + id="TEXT", + ), + # SqlA UNICODE gets converted to Databricks STRING + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="UNICODE", type_list=["STRING"]), + id="UNICODE", + ), + # SqlA UNICODE_TEXT gets converted to Databricks STRING + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="UNICODE_TEXT", type_list=["STRING"]), + id="UNICODE_TEXT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="BOOLEAN", type_list=["BOOLEAN"]), + id="BOOLEAN", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="DECIMAL", type_list=["DECIMAL", "DECIMAL(10, 0)"] + ), + id="DECIMAL", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="DATE", type_list=["DATE"]), + id="DATE", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="TIMESTAMP", type_list=["TIMESTAMP"]), + id="TIMESTAMP", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="TIMESTAMP_NTZ", type_list=["TIMESTAMP_NTZ"] + ), + id="TIMESTAMP_NTZ", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="FLOAT", type_list=["FLOAT"]), + id="FLOAT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="INT", type_list=["INT"]), + id="INT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList(column="TINYINT", type_list=["TINYINT"]), + id="TINYINT", + ), + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="DECIMAL", type_list=["DECIMAL", "DECIMAL(10, 0)"] + ), + id="DECIMAL", + ), + # SqlA Time gets converted to Databricks STRING, + # but is not supported by our testing framework + # pytest.param( + # gxe.ExpectColumnValuesToBeInTypeList(column="TIME", type_list=["STRING"]), + # id="TIME", + # ), + # SqlA UUID gets converted to Databricks STRING, + # but is not supported by our testing framework. + # pytest.param( + # gxe.ExpectColumnValuesToBeInTypeList(column="UUID", type_list=["STRING"]), + # id="UUID", + # ) + ], +) +@parameterize_batch_for_data_sources( + data_source_configs=[ + DatabricksDatasourceTestConfig( + column_types={ + "STRING": DATABRICKS_TYPES.STRING, + "TEXT": sqltypes.Text, + "UNICODE": sqltypes.Unicode, + "UNICODE_TEXT": sqltypes.UnicodeText, + "BIGINT": sqltypes.BigInteger, + "BOOLEAN": sqltypes.BOOLEAN, + "DATE": sqltypes.DATE, + "TIMESTAMP_NTZ": DATABRICKS_TYPES.TIMESTAMP_NTZ, + "TIMESTAMP": DATABRICKS_TYPES.TIMESTAMP, + "FLOAT": sqltypes.Float, + "INT": sqltypes.Integer, + "DECIMAL": sqltypes.Numeric, + "SMALLINT": sqltypes.SmallInteger, + "TINYINT": DATABRICKS_TYPES.TINYINT, + # "TIME": sqltypes.Time, + # "UUID": sqltypes.UUID, + } + ) + ], + data=pd.DataFrame( + { + "STRING": ["a", "b", "c"], + "TEXT": ["a", "b", "c"], + "UNICODE": ["\u00e9", "\u00e9", "\u00e9"], + "UNICODE_TEXT": ["a", "b", "c"], + "BIGINT": [1111, 2222, 3333], + "BOOLEAN": [True, True, False], + "DATE": [ + "2021-01-01", + "2021-01-02", + "2021-01-03", + ], + "TIMESTAMP_NTZ": [ + "2021-01-01 00:00:00", + "2021-01-02 00:00:00", + "2021-01-03 00:00:00", + ], + "TIMESTAMP": [ + "2021-01-01 00:00:00", + "2021-01-02 00:00:00", + "2021-01-03 00:00:00", + ], + "DOUBLE": [1.0, 2.0, 3.0], + "FLOAT": [1.0, 2.0, 3.0], + "INT": [1, 2, 3], + "DECIMAL": [1.1, 2.2, 3.3], + "SMALLINT": [1, 2, 3], + # "TIME": [ + # sa.Time("22:17:33.123456"), + # sa.Time("22:17:33.123456"), + # sa.Time("22:17:33.123456"), + # ], + # "UUID": [ + # uuid.UUID("905993ea-f50e-4284-bea0-5be3f0ed7031"), + # uuid.UUID("9406b631-fa2f-41cf-b666-f9a2ac3118c1"), + # uuid.UUID("47538f05-32e3-4594-80e2-0b3b33257ae7") + # ], + }, + dtype="object", + ), +) +def test_success_complete_databricks( + batch_for_datasource: Batch, expectation: gxe.ExpectColumnValuesToBeInTypeList +) -> None: + result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) + result_dict = result.to_json_dict()["result"] + + assert result.success + assert isinstance(result_dict, dict) + assert isinstance(result_dict["observed_value"], str) + assert isinstance(expectation.type_list, list) + assert result_dict["observed_value"] in expectation.type_list + + +if version.parse(sa.__version__) >= version.parse("2.0.0"): + # Note: why not use pytest.skip? + # the import of `sqltypes.Double` is only possible in sqlalchemy >= 2.0.0 + # the import is done as part of the instantiation of the test, which includes + # processing the pytest.skip() statement. This way, we skip the instantiation + # of the test entirely. + @pytest.mark.parametrize( + "expectation", + [ + pytest.param( + gxe.ExpectColumnValuesToBeInTypeList( + column="DOUBLE", type_list=["DOUBLE", "FLOAT"] + ), + id="DOUBLE", + ) + ], + ) + @parameterize_batch_for_data_sources( + data_source_configs=[ + DatabricksDatasourceTestConfig( + column_types={ + "DOUBLE": sqltypes.Double, + } + ) + ], + data=pd.DataFrame( + { + "DOUBLE": [1.0, 2.0, 3.0], + }, + dtype="object", + ), + ) + def test_success_complete_databricks_double_type_only( + batch_for_datasource: Batch, expectation: gxe.ExpectColumnValuesToBeInTypeList + ) -> None: + """What does this test and why? + + Databricks mostly uses SqlA types directly, but the double type is + only available after sqlalchemy 2.0. We therefore split up the test + into 2 parts, with this test being skipped if the SA version is too low. + """ + result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) + result_dict = result.to_json_dict()["result"] + + assert result.success + assert isinstance(result_dict, dict) + assert isinstance(result_dict["observed_value"], str) + assert isinstance(expectation.type_list, list) + assert result_dict["observed_value"] in expectation.type_list diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_of_type.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_of_type.py index 32598547011e..ffcf64dab0b7 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_of_type.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_be_of_type.py @@ -66,13 +66,12 @@ def test_success_for_type__INTEGER(batch_for_datasource: Batch) -> None: assert result.success -@pytest.mark.xfail @parameterize_batch_for_data_sources( data_source_configs=[DatabricksDatasourceTestConfig()], data=DATA, ) def test_success_for_type__Integer(batch_for_datasource: Batch) -> None: - expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="Integer") + expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="INT") result = batch_for_datasource.validate(expectation) assert result.success @@ -87,13 +86,12 @@ def test_success_for_type__IntegerType(batch_for_datasource: Batch) -> None: assert result.success -@pytest.mark.xfail() @parameterize_batch_for_data_sources( data_source_configs=[SnowflakeDatasourceTestConfig()], data=DATA, ) def test_success_for_type__Number(batch_for_datasource: Batch) -> None: - expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="Number") + expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="DECIMAL(38, 0)") result = batch_for_datasource.validate(expectation) assert result.success diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_be_in_set.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_be_in_set.py index 47368a281007..fb121183b453 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_be_in_set.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_be_in_set.py @@ -23,9 +23,9 @@ NUMBERS_COLUMN: [1, 2, 3, None], STRINGS_COLUMN: ["a", "b", "c", None], DATES_COLUMN: [ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 - datetime(2024, 3, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 3, 1).date(), # noqa: DTZ001 # FIXME CoP None, ], }, @@ -75,8 +75,8 @@ def test_success_complete_sql(batch_for_datasource: Batch) -> None: gxe.ExpectColumnValuesToNotBeInSet( column=DATES_COLUMN, value_set=[ - datetime(2024, 4, 1).date(), # noqa: DTZ001 - datetime(2024, 5, 1).date(), # noqa: DTZ001 + datetime(2024, 4, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 5, 1).date(), # noqa: DTZ001 # FIXME CoP ], ), id="dates", @@ -122,8 +122,8 @@ def test_empty_data(batch_for_datasource: Batch) -> None: gxe.ExpectColumnValuesToNotBeInSet( column=DATES_COLUMN, value_set=[ - datetime(2024, 1, 1).date(), # noqa: DTZ001 - datetime(2024, 2, 1).date(), # noqa: DTZ001 + datetime(2024, 1, 1).date(), # noqa: DTZ001 # FIXME CoP + datetime(2024, 2, 1).date(), # noqa: DTZ001 # FIXME CoP ], ), id="dates", diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_match_regex_list.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_match_regex_list.py index dcab590f58e5..7c78eff3b4f0 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_match_regex_list.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_column_values_to_not_match_regex_list.py @@ -4,6 +4,7 @@ import pytest import great_expectations.expectations as gxe +from great_expectations.compatibility import pydantic from great_expectations.datasource.fluent.interfaces import Batch from tests.integration.conftest import parameterize_batch_for_data_sources from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( @@ -126,12 +127,7 @@ def test_failure( assert not result.success -@pytest.mark.xfail( - strict=True, reason="This should either pass or fail instantiating the expectation" -) -@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA) -def invalid_config(batch_for_datasource: Batch) -> None: - expectation = gxe.ExpectColumnValuesToNotMatchRegexList(column=COL_A, regex_list=[]) - result = batch_for_datasource.validate(expectation) - assert not result.success - assert not result.exception_info["raised_exception"] +@pytest.mark.unit +def test_invalid_config() -> None: + with pytest.raises(pydantic.ValidationError): + gxe.ExpectColumnValuesToNotMatchRegexList(column=COL_A, regex_list=[]) diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_compound_column_values_to_be_unique.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_compound_column_values_to_be_unique.py index 988c11fa2b3a..5b95d08d5b5b 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_compound_column_values_to_be_unique.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_compound_column_values_to_be_unique.py @@ -2,6 +2,7 @@ import pytest import great_expectations.expectations as gxe +from great_expectations.compatibility import pydantic from great_expectations.datasource.fluent.interfaces import Batch from tests.integration.conftest import parameterize_batch_for_data_sources from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( @@ -77,7 +78,7 @@ def test_failure( assert not result.success -@pytest.mark.xfail(strict=True, reason="We should fail at intantiation; not when validating") +@pytest.mark.unit @pytest.mark.parametrize( "column_list", [ @@ -85,7 +86,6 @@ def test_failure( pytest.param([INT_COL_2], id="one_col"), ], ) -@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA) -def test_invalid_config(column_list: list[str], batch_for_datasource: Batch) -> None: - with pytest.raises(ValueError): +def test_invalid_config(column_list: list[str]) -> None: + with pytest.raises(pydantic.ValidationError): gxe.ExpectCompoundColumnsToBeUnique(column_list=column_list) diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_multicolumn_sum_to_equal.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_multicolumn_sum_to_equal.py index b17d23e8607e..a84e1d0a6727 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_multicolumn_sum_to_equal.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_multicolumn_sum_to_equal.py @@ -54,10 +54,6 @@ def test_success(batch_for_datasource: Batch, expectation: gxe.ExpectMulticolumn @pytest.mark.parametrize( "expectation", [ - pytest.param( - gxe.ExpectMulticolumnSumToEqual(column_list=[ONES_COL], sum_total=1), - id="one_col", - ), pytest.param( gxe.ExpectMulticolumnSumToEqual( column_list=[COL_A_BAD, COL_B, COL_C], sum_total=7, mostly=0.7 diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_table_columns_to_match_set.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_table_columns_to_match_set.py index a656cd8564b6..6f98808e479a 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_table_columns_to_match_set.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_table_columns_to_match_set.py @@ -64,7 +64,6 @@ def test_success( ), pytest.param( gxe.ExpectTableColumnsToMatchSet(column_set=[COL_A]), - marks=pytest.mark.xfail, id="defaults_to_exact_match", ), pytest.param( diff --git a/tests/integration/data_sources_and_expectations/expectations/test_expect_table_row_count_to_be_between.py b/tests/integration/data_sources_and_expectations/expectations/test_expect_table_row_count_to_be_between.py index e533a5007f37..2d80d06b4a40 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_expect_table_row_count_to_be_between.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_expect_table_row_count_to_be_between.py @@ -2,6 +2,7 @@ import pytest import great_expectations.expectations as gxe +from great_expectations.compatibility import pydantic from great_expectations.datasource.fluent.interfaces import Batch from tests.integration.conftest import parameterize_batch_for_data_sources from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( @@ -43,6 +44,30 @@ def test_golden_path(batch_for_datasource: Batch) -> None: gxe.ExpectTableRowCountToBeBetween(min_value=3, max_value=5), id="inclusivity", ), + pytest.param( + gxe.ExpectTableRowCountToBeBetween( + min_value=None, max_value=None, strict_min=True, strict_max=True + ), + id="strict_min_max_vacuously_true", + ), + pytest.param( + gxe.ExpectTableRowCountToBeBetween( + min_value=2, max_value=None, strict_min=True, strict_max=True + ), + id="strict_min_max_just_min", + ), + pytest.param( + gxe.ExpectTableRowCountToBeBetween( + min_value=None, max_value=4, strict_min=True, strict_max=True + ), + id="strict_min_max_just_max", + ), + pytest.param( + gxe.ExpectTableRowCountToBeBetween( + min_value=2, max_value=4, strict_min=True, strict_max=True + ), + id="strict_min_max_inclusive", + ), ], ) @parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA) @@ -75,6 +100,20 @@ def test_empty_data(batch_for_datasource: Batch) -> None: gxe.ExpectTableRowCountToBeBetween(min_value=4, max_value=4), id="bad_range", ), + pytest.param( + gxe.ExpectTableRowCountToBeBetween(min_value=3, max_value=4, strict_min=True), + id="strict_min_max_observed_same_as_min", + ), + pytest.param( + gxe.ExpectTableRowCountToBeBetween(min_value=2, max_value=3, strict_max=True), + id="strict_min_max_observed_same_as_max", + ), + pytest.param( + gxe.ExpectTableRowCountToBeBetween( + min_value=3, max_value=3, strict_min=True, strict_max=True + ), + id="strict_min_max_observed_same_as_min_and_max", + ), ], ) @parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA) @@ -85,8 +124,7 @@ def test_failure( assert not result.success -@pytest.mark.xfail(reason="Fails at validation, but should fail when instantiating") @pytest.mark.unit def test_valid_range() -> None: - with pytest.raises(ValueError): + with pytest.raises(pydantic.ValidationError): gxe.ExpectTableRowCountToBeBetween(min_value=5, max_value=4) diff --git a/tests/integration/data_sources_and_expectations/expectations/test_unexpected_rows_expectation.py b/tests/integration/data_sources_and_expectations/expectations/test_unexpected_rows_expectation.py index f00909b19750..00e37ba24f86 100644 --- a/tests/integration/data_sources_and_expectations/expectations/test_unexpected_rows_expectation.py +++ b/tests/integration/data_sources_and_expectations/expectations/test_unexpected_rows_expectation.py @@ -6,6 +6,7 @@ import pytest import great_expectations.expectations as gxe +from great_expectations.datasource.fluent.interfaces import Batch from tests.integration.conftest import parameterize_batch_for_data_sources from tests.integration.test_utils.data_source_config import ( BigQueryDatasourceTestConfig, @@ -234,7 +235,7 @@ def test_unexpected_rows_expectation_batch_keyword_partitioner_success( unexpected_rows_query, ) -> None: batch = asset_for_datasource.add_batch_definition_monthly( - name=str(uuid4()), column=DATE_COLUMN + name="my-batch-def", column=DATE_COLUMN ).get_batch() expectation = gxe.UnexpectedRowsExpectation( description="Expect query with {batch} keyword and paritioner defined to succeed", @@ -255,7 +256,7 @@ def test_unexpected_rows_expectation_join_keyword_partitioner_success( extra_table_names_for_datasource, ) -> None: batch = asset_for_datasource.add_batch_definition_monthly( - name=str(uuid4()), column=DATE_COLUMN + name="my-batch-def", column=DATE_COLUMN ).get_batch() for join_success_query in JOIN_SUCCESS_QUERIES: unexpected_rows_query = join_success_query.replace( @@ -314,3 +315,51 @@ def test_unexpected_rows_expectation_join_keyword_partitioner_failure( result = batch.validate(expectation) assert result.success is False assert result.exception_info.get("raised_exception") is False + + +@parameterize_batch_for_data_sources( + data_source_configs=[PostgreSQLDatasourceTestConfig()], + data=TABLE_1, +) +def test_success_result_format(batch_for_datasource: Batch) -> None: + result = batch_for_datasource.validate( + gxe.UnexpectedRowsExpectation( + unexpected_rows_query="SELECT * FROM {batch} WHERE entity_id = 123" + ) + ) + + assert result.success + assert result.result == { + "observed_value": 0, + "details": { + "unexpected_rows": [], + }, + } + + +@parameterize_batch_for_data_sources( + data_source_configs=[PostgreSQLDatasourceTestConfig()], + data=TABLE_1, +) +def test_fail_result_format(batch_for_datasource: Batch) -> None: + result = batch_for_datasource.validate( + gxe.UnexpectedRowsExpectation( + unexpected_rows_query="SELECT * FROM {batch} WHERE entity_id = 2" + ) + ) + + assert not result.success + assert result.result == { + "observed_value": 1, + "details": { + "unexpected_rows": [ + { + "entity_id": 2, + "created_at": datetime(year=2024, month=11, day=30, tzinfo=timezone.utc).date(), + "quantity": 2, + "temperature": 92, + "color": "red", + } + ], + }, + } diff --git a/tests/integration/data_sources_and_expectations/test_misconfigured_expectations.py b/tests/integration/data_sources_and_expectations/test_misconfigured_expectations.py index 4195c0b581b6..6194199ee1cb 100644 --- a/tests/integration/data_sources_and_expectations/test_misconfigured_expectations.py +++ b/tests/integration/data_sources_and_expectations/test_misconfigured_expectations.py @@ -67,7 +67,7 @@ def test_pandas(self, batch_for_datasource) -> None: def test_bigquery(self, batch_for_datasource) -> None: self._assert_misconfiguration( batch_for_datasource=batch_for_datasource, - exception_message="No matching signature for operator * for argument types: FLOAT64, STRING", # noqa: E501 + exception_message="No matching signature for operator *", ) @parameterize_batch_for_data_sources( diff --git a/tests/integration/data_sources_and_expectations/test_test_performance.py b/tests/integration/data_sources_and_expectations/test_test_performance.py index 77bd3a975b46..d0dadbbc31af 100644 --- a/tests/integration/data_sources_and_expectations/test_test_performance.py +++ b/tests/integration/data_sources_and_expectations/test_test_performance.py @@ -8,7 +8,7 @@ """ from dataclasses import dataclass -from functools import cache, cached_property +from functools import cache from typing import Mapping import pandas as pd @@ -53,18 +53,19 @@ def create_batch_setup( class DummyBatchTestSetup(BatchTestSetup[DummyTestConfig, DataFrameAsset]): - @cached_property @override - def asset(self) -> DataFrameAsset: + def make_asset(self) -> DataFrameAsset: return self.context.data_sources.add_pandas( self._random_resource_name() ).add_dataframe_asset(self._random_resource_name()) @override def make_batch(self) -> Batch: - return self.asset.add_batch_definition_whole_dataframe( - self._random_resource_name() - ).get_batch(batch_parameters={"dataframe": self.data}) + return ( + self.make_asset() + .add_batch_definition_whole_dataframe(self._random_resource_name()) + .get_batch(batch_parameters={"dataframe": self.data}) + ) @override def setup(self) -> None: diff --git a/tests/integration/db/taxi_data_utils.py b/tests/integration/db/taxi_data_utils.py index b93a833d3c7c..21cfb48fc8c3 100644 --- a/tests/integration/db/taxi_data_utils.py +++ b/tests/integration/db/taxi_data_utils.py @@ -12,7 +12,7 @@ from great_expectations.execution_engine.sqlalchemy_batch_data import ( SqlAlchemyBatchData, ) -from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 +from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 # FIXME CoP TaxiPartitioningTestCase, TaxiPartitioningTestCasesBase, ) diff --git a/tests/integration/db/test_sql_data_partitioned_on_datetime_and_day_part.py b/tests/integration/db/test_sql_data_partitioned_on_datetime_and_day_part.py index 9bb8c5ef1151..b6711a627b30 100644 --- a/tests/integration/db/test_sql_data_partitioned_on_datetime_and_day_part.py +++ b/tests/integration/db/test_sql_data_partitioned_on_datetime_and_day_part.py @@ -4,7 +4,7 @@ _execute_taxi_partitioning_test_cases, loaded_table, ) -from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 +from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 # FIXME CoP TaxiPartitioningTestCasesBase, TaxiPartitioningTestCasesDateTime, TaxiTestData, diff --git a/tests/integration/db/test_sql_data_partitioned_on_whole_table.py b/tests/integration/db/test_sql_data_partitioned_on_whole_table.py index 30b5f3d39940..0b66937591f0 100644 --- a/tests/integration/db/test_sql_data_partitioned_on_whole_table.py +++ b/tests/integration/db/test_sql_data_partitioned_on_whole_table.py @@ -4,7 +4,7 @@ _execute_taxi_partitioning_test_cases, loaded_table, ) -from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 +from tests.integration.fixtures.partition_and_sample_data.partitioner_test_cases_and_fixtures import ( # noqa: E501 # FIXME CoP TaxiPartitioningTestCasesBase, TaxiPartitioningTestCasesWholeTable, TaxiTestData, diff --git a/tests/integration/fixtures/partition_and_sample_data/mssql_connection_string.yml b/tests/integration/fixtures/partition_and_sample_data/mssql_connection_string.yml index 0d523189a120..f160222ae218 100644 --- a/tests/integration/fixtures/partition_and_sample_data/mssql_connection_string.yml +++ b/tests/integration/fixtures/partition_and_sample_data/mssql_connection_string.yml @@ -1,2 +1,2 @@ -connection_string: mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@localhost:1433/test_ci?driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true +connection_string: mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@127.0.0.1:1433/test_ci?driver=ODBC Driver 18 for SQL Server&charset=utf8&autocommit=true&TrustServerCertificate=yes dialect: mssql diff --git a/tests/integration/fixtures/partition_and_sample_data/partitioner_test_cases_and_fixtures.py b/tests/integration/fixtures/partition_and_sample_data/partitioner_test_cases_and_fixtures.py index 0103040c4c56..0ac8033cfb38 100644 --- a/tests/integration/fixtures/partition_and_sample_data/partitioner_test_cases_and_fixtures.py +++ b/tests/integration/fixtures/partition_and_sample_data/partitioner_test_cases_and_fixtures.py @@ -38,7 +38,7 @@ def __init__( self._test_column_name = test_column_name self._test_column_names = test_column_names - # Convert specified columns (e.g., "pickup_datetime" and "dropoff_datetime") to datetime column type. # noqa: E501 + # Convert specified columns (e.g., "pickup_datetime" and "dropoff_datetime") to datetime column type. # noqa: E501 # FIXME CoP convert_string_columns_to_datetime( df=test_df, column_names_to_convert=column_names_to_convert ) diff --git a/tests/integration/integration_test_fixture.py b/tests/integration/integration_test_fixture.py index 5fd262387adc..ea727ae78825 100644 --- a/tests/integration/integration_test_fixture.py +++ b/tests/integration/integration_test_fixture.py @@ -21,7 +21,7 @@ class IntegrationTestFixture: data_dir: Folder that contains data used in the test. other_files: other files (like credential information) to copy into the test environment. These are presented as Tuple(path_to_source_file, path_to_target_file), where path_to_target_file is relative to the test_script.py file in our test environment util_script: Path of optional util script that is used in test script (for loading test_specific methods like load_data_into_test_database()) - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP name: str user_flow_script: str diff --git a/tests/integration/spark/test_spark_config.py b/tests/integration/spark/test_spark_config.py index b6d7d6053dfa..d95dd4bfe721 100644 --- a/tests/integration/spark/test_spark_config.py +++ b/tests/integration/spark/test_spark_config.py @@ -16,7 +16,7 @@ SparkDFExecutionEngine = None # TODO: review logging more detail here logger.debug( - "Unable to load pyspark; install optional spark dependency if you will be working with Spark dataframes." # noqa: E501 + "Unable to load pyspark; install optional spark dependency if you will be working with Spark dataframes." # noqa: E501 # FIXME CoP ) # module level markers @@ -40,7 +40,7 @@ def test_spark_config_datasource(spark_session_v012): name="my spark datasource", spark_config=spark_config, ) - # a warning is raised because passing unmodifiable config options results in restarting spark context # noqa: E501 + # a warning is raised because passing unmodifiable config options results in restarting spark context # noqa: E501 # FIXME CoP with pytest.warns(RuntimeWarning): execution_engine: SparkDFExecutionEngine = spark_datasource.get_execution_engine() spark_session: pyspark.SparkSession = execution_engine.spark diff --git a/tests/integration/test_definitions/gcs/integration_tests.py b/tests/integration/test_definitions/gcs/integration_tests.py index acd4001996fc..75053a57215f 100644 --- a/tests/integration/test_definitions/gcs/integration_tests.py +++ b/tests/integration/test_definitions/gcs/integration_tests.py @@ -8,24 +8,24 @@ connecting_to_your_data: List[IntegrationTestFixture] = [] how_to_configure_metadata_store = [ - # Chetan - 20231117 - These have been commented out due to their reliance on the CLI (which has been deleted). # noqa: E501 + # Chetan - 20231117 - These have been commented out due to their reliance on the CLI (which has been deleted). # noqa: E501 # FIXME CoP # They should be re-enabled once they have been updated. # IntegrationTestFixture( # name="how_to_configure_an_expectation_store_in_gcs", - # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_metadata_stores/how_to_configure_an_expectation_store_in_gcs.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_metadata_stores/how_to_configure_an_expectation_store_in_gcs.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[BackendDependencies.GCS], # ), # IntegrationTestFixture( # name="how_to_host_and_share_data_docs_on_gcs", - # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_docs/how_to_host_and_share_data_docs_on_gcs.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_docs/how_to_host_and_share_data_docs_on_gcs.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[BackendDependencies.GCS], # ), # IntegrationTestFixture( # name="how_to_configure_a_validation_result_store_in_gcs", - # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_metadata_stores/how_to_configure_a_validation_result_store_in_gcs.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_metadata_stores/how_to_configure_a_validation_result_store_in_gcs.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[BackendDependencies.GCS], diff --git a/tests/integration/test_definitions/redshift/integration_tests.py b/tests/integration/test_definitions/redshift/integration_tests.py index 52f3e8f09ebd..ccb93d108512 100644 --- a/tests/integration/test_definitions/redshift/integration_tests.py +++ b/tests/integration/test_definitions/redshift/integration_tests.py @@ -4,7 +4,7 @@ redshift_integration_tests = [] connecting_to_your_data = [ - # TODO: ALEX: Rename test modules to include "configured" and "inferred_and_runtime" suffixes in names. # noqa: E501 + # TODO: ALEX: Rename test modules to include "configured" and "inferred_and_runtime" suffixes in names. # noqa: E501 # FIXME CoP # IntegrationTestFixture( # name = "redshift_python_example", # user_flow_script= "docs/docusaurus/docs/snippets/redshift_python_example.py", diff --git a/tests/integration/test_definitions/spark/integration_tests.py b/tests/integration/test_definitions/spark/integration_tests.py index 2586da6a41a9..6c4077385949 100644 --- a/tests/integration/test_definitions/spark/integration_tests.py +++ b/tests/integration/test_definitions/spark/integration_tests.py @@ -11,7 +11,7 @@ # unable to mock dbfs in CI # IntegrationTestFixture( # name="databricks_deployment_patterns_file_python_configs", - # user_flow_script="docs/docusaurus/docs/snippets/databricks_deployment_patterns_file_python_configs.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/snippets/databricks_deployment_patterns_file_python_configs.py", # noqa: E501 # FIXME CoP # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[BackendDependencies.SPARK], # ), diff --git a/tests/integration/test_script_runner.py b/tests/integration/test_script_runner.py index 65d2489dd2c7..0e9c43085b8a 100644 --- a/tests/integration/test_script_runner.py +++ b/tests/integration/test_script_runner.py @@ -2,7 +2,7 @@ Individual tests can be run by setting the '-k' flag and referencing the name of test, like the following example: pytest -v --docs-tests -k "test_docs[quickstart]" tests/integration/test_script_runner.py -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP import importlib.machinery import importlib.util @@ -95,13 +95,13 @@ def delay_rerun(*args): local_tests = [ # IntegrationTestFixture( # name="how_to_add_validations_data_or_suites_to_a_checkpoint.py", - # user_flow_script="docs/docusaurus/docs/oss/guides/validation/checkpoints/how_to_add_validations_data_or_suites_to_a_checkpoint.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/validation/checkpoints/how_to_add_validations_data_or_suites_to_a_checkpoint.py", # noqa: E501 # FIXME CoP # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[], # ), # IntegrationTestFixture( # name="how_to_validate_multiple_batches_within_single_checkpoint", - # user_flow_script="docs/docusaurus/docs/oss/guides/validation/checkpoints/how_to_validate_multiple_batches_within_single_checkpoint.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/validation/checkpoints/how_to_validate_multiple_batches_within_single_checkpoint.py", # noqa: E501 # FIXME CoP # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[BackendDependencies.PANDAS], # ), @@ -112,12 +112,12 @@ def delay_rerun(*args): ), # IntegrationTestFixture( # name="expect_column_values_to_be_in_solfege_scale_set", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/expect_column_values_to_be_in_solfege_scale_set.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/expect_column_values_to_be_in_solfege_scale_set.py", # noqa: E501 # FIXME CoP # backend_dependencies=[], # ), # IntegrationTestFixture( # name="expect_column_values_to_only_contain_vowels", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/expect_column_values_to_only_contain_vowels.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/creating_custom_expectations/expect_column_values_to_only_contain_vowels.py", # noqa: E501 # FIXME CoP # backend_dependencies=[], # ), IntegrationTestFixture( @@ -128,20 +128,20 @@ def delay_rerun(*args): # Fluent Datasources # IntegrationTestFixture( # name="how_to_create_and_edit_expectations_with_instant_feedback_fluent", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/how_to_create_and_edit_expectations_with_instant_feedback_fluent.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/how_to_create_and_edit_expectations_with_instant_feedback_fluent.py", # noqa: E501 # FIXME CoP # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[], # ), # IntegrationTestFixture( # name="data_docs", - # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_docs/data_docs.py", # noqa: E501 - # data_context_dir="tests/integration/fixtures/yellow_trip_data_fluent_pandas/great_expectations", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_docs/data_docs.py", # noqa: E501 # FIXME CoP + # data_context_dir="tests/integration/fixtures/yellow_trip_data_fluent_pandas/great_expectations", # noqa: E501 # FIXME CoP # data_dir="tests/test_sets/taxi_yellow_tripdata_samples", # backend_dependencies=[], # ), # IntegrationTestFixture( # name="how_to_edit_an_existing_expectation_suite", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/how_to_edit_an_expectation_suite.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/how_to_edit_an_expectation_suite.py", # noqa: E501 # FIXME CoP # backend_dependencies=[], # ), IntegrationTestFixture( @@ -186,7 +186,7 @@ def delay_rerun(*args): ), # IntegrationTestFixture( # name="how_to_pass_an_in_memory_dataframe_to_a_checkpoint", - # user_flow_script="docs/docusaurus/docs/oss/guides/validation/checkpoints/how_to_pass_an_in_memory_dataframe_to_a_checkpoint.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/validation/checkpoints/how_to_pass_an_in_memory_dataframe_to_a_checkpoint.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[], @@ -200,7 +200,7 @@ def delay_rerun(*args): ), # IntegrationTestFixture( # name="how_to_create_and_edit_an_expectation_with_domain_knowledge", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/how_to_create_and_edit_an_expectationsuite_domain_knowledge.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/how_to_create_and_edit_an_expectationsuite_domain_knowledge.py", # noqa: E501 # FIXME CoP # data_context_dir=None, # backend_dependencies=[], # ), @@ -213,7 +213,7 @@ def delay_rerun(*args): ), # IntegrationTestFixture( # name="how_to_organize_batches_in_a_file_based_data_asset", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/data_assets/organize_batches_in_pandas_filesystem_datasource.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/data_assets/organize_batches_in_pandas_filesystem_datasource.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[BackendDependencies.PANDAS], @@ -226,26 +226,26 @@ def delay_rerun(*args): ), # IntegrationTestFixture( # name="how_to_connect_to_one_or_more_files_using_pandas", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/filesystem/how_to_connect_to_one_or_more_files_using_pandas.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/filesystem/how_to_connect_to_one_or_more_files_using_pandas.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # data_dir="tests/test_sets/taxi_yellow_tripdata_samples/first_3_files", # backend_dependencies=[BackendDependencies.PANDAS], # ), # IntegrationTestFixture( # name="how_to_connect_to_sql_data_using_a_query", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/database/how_to_connect_to_sql_data_using_a_query.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/database/how_to_connect_to_sql_data_using_a_query.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[], # ), # IntegrationTestFixture( # name="how_to_quickly_connect_to_a_single_file_with_pandas", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/filesystem/how_to_quickly_connect_to_a_single_file_with_pandas.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/filesystem/how_to_quickly_connect_to_a_single_file_with_pandas.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[BackendDependencies.PANDAS], # ), # IntegrationTestFixture( # name="how_to_connect_to_sqlite_data", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/database/how_to_connect_to_sqlite_data.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/database/how_to_connect_to_sqlite_data.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[BackendDependencies.SQLALCHEMY], # ), @@ -257,19 +257,19 @@ def delay_rerun(*args): ), # IntegrationTestFixture( # name="how_to_connect_to_sql_data", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/database/how_to_connect_to_sqlite_data.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/database/how_to_connect_to_sqlite_data.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[], # ), # IntegrationTestFixture( # name="how_to_instantiate_a_specific_filesystem_data_context", - # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_contexts/instantiating_data_contexts/how_to_instantiate_a_specific_filesystem_data_context.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_contexts/instantiating_data_contexts/how_to_instantiate_a_specific_filesystem_data_context.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[], # ), # IntegrationTestFixture( # name="how_to_initialize_a_filesystem_data_context_in_python", - # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_contexts/instantiating_data_contexts/how_to_initialize_a_filesystem_data_context_in_python.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/setup/configuring_data_contexts/instantiating_data_contexts/how_to_initialize_a_filesystem_data_context_in_python.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[], # ), @@ -281,13 +281,13 @@ def delay_rerun(*args): ), # IntegrationTestFixture( # name="how_to_connect_to_in_memory_data_using_pandas", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/in_memory/how_to_connect_to_in_memory_data_using_pandas.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/in_memory/how_to_connect_to_in_memory_data_using_pandas.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[BackendDependencies.PANDAS], # ), # IntegrationTestFixture( # name="how_to_connect_to_in_memory_data_using_spark", - # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/in_memory/how_to_connect_to_in_memory_data_using_spark.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/in_memory/how_to_connect_to_in_memory_data_using_spark.py", # noqa: E501 # FIXME CoP # data_context_dir="tests/integration/fixtures/no_datasources/great_expectations", # backend_dependencies=[BackendDependencies.SPARK], # ), @@ -298,21 +298,21 @@ def delay_rerun(*args): # name="failed_rows_pandas", # data_context_dir="tests/integration/fixtures/failed_rows/great_expectations", # data_dir="tests/test_sets/visits", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/advanced/failed_rows_pandas.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/advanced/failed_rows_pandas.py", # noqa: E501 # FIXME CoP # backend_dependencies=[], # ), # IntegrationTestFixture( # name="failed_rows_sqlalchemy", # data_context_dir="tests/integration/fixtures/failed_rows/great_expectations", # data_dir="tests/test_sets/visits", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/advanced/failed_rows_sql.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/advanced/failed_rows_sql.py", # noqa: E501 # FIXME CoP # backend_dependencies=[BackendDependencies.SQLALCHEMY], # ), # IntegrationTestFixture( # name="failed_rows_spark", # data_context_dir="tests/integration/fixtures/failed_rows/great_expectations", # data_dir="tests/test_sets/visits", - # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/advanced/failed_rows_spark.py", # noqa: E501 + # user_flow_script="docs/docusaurus/docs/oss/guides/expectations/advanced/failed_rows_spark.py", # noqa: E501 # FIXME CoP # backend_dependencies=[BackendDependencies.SPARK], # ), ] @@ -370,7 +370,7 @@ def test_integration_tests(test_configuration, tmp_path, pytest_parsed_arguments _execute_integration_test(test_configuration, tmp_path) -def _execute_integration_test( # noqa: C901, PLR0915 +def _execute_integration_test( # noqa: C901, PLR0915 # FIXME CoP integration_test_fixture: IntegrationTestFixture, tmp_path: pathlib.Path ): """ @@ -432,7 +432,7 @@ def _execute_integration_test( # noqa: C901, PLR0915 script_path = tmp_path / "test_script.py" shutil.copyfile(script_source, script_path) logger.debug( - f"(_execute_integration_test) script_source -> {script_source} :: copied to {script_path}" # noqa: E501 + f"(_execute_integration_test) script_source -> {script_source} :: copied to {script_path}" # noqa: E501 # FIXME CoP ) if script_source.suffix != ".py": logger.error(f"{script_source} is not a python script!") @@ -453,7 +453,7 @@ def _execute_integration_test( # noqa: C901, PLR0915 test_script_module = importlib.util.module_from_spec(spec) loader.exec_module(test_script_module) except Exception as e: - logger.error(str(e)) # noqa: TRY400 + logger.error(str(e)) # noqa: TRY400 # FIXME CoP if "JavaPackage" in str(e) and "aws_glue" in user_flow_script: logger.debug("This is something aws_glue related, so just going to return") # Should try to copy aws-glue-libs jar files to Spark jar during pipeline setup @@ -465,7 +465,7 @@ def _execute_integration_test( # noqa: C901, PLR0915 os.chdir(workdir) -def _check_for_skipped_tests( # noqa: C901, PLR0912 +def _check_for_skipped_tests( # noqa: C901, PLR0912 # FIXME CoP pytest_args, integration_test_fixture, ) -> None: @@ -488,10 +488,10 @@ def _check_for_skipped_tests( # noqa: C901, PLR0912 elif BackendDependencies.BIGQUERY in dependencies and ( pytest_args.no_sqlalchemy or not pytest_args.bigquery ): - # TODO : Investigate whether this test should be handled by azure-pipelines-cloud-db-integration.yml # noqa: E501 + # TODO : Investigate whether this test should be handled by azure-pipelines-cloud-db-integration.yml # noqa: E501 # FIXME CoP pytest.skip("Skipping bigquery tests") elif BackendDependencies.GCS in dependencies and not pytest_args.bigquery: - # TODO : Investigate whether this test should be handled by azure-pipelines-cloud-db-integration.yml # noqa: E501 + # TODO : Investigate whether this test should be handled by azure-pipelines-cloud-db-integration.yml # noqa: E501 # FIXME CoP pytest.skip("Skipping GCS tests") elif BackendDependencies.AWS in dependencies and not pytest_args.aws: pytest.skip("Skipping AWS tests") diff --git a/tests/integration/test_utils/data_source_config/base.py b/tests/integration/test_utils/data_source_config/base.py index 7ee3bbb5cda3..f06b49631722 100644 --- a/tests/integration/test_utils/data_source_config/base.py +++ b/tests/integration/test_utils/data_source_config/base.py @@ -97,9 +97,8 @@ def __init__(self, config: _ConfigT, data: pd.DataFrame) -> None: self.config = config self.data = data - @property @abstractmethod - def asset(self) -> _AssetT: ... + def make_asset(self) -> _AssetT: ... @abstractmethod def make_batch(self) -> Batch: ... @@ -124,7 +123,7 @@ def asset_test_context(self) -> Generator[_AssetT, None, None]: """Receive an Asset and ensure proper setup and teardown regardless of errors.""" try: self.setup() - yield self.asset + yield self.make_asset() finally: self.teardown() diff --git a/tests/integration/test_utils/data_source_config/big_query.py b/tests/integration/test_utils/data_source_config/big_query.py index a520ce5bfc58..45262a17418d 100644 --- a/tests/integration/test_utils/data_source_config/big_query.py +++ b/tests/integration/test_utils/data_source_config/big_query.py @@ -57,9 +57,8 @@ def use_schema(self) -> bool: # dataset is to create a schema: https://cloud.google.com/bigquery/docs/datasets#sql return True - @cached_property @override - def asset(self) -> TableAsset: + def make_asset(self) -> TableAsset: return self.context.data_sources.add_sql( name=self._random_resource_name(), connection_string=self.connection_string ).add_table_asset( diff --git a/tests/integration/test_utils/data_source_config/databricks.py b/tests/integration/test_utils/data_source_config/databricks.py index 3fb02c42e795..f2cc7436e76c 100644 --- a/tests/integration/test_utils/data_source_config/databricks.py +++ b/tests/integration/test_utils/data_source_config/databricks.py @@ -69,9 +69,8 @@ def inferrable_types_lookup(self) -> dict[type, TypeEngine]: def _databrics_connection_config(self) -> DatabricksConnectionConfig: return DatabricksConnectionConfig() # type: ignore[call-arg] # retrieves env vars - @cached_property @override - def asset(self) -> TableAsset: + def make_asset(self) -> TableAsset: return self.context.data_sources.add_databricks_sql( name=self._random_resource_name(), connection_string=self.connection_string, diff --git a/tests/integration/test_utils/data_source_config/mssql.py b/tests/integration/test_utils/data_source_config/mssql.py index a7771e4a0e5e..f178c1d8514e 100644 --- a/tests/integration/test_utils/data_source_config/mssql.py +++ b/tests/integration/test_utils/data_source_config/mssql.py @@ -1,4 +1,3 @@ -from functools import cached_property from typing import Mapping import pandas as pd @@ -11,6 +10,7 @@ DataSourceTestConfig, ) from tests.integration.test_utils.data_source_config.sql import SQLBatchTestSetup +from tests.test_utils import get_default_mssql_url class MSSQLDatasourceTestConfig(DataSourceTestConfig): @@ -43,16 +43,15 @@ class MSSQLBatchTestSetup(SQLBatchTestSetup[MSSQLDatasourceTestConfig]): @property @override def connection_string(self) -> str: - return "mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@localhost:1433/test_ci?driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true" # noqa: E501 # it's okay + return get_default_mssql_url() @property @override def use_schema(self) -> bool: return False - @cached_property @override - def asset(self) -> TableAsset: + def make_asset(self) -> TableAsset: return self.context.data_sources.add_sql( name=self._random_resource_name(), connection_string=self.connection_string ).add_table_asset( diff --git a/tests/integration/test_utils/data_source_config/mysql.py b/tests/integration/test_utils/data_source_config/mysql.py index 45ffa26ecf0a..363858bc3fd2 100644 --- a/tests/integration/test_utils/data_source_config/mysql.py +++ b/tests/integration/test_utils/data_source_config/mysql.py @@ -10,7 +10,6 @@ BatchTestSetup, DataSourceTestConfig, ) -from tests.integration.test_utils.data_source_config.databricks import cached_property from tests.integration.test_utils.data_source_config.sql import SQLBatchTestSetup @@ -59,9 +58,8 @@ def inferrable_types_lookup(self) -> Dict[Type, TypeEngine]: } return super().inferrable_types_lookup | overrides - @cached_property @override - def asset(self) -> TableAsset: + def make_asset(self) -> TableAsset: return self.context.data_sources.add_sql( name=self._random_resource_name(), connection_string=self.connection_string ).add_table_asset( diff --git a/tests/integration/test_utils/data_source_config/pandas_data_frame.py b/tests/integration/test_utils/data_source_config/pandas_data_frame.py index 9d6dfe7e214e..524d31ed585b 100644 --- a/tests/integration/test_utils/data_source_config/pandas_data_frame.py +++ b/tests/integration/test_utils/data_source_config/pandas_data_frame.py @@ -1,4 +1,3 @@ -from functools import cached_property from typing import Mapping import pandas as pd @@ -38,18 +37,19 @@ def create_batch_setup( class PandasDataFrameBatchTestSetup( BatchTestSetup[PandasDataFrameDatasourceTestConfig, DataFrameAsset] ): - @cached_property @override - def asset(self) -> DataFrameAsset: + def make_asset(self) -> DataFrameAsset: return self.context.data_sources.add_pandas( self._random_resource_name() ).add_dataframe_asset(self._random_resource_name()) @override def make_batch(self) -> Batch: - return self.asset.add_batch_definition_whole_dataframe( - self._random_resource_name() - ).get_batch(batch_parameters={"dataframe": self.data}) + return ( + self.make_asset() + .add_batch_definition_whole_dataframe(self._random_resource_name()) + .get_batch(batch_parameters={"dataframe": self.data}) + ) @override def setup(self) -> None: ... diff --git a/tests/integration/test_utils/data_source_config/pandas_filesystem_csv.py b/tests/integration/test_utils/data_source_config/pandas_filesystem_csv.py index b2f66190a44e..dee709e09ccf 100644 --- a/tests/integration/test_utils/data_source_config/pandas_filesystem_csv.py +++ b/tests/integration/test_utils/data_source_config/pandas_filesystem_csv.py @@ -1,5 +1,4 @@ import pathlib -from functools import cached_property from typing import Mapping import pandas as pd @@ -56,18 +55,19 @@ def __init__( super().__init__(config=config, data=data) self._base_dir = base_dir - @cached_property @override - def asset(self) -> CSVAsset: + def make_asset(self) -> CSVAsset: return self.context.data_sources.add_pandas_filesystem( name=self._random_resource_name(), base_directory=self._base_dir ).add_csv_asset(name=self._random_resource_name()) @override def make_batch(self) -> Batch: - return self.asset.add_batch_definition_path( - name=self._random_resource_name(), path=self.csv_path - ).get_batch() + return ( + self.make_asset() + .add_batch_definition_path(name=self._random_resource_name(), path=self.csv_path) + .get_batch() + ) @override def setup(self) -> None: diff --git a/tests/integration/test_utils/data_source_config/postgres.py b/tests/integration/test_utils/data_source_config/postgres.py index 7dd9961b2c08..16fd7e0cab37 100644 --- a/tests/integration/test_utils/data_source_config/postgres.py +++ b/tests/integration/test_utils/data_source_config/postgres.py @@ -9,7 +9,6 @@ BatchTestSetup, DataSourceTestConfig, ) -from tests.integration.test_utils.data_source_config.databricks import cached_property from tests.integration.test_utils.data_source_config.sql import SQLBatchTestSetup @@ -50,9 +49,8 @@ def connection_string(self) -> str: def use_schema(self) -> bool: return False - @cached_property @override - def asset(self) -> TableAsset: + def make_asset(self) -> TableAsset: return self.context.data_sources.add_postgres( name=self._random_resource_name(), connection_string=self.connection_string ).add_table_asset( diff --git a/tests/integration/test_utils/data_source_config/snowflake.py b/tests/integration/test_utils/data_source_config/snowflake.py index afb0971cb185..83f16c32edd1 100644 --- a/tests/integration/test_utils/data_source_config/snowflake.py +++ b/tests/integration/test_utils/data_source_config/snowflake.py @@ -1,4 +1,3 @@ -from functools import cached_property from typing import Mapping, Optional import pandas as pd @@ -85,9 +84,8 @@ def __init__( self.snowflake_connection_config = SnowflakeConnectionConfig() # type: ignore[call-arg] # retrieves env vars super().__init__(config=config, data=data, extra_data=extra_data, table_name=table_name) - @cached_property @override - def asset(self) -> TableAsset: + def make_asset(self) -> TableAsset: schema = self.schema assert schema return self.context.data_sources.add_snowflake( diff --git a/tests/integration/test_utils/data_source_config/spark_filesystem_csv.py b/tests/integration/test_utils/data_source_config/spark_filesystem_csv.py index 9c0cd3c4308b..f0428007247d 100644 --- a/tests/integration/test_utils/data_source_config/spark_filesystem_csv.py +++ b/tests/integration/test_utils/data_source_config/spark_filesystem_csv.py @@ -1,5 +1,4 @@ import pathlib -from functools import cached_property from typing import Mapping import pandas as pd @@ -56,18 +55,19 @@ def __init__( super().__init__(config=config, data=data) self._base_dir = base_dir - @cached_property @override - def asset(self) -> CSVAsset: + def make_asset(self) -> CSVAsset: return self.context.data_sources.add_spark_filesystem( name=self._random_resource_name(), base_directory=self._base_dir ).add_csv_asset(name=self._random_resource_name(), header=True, infer_schema=True) @override def make_batch(self) -> Batch: - return self.asset.add_batch_definition_path( - name=self._random_resource_name(), path=self.csv_path - ).get_batch() + return ( + self.make_asset() + .add_batch_definition_path(name=self._random_resource_name(), path=self.csv_path) + .get_batch() + ) @override def setup(self) -> None: diff --git a/tests/integration/test_utils/data_source_config/sql.py b/tests/integration/test_utils/data_source_config/sql.py index 66883b93466b..87356b3d86b6 100644 --- a/tests/integration/test_utils/data_source_config/sql.py +++ b/tests/integration/test_utils/data_source_config/sql.py @@ -56,12 +56,12 @@ def inferrable_types_lookup(self) -> Dict[Type, TypeEngine]: """Dict of Python type keys mapped to SQL dialect-specific SqlAlchemy types.""" # implementations of the class can override this if more specific types are required return { - str: sqltypes.VARCHAR, # type: ignore[dict-item] - int: sqltypes.INTEGER, # type: ignore[dict-item] - float: sqltypes.DECIMAL, # type: ignore[dict-item] - bool: sqltypes.BOOLEAN, # type: ignore[dict-item] - date: sqltypes.DATE, # type: ignore[dict-item] - datetime: sqltypes.DATETIME, # type: ignore[dict-item] + str: sqltypes.VARCHAR, # type: ignore[dict-item] # FIXME CoP + int: sqltypes.INTEGER, # type: ignore[dict-item] # FIXME CoP + float: sqltypes.DECIMAL, # type: ignore[dict-item] # FIXME CoP + bool: sqltypes.BOOLEAN, # type: ignore[dict-item] # FIXME CoP + date: sqltypes.DATE, # type: ignore[dict-item] # FIXME CoP + datetime: sqltypes.DATETIME, # type: ignore[dict-item] # FIXME CoP } def __init__( @@ -79,9 +79,11 @@ def __init__( @override def make_batch(self) -> Batch: - return self.asset.add_batch_definition_whole_table( - name=self._random_resource_name() - ).get_batch() + return ( + self.make_asset() + .add_batch_definition_whole_table(name=self._random_resource_name()) + .get_batch() + ) @cached_property def table_name(self) -> str: @@ -202,7 +204,7 @@ def _infer_column_types(self, data: pd.DataFrame) -> Dict[str, TypeEngine]: non_null_value_list = [val for val in value_list if val is not None] if not non_null_value_list: # if we have an all null column, just arbitrarily use INTEGER - inferred_column_types[str(column)] = sqltypes.INTEGER # type: ignore[assignment] + inferred_column_types[str(column)] = sqltypes.INTEGER # type: ignore[assignment] # FIXME CoP else: python_type = type(non_null_value_list[0]) if not all(isinstance(val, python_type) for val in non_null_value_list): diff --git a/tests/integration/test_utils/data_source_config/sqlite.py b/tests/integration/test_utils/data_source_config/sqlite.py index a077eb82569b..558b57d045d4 100644 --- a/tests/integration/test_utils/data_source_config/sqlite.py +++ b/tests/integration/test_utils/data_source_config/sqlite.py @@ -1,5 +1,4 @@ import pathlib -from functools import cached_property from typing import Mapping, Optional import pandas as pd @@ -70,9 +69,8 @@ def use_schema(self) -> bool: def db_file_path(self) -> pathlib.Path: return self._base_dir / "database.db" - @cached_property @override - def asset(self) -> TableAsset: + def make_asset(self) -> TableAsset: return self.context.data_sources.add_sqlite( name=self._random_resource_name(), connection_string=self.connection_string, diff --git a/tests/profile/conftest.py b/tests/profile/conftest.py index eb8e7ca7eccd..c9f3d773eaae 100644 --- a/tests/profile/conftest.py +++ b/tests/profile/conftest.py @@ -15,28 +15,28 @@ @pytest.fixture def titanic_data_context_modular_api(tmp_path_factory, monkeypatch): project_path = str(tmp_path_factory.mktemp("titanic_data_context")) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "checkpoints"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "checkpoints"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path = os.path.join(context_path, "../data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path = os.path.join(context_path, "../data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP titanic_yml_path = file_relative_path( __file__, "./fixtures/great_expectations_titanic_0.13.yml" ) shutil.copy( titanic_yml_path, - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) titanic_csv_path = file_relative_path(__file__, "../test_sets/Titanic.csv") shutil.copy( titanic_csv_path, - str(os.path.join(context_path, "../data/Titanic.csv")), # noqa: PTH118 + str(os.path.join(context_path, "../data/Titanic.csv")), # noqa: PTH118 # FIXME CoP ) return gx.get_context(context_root_dir=context_path) @@ -73,7 +73,7 @@ def get_set_of_columns_and_expectations_from_suite( A tuple containing a set of columns and a set of expectations found in a suite """ columns: Set[str] = { - i.kwargs.get("column") # type: ignore[misc] + i.kwargs.get("column") # type: ignore[misc] # FIXME CoP for i in suite.expectation_configurations if i.kwargs.get("column") } diff --git a/tests/render/conftest.py b/tests/render/conftest.py index 3fca9a77c7ad..5cd9dac9011a 100644 --- a/tests/render/conftest.py +++ b/tests/render/conftest.py @@ -30,16 +30,16 @@ def empty_data_context_module_scoped(tmp_path_factory) -> FileDataContext: # Re-enable GE_USAGE_STATS project_path = str(tmp_path_factory.mktemp("empty_data_context")) context = gx.data_context.FileDataContext.create(project_path) - context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 - asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 - os.makedirs(asset_config_path, exist_ok=True) # noqa: PTH103 + context_path = os.path.join(project_path, FileDataContext.GX_DIR) # noqa: PTH118 # FIXME CoP + asset_config_path = os.path.join(context_path, "expectations") # noqa: PTH118 # FIXME CoP + os.makedirs(asset_config_path, exist_ok=True) # noqa: PTH103 # FIXME CoP return context @pytest.fixture def titanic_profiled_name_column_evrs() -> ExpectationSuiteValidationResult: # This is a janky way to fetch expectations matching a specific name from an EVR suite. - # TODO: It will no longer be necessary once we implement ValidationResultSuite._group_evrs_by_column # noqa: E501 + # TODO: It will no longer be necessary once we implement ValidationResultSuite._group_evrs_by_column # noqa: E501 # FIXME CoP from great_expectations.render.renderer.renderer import Renderer with open( diff --git a/tests/render/fixtures/BasicDatasetProfiler_evrs.json b/tests/render/fixtures/BasicDatasetProfiler_evrs.json index e5c43253f8bc..cad25cbb40aa 100644 --- a/tests/render/fixtures/BasicDatasetProfiler_evrs.json +++ b/tests/render/fixtures/BasicDatasetProfiler_evrs.json @@ -308,7 +308,7 @@ "type": "expect_column_values_to_be_in_set", "kwargs": { "column": "Unnamed: 0", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -599,7 +599,7 @@ "type": "expect_column_values_to_be_in_set", "kwargs": { "column": "Name", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -848,7 +848,7 @@ "type": "expect_column_values_to_be_in_set", "kwargs": { "column": "PClass", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -926,7 +926,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "PClass", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -1182,7 +1182,7 @@ "type": "expect_column_values_to_be_in_set", "kwargs": { "column": "Age", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -1672,7 +1672,7 @@ "type": "expect_column_values_to_be_in_set", "kwargs": { "column": "Sex", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -1740,7 +1740,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "Sex", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -1925,7 +1925,7 @@ "type": "expect_column_values_to_be_in_set", "kwargs": { "column": "Survived", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -1964,7 +1964,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "Survived", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -2149,7 +2149,7 @@ "type": "expect_column_values_to_be_in_set", "kwargs": { "column": "SexCode", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, @@ -2188,7 +2188,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "SexCode", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY" } }, diff --git a/tests/render/fixtures/BasicDatasetProfiler_expectations.json b/tests/render/fixtures/BasicDatasetProfiler_expectations.json index 91cc7ea0e822..d4c159f631aa 100644 --- a/tests/render/fixtures/BasicDatasetProfiler_expectations.json +++ b/tests/render/fixtures/BasicDatasetProfiler_expectations.json @@ -177,7 +177,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "PClass", - "value_set": null + "value_set": ["a", "b", "c"] } }, { @@ -342,7 +342,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "Sex", - "value_set": null + "value_set": ["a", "b", "c"] } }, { @@ -386,7 +386,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "Survived", - "value_set": null + "value_set": ["a", "b", "c"] } }, { @@ -430,7 +430,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "SexCode", - "value_set": null + "value_set": ["a", "b", "c"] } } ] diff --git a/tests/render/fixtures/BasicDatasetProfiler_expectations_with_distribution.json b/tests/render/fixtures/BasicDatasetProfiler_expectations_with_distribution.json index b325372e3b87..87c956b284ff 100644 --- a/tests/render/fixtures/BasicDatasetProfiler_expectations_with_distribution.json +++ b/tests/render/fixtures/BasicDatasetProfiler_expectations_with_distribution.json @@ -177,7 +177,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "PClass", - "value_set": null + "value_set": ["a", "b", "c"] } }, { @@ -345,7 +345,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "Sex", - "value_set": null + "value_set": ["a", "b", "c"] } }, { @@ -389,7 +389,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "Survived", - "value_set": null + "value_set": ["a", "b", "c"] } }, { @@ -433,7 +433,7 @@ "type": "expect_column_distinct_values_to_be_in_set", "kwargs": { "column": "SexCode", - "value_set": null + "value_set": ["a", "b", "c"] } } ] diff --git a/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_end.json b/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_end.json index 7a6bdcb400d5..d05ecde38a9b 100644 --- a/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_end.json +++ b/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_end.json @@ -651,14 +651,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Unnamed: 0", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -1155,14 +1158,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Name", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -1742,14 +1748,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "PClass", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -1918,13 +1927,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "PClass", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -2307,14 +2319,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Age", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -3389,14 +3404,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Sex", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -3557,13 +3575,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Sex", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -3948,14 +3969,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Survived", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -4054,13 +4078,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Survived", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -4445,14 +4472,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "SexCode", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -4551,13 +4581,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "SexCode", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { diff --git a/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_start.json b/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_start.json index eec5c46bc725..c63436c83b42 100644 --- a/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_start.json +++ b/tests/render/fixtures/ValidationResultsPageRenderer_render_with_run_info_at_start.json @@ -791,14 +791,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Unnamed: 0", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -1295,14 +1298,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Name", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -1882,14 +1888,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "PClass", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -2058,13 +2067,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "PClass", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -2447,14 +2459,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Age", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -3529,14 +3544,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Sex", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -3697,13 +3715,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Sex", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -4088,14 +4109,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Survived", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -4194,13 +4218,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "Survived", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -4585,14 +4612,17 @@ { "content_block_type": "string_template", "string_template": { - "template": "values must belong to this set: [ ].", + "template": "values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "SexCode", - "value_set": [], + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "mostly": null, "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { @@ -4691,13 +4721,16 @@ { "content_block_type": "string_template", "string_template": { - "template": "distinct values must belong to a set, but that set is not specified.", + "template": "distinct values must belong to this set: $v__0 $v__1 $v__2.", "params": { "column": "SexCode", - "value_set": null, + "value_set": ["a", "b", "c"], "result_format": "SUMMARY", "row_condition": null, - "condition_parser": null + "condition_parser": null, + "v__0": "a", + "v__1": "b", + "v__2": "c" }, "styling": { "default": { diff --git a/tests/render/test_EmailRenderer.py b/tests/render/test_EmailRenderer.py index db15301a726f..e83f399fac85 100644 --- a/tests/render/test_EmailRenderer.py +++ b/tests/render/test_EmailRenderer.py @@ -29,11 +29,11 @@ def test_EmailRenderer_render(v1_checkpoint_result): "

Data Asset Name: my_first_asset

", "

Run ID: __no_run_id__

", "

Batch ID: my_batch

", - "

Summary: 3 of 5 expectations were met


my_good_suite

", # noqa: E501 + "

Summary: 3 of 5 expectations were met


my_good_suite

", # noqa: E501 # FIXME CoP "

Batch Validation Status: Success 🎉

", "

Expectation Suite Name: my_good_suite

", "

Data Asset Name: __no_asset_name__

", "

Run ID: my_run_id

", "

Batch ID: my_other_batch

", - "

Summary: 1 of 1 expectations were met

", # noqa: E501 + "

Summary: 1 of 1 expectations were met

", # noqa: E501 # FIXME CoP ] diff --git a/tests/render/test_column_section_renderer.py b/tests/render/test_column_section_renderer.py index f99f703d3a64..c96b7d99d4e4 100644 --- a/tests/render/test_column_section_renderer.py +++ b/tests/render/test_column_section_renderer.py @@ -173,7 +173,7 @@ def test_ProfilingResultsColumnSectionRenderer_render_header( "string_template": { "template": "Type: None", "tooltip": { - "content": "expect_column_values_to_be_of_type
expect_column_values_to_be_in_type_list" # noqa: E501 + "content": "expect_column_values_to_be_of_type
expect_column_values_to_be_in_type_list" # noqa: E501 # FIXME CoP }, "tag": "h6", "styling": {"classes": ["mt-1", "mb-0"]}, @@ -238,7 +238,7 @@ def test_ProfilingResultsColumnSectionRenderer_render_header_with_unescaped_doll "string_template": { "template": "Type: []", "tooltip": { - "content": "expect_column_values_to_be_of_type
expect_column_values_to_be_in_type_list" # noqa: E501 + "content": "expect_column_values_to_be_of_type
expect_column_values_to_be_in_type_list" # noqa: E501 # FIXME CoP }, "tag": "h6", "styling": {"classes": ["mt-1", "mb-0"]}, @@ -1130,9 +1130,9 @@ def test_ExpectationSuiteColumnSectionRenderer_render_expectation_with_descripti content_block = result.content_blocks[1] content = content_block.bullet_list[0] - markdown = content.markdown - - assert markdown == expectation.description + assert content.string_template == { + "template": "column values must be a legal adult age (**18** or older)" + } @pytest.mark.unit @@ -1246,14 +1246,14 @@ def test_ValidationResultsColumnSectionRenderer_render_table( assert ( "values must not be null, at least $mostly_pct % of the time." in content_block_stringified ) - assert "values must belong to this set: [ ]." in content_block_stringified + assert "values must belong to this set: $v__0 $v__1 $v__2." in content_block_stringified assert ( - "\\n\\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows." # noqa: E501 + "\\n\\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows." # noqa: E501 # FIXME CoP in content_block_stringified ) assert "values must not match this regular expression: $regex." in content_block_stringified assert ( - "\\n\\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows." # noqa: E501 + "\\n\\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows." # noqa: E501 # FIXME CoP in content_block_stringified ) @@ -1354,7 +1354,7 @@ def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_hap # noinspection PyPep8Naming @pytest.mark.unit -def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_happy_path_with_eval_parameter(): # noqa: E501 +def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_happy_path_with_eval_parameter(): # noqa: E501 # FIXME CoP evr = ExpectationValidationResult( success=True, result={ @@ -1426,7 +1426,7 @@ def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_hap { "content_block_type": "string_template", "string_template": { - "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 + "template": "$column minimum value must be greater than or equal to $min_value and less than or equal to $max_value.", # noqa: E501 # FIXME CoP "params": { "column": "live", "min_value": {"$PARAMETER": "MIN_VAL_PARAM * 2"}, @@ -1446,7 +1446,7 @@ def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_hap { "content_block_type": "string_template", "string_template": { - "template": "\n - $eval_param = $eval_param_value (at time of validation).", # noqa: E501 + "template": "\n - $eval_param = $eval_param_value (at time of validation).", # noqa: E501 # FIXME CoP "params": { "eval_param": "MIN_VAL_PARAM", "eval_param_value": 10, @@ -1460,7 +1460,7 @@ def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_hap { "content_block_type": "string_template", "string_template": { - "template": "\n - $eval_param = $eval_param_value (at time of validation).", # noqa: E501 + "template": "\n - $eval_param = $eval_param_value (at time of validation).", # noqa: E501 # FIXME CoP "params": { "eval_param": "MAX_VAL_PARAM", "eval_param_value": 40, @@ -1588,15 +1588,15 @@ def test_ValidationResultsTableContentBlockRenderer_render_evr_with_description( result = ValidationResultsColumnSectionRenderer().render([evr]) content_block = result.content_blocks[1] - content = content_block.table[0] - markdown = content.markdown - - assert markdown == expectation.description + _, description_cell, _ = content_block.table[0] + assert description_cell.string_template == { + "template": "column values must be a legal adult age (**18** or older)" + } # noinspection PyPep8Naming @pytest.mark.filterwarnings( - "ignore:Cannot get %*::great_expectations.render.renderer.profiling_results_overview_section_renderer" # noqa: E501 + "ignore:Cannot get %*::great_expectations.render.renderer.profiling_results_overview_section_renderer" # noqa: E501 # FIXME CoP ) @pytest.mark.unit def test_ProfilingResultsOverviewSectionRenderer_empty_type_list(): @@ -1608,7 +1608,7 @@ def test_ProfilingResultsOverviewSectionRenderer_empty_type_list(): ExpectationValidationResult( success=True, result={ - "observed_value": "VARIANT", # Note this is NOT a recognized type by many backends # noqa: E501 + "observed_value": "VARIANT", # Note this is NOT a recognized type by many backends # noqa: E501 # FIXME CoP }, exception_info={ "raised_exception": False, diff --git a/tests/render/test_data_documentation_site_builder.py b/tests/render/test_data_documentation_site_builder.py index b68e319c3f1a..5133879874e4 100644 --- a/tests/render/test_data_documentation_site_builder.py +++ b/tests/render/test_data_documentation_site_builder.py @@ -26,7 +26,7 @@ def assert_how_to_buttons( ): """Helper function to assert presence or non-presence of how-to buttons and related content in various Data Docs pages. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP # these are simple checks for presence of certain page elements show_walkthrough_button = "Show Walkthrough" @@ -53,7 +53,7 @@ def assert_how_to_buttons( "profiling_results": [action_card, show_walkthrough_button, walkthrough_modal], } - data_docs_site_dir = os.path.join( # noqa: PTH118 + data_docs_site_dir = os.path.join( # noqa: PTH118 # FIXME CoP context._context_root_directory, context._project_config.data_docs_sites["local_site"]["store_backend"]["base_directory"], ) @@ -61,15 +61,15 @@ def assert_how_to_buttons( page_paths_dict = { "index_pages": [index_page_locator_info[7:]], "expectation_suites": [ - os.path.join(data_docs_site_dir, link_dict["filepath"]) # noqa: PTH118 + os.path.join(data_docs_site_dir, link_dict["filepath"]) # noqa: PTH118 # FIXME CoP for link_dict in index_links_dict.get("expectations_links", []) ], "validation_results": [ - os.path.join(data_docs_site_dir, link_dict["filepath"]) # noqa: PTH118 + os.path.join(data_docs_site_dir, link_dict["filepath"]) # noqa: PTH118 # FIXME CoP for link_dict in index_links_dict.get("validations_links", []) ], "profiling_results": [ - os.path.join(data_docs_site_dir, link_dict["filepath"]) # noqa: PTH118 + os.path.join(data_docs_site_dir, link_dict["filepath"]) # noqa: PTH118 # FIXME CoP for link_dict in index_links_dict.get("profiling_links", []) ], } @@ -88,15 +88,15 @@ def assert_how_to_buttons( def test_site_builder_with_custom_site_section_builders_config(tmp_path_factory): """Test that site builder can handle partially specified custom site_section_builders config""" base_dir = str(tmp_path_factory.mktemp("project_dir")) - project_dir = os.path.join(base_dir, "project_path") # noqa: PTH118 - os.mkdir(project_dir) # noqa: PTH102 + project_dir = os.path.join(base_dir, "project_path") # noqa: PTH118 # FIXME CoP + os.mkdir(project_dir) # noqa: PTH102 # FIXME CoP # fixture config swaps site section builder source stores and specifies custom run_name_filters shutil.copy( file_relative_path( __file__, "../test_fixtures/great_expectations_custom_local_site_config.yml" ), - str(os.path.join(project_dir, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(project_dir, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) context = get_context(context_root_dir=project_dir) local_site_config = context._project_config.data_docs_sites.get("local_site") diff --git a/tests/render/test_default_jinja_view.py b/tests/render/test_default_jinja_view.py index 2863b5ee2970..16c1128eefcd 100644 --- a/tests/render/test_default_jinja_view.py +++ b/tests/render/test_default_jinja_view.py @@ -23,7 +23,7 @@ # noinspection PyPep8Naming @pytest.mark.filterwarnings( - "ignore:Cannot get %*::great_expectations.render.renderer.profiling_results_overview_section_renderer" # noqa: E501 + "ignore:Cannot get %*::great_expectations.render.renderer.profiling_results_overview_section_renderer" # noqa: E501 # FIXME CoP ) def test_render_DefaultJinjaPageView_meta_info(): validation_results = ExpectationSuiteValidationResult( @@ -42,7 +42,7 @@ def test_render_DefaultJinjaPageView_meta_info(): "expectation_suite_name": "my_suite", "run_id": "2019-06-25T14:58:09.960521", "batch_kwargs": { - "path": "/Users/user/project_data/public_healthcare_datasets/tetanusvaricella/tetvardata.csv", # noqa: E501 + "path": "/Users/user/project_data/public_healthcare_datasets/tetanusvaricella/tetvardata.csv", # noqa: E501 # FIXME CoP "timestamp": 1561474688.693565, }, }, @@ -140,7 +140,7 @@ def test_render_section_page(): -""".replace(" ", "") # noqa: E501 +""".replace(" ", "") # noqa: E501 # FIXME CoP .replace("\t", "") .replace("\n", "") ) @@ -331,7 +331,7 @@ def test_rendering_components_with_styling():
Mean
446
Minimum
1
-""".replace(" ", "") # noqa: E501 +""".replace(" ", "") # noqa: E501 # FIXME CoP .replace("\t", "") .replace("\n", "") ) @@ -420,7 +420,7 @@ def test_render_table_component():
Minimum
1
-""".replace(" ", "") # noqa: E501 +""".replace(" ", "") # noqa: E501 # FIXME CoP .replace("\t", "") .replace("\n", "") ) @@ -490,7 +490,7 @@ def test_render_graph(): **{ "content_block_type": "graph", "header": "Histogram", - "graph": '{"$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json", "autosize": "fit", "config": {"view": {"height": 300, "width": 400}}, "data": {"name": "data-a681d02fb484e64eadd9721b37015d5b"}, "datasets": {"data-a681d02fb484e64eadd9721b37015d5b": [{"bins": 3.7, "weights": 5.555555555555555}, {"bins": 10.8, "weights": 3.439153439153439}, {"bins": 17.9, "weights": 17.857142857142858}, {"bins": 25.0, "weights": 24.206349206349206}, {"bins": 32.0, "weights": 16.137566137566136}, {"bins": 39.1, "weights": 12.3015873015873}, {"bins": 46.2, "weights": 9.788359788359788}, {"bins": 53.3, "weights": 5.423280423280423}, {"bins": 60.4, "weights": 3.439153439153439}, {"bins": 67.5, "weights": 1.8518518518518516}]}, "encoding": {"x": {"field": "bins", "type": "ordinal"}, "y": {"field": "weights", "type": "quantitative"}}, "height": 200, "mark": "bar", "width": 200}', # noqa: E501 + "graph": '{"$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json", "autosize": "fit", "config": {"view": {"height": 300, "width": 400}}, "data": {"name": "data-a681d02fb484e64eadd9721b37015d5b"}, "datasets": {"data-a681d02fb484e64eadd9721b37015d5b": [{"bins": 3.7, "weights": 5.555555555555555}, {"bins": 10.8, "weights": 3.439153439153439}, {"bins": 17.9, "weights": 17.857142857142858}, {"bins": 25.0, "weights": 24.206349206349206}, {"bins": 32.0, "weights": 16.137566137566136}, {"bins": 39.1, "weights": 12.3015873015873}, {"bins": 46.2, "weights": 9.788359788359788}, {"bins": 53.3, "weights": 5.423280423280423}, {"bins": 60.4, "weights": 3.439153439153439}, {"bins": 67.5, "weights": 1.8518518518518516}]}, "encoding": {"x": {"field": "bins", "type": "ordinal"}, "y": {"field": "weights", "type": "quantitative"}}, "height": 200, "mark": "bar", "width": 200}', # noqa: E501 # FIXME CoP "styling": {"classes": ["col-4"]}, } ).to_json_dict() @@ -525,7 +525,7 @@ def test_render_graph(): }).then(result=>console.log(result)).catch(console.warn); -""".replace(" ", "") # noqa: E501 +""".replace(" ", "") # noqa: E501 # FIXME CoP .replace("\t", "") .replace("\n", "") ) diff --git a/tests/render/test_inline_renderer.py b/tests/render/test_inline_renderer.py index b2f0660c66ba..f78db5cbbc1e 100644 --- a/tests/render/test_inline_renderer.py +++ b/tests/render/test_inline_renderer.py @@ -39,10 +39,10 @@ def test_inline_renderer_instantiation_error_message( ): expectation_suite: ExpectationSuite = basic_expectation_suite with pytest.raises(InlineRendererError) as e: - InlineRenderer(render_object=expectation_suite) # type: ignore + InlineRenderer(render_object=expectation_suite) # type: ignore # FIXME CoP assert ( str(e.value) - == "InlineRenderer can only be used with an ExpectationConfiguration or ExpectationValidationResult, but was used." # noqa: E501 + == "InlineRenderer can only be used with an ExpectationConfiguration or ExpectationValidationResult, but was used." # noqa: E501 # FIXME CoP ) diff --git a/tests/render/test_page_renderer.py b/tests/render/test_page_renderer.py index 46663eb8115e..b37057d7c4d8 100644 --- a/tests/render/test_page_renderer.py +++ b/tests/render/test_page_renderer.py @@ -224,7 +224,7 @@ def test_ValidationResultsPageRenderer_render_validation_header( "subheader": { "content_block_type": "string_template", "string_template": { - "template": "${suite_title} ${expectation_suite_name}\n ${data_asset} ${data_asset_name}\n ${status_title} ${html_success_icon} ${success}", # noqa: E501 + "template": "${suite_title} ${expectation_suite_name}\n ${data_asset} ${data_asset_name}\n ${status_title} ${html_success_icon} ${success}", # noqa: E501 # FIXME CoP "params": { "suite_title": "Expectation Suite:", "data_asset": "Data asset:", @@ -232,7 +232,7 @@ def test_ValidationResultsPageRenderer_render_validation_header( "status_title": "Status:", "expectation_suite_name": "default", "success": "Failed", - "html_success_icon": '', # noqa: E501 + "html_success_icon": '', # noqa: E501 # FIXME CoP }, "styling": { "params": { diff --git a/tests/render/test_render.py b/tests/render/test_render.py index e80b831f5415..9b212b76f755 100644 --- a/tests/render/test_render.py +++ b/tests/render/test_render.py @@ -321,7 +321,7 @@ def test_render_validation_results(titanic_profiled_evrs_1): 'Must have greater than or equal to 0 rows.' in rendered_page ) - # assert 'This table should have a list of columns in a specific order, but that order is not specified.' \ # noqa: E501 + # assert 'This table should have a list of columns in a specific order, but that order is not specified.' \ # noqa: E501 # FIXME CoP # in rendered_page @@ -348,7 +348,7 @@ def test_smoke_render_profiling_results_page_renderer_with_exception( def test_render_string_template(): template = { - "template": "$column Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than $threshold: $sparklines_histogram", # noqa: E501 + "template": "$column Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than $threshold: $sparklines_histogram", # noqa: E501 # FIXME CoP "params": { "column": "categorical_fixed", "partition_object": { @@ -374,14 +374,14 @@ def test_render_string_template(): expected = ( """ categorical_fixed Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than 0.1: █▄▁ - """.replace(" ", "") # noqa: E501 + """.replace(" ", "") # noqa: E501 # FIXME CoP .replace("\t", "") .replace("\n", "") ) assert res == expected template = { - "template": "$column Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than $threshold: $sparklines_histogram", # noqa: E501 + "template": "$column Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than $threshold: $sparklines_histogram", # noqa: E501 # FIXME CoP "params": { "column": "categorical_fixed", "partition_object": { @@ -407,7 +407,7 @@ def test_render_string_template(): expected = ( """ categorical_fixed Kullback-Leibler (KL) divergence with respect to the following distribution must be lower than 0.1: ▃▆▁█ - """.replace(" ", "") # noqa: E501 + """.replace(" ", "") # noqa: E501 # FIXME CoP .replace("\t", "") .replace("\n", "") ) diff --git a/tests/render/test_render_BulletListContentBlock.py b/tests/render/test_render_BulletListContentBlock.py index b5f7de617e74..c3c8d62365bc 100644 --- a/tests/render/test_render_BulletListContentBlock.py +++ b/tests/render/test_render_BulletListContentBlock.py @@ -58,7 +58,7 @@ def test_parse_row_condition_string_pandas_engine(): "Survived == 1 and (SexCode not in (0, 7, x) | ~(Age > 50)) & not (PClass != '1st')" ) assert parse_row_condition_string_pandas_engine(test_condition_string) == ( - "if $row_condition__0 and ($row_condition__1 or not ($row_condition__2)) and not ($row_condition__3)", # noqa: E501 + "if $row_condition__0 and ($row_condition__1 or not ($row_condition__2)) and not ($row_condition__3)", # noqa: E501 # FIXME CoP { "row_condition__0": "Survived == 1", "row_condition__1": "SexCode not in [0, 7, x]", @@ -70,13 +70,13 @@ def test_parse_row_condition_string_pandas_engine(): @pytest.mark.filesystem def test_all_expectations_using_test_definitions(): - dir_path = os.path.dirname(os.path.abspath(__file__)) # noqa: PTH120, PTH100 - pattern = os.path.join( # noqa: PTH118 + dir_path = os.path.dirname(os.path.abspath(__file__)) # noqa: PTH120, PTH100 # FIXME CoP + pattern = os.path.join( # noqa: PTH118 # FIXME CoP dir_path, "..", "..", "tests", "test_definitions", "*", "expect*.json" ) - test_files = glob.glob(pattern) # noqa: PTH207 + test_files = glob.glob(pattern) # noqa: PTH207 # FIXME CoP - # Historically, collecting all the JSON tests was an issue - this step ensures we actually have test data. # noqa: E501 + # Historically, collecting all the JSON tests was an issue - this step ensures we actually have test data. # noqa: E501 # FIXME CoP assert ( len(test_files) == 61 ), "Something went wrong when collecting JSON Expectation test fixtures" @@ -97,7 +97,7 @@ def test_all_expectations_using_test_definitions(): for dataset in test_definitions["datasets"]: for test in dataset["tests"]: # Construct an expectation from the test. - if type(test["in"]) == dict: # noqa: E721 + if type(test["in"]) == dict: # noqa: E721 # FIXME CoP # Skip tests with invalid configurations if test["in"].get("catch_exceptions"): continue diff --git a/tests/render/test_render_ExceptionListContentBlockRenderer.py b/tests/render/test_render_ExceptionListContentBlockRenderer.py index e01d6772e82c..5cb926b1f15b 100644 --- a/tests/render/test_render_ExceptionListContentBlockRenderer.py +++ b/tests/render/test_render_ExceptionListContentBlockRenderer.py @@ -21,7 +21,7 @@ def test_exception_list_content_block_renderer(): exception_info={ "raised_exception": True, "exception_message": "Invalid partition object.", - "exception_traceback": 'Traceback (most recent call last):\n File "/home/user/great_expectations/great_expectations/data_asset/data_asset.py", line 186, in wrapper\n return_obj = func(self, **evaluation_args)\n File " /home/user/great_expectations/great_expectations/dataset/dataset.py", line 106, in inner_wrapper\n evaluation_result = func(self, column, *args, **kwargs)\n File "/home/user/great_expectations/great_expectations/dataset/dataset.py", line 3388, in expect_column_kl_divergence_to_be_less_than\n raise ValueError("Invalid partition object.")\nValueError: Invalid partition object.\n', # noqa: E501 + "exception_traceback": 'Traceback (most recent call last):\n File "/home/user/great_expectations/great_expectations/data_asset/data_asset.py", line 186, in wrapper\n return_obj = func(self, **evaluation_args)\n File " /home/user/great_expectations/great_expectations/dataset/dataset.py", line 106, in inner_wrapper\n evaluation_result = func(self, column, *args, **kwargs)\n File "/home/user/great_expectations/great_expectations/dataset/dataset.py", line 3388, in expect_column_kl_divergence_to_be_less_than\n raise ValueError("Invalid partition object.")\nValueError: Invalid partition object.\n', # noqa: E501 # FIXME CoP }, expectation_config=ExpectationConfiguration( type="expect_column_kl_divergence_to_be_less_than", @@ -43,7 +43,7 @@ def test_exception_list_content_block_renderer(): { "content_block_type": "string_template", "string_template": { - "template": "$column: $expectation_type raised an exception: $exception_message", # noqa: E501 + "template": "$column: $expectation_type raised an exception: $exception_message", # noqa: E501 # FIXME CoP "params": { "column": "answer", "expectation_type": "expect_column_kl_divergence_to_be_less_than", diff --git a/tests/render/test_render_ValidationResultsTableContentBlockRenderer.py b/tests/render/test_render_ValidationResultsTableContentBlockRenderer.py index 722952f38e08..ee1607152e5d 100644 --- a/tests/render/test_render_ValidationResultsTableContentBlockRenderer.py +++ b/tests/render/test_render_ValidationResultsTableContentBlockRenderer.py @@ -29,7 +29,7 @@ def evr_failed_with_exception(): exception_info={ "raised_exception": True, "exception_message": "Invalid partition object.", - "exception_traceback": 'Traceback (most recent call last):\n File "/great_expectations/great_expectations/data_asset/data_asset.py", line 216, in wrapper\n return_obj = func(self, **evaluation_args)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 106, in inner_wrapper\n evaluation_result = func(self, column, *args, **kwargs)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 3381, in expect_column_kl_divergence_to_be_less_than\n raise ValueError("Invalid partition object.")\nValueError: Invalid partition object.\n', # noqa: E501 + "exception_traceback": 'Traceback (most recent call last):\n File "/great_expectations/great_expectations/data_asset/data_asset.py", line 216, in wrapper\n return_obj = func(self, **evaluation_args)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 106, in inner_wrapper\n evaluation_result = func(self, column, *args, **kwargs)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 3381, in expect_column_kl_divergence_to_be_less_than\n raise ValueError("Invalid partition object.")\nValueError: Invalid partition object.\n', # noqa: E501 # FIXME CoP }, expectation_config=ExpectationConfiguration( type="expect_column_kl_divergence_to_be_less_than", @@ -84,7 +84,7 @@ def evr_id_pk_basic_sql() -> ExpectationValidationResult: {"animals": "lion", "pk_1": 4, "pk_2": "four"}, {"animals": "zebra", "pk_1": 5, "pk_2": "five"}, ], - "unexpected_index_query": "SELECT animals, pk_1, pk_2 \nFROM animal_names \nWHERE animals IS NOT NULL AND (animals NOT IN ('cat', 'fish', 'dog'))", # noqa: E501 + "unexpected_index_query": "SELECT animals, pk_1, pk_2 \nFROM animal_names \nWHERE animals IS NOT NULL AND (animals NOT IN ('cat', 'fish', 'dog'))", # noqa: E501 # FIXME CoP "unexpected_list": ["giraffe", "lion", "zebra"], "unexpected_percent": 50.0, "unexpected_percent_nonmissing": 50.0, @@ -133,7 +133,7 @@ def evr_id_pk_basic_pandas() -> ExpectationValidationResult: ) -def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_with_errored_expectation( # noqa: E501 +def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_with_errored_expectation( # noqa: E501 # FIXME CoP evr_failed_with_exception, ): result = ValidationResultsTableContentBlockRenderer.render( @@ -184,7 +184,7 @@ def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_wit { "content_block_type": "string_template", "string_template": { - "template": "\n\n$expectation_type raised an exception:\n$exception_message", # noqa: E501 + "template": "\n\n$expectation_type raised an exception:\n$exception_message", # noqa: E501 # FIXME CoP "params": { "expectation_type": "expect_column_kl_divergence_to_be_less_than", "exception_message": "Invalid partition object.", @@ -208,7 +208,7 @@ def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_wit { "content_block_type": "string_template", "string_template": { - "template": 'Traceback (most recent call last):\n File "/great_expectations/great_expectations/data_asset/data_asset.py", line 216, in wrapper\n return_obj = func(self, **evaluation_args)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 106, in inner_wrapper\n evaluation_result = func(self, column, *args, **kwargs)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 3381, in expect_column_kl_divergence_to_be_less_than\n raise ValueError("Invalid partition object.")\nValueError: Invalid partition object.\n', # noqa: E501 + "template": 'Traceback (most recent call last):\n File "/great_expectations/great_expectations/data_asset/data_asset.py", line 216, in wrapper\n return_obj = func(self, **evaluation_args)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 106, in inner_wrapper\n evaluation_result = func(self, column, *args, **kwargs)\n File "/great_expectations/great_expectations/dataset/dataset.py", line 3381, in expect_column_kl_divergence_to_be_less_than\n raise ValueError("Invalid partition object.")\nValueError: Invalid partition object.\n', # noqa: E501 # FIXME CoP "tag": "code", }, } @@ -486,7 +486,7 @@ def test_ValidationResultsTableContentBlockRenderer_get_content_block_fn(evr_suc @pytest.mark.filterwarnings("ignore:V2 API style custom rendering*:DeprecationWarning") -def test_ValidationResultsTableContentBlockRenderer_get_content_block_fn_with_v2_api_style_custom_rendering(): # noqa: E501 +def test_ValidationResultsTableContentBlockRenderer_get_content_block_fn_with_v2_api_style_custom_rendering(): # noqa: E501 # FIXME CoP """Test backwards support for custom expectation rendering with the V2 API as described at https://docs.greatexpectations.io/en/latest/reference/spare_parts/data_docs_reference.html#customizing-data-docs. """ @@ -528,7 +528,7 @@ def expect_custom_expectation_written_in_v2_api_style( ), ) - content_block_fn = ValidationResultsTableContentBlockRendererWithV2ApiStyleCustomExpectations._get_content_block_fn( # noqa: E501 + content_block_fn = ValidationResultsTableContentBlockRendererWithV2ApiStyleCustomExpectations._get_content_block_fn( # noqa: E501 # FIXME CoP "expect_custom_expectation_written_in_v2_api_style" ) content_block_fn_output = content_block_fn(result=evr) @@ -741,7 +741,7 @@ def test_ValidationResultsTableContentBlockRenderer_get_unexpected_statement( **{ "content_block_type": "string_template", "string_template": { - "template": "\n\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows.", # noqa: E501 + "template": "\n\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows.", # noqa: E501 # FIXME CoP "params": { "unexpected_count": "3", "unexpected_percent": "≈0.2285%", @@ -1210,7 +1210,7 @@ def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_no_id_p } -def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id_pk_pandas_and_query( # noqa: E501 +def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id_pk_pandas_and_query( # noqa: E501 # FIXME CoP evr_id_pk_basic_pandas, ): rendered_value = get_renderer_impl( @@ -1236,7 +1236,7 @@ def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id } -def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id_pk_pandas_with_sampled_table( # noqa: E501 +def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id_pk_pandas_with_sampled_table( # noqa: E501 # FIXME CoP evr_id_pk_basic_pandas, ): evr_id_pk_pandas = evr_id_pk_basic_pandas @@ -1315,7 +1315,7 @@ def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id } -def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id_pk_sql_with_query_with_sampled_table( # noqa: E501 +def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table_with_id_pk_sql_with_query_with_sampled_table( # noqa: E501 # FIXME CoP evr_id_pk_basic_sql, ): new_index = [ diff --git a/tests/render/test_renderer.py b/tests/render/test_renderer.py index 132a63398361..02e1b5fd8e1d 100644 --- a/tests/render/test_renderer.py +++ b/tests/render/test_renderer.py @@ -11,14 +11,14 @@ @pytest.mark.unit def test__find_evr_by_type(titanic_profiled_evrs_1): - # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 + # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 # FIXME CoP found_evr = Renderer()._find_evr_by_type( titanic_profiled_evrs_1.results, "expect_column_to_exist" ) print(found_evr) assert found_evr is None - # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 + # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 # FIXME CoP found_evr = Renderer()._find_evr_by_type( titanic_profiled_evrs_1.results, "expect_column_distinct_values_to_be_in_set" ) @@ -46,28 +46,28 @@ def test__find_evr_by_type(titanic_profiled_evrs_1): }, expectation_config=ExpectationConfiguration( type="expect_column_distinct_values_to_be_in_set", - kwargs={"column": "PClass", "value_set": None, "result_format": "SUMMARY"}, + kwargs={"column": "PClass", "value_set": ["a", "b", "c"], "result_format": "SUMMARY"}, ), ) @pytest.mark.unit def test__find_all_evrs_by_type(titanic_profiled_evrs_1): - # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 + # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 # FIXME CoP found_evrs = Renderer()._find_all_evrs_by_type( titanic_profiled_evrs_1.results, "expect_column_to_exist", column_=None ) print(found_evrs) assert found_evrs == [] - # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 + # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 # FIXME CoP found_evrs = Renderer()._find_all_evrs_by_type( titanic_profiled_evrs_1.results, "expect_column_to_exist", column_="SexCode" ) print(found_evrs) assert found_evrs == [] - # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 + # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 # FIXME CoP found_evrs = Renderer()._find_all_evrs_by_type( titanic_profiled_evrs_1.results, "expect_column_distinct_values_to_be_in_set", @@ -76,7 +76,7 @@ def test__find_all_evrs_by_type(titanic_profiled_evrs_1): print(found_evrs) assert len(found_evrs) == 4 - # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 + # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results # noqa: E501 # FIXME CoP found_evrs = Renderer()._find_all_evrs_by_type( titanic_profiled_evrs_1.results, "expect_column_distinct_values_to_be_in_set", diff --git a/tests/render/test_renderer_configuration.py b/tests/render/test_renderer_configuration.py index 1b723a0d0077..91e8d5122464 100644 --- a/tests/render/test_renderer_configuration.py +++ b/tests/render/test_renderer_configuration.py @@ -174,7 +174,7 @@ def test_successful_renderer_row_condition_params( @pytest.mark.unit @pytest.mark.xfail( - reason="As of v0.15.46 test will fail until RendererConfiguration._validate_configuration_or_result is re-enabled.", # noqa: E501 + reason="As of v0.15.46 test will fail until RendererConfiguration._validate_configuration_or_result is re-enabled.", # noqa: E501 # FIXME CoP strict=True, ) def test_failed_renderer_configuration_instantiation(): diff --git a/tests/render/test_slack_renderer.py b/tests/render/test_slack_renderer.py index d77a8116372b..256075ebb3ed 100644 --- a/tests/render/test_slack_renderer.py +++ b/tests/render/test_slack_renderer.py @@ -61,7 +61,7 @@ def test_SlackRenderer_render(mocker): }, { "text": { - "text": "*DataDocs* can be found here: `file:///localsite/index.html` \n (Please copy and paste link into a browser to view)\n", # noqa: E501 + "text": "*DataDocs* can be found here: `file:///localsite/index.html` \n (Please copy and paste link into a browser to view)\n", # noqa: E501 # FIXME CoP "type": "mrkdwn", }, "type": "section", diff --git a/tests/render/test_styled_string_template.py b/tests/render/test_styled_string_template.py index 766463cb251a..1bb90e3e6676 100644 --- a/tests/render/test_styled_string_template.py +++ b/tests/render/test_styled_string_template.py @@ -40,7 +40,7 @@ def test_render_template(): .replace(" ", "") .replace("\t", "") .replace("\n", "") - == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 + == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 # FIXME CoP " ", "" ) .replace("\t", "") @@ -71,7 +71,7 @@ def test_render_template(): .replace(" ", "") .replace("\t", "") .replace("\n", "") - == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 + == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 # FIXME CoP " ", "" ) .replace("\t", "") @@ -99,7 +99,7 @@ def test_render_template(): .replace(" ", "") .replace("\t", "") .replace("\n", "") - == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 + == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 # FIXME CoP " ", "" ) .replace("\t", "") @@ -129,7 +129,7 @@ def test_render_template(): .replace(" ", "") .replace("\t", "") .replace("\n", "") - == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 + == 'It was the best of times; it was the worst of times.'.replace( # noqa: E501 # FIXME CoP " ", "" ) .replace("\t", "") @@ -140,7 +140,7 @@ def test_render_template(): def test_render_template_with_extra_dollar_signs_in_template(): result = DefaultJinjaPageView().render_string_template( { - "template": "It was the $first_adj of times; it was the $second_adj of times. Blahhh$hhhh. $Bloooop. " # noqa: E501 + "template": "It was the $first_adj of times; it was the $second_adj of times. Blahhh$hhhh. $Bloooop. " # noqa: E501 # FIXME CoP "Bleep$.", "params": { "first_adj": "best", @@ -150,7 +150,7 @@ def test_render_template_with_extra_dollar_signs_in_template(): ) assert ( result.replace(" ", "").replace("\t", "").replace("\n", "") - == "It was the best of times; it was the worst of times. Blahhh$hhhh. $Bloooop. Bleep$.".replace( # noqa: E501 + == "It was the best of times; it was the worst of times. Blahhh$hhhh. $Bloooop. Bleep$.".replace( # noqa: E501 # FIXME CoP " ", "" ) .replace("\t", "") @@ -159,7 +159,7 @@ def test_render_template_with_extra_dollar_signs_in_template(): result = DefaultJinjaPageView().render_string_template( { - "template": "It was the $first_adj of times; it was the $second_adj of times. Blahhh$$$hhhh. $$Bloooop. Bleep$$$$$.", # noqa: E501 + "template": "It was the $first_adj of times; it was the $second_adj of times. Blahhh$$$hhhh. $$Bloooop. Bleep$$$$$.", # noqa: E501 # FIXME CoP "params": { "first_adj": "best", "second_adj": "worst", @@ -168,7 +168,7 @@ def test_render_template_with_extra_dollar_signs_in_template(): ) assert ( result.replace(" ", "").replace("\t", "").replace("\n", "") - == "It was the best of times; it was the worst of times. Blahhh$$$hhhh. $$Bloooop. Bleep$$$$$.".replace( # noqa: E501 + == "It was the best of times; it was the worst of times. Blahhh$$$hhhh. $$Bloooop. Bleep$$$$$.".replace( # noqa: E501 # FIXME CoP " ", "" ) .replace("\t", "") @@ -179,7 +179,7 @@ def test_render_template_with_extra_dollar_signs_in_template(): def test_render_template_with_extra_dollar_signs_in_param_values(): result = DefaultJinjaPageView().render_string_template( { - "template": "It was the $first_adj of times; it was the $second_adj of times. Blahhh$hhhh. $Bloooop. " # noqa: E501 + "template": "It was the $first_adj of times; it was the $second_adj of times. Blahhh$hhhh. $Bloooop. " # noqa: E501 # FIXME CoP "Bleep$.", "params": { "first_adj": "$best$", @@ -189,7 +189,7 @@ def test_render_template_with_extra_dollar_signs_in_param_values(): ) assert ( result.replace(" ", "").replace("\t", "").replace("\n", "") - == "It was the $best$ of times; it was the $$worst$ of times. Blahhh$hhhh. $Bloooop. Bleep$.".replace( # noqa: E501 + == "It was the $best$ of times; it was the $$worst$ of times. Blahhh$hhhh. $Bloooop. Bleep$.".replace( # noqa: E501 # FIXME CoP " ", "" ) .replace("\t", "") diff --git a/tests/render/test_util.py b/tests/render/test_util.py index e17c82c693c0..d69e8e6d6cf4 100644 --- a/tests/render/test_util.py +++ b/tests/render/test_util.py @@ -58,7 +58,7 @@ def test_num_to_str(): assert num_to_str(f, precision=10) == "100" assert num_to_str(f, precision=10, use_locale=True) == "100" - f = 1000 # If we have a number longer than our precision, we should still be able to correctly format # noqa: E501 + f = 1000 # If we have a number longer than our precision, we should still be able to correctly format # noqa: E501 # FIXME CoP assert num_to_str(f, precision=4) == "1000" assert num_to_str(f) == "1000" @@ -498,7 +498,7 @@ def test_build_count_and_index_table_with_column_pair(): unexpected_index_column_names=unexpected_index_column_names, ) assert header_row == ["Unexpected Value", "Count", "pk_2"] - assert table_rows == [ # noqa: PLR1714 + assert table_rows == [ # noqa: PLR1714 # FIXME CoP ["('desk', 'eraser')", 3, "three, four, five"] ] or table_rows == [["('eraser', 'desk')", 3, "three, four, five"]] diff --git a/tests/scripts/test_public_api_report.py b/tests/scripts/test_public_api_report.py index b254e40d6da9..a3e05b2aa030 100644 --- a/tests/scripts/test_public_api_report.py +++ b/tests/scripts/test_public_api_report.py @@ -62,7 +62,7 @@ def sample_docs_example_python_file_string() -> str: class_name: Something \"\"\" -""" # noqa: E501 +""" # noqa: E501 # FIXME CoP @pytest.fixture @@ -150,27 +150,31 @@ def sample_markdown_doc_with_yaml() -> str: @pytest.fixture -def repo_root() -> pathlib.Path: - return pathlib.Path("/some/absolute/path/repo_root/") +def repo_root(tmp_path) -> pathlib.Path: + return tmp_path @pytest.fixture def sample_docs_example_python_file_string_filepath( repo_root: pathlib.Path, ) -> pathlib.Path: - return ( + path = ( repo_root / pathlib.Path("tests/integration/docusaurus/sample_docs_example_python_file_string.py") ).relative_to(repo_root) + path.touch() + return path @pytest.fixture def sample_with_definitions_python_file_string_filepath( repo_root: pathlib.Path, ) -> pathlib.Path: - return ( - repo_root / pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") - ).relative_to(repo_root) + path = (repo_root / pathlib.Path("sample_with_definitions_python_file_string.py")).relative_to( + repo_root + ) + path.touch() + return path @pytest.fixture @@ -222,9 +226,6 @@ def empty_docs_example_parser( class TestDocExampleParser: - def test_instantiate(self, docs_example_parser: DocsExampleParser): - assert isinstance(docs_example_parser, DocsExampleParser) - def test_retrieve_all_usages_in_files(self, docs_example_parser: DocsExampleParser): usages = docs_example_parser.get_names_from_usage_in_docs_examples() assert usages == { @@ -251,9 +252,6 @@ def code_parser(sample_with_definitions_file_contents: FileContents) -> CodePars class TestCodeParser: - def test_instantiate(self, code_parser: CodeParser): - assert isinstance(code_parser, CodeParser) - def test_get_all_class_method_and_function_names(self, code_parser: CodeParser): names = code_parser.get_all_class_method_and_function_names() assert names == { @@ -298,7 +296,7 @@ def test_get_all_class_method_and_function_definitions(self, code_parser: CodePa "example_staticmethod", } assert {d.filepath for d in definitions} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } @@ -357,7 +355,7 @@ def test__get_import_names(various_imports: str): for node in ast.walk(tree): if isinstance(node, (ast.Import, ast.ImportFrom)): - import_names.extend(_get_import_names(node)) # type: ignore[arg-type] + import_names.extend(_get_import_names(node)) # type: ignore[arg-type] # FIXME CoP assert import_names == [ "some_module", @@ -372,16 +370,11 @@ def test__get_import_names(various_imports: str): @pytest.fixture -def public_api_checker( - docs_example_parser: DocsExampleParser, code_parser: CodeParser -) -> PublicAPIChecker: +def public_api_checker(code_parser: CodeParser) -> PublicAPIChecker: return PublicAPIChecker(code_parser=code_parser) class TestPublicAPIChecker: - def test_instantiate(self, public_api_checker: PublicAPIChecker): - assert isinstance(public_api_checker, PublicAPIChecker) - def test_get_all_public_api_definitions(self, public_api_checker: PublicAPIChecker): observed = public_api_checker.get_all_public_api_definitions() assert len(observed) == 6 @@ -394,7 +387,7 @@ def test_get_all_public_api_definitions(self, public_api_checker: PublicAPICheck "example_public_staticmethod", } assert {d.filepath for d in observed} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } def _class_and_function_definitions( @@ -408,7 +401,9 @@ def _class_and_function_definitions( return definitions - def test_is_definition_marked_public_api_yes(self, public_api_checker: PublicAPIChecker): + def test_is_definition_marked_public_api_yes( + self, public_api_checker: PublicAPIChecker, tmp_path: pathlib.Path + ): file_string = """ @public_api def example_public_api_module_level_function(): @@ -437,11 +432,12 @@ def example_multiple_decorator_public_api_method(self): pass """ + test_path = tmp_path / "test_path.py" ast_definitions = self._class_and_function_definitions(tree=ast.parse(file_string)) definitions = [ Definition( name="test_name", - filepath=pathlib.Path("test_path"), + filepath=pathlib.Path(test_path), ast_definition=ast_definition, ) for ast_definition in ast_definitions @@ -451,7 +447,9 @@ def example_multiple_decorator_public_api_method(self): for definition in definitions ) - def test_is_definition_marked_public_api_no(self, public_api_checker: PublicAPIChecker): + def test_is_definition_marked_public_api_no( + self, public_api_checker: PublicAPIChecker, tmp_path: pathlib.Path + ): file_string = """ def example_module_level_function(): pass @@ -474,11 +472,12 @@ def example_multiple_decorator_public_method(self): pass """ + test_path = tmp_path / "test_path.py" ast_definitions = self._class_and_function_definitions(tree=ast.parse(file_string)) definitions = [ Definition( name="test_name", - filepath=pathlib.Path("test_path"), + filepath=pathlib.Path(test_path), ast_definition=ast_definition, ) for ast_definition in ast_definitions @@ -510,6 +509,7 @@ def code_reference_filter_with_non_default_include_exclude( docs_example_parser: DocsExampleParser, code_parser: CodeParser, public_api_checker: PublicAPIChecker, + sample_docs_example_python_file_string_filepath: pathlib.Path, ) -> CodeReferenceFilter: return CodeReferenceFilter( repo_root=repo_root, @@ -518,12 +518,16 @@ def code_reference_filter_with_non_default_include_exclude( public_api_checker=public_api_checker, includes=[ IncludeExcludeDefinition( - reason="test", name="test_name", filepath=pathlib.Path("test_path") + reason="test", + name="test_name", + filepath=sample_docs_example_python_file_string_filepath, ) ], excludes=[ IncludeExcludeDefinition( - reason="test", name="test_name", filepath=pathlib.Path("test_path") + reason="test", + name="test_name", + filepath=sample_docs_example_python_file_string_filepath, ) ], ) @@ -552,6 +556,7 @@ def code_reference_filter_with_exclude_by_file( docs_example_parser: DocsExampleParser, code_parser: CodeParser, public_api_checker: PublicAPIChecker, + sample_with_definitions_python_file_string_filepath: pathlib.Path, ) -> CodeReferenceFilter: return CodeReferenceFilter( repo_root=repo_root, @@ -562,9 +567,7 @@ def code_reference_filter_with_exclude_by_file( excludes=[ IncludeExcludeDefinition( reason="test", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ) ], ) @@ -592,6 +595,7 @@ def code_reference_filter_with_exclude_by_file_and_name( docs_example_parser: DocsExampleParser, code_parser: CodeParser, public_api_checker: PublicAPIChecker, + sample_with_definitions_python_file_string_filepath: pathlib.Path, ) -> CodeReferenceFilter: return CodeReferenceFilter( repo_root=repo_root, @@ -603,16 +607,12 @@ def code_reference_filter_with_exclude_by_file_and_name( IncludeExcludeDefinition( reason="test", name="example_method", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ), IncludeExcludeDefinition( reason="test", name="example_module_level_function", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ), ], ) @@ -624,6 +624,7 @@ def code_reference_filter_with_include_by_file_and_name_already_included( docs_example_parser: DocsExampleParser, code_parser: CodeParser, public_api_checker: PublicAPIChecker, + sample_with_definitions_python_file_string_filepath: pathlib.Path, ) -> CodeReferenceFilter: return CodeReferenceFilter( repo_root=repo_root, @@ -634,16 +635,12 @@ def code_reference_filter_with_include_by_file_and_name_already_included( IncludeExcludeDefinition( reason="test", name="example_method", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ), IncludeExcludeDefinition( reason="test", name="example_module_level_function", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ), ], excludes=[], @@ -656,6 +653,7 @@ def code_reference_filter_with_include_by_file_and_name_already_excluded( docs_example_parser: DocsExampleParser, code_parser: CodeParser, public_api_checker: PublicAPIChecker, + sample_with_definitions_python_file_string_filepath: pathlib.Path, ) -> CodeReferenceFilter: return CodeReferenceFilter( repo_root=repo_root, @@ -666,24 +664,18 @@ def code_reference_filter_with_include_by_file_and_name_already_excluded( IncludeExcludeDefinition( reason="test", name="example_method", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ), IncludeExcludeDefinition( reason="test", name="example_module_level_function", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ), ], excludes=[ IncludeExcludeDefinition( reason="test", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ) ], ) @@ -695,6 +687,7 @@ def code_reference_filter_with_include_by_file_and_name_not_used_in_docs_example docs_example_parser: DocsExampleParser, code_parser: CodeParser, public_api_checker: PublicAPIChecker, + sample_with_definitions_python_file_string_filepath: pathlib.Path, ) -> CodeReferenceFilter: return CodeReferenceFilter( repo_root=repo_root, @@ -705,17 +698,13 @@ def code_reference_filter_with_include_by_file_and_name_not_used_in_docs_example IncludeExcludeDefinition( reason="test", name="example_no_usages_in_sample_docs_example_python_file_string", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ), ], excludes=[ IncludeExcludeDefinition( reason="test", - filepath=pathlib.Path( - "great_expectations/sample_with_definitions_python_file_string.py" - ), + filepath=sample_with_definitions_python_file_string_filepath, ) ], ) @@ -723,7 +712,6 @@ def code_reference_filter_with_include_by_file_and_name_not_used_in_docs_example class TestCodeReferenceFilter: def test_instantiate(self, code_reference_filter: CodeReferenceFilter): - assert isinstance(code_reference_filter, CodeReferenceFilter) assert code_reference_filter.excludes assert code_reference_filter.includes @@ -732,7 +720,6 @@ def test_instantiate_with_non_default_include_exclude( code_reference_filter_with_non_default_include_exclude: CodeReferenceFilter, ): code_reference_filter = code_reference_filter_with_non_default_include_exclude - assert isinstance(code_reference_filter, CodeReferenceFilter) assert code_reference_filter.excludes assert code_reference_filter.includes assert len(code_reference_filter.excludes) == 1 @@ -755,7 +742,7 @@ def test_filter_definitions_no_include_exclude( "example_staticmethod", } assert {d.filepath for d in observed} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } def test_filter_definitions_with_references_from_docs_content( @@ -766,7 +753,7 @@ def test_filter_definitions_with_references_from_docs_content( assert len(observed) == 1 assert {d.name for d in observed} == {"ExampleClass"} assert {d.filepath for d in observed} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } def test_filter_definitions_exclude_by_file( @@ -789,7 +776,7 @@ def test_filter_definitions_exclude_by_file_and_name( "example_staticmethod", } assert {d.filepath for d in observed} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } def test_filter_definitions_include_by_file_and_name_already_included( @@ -802,7 +789,7 @@ def test_filter_definitions_include_by_file_and_name_already_included( will not include multiple copies of the same definitions (when not accounting for different but equivalent ast definition object instances). """ - observed = code_reference_filter_with_include_by_file_and_name_already_included.filter_definitions() # noqa: E501 + observed = code_reference_filter_with_include_by_file_and_name_already_included.filter_definitions() # noqa: E501 # FIXME CoP # There are two extra (8 vs 6) here due to the ast_definition classes # pointing to different but equivalent objects. assert len(observed) == 8 @@ -815,7 +802,7 @@ def test_filter_definitions_include_by_file_and_name_already_included( "example_staticmethod", } assert {d.filepath for d in observed} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } def test_filter_definitions_include_by_file_and_name_already_excluded( @@ -826,7 +813,7 @@ def test_filter_definitions_include_by_file_and_name_already_excluded( Include overrides exclude. """ - observed = code_reference_filter_with_include_by_file_and_name_already_excluded.filter_definitions() # noqa: E501 + observed = code_reference_filter_with_include_by_file_and_name_already_excluded.filter_definitions() # noqa: E501 # FIXME CoP # There are two extra (4 vs 2) here due to the ast_definition classes # pointing to different but equivalent objects. assert len(observed) == 4 @@ -835,25 +822,25 @@ def test_filter_definitions_include_by_file_and_name_already_excluded( "example_module_level_function", } assert {d.filepath for d in observed} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } def test_filter_definitions_include_by_file_and_name_already_excluded_not_used_in_docs_example( self, - code_reference_filter_with_include_by_file_and_name_not_used_in_docs_example_exclude_file: CodeReferenceFilter, # noqa: E501 + code_reference_filter_with_include_by_file_and_name_not_used_in_docs_example_exclude_file: CodeReferenceFilter, # noqa: E501 # FIXME CoP ): """What does this test and why? Include overrides exclude. Method that was not included in docs examples is still included if manually added. """ - observed = code_reference_filter_with_include_by_file_and_name_not_used_in_docs_example_exclude_file.filter_definitions() # noqa: E501 + observed = code_reference_filter_with_include_by_file_and_name_not_used_in_docs_example_exclude_file.filter_definitions() # noqa: E501 # FIXME CoP assert len(observed) == 1 assert {d.name for d in observed} == { "example_no_usages_in_sample_docs_example_python_file_string", } assert {d.filepath for d in observed} == { - pathlib.Path("great_expectations/sample_with_definitions_python_file_string.py") + pathlib.Path("sample_with_definitions_python_file_string.py") } @@ -880,25 +867,17 @@ def public_api_report_filter_out_file( class TestPublicAPIReport: - def test_instantiate(self, public_api_report: PublicAPIReport): - assert isinstance(public_api_report, PublicAPIReport) - def test_generate_printable_definitions(self, public_api_report: PublicAPIReport): expected: List[str] = [ - "File: great_expectations/sample_with_definitions_python_file_string.py Name: " - "ExampleClass", - "File: great_expectations/sample_with_definitions_python_file_string.py Name: " - "example_classmethod", - "File: great_expectations/sample_with_definitions_python_file_string.py Name: " - "example_method", - "File: great_expectations/sample_with_definitions_python_file_string.py Name: " - "example_method_with_args", - "File: great_expectations/sample_with_definitions_python_file_string.py Name: " + "File: sample_with_definitions_python_file_string.py Name: " "ExampleClass", + "File: sample_with_definitions_python_file_string.py Name: " "example_classmethod", + "File: sample_with_definitions_python_file_string.py Name: " "example_method", + "File: sample_with_definitions_python_file_string.py Name: " "example_method_with_args", + "File: sample_with_definitions_python_file_string.py Name: " "example_module_level_function", - "File: great_expectations/sample_with_definitions_python_file_string.py Name: " - "example_staticmethod", + "File: sample_with_definitions_python_file_string.py Name: " "example_staticmethod", ] - observed = public_api_report.generate_printable_definitions() + observed = [str(p) for p in public_api_report.generate_printable_definitions()] assert observed == expected def test_generate_printable_definitions_exclude_by_file( @@ -910,19 +889,19 @@ def test_generate_printable_definitions_exclude_by_file( class TestIncludeExcludeDefinition: - def test_instantiate_name_and_filepath(self): - definition = IncludeExcludeDefinition( - reason="reason", name="name", filepath=pathlib.Path("filepath") + def test_instantiate_name_and_filepath(self, tmp_path: pathlib.Path): + path = tmp_path / "test_path.py" + path.touch() + IncludeExcludeDefinition( + reason="reason", + name="name", + filepath=path, ) - assert isinstance(definition, IncludeExcludeDefinition) - - def test_instantiate_filepath_only(self): - definition = IncludeExcludeDefinition(reason="reason", filepath=pathlib.Path("filepath")) - assert isinstance(definition, IncludeExcludeDefinition) - def test_instantiate_name_and_filepath_no_reason(self): - with pytest.raises(TypeError): - IncludeExcludeDefinition(name="name", filepath=pathlib.Path("filepath")) + def test_instantiate_filepath_only(self, tmp_path: pathlib.Path): + path = tmp_path / "test_path.py" + path.touch() + IncludeExcludeDefinition(reason="reason", filepath=path) def test_instantiate_name_only(self): with pytest.raises(ValueError) as exc: diff --git a/tests/sqlalchemy_test_doubles.py b/tests/sqlalchemy_test_doubles.py index c4c9d1f48f03..98d8b4b5f80e 100644 --- a/tests/sqlalchemy_test_doubles.py +++ b/tests/sqlalchemy_test_doubles.py @@ -5,15 +5,15 @@ class MockSaInspector: - def get_columns(self) -> List[Dict[str, Any]]: # type: ignore[empty-body] + def get_columns(self) -> List[Dict[str, Any]]: # type: ignore[empty-body] # FIXME CoP ... - def get_schema_names(self) -> List[str]: # type: ignore[empty-body] + def get_schema_names(self) -> List[str]: # type: ignore[empty-body] # FIXME CoP ... def get_table_names(self, schema: str | None): ... - def has_table(self, table_name: str, schema: str | None) -> bool: # type: ignore[empty-body] + def has_table(self, table_name: str, schema: str | None) -> bool: # type: ignore[empty-body] # FIXME CoP ... diff --git a/tests/test_convert_to_json_serializable.py b/tests/test_convert_to_json_serializable.py index 897baba8a5b0..8c2c1a2e8519 100644 --- a/tests/test_convert_to_json_serializable.py +++ b/tests/test_convert_to_json_serializable.py @@ -1,9 +1,10 @@ +import datetime import re import numpy as np import pytest -from great_expectations.util import convert_to_json_serializable +from great_expectations.util import convert_to_json_serializable, ensure_json_serializable try: from shapely.geometry import LineString, MultiPolygon, Point, Polygon @@ -63,3 +64,21 @@ def test_serialization_of_pattern(): pattern_to_test = r"data_(?P\d{4})-(?P\d{2}).csv" data = re.compile(pattern_to_test) assert convert_to_json_serializable(data) == pattern_to_test + + +@pytest.mark.unit +@pytest.mark.parametrize( + "data", [pytest.param({"t": datetime.time(hour=1, minute=30, second=45)}, id="datetime.time")] +) +def test_convert_to_json_serializable_converts_correctly(data: dict): + ret = convert_to_json_serializable(data) + assert ret == {"t": "01:30:45"} + + +@pytest.mark.unit +@pytest.mark.parametrize( + "data", [pytest.param({"t": datetime.time(hour=1, minute=30, second=45)}, id="datetime.time")] +) +def test_ensure_json_serializable(data: dict): + ensure_json_serializable(data) + # Passes if no exception raised diff --git a/tests/test_definitions/query_expectations/unexpected_rows_expectation.json b/tests/test_definitions/query_expectations/unexpected_rows_expectation.json index 3ca3d76148a1..e8af592f8ec8 100644 --- a/tests/test_definitions/query_expectations/unexpected_rows_expectation.json +++ b/tests/test_definitions/query_expectations/unexpected_rows_expectation.json @@ -1,45 +1,37 @@ { - "expectation_type" : "unexpected_rows_expectation", - "datasets" : [{ - "only_for": ["postgresql", "snowflake", "sqlite"], - "dataset_name": "unexpected_rows_expectation_1", - "data" : { - "c1" : [4,5,6,7], - "c2" : ["a","b","c","d"], - "c3" : [null,null,null,null], - "c4" : [4.0, 3.0, 3.5, 1.2] - }, - "schemas": { - "spark": { - "c1": "IntegerType", - "c2": "StringType", - "c3": "StringType", - "c4": "FloatType" - } - }, - "tests": [{ - "title": "basic_positive_test", - "include_in_gallery": false, - "exact_match_out" : false, - "in":{ - "unexpected_rows_query": "SELECT * FROM {batch} WHERE c1 > 7" - }, - "out":{ - "success":true, - "observed_value": "0 unexpected rows" - } - }, + "expectation_type": "unexpected_rows_expectation", + "datasets": [ { - "title": "basic_negative_test", - "include_in_gallery": false, - "exact_match_out" : false, - "in":{ - "unexpected_rows_query": "SELECT * FROM {batch} WHERE c1 > 6" + "only_for": ["postgresql", "snowflake", "sqlite"], + "dataset_name": "unexpected_rows_expectation_1", + "data": { + "c1": [4, 5, 6, 7], + "c2": ["a", "b", "c", "d"], + "c3": [null, null, null, null], + "c4": [4.0, 3.0, 3.5, 1.2] + }, + "schemas": { + "spark": { + "c1": "IntegerType", + "c2": "StringType", + "c3": "StringType", + "c4": "FloatType" + } }, - "out":{ - "success":false, - "observed_value": "1 unexpected row" - } - }] - }] + "tests": [ + { + "title": "basic_positive_test", + "include_in_gallery": false, + "exact_match_out": false, + "in": { + "unexpected_rows_query": "SELECT * FROM {batch} WHERE c1 > 7" + }, + "out": { + "success": true, + "observed_value": 0 + } + } + ] + } + ] } diff --git a/tests/test_definitions/test_expectations_v3_api.py b/tests/test_definitions/test_expectations_v3_api.py index 10fdac7662ea..f6772ee065ae 100644 --- a/tests/test_definitions/test_expectations_v3_api.py +++ b/tests/test_definitions/test_expectations_v3_api.py @@ -33,16 +33,16 @@ try: sqliteDialect = sqlalchemy.sqlite.dialect except (ImportError, AttributeError): - sqliteDialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + sqliteDialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] # FIXME CoP def pytest_generate_tests(metafunc): # noqa C901 - 35 # Load all the JSON files in the directory - dir_path = os.path.dirname(os.path.realpath(__file__)) # noqa: PTH120 + dir_path = os.path.dirname(os.path.realpath(__file__)) # noqa: PTH120 # FIXME CoP expectation_dirs = [ dir_ for dir_ in os.listdir(dir_path) - if os.path.isdir(os.path.join(dir_path, dir_)) # noqa: PTH118, PTH112 + if os.path.isdir(os.path.join(dir_path, dir_)) # noqa: PTH118, PTH112 # FIXME CoP ] parametrized_tests = [] ids = [] @@ -50,7 +50,7 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 validator_with_data = None for expectation_category in expectation_dirs: - test_configuration_files = glob.glob( # noqa: PTH207 + test_configuration_files = glob.glob( # noqa: PTH207 # FIXME CoP dir_path + "/" + expectation_category + "/*.json" ) for backend in backends: @@ -61,8 +61,8 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 expectation_type = filename.split(".json")[0].split("/")[-1] for index, test_config in enumerate(test_configuration["datasets"], 1): datasets = [] - # optional only_for and suppress_test flag at the datasets-level that can prevent data being # noqa: E501 - # added to incompatible backends. Currently only used by expect_column_values_to_be_unique # noqa: E501 + # optional only_for and suppress_test flag at the datasets-level that can prevent data being # noqa: E501 # FIXME CoP + # added to incompatible backends. Currently only used by expect_column_values_to_be_unique # noqa: E501 # FIXME CoP only_for = test_config.get("only_for") if only_for and not isinstance(only_for, list): # coerce into list if passed in as string @@ -137,7 +137,7 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 skip_test = False only_for = test.get("only_for") if only_for: - # if we're not on the "only_for" list, then never even generate the test # noqa: E501 + # if we're not on the "only_for" list, then never even generate the test # noqa: E501 # FIXME CoP generate_test = False if not isinstance(only_for, list): # coerce into list if passed in as string @@ -199,7 +199,7 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 validator_with_data.active_batch_data.sql_engine_dialect, "name", ) - and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 + and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 # FIXME CoP == "bigquery" ) ) @@ -212,12 +212,12 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 validator_with_data.active_batch_data.sql_engine_dialect, "name", ) - and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 + and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 # FIXME CoP == "bigquery" ): # : Marker to get the test to only run for CFE - # expect_column_values_to_be_unique:negative_case_all_null_values_bigquery_nones # noqa: E501 - # works in different ways between CFE (V3) and V2 Expectations. This flag allows for # noqa: E501 + # expect_column_values_to_be_unique:negative_case_all_null_values_bigquery_nones # noqa: E501 # FIXME CoP + # works in different ways between CFE (V3) and V2 Expectations. This flag allows for # noqa: E501 # FIXME CoP # the test to only be run in the CFE case generate_test = True elif ( @@ -228,7 +228,7 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 validator_with_data.active_batch_data.sql_engine_dialect, "name", ) - and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 + and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 # FIXME CoP == "trino" ): generate_test = True @@ -354,7 +354,7 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 validator_with_data.active_batch_data.sql_engine_dialect, "name", ) - and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 + and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 # FIXME CoP == "bigquery" ) or ( @@ -369,7 +369,7 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 validator_with_data.active_batch_data.sql_engine_dialect, "name", ) - and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 + and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 # FIXME CoP == "bigquery" ) or ( @@ -385,7 +385,7 @@ def pytest_generate_tests(metafunc): # noqa C901 - 35 validator_with_data.active_batch_data.sql_engine_dialect, "name", ) - and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 + and validator_with_data.active_batch_data.sql_engine_dialect.name # noqa: E501 # FIXME CoP == "trino" ) or ( diff --git a/tests/test_deprecation.py b/tests/test_deprecation.py index 4f874981f3e2..683d62fcc057 100644 --- a/tests/test_deprecation.py +++ b/tests/test_deprecation.py @@ -21,7 +21,7 @@ def regex_for_deprecation_comments() -> Pattern: @pytest.fixture def files_with_deprecation_warnings() -> List[str]: - files: List[str] = glob.glob( # noqa: PTH207 + files: List[str] = glob.glob( # noqa: PTH207 # FIXME CoP "great_expectations/**/*.py", recursive=True ) files_to_exclude = [ @@ -30,7 +30,6 @@ def files_with_deprecation_warnings() -> List[str]: "great_expectations/compatibility/pyspark.py", "great_expectations/compatibility/sqlalchemy_and_pandas.py", "great_expectations/compatibility/sqlalchemy_compatibility_wrappers.py", - "great_expectations/rule_based_profiler/altair/encodings.py", # ignoring because of imprecise matching logic # noqa: E501 ] for file_to_exclude in files_to_exclude: if file_to_exclude in files: @@ -108,5 +107,5 @@ def test_deprecation_warnings_have_been_removed_after_two_minor_versions( # Chetan - 20220316 - Once v0.16.0 lands, this should be cleaned up and made 0. if len(unneeded_deprecation_warnings) > UNNEEDED_DEPRECATION_WARNINGS_THRESHOLD: raise ValueError( - f"Found {len(unneeded_deprecation_warnings)} warnings but threshold is {UNNEEDED_DEPRECATION_WARNINGS_THRESHOLD}; please adjust accordingly" # noqa: E501 + f"Found {len(unneeded_deprecation_warnings)} warnings but threshold is {UNNEEDED_DEPRECATION_WARNINGS_THRESHOLD}; please adjust accordingly" # noqa: E501 # FIXME CoP ) diff --git a/tests/test_ge_utils.py b/tests/test_ge_utils.py index 8ff32a37494d..f65d3b219a1e 100644 --- a/tests/test_ge_utils.py +++ b/tests/test_ge_utils.py @@ -25,7 +25,7 @@ def datetime_array(): week_idx: int return [ - datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 + datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 # FIXME CoP for week_idx in range(4) ] @@ -35,7 +35,7 @@ def datetime_string_array(): week_idx: int return [ ( - datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 + datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 # FIXME CoP ).isoformat() for week_idx in range(4) ] @@ -50,18 +50,18 @@ def numeric_array(): @pytest.mark.unit def test_gen_directory_tree_str(tmpdir): project_dir = str(tmpdir.mkdir("project_dir")) - os.mkdir(os.path.join(project_dir, "BBB")) # noqa: PTH102, PTH118 - with open(os.path.join(project_dir, "BBB", "bbb.txt"), "w") as f: # noqa: PTH118 + os.mkdir(os.path.join(project_dir, "BBB")) # noqa: PTH102, PTH118 # FIXME CoP + with open(os.path.join(project_dir, "BBB", "bbb.txt"), "w") as f: # noqa: PTH118 # FIXME CoP f.write("hello") - with open(os.path.join(project_dir, "BBB", "aaa.txt"), "w") as f: # noqa: PTH118 + with open(os.path.join(project_dir, "BBB", "aaa.txt"), "w") as f: # noqa: PTH118 # FIXME CoP f.write("hello") - os.mkdir(os.path.join(project_dir, "AAA")) # noqa: PTH102, PTH118 + os.mkdir(os.path.join(project_dir, "AAA")) # noqa: PTH102, PTH118 # FIXME CoP res = gx.util.gen_directory_tree_str(project_dir) print(res) - # Note: files and directories are sorteds alphabetically, so that this method can be used for testing. # noqa: E501 + # Note: files and directories are sorteds alphabetically, so that this method can be used for testing. # noqa: E501 # FIXME CoP assert ( res == """\ @@ -76,7 +76,7 @@ def test_gen_directory_tree_str(tmpdir): @pytest.mark.unit def test_nested_update(): - # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary) # noqa: E501 + # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary) # noqa: E501 # FIXME CoP batch_kwargs = { "path": "/a/path", "reader_method": "read_csv", @@ -94,7 +94,7 @@ def test_nested_update(): @pytest.mark.unit def test_nested_update_lists(): - # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary) # noqa: E501 + # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary) # noqa: E501 # FIXME CoP dependencies = { "suite.warning": {"metric.name": ["column=foo"]}, "suite.failure": {"metric.blarg": [""]}, @@ -549,7 +549,7 @@ def test_convert_ndarray_float_to_datetime_tuple( with pytest.raises(TypeError) as e: _ = convert_ndarray_float_to_datetime_tuple(data=datetime_array) - # Error message varies based on version but mainly looking to validate type error by not using integer # noqa: E501 + # Error message varies based on version but mainly looking to validate type error by not using integer # noqa: E501 # FIXME CoP assert all(string in str(e.value) for string in ("datetime.datetime", "integer")) diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 7e5671c3e0e5..7237a686565d 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -63,7 +63,7 @@ def parse_requirements_files_to_specs( abs_path = req_file.absolute().as_posix() key = abs_path.rsplit(os.path.sep, 1)[-1] with open(req_file) as f: - req_set_dict[key] = {line.name: line.specs for line in rp.parse(f) if line.specs} # type: ignore[misc] + req_set_dict[key] = {line.name: line.specs for line in rp.parse(f) if line.specs} # type: ignore[misc] # FIXME CoP return req_set_dict @@ -193,7 +193,7 @@ def test_polish_and_ratchet_pins_and_upper_bounds(): ) # Polish and ratchet this number down as low as possible - assert len(sorted_packages_with_pins_or_upper_bounds) == 40 + assert len(sorted_packages_with_pins_or_upper_bounds) == 36 assert set(sorted_packages_with_pins_or_upper_bounds) == { ( "requirements-dev-api-docs-test.txt", @@ -207,11 +207,6 @@ def test_polish_and_ratchet_pins_and_upper_bounds(): ("requirements-dev-lite.txt", "moto", (("<", "5.0"), (">=", "4.2.13"))), ("requirements-dev-pagerduty.txt", "pypd", (("==", "1.1.0"),)), ("requirements-dev-snowflake.txt", "pandas", (("<", "2.2.0"),)), - ( - "requirements-dev-snowflake.txt", - "snowflake-sqlalchemy", - (("<", "1.7.0"), (">=", "1.2.3")), - ), ("requirements-dev-sqlalchemy.txt", "moto", (("<", "5.0"), (">=", "4.2.13"))), ("requirements-dev-sqlalchemy.txt", "pandas", (("<", "2.2.0"),)), ( @@ -219,11 +214,6 @@ def test_polish_and_ratchet_pins_and_upper_bounds(): "pyathena", (("<", "3"), (">=", "2.0.0")), ), - ( - "requirements-dev-sqlalchemy.txt", - "snowflake-sqlalchemy", - (("<", "1.7.0"), (">=", "1.2.3")), - ), ("requirements-dev-sqlalchemy.txt", "sqlalchemy", (("<", "2.0.0"),)), ( "requirements-dev-sqlalchemy.txt", @@ -236,11 +226,6 @@ def test_polish_and_ratchet_pins_and_upper_bounds(): (("==", "17.0.0.5"),), ), ("requirements-dev-sqlalchemy1.txt", "sqlalchemy", (("<", "2.0.0"),)), - ( - "requirements-dev-sqlalchemy2.txt", - "snowflake-sqlalchemy", - (("<", "1.7.0"), (">=", "1.6")), - ), ( "requirements-dev-teradata.txt", "teradatasqlalchemy", @@ -255,10 +240,9 @@ def test_polish_and_ratchet_pins_and_upper_bounds(): ("requirements-dev.txt", "marshmallow", (("<", "4.0.0"), (">=", "3.7.1"))), ("requirements-dev.txt", "moto", (("<", "5.0"), (">=", "4.2.13"))), ("requirements-dev.txt", "pandas", (("<", "2.2.0"),)), - ("requirements-dev.txt", "posthog", (("<", "3"), (">=", "2.1.0"))), + ("requirements-dev.txt", "posthog", (("<", "4"), (">", "3"))), ("requirements-dev.txt", "pyathena", (("<", "3"), (">=", "2.0.0"))), ("requirements-dev.txt", "pypd", (("==", "1.1.0"),)), - ("requirements-dev.txt", "snowflake-sqlalchemy", (("<", "1.7.0"), (">=", "1.2.3"))), ("requirements-dev.txt", "sqlalchemy", (("<", "2.0.0"),)), ("requirements-dev.txt", "sqlalchemy-dremio", (("==", "1.2.1"),)), ("requirements-dev.txt", "teradatasqlalchemy", (("==", "17.0.0.5"),)), @@ -266,5 +250,5 @@ def test_polish_and_ratchet_pins_and_upper_bounds(): ("requirements.txt", "altair", (("<", "5.0.0"), (">=", "4.2.1"))), ("requirements.txt", "marshmallow", (("<", "4.0.0"), (">=", "3.7.1"))), ("requirements.txt", "pandas", (("<", "2.2"),)), - ("requirements.txt", "posthog", (("<", "3"), (">=", "2.1.0"))), + ("requirements.txt", "posthog", (("<", "4"), (">", "3"))), } diff --git a/tests/test_utils.py b/tests/test_utils.py index 88d012aa4038..7ccdb3948adb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,6 +7,7 @@ from typing import Dict, List, Optional, Tuple, Union, cast import pandas as pd +from sqlalchemy.exc import ProgrammingError import great_expectations.exceptions as gx_exceptions from great_expectations.alias_types import PathStr @@ -43,7 +44,7 @@ def safe_remove(path): if path is not None: try: - os.remove(path) # noqa: PTH107 + os.remove(path) # noqa: PTH107 # FIXME CoP except OSError as e: print(e) @@ -55,17 +56,17 @@ def create_files_in_directory( for file_name in file_name_list: splits = file_name.split("/") for i in range(1, len(splits)): - subdirectories.append(os.path.join(*splits[:i])) # noqa: PTH118 + subdirectories.append(os.path.join(*splits[:i])) # noqa: PTH118 # FIXME CoP subdirectories = set(subdirectories) for subdirectory in subdirectories: - os.makedirs( # noqa: PTH103 - os.path.join(directory, subdirectory), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(directory, subdirectory), # noqa: PTH118 # FIXME CoP exist_ok=True, ) for file_name in file_name_list: - file_path = os.path.join(directory, file_name) # noqa: PTH118 + file_path = os.path.join(directory, file_name) # noqa: PTH118 # FIXME CoP with open(file_path, "w") as f_: f_.write(file_content_fn()) @@ -132,7 +133,7 @@ def build_tuple_filesystem_store_backend( ) -> StoreBackend: logger.debug( f"""Starting data_context/store/util.py#build_tuple_filesystem_store_backend using base_directory: -"{base_directory}""" # noqa: E501 +"{base_directory}""" # noqa: E501 # FIXME CoP ) store_backend_config: dict = { "module_name": module_name, @@ -202,7 +203,7 @@ def build_configuration_store( store_backend = store_backend.config elif not isinstance(store_backend, dict): raise gx_exceptions.DataContextError( - "Invalid configuration: A store_backend needs to be a dictionary or inherit from the StoreBackend class." # noqa: E501 + "Invalid configuration: A store_backend needs to be a dictionary or inherit from the StoreBackend class." # noqa: E501 # FIXME CoP ) store_backend.update(**kwargs) @@ -214,7 +215,7 @@ def build_configuration_store( "overwrite_existing": overwrite_existing, "store_backend": store_backend, } - configuration_store: ConfigurationStore = Store.build_store_from_config( # type: ignore[assignment] + configuration_store: ConfigurationStore = Store.build_store_from_config( # type: ignore[assignment] # FIXME CoP config=store_config, module_name=module_name, runtime_environment=None, @@ -312,7 +313,7 @@ def load_config_from_store_backend( key = ConfigurationIdentifier( configuration_key=configuration_key, ) - return config_store.get(key=key) # type: ignore[return-value] + return config_store.get(key=key) # type: ignore[return-value] # FIXME CoP def delete_config_from_filesystem( @@ -375,7 +376,7 @@ def get_bigquery_table_prefix() -> str: Returns: String of table prefix, which is the gcp_project and dataset concatenated by a "." - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP gcp_project = os.environ.get("GE_TEST_GCP_PROJECT") if not gcp_project: raise ValueError( @@ -427,7 +428,7 @@ def load_and_concatenate_csvs( Returns: A pandas dataframe concatenating data loaded from all csvs. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if convert_column_names_to_datetime is None: convert_column_names_to_datetime = [] @@ -441,7 +442,7 @@ def load_and_concatenate_csvs( df=df, column_names_to_convert=convert_column_names_to_datetime ) if not load_full_dataset: - # Improving test performance by only loading the first 10 rows of our test data into the db # noqa: E501 + # Improving test performance by only loading the first 10 rows of our test data into the db # noqa: E501 # FIXME CoP df = df.head(10) dfs.append(df) @@ -457,7 +458,7 @@ def convert_string_columns_to_datetime( """ Converts specified columns (e.g., "pickup_datetime" and "dropoff_datetime") to datetime column type. Side-effect: Passed DataFrame is modified (in-place). - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if column_names_to_convert is None: column_names_to_convert = [] @@ -466,7 +467,7 @@ def convert_string_columns_to_datetime( df[column_name_to_convert] = pd.to_datetime(df[column_name_to_convert]) -def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 +def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 # FIXME CoP table_name: str, connection_string: str, schema_name: Optional[str] = None, @@ -496,7 +497,7 @@ def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 drop_existing_table: boolean value. If set to false, will append to existing table Returns: LoadedTable which for convenience, contains the pandas dataframe that was used to load the data. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if csv_path and csv_paths: csv_paths.append(csv_path) elif csv_path and not csv_paths: @@ -517,13 +518,13 @@ def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 engine = sa.create_engine(connection_string) else: logger.debug( - "Attempting to load data in to tests SqlAlchemy database, but unable to load SqlAlchemy context; " # noqa: E501 + "Attempting to load data in to tests SqlAlchemy database, but unable to load SqlAlchemy context; " # noqa: E501 # FIXME CoP "install optional sqlalchemy dependency for support." ) return return_value if engine.dialect.name.lower().startswith("mysql"): - # Don't attempt to DROP TABLE IF EXISTS on a table that doesn't exist in mysql because it will error # noqa: E501 + # Don't attempt to DROP TABLE IF EXISTS on a table that doesn't exist in mysql because it will error # noqa: E501 # FIXME CoP inspector = inspect(engine) db_name = connection_string.split("/")[-1] table_names = [name for name in inspector.get_table_names(schema=db_name)] @@ -545,10 +546,10 @@ def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 ) return return_value except SQLAlchemyError: - error_message: str = """Docs integration tests encountered an error while loading test-data into test-database.""" # noqa: E501 - logger.error(error_message) # noqa: TRY400 + error_message: str = """Docs integration tests encountered an error while loading test-data into test-database.""" # noqa: E501 # FIXME CoP + logger.error(error_message) # noqa: TRY400 # FIXME CoP raise gx_exceptions.DatabaseConnectionError(error_message) - # Normally we would call `raise` to re-raise the SqlAlchemyError but we don't to make sure that # noqa: E501 + # Normally we would call `raise` to re-raise the SqlAlchemyError but we don't to make sure that # noqa: E501 # FIXME CoP # sensitive information does not make it into our CI logs. finally: connection.close() @@ -575,10 +576,10 @@ def load_data_into_test_database( # noqa: C901, PLR0912, PLR0915 ) return return_value except SQLAlchemyError: - error_message: str = """Docs integration tests encountered an error while loading test-data into test-database.""" # noqa: E501 - logger.error(error_message) # noqa: TRY400 + error_message: str = """Docs integration tests encountered an error while loading test-data into test-database.""" # noqa: E501 # FIXME CoP + logger.error(error_message) # noqa: TRY400 # FIXME CoP raise gx_exceptions.DatabaseConnectionError(error_message) - # Normally we would call `raise` to re-raise the SqlAlchemyError but we don't to make sure that # noqa: E501 + # Normally we would call `raise` to re-raise the SqlAlchemyError but we don't to make sure that # noqa: E501 # FIXME CoP # sensitive information does not make it into our CI logs. finally: if connection: @@ -597,7 +598,7 @@ def load_data_into_test_bigquery_database_with_bigquery_client( dataframe (pd.DataFrame): DataFrame to load table_name (str): table to load DataFrame to. Prefix containing project and dataset are loaded by helper function. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP prefix: str = get_bigquery_table_prefix() table_id: str = f"""{prefix}.{table_name}""" from google.cloud import bigquery @@ -695,7 +696,7 @@ def clean_up_tables_with_prefix(connection_string: str, table_prefix: str) -> Li return tables_dropped -def introspect_db( # noqa: C901, PLR0912 +def introspect_db( # noqa: C901, PLR0912 # FIXME CoP execution_engine: SqlAlchemyExecutionEngine, schema_name: Union[str, None] = None, ignore_information_schemas_and_system_tables: bool = True, @@ -730,7 +731,12 @@ def introspect_db( # noqa: C901, PLR0912 if selected_schema_name is not None and schema_name != selected_schema_name: continue - table_names: List[str] = inspector.get_table_names(schema=schema) + try: + table_names: List[str] = inspector.get_table_names(schema=schema) + except ProgrammingError: + # Likely another test already cleaned up this schema. + # TODO: Make tests only clean up after themselves + continue for table_name in table_names: if ignore_information_schemas_and_system_tables and (table_name in system_tables): continue @@ -766,7 +772,7 @@ def introspect_db( # noqa: C901, PLR0912 } ) - # SQLAlchemy's introspection does not list "external tables" in Redshift Spectrum (tables whose data is stored on S3). # noqa: E501 + # SQLAlchemy's introspection does not list "external tables" in Redshift Spectrum (tables whose data is stored on S3). # noqa: E501 # FIXME CoP # The following code fetches the names of external schemas and tables from a special table # 'svv_external_tables'. try: @@ -784,10 +790,10 @@ def introspect_db( # noqa: C901, PLR0912 } ) except Exception as e: - # Our testing shows that 'svv_external_tables' table is present in all Redshift clusters. This means that this # noqa: E501 + # Our testing shows that 'svv_external_tables' table is present in all Redshift clusters. This means that this # noqa: E501 # FIXME CoP # exception is highly unlikely to fire. if "UndefinedTable" not in str(e): - raise e # noqa: TRY201 + raise e # noqa: TRY201 # FIXME CoP return tables @@ -797,12 +803,12 @@ def check_athena_table_count( ) -> bool: """ Helper function used by awsathena integration test. Checks whether expected number of tables exist in database - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP if sa: engine = sa.create_engine(connection_string) else: logger.debug( - "Attempting to perform test on AWSAthena database, but unable to load SqlAlchemy context; " # noqa: E501 + "Attempting to perform test on AWSAthena database, but unable to load SqlAlchemy context; " # noqa: E501 # FIXME CoP "install optional sqlalchemy dependency for support." ) return False @@ -813,10 +819,10 @@ def check_athena_table_count( result = connection.execute(sa.text(f"SHOW TABLES in {db_name}")).fetchall() return len(result) == expected_table_count except SQLAlchemyError: - error_message: str = """Docs integration tests encountered an error while loading test-data into test-database.""" # noqa: E501 - logger.error(error_message) # noqa: TRY400 + error_message: str = """Docs integration tests encountered an error while loading test-data into test-database.""" # noqa: E501 # FIXME CoP + logger.error(error_message) # noqa: TRY400 # FIXME CoP raise gx_exceptions.DatabaseConnectionError(error_message) - # Normally we would call `raise` to re-raise the SqlAlchemyError but we don't to make sure that # noqa: E501 + # Normally we would call `raise` to re-raise the SqlAlchemyError but we don't to make sure that # noqa: E501 # FIXME CoP # sensitive information does not make it into our CI logs. finally: connection.close() @@ -831,7 +837,7 @@ def clean_athena_db(connection_string: str, db_name: str, table_to_keep: str) -> engine = sa.create_engine(connection_string) else: logger.debug( - "Attempting to perform test on AWSAthena database, but unable to load SqlAlchemy context; " # noqa: E501 + "Attempting to perform test on AWSAthena database, but unable to load SqlAlchemy context; " # noqa: E501 # FIXME CoP "install optional sqlalchemy dependency for support." ) return @@ -878,9 +884,11 @@ def get_default_mssql_url() -> str: Returns: String of default connection to Docker container """ - db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") - connection_string = f"mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@{db_hostname}:1433/test_ci?driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true" # noqa: E501 - return connection_string + return ( + "mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@127.0.0.1:1433/test_ci" + "?driver=ODBC Driver 18 for SQL Server&charset=utf8" + "&autocommit=true&TrustServerCertificate=yes" + ) def get_awsathena_db_name(db_name_env_var: str = "ATHENA_DB_NAME") -> str: @@ -892,7 +900,7 @@ def get_awsathena_db_name(db_name_env_var: str = "ATHENA_DB_NAME") -> str: athena_db_name: str = os.getenv(db_name_env_var) if not athena_db_name: raise ValueError( - f"Environment Variable {db_name_env_var} is required to run integration tests against AWS Athena" # noqa: E501 + f"Environment Variable {db_name_env_var} is required to run integration tests against AWS Athena" # noqa: E501 # FIXME CoP ) return athena_db_name @@ -907,7 +915,7 @@ def get_awsathena_connection_url(db_name_env_var: str = "ATHENA_DB_NAME") -> str ATHENA_STAGING_S3: Optional[str] = os.getenv("ATHENA_STAGING_S3") if not ATHENA_STAGING_S3: raise ValueError( - "Environment Variable ATHENA_STAGING_S3 is required to run integration tests against AWS Athena" # noqa: E501 + "Environment Variable ATHENA_STAGING_S3 is required to run integration tests against AWS Athena" # noqa: E501 # FIXME CoP ) return f"awsathena+rest://@athena.us-east-1.amazonaws.com/{ATHENA_DB_NAME}?s3_staging_dir={ATHENA_STAGING_S3}" @@ -961,7 +969,7 @@ def working_directory(directory: PathStr): Reference: https://stackoverflow.com/questions/431684/equivalent-of-shell-cd-command-to-change-the-working-directory/431747#431747 """ - owd = os.getcwd() # noqa: PTH109 + owd = os.getcwd() # noqa: PTH109 # FIXME CoP try: os.chdir(directory) yield directory diff --git a/tests/validator/test_exception_info.py b/tests/validator/test_exception_info.py index 5f7dad398ca2..9dc0df033780 100644 --- a/tests/validator/test_exception_info.py +++ b/tests/validator/test_exception_info.py @@ -24,7 +24,7 @@ def test_exception_info__eq__and__ne__(exception_info: ExceptionInfo) -> None: def test_exception_info__repr__(exception_info: ExceptionInfo) -> None: assert ( exception_info.__repr__() - == "{'exception_traceback': 'my exception traceback', 'exception_message': 'my exception message', 'raised_exception': True}" # noqa: E501 + == "{'exception_traceback': 'my exception traceback', 'exception_message': 'my exception message', 'raised_exception': True}" # noqa: E501 # FIXME CoP ) @@ -32,5 +32,5 @@ def test_exception_info__repr__(exception_info: ExceptionInfo) -> None: def test_exception_info__str__(exception_info: ExceptionInfo) -> None: assert ( exception_info.__str__() - == '{\n "exception_traceback": "my exception traceback",\n "exception_message": "my exception message",\n "raised_exception": true\n}' # noqa: E501 + == '{\n "exception_traceback": "my exception traceback",\n "exception_message": "my exception message",\n "raised_exception": true\n}' # noqa: E501 # FIXME CoP ) diff --git a/tests/validator/test_metric_configuration.py b/tests/validator/test_metric_configuration.py index b7448b2ba297..a4379143e3c1 100644 --- a/tests/validator/test_metric_configuration.py +++ b/tests/validator/test_metric_configuration.py @@ -9,7 +9,7 @@ def test_metric_configuration__repr__and__str__( table_head_metric_config: MetricConfiguration, ) -> None: - expected = '{\n "metric_name": "table.head",\n "metric_domain_kwargs": {\n "batch_id": "abc123"\n },\n "metric_domain_kwargs_id": "batch_id=abc123",\n "metric_value_kwargs": {\n "n_rows": 5\n },\n "metric_value_kwargs_id": "n_rows=5",\n "id": [\n "table.head",\n "batch_id=abc123",\n "n_rows=5"\n ]\n}' # noqa: E501 + expected = '{\n "metric_name": "table.head",\n "metric_domain_kwargs": {\n "batch_id": "abc123"\n },\n "metric_domain_kwargs_id": "batch_id=abc123",\n "metric_value_kwargs": {\n "n_rows": 5\n },\n "metric_value_kwargs_id": "n_rows=5",\n "id": [\n "table.head",\n "batch_id=abc123",\n "n_rows=5"\n ]\n}' # noqa: E501 # FIXME CoP assert table_head_metric_config.__repr__() == expected assert table_head_metric_config.__str__() == expected diff --git a/tests/validator/test_metrics_calculator.py b/tests/validator/test_metrics_calculator.py index cede1a8471ed..5d39230eaae6 100644 --- a/tests/validator/test_metrics_calculator.py +++ b/tests/validator/test_metrics_calculator.py @@ -34,7 +34,7 @@ def integer_and_datetime_sample_dataset() -> dict: 11, ], "b": [ - datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 + datetime.datetime(2021, 1, 1, 0, 0, 0) + datetime.timedelta(days=(week_idx * 7)) # noqa: DTZ001 # FIXME CoP for week_idx in range(12) ], } @@ -86,7 +86,7 @@ def _test_column_partition_metric( For "datetime.datetime" data, test set contains 12 dates, starting with January 1, 2021, separated by 7 days. Expected partition boundaries are pre-computed algorithmically and asserted to be "close" to actual metric values. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP validator_with_data: Validator = get_test_validator_with_data( execution_engine=backend, table_name="column_partition_metric_test", @@ -140,7 +140,7 @@ def _test_column_partition_metric( operand_a=element.to_pydatetime() if isinstance(validator_with_data.execution_engine, PandasExecutionEngine) else element, - operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 + operand_b=(datetime.datetime(2021, 1, 1, 0, 0, 0) + (increment * idx)), # noqa: DTZ001 # FIXME CoP ) for idx, element in enumerate(results[desired_metric.id]) ) @@ -161,7 +161,7 @@ def test_get_metric_calls_get_metrics_and_returns_correct_result(): The "with mock.patch" is used judiciously, trading off the focus on the functionality under test (i.e., avoiding "test leakage") against going as far as mocking all non-essential methods and properties, favoring code readability. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class DummyExecutionEngine: pass diff --git a/tests/validator/test_util.py b/tests/validator/test_util.py index 34487a4c3b29..e9b09abd2ae0 100644 --- a/tests/validator/test_util.py +++ b/tests/validator/test_util.py @@ -19,7 +19,7 @@ def test_recursively_convert_to_json_serializable(tmp_path): "z": {1, 2, 3, 4, 5}, "zz": (1, 2, 3), "zzz": [ - datetime.datetime(2017, 1, 1), # noqa: DTZ001 + datetime.datetime(2017, 1, 1), # noqa: DTZ001 # FIXME CoP datetime.date(2017, 5, 1), ], "np.bool": np.bool_([True, False, True]), diff --git a/tests/validator/test_validation_graph.py b/tests/validator/test_validation_graph.py index 1aca3cb4610e..62b32a4e0872 100644 --- a/tests/validator/test_validation_graph.py +++ b/tests/validator/test_validation_graph.py @@ -40,13 +40,13 @@ def resolve_metrics( appropriate exception to be raised, while its dependencies resolve to actual values ("my_value" is used here as placeholder). This makes "ValidationGraph.resolve()" -- method under test -- evaluate every "MetricConfiguration" of parsed "ValidationGraph" successfully, except "failed" "MetricConfiguration". - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP metric_configuration: MetricConfiguration if failed_metric_config.id in [ metric_configuration.id for metric_configuration in metrics_to_resolve ]: raise gx_exceptions.MetricResolutionError( - message=f'Error: The column "not_in_table" in BatchData does not exist.{uuid.uuid4()}', # Randomizing the message to assert that only one exception is kept # noqa: E501 + message=f'Error: The column "not_in_table" in BatchData does not exist.{uuid.uuid4()}', # Randomizing the message to assert that only one exception is kept # noqa: E501 # FIXME CoP failed_metrics=[failed_metric_config], ) @@ -213,7 +213,7 @@ def test_ExpectationValidationGraph_constructor( ) assert ve.value.args == ( - 'Instantiation of "ExpectationValidationGraph" requires valid "ExpectationConfiguration" object.', # noqa: E501 + 'Instantiation of "ExpectationValidationGraph" requires valid "ExpectationConfiguration" object.', # noqa: E501 # FIXME CoP ) with pytest.raises(ValueError) as ve: @@ -296,7 +296,7 @@ def test_parse_validation_graph( ): available_metrics: Dict[Tuple[str, str, str], MetricValue] - # Parse input "ValidationGraph" object and confirm the numbers of ready and still needed metrics. # noqa: E501 + # Parse input "ValidationGraph" object and confirm the numbers of ready and still needed metrics. # noqa: E501 # FIXME CoP available_metrics = {} ( ready_metrics, @@ -306,7 +306,7 @@ def test_parse_validation_graph( ) assert len(ready_metrics) == 2 and len(needed_metrics) == 9 - # Show that including "nonexistent" metric in dictionary of resolved metrics does not increase ready_metrics count. # noqa: E501 + # Show that including "nonexistent" metric in dictionary of resolved metrics does not increase ready_metrics count. # noqa: E501 # FIXME CoP available_metrics = {("nonexistent", "nonexistent", "nonexistent"): "NONE"} ( ready_metrics, @@ -427,7 +427,7 @@ def test_progress_bar_config( """ This test creates mocked environment for progress bar tests; it then executes the method under test that utilizes the progress bar, "ValidationGraph.resolve()", with composed arguments, and verifies result. - """ # noqa: E501 + """ # noqa: E501 # FIXME CoP class DummyMetricConfiguration: pass diff --git a/tests/validator/test_validator.py b/tests/validator/test_validator.py index 7cd24df1851d..4a5cabdee8d9 100644 --- a/tests/validator/test_validator.py +++ b/tests/validator/test_validator.py @@ -58,19 +58,19 @@ def yellow_trip_pandas_data_context( to individual months if the "month" in batch_filter_parameters is set to "01", "02", or "03" """ project_path: str = str(tmp_path_factory.mktemp("taxi_data_context")) - context_path: str = os.path.join( # noqa: PTH118 + context_path: str = os.path.join( # noqa: PTH118 # FIXME CoP project_path, FileDataContext.GX_DIR ) - os.makedirs( # noqa: PTH103 - os.path.join(context_path, "expectations"), # noqa: PTH118 + os.makedirs( # noqa: PTH103 # FIXME CoP + os.path.join(context_path, "expectations"), # noqa: PTH118 # FIXME CoP exist_ok=True, ) - data_path: str = os.path.join(context_path, "..", "data") # noqa: PTH118 - os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 + data_path: str = os.path.join(context_path, "..", "data") # noqa: PTH118 # FIXME CoP + os.makedirs(os.path.join(data_path), exist_ok=True) # noqa: PTH118, PTH103 # FIXME CoP shutil.copy( file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "integration", "fixtures", @@ -79,12 +79,12 @@ def yellow_trip_pandas_data_context( FileDataContext.GX_YML, ), ), - str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 + str(os.path.join(context_path, FileDataContext.GX_YML)), # noqa: PTH118 # FIXME CoP ) shutil.copy( file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "taxi_yellow_tripdata_samples", @@ -92,7 +92,7 @@ def yellow_trip_pandas_data_context( ), ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "yellow_tripdata_sample_2019-01.csv" ) ), @@ -100,7 +100,7 @@ def yellow_trip_pandas_data_context( shutil.copy( file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "taxi_yellow_tripdata_samples", @@ -108,7 +108,7 @@ def yellow_trip_pandas_data_context( ), ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "yellow_tripdata_sample_2019-02.csv" ) ), @@ -116,7 +116,7 @@ def yellow_trip_pandas_data_context( shutil.copy( file_relative_path( __file__, - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP "..", "test_sets", "taxi_yellow_tripdata_samples", @@ -124,7 +124,7 @@ def yellow_trip_pandas_data_context( ), ), str( - os.path.join( # noqa: PTH118 + os.path.join( # noqa: PTH118 # FIXME CoP context_path, "..", "data", "yellow_tripdata_sample_2019-03.csv" ) ), @@ -303,7 +303,7 @@ def test_graph_validate_with_runtime_config( def test_graph_validate_with_exception(basic_datasource: PandasDatasource, mocker: MockerFixture): # noinspection PyUnusedLocal def mock_error(*args, **kwargs): - raise Exception("Mock Error") # noqa: TRY002 + raise Exception("Mock Error") # noqa: TRY002 # FIXME CoP df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]}) asset = basic_datasource.add_dataframe_asset("my_asset") @@ -325,7 +325,7 @@ def mock_error(*args, **kwargs): # TODO: Convert this to actually mock an exception being thrown # graph = ValidationGraph(execution_engine=execution_engine) - # graph.build_metric_dependency_graph = mock_error # type: ignore[method-assign] + # graph.build_metric_dependency_graph = mock_error # type: ignore[method-assign] # FIXME CoP result = validator.graph_validate(configurations=[expectation_configuration]) @@ -499,7 +499,7 @@ def test__get_attr___raises_attribute_error_with_invalid_attr( @pytest.mark.big -def test_graph_validate_with_two_expectations_and_first_expectation_without_additional_configuration( # noqa: E501 +def test_graph_validate_with_two_expectations_and_first_expectation_without_additional_configuration( # noqa: E501 # FIXME CoP in_memory_runtime_context, basic_datasource: PandasDatasource ): in_memory_runtime_context.data_sources.all()["my_datasource"] = basic_datasource @@ -808,7 +808,7 @@ def test_validator_with_exception_info_in_result(): "include_nested=True", ) exception_message = "Danger Will Robinson! Danger!" - exception_traceback = 'Traceback (most recent call last):\n File "lostinspace.py", line 42, in \n raise Exception("Danger Will Robinson! Danger!")\nException: Danger Will Robinson! Danger!' # noqa: E501 + exception_traceback = 'Traceback (most recent call last):\n File "lostinspace.py", line 42, in \n raise Exception("Danger Will Robinson! Danger!")\nException: Danger Will Robinson! Danger!' # noqa: E501 # FIXME CoP mock_aborted_metrics_info = { metric_id: {