Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Enable strict_min/max for ExpectTableRowCountToBeBetween #10845

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@
EXPECTATION_SHORT_DESCRIPTION = "Expect the number of rows to be between two values."
MIN_VALUE_DESCRIPTION = "The minimum number of rows, inclusive."
MAX_VALUE_DESCRIPTION = "The maximum number of rows, inclusive."

STRICT_MIN_DESCRIPTION = (
"If True, the row count must be strictly larger than min_value, default=False"
)
STRICT_MAX_DESCRIPTION = (
"If True, the row count must be strictly smaller than max_value, default=False"
)
SUPPORTED_DATA_SOURCES = [
"Pandas",
"Spark",
Expand Down Expand Up @@ -68,6 +75,10 @@ class ExpectTableRowCountToBeBetween(BatchExpectation):
{MIN_VALUE_DESCRIPTION}
max_value (int or None): \
{MAX_VALUE_DESCRIPTION}
strict_min (boolean): \
{STRICT_MIN_DESCRIPTION}
strict_max (boolean): \
{STRICT_MAX_DESCRIPTION}

Other Parameters:
result_format (str or None): \
Expand All @@ -86,7 +97,7 @@ class ExpectTableRowCountToBeBetween(BatchExpectation):
Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.

Notes:
* min_value and max_value are both inclusive.
* min_value and max_value are both inclusive unless strict_min or strict_max are set to True.
* If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has \
no minimum.
* If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has \
Expand Down Expand Up @@ -164,6 +175,8 @@ class ExpectTableRowCountToBeBetween(BatchExpectation):
max_value: Union[int, SuiteParameterDict, datetime, None] = pydantic.Field(
default=None, description=MAX_VALUE_DESCRIPTION
)
strict_min: bool = pydantic.Field(default=False, description=STRICT_MAX_DESCRIPTION)
strict_max: bool = pydantic.Field(default=False, description=STRICT_MIN_DESCRIPTION)
row_condition: Union[str, None] = None
condition_parser: Union[ConditionParser, None] = None

Expand All @@ -182,10 +195,14 @@ class ExpectTableRowCountToBeBetween(BatchExpectation):
success_keys = (
"min_value",
"max_value",
"strict_min",
"strict_max",
)
args_keys = (
"min_value",
"max_value",
"strict_min",
"strict_max",
)

class Config:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Expect table row count to be between",
"description": "Expect the number of rows to be between two values.\n\nExpectTableRowCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of rows, inclusive.\n max_value (int or None): The maximum number of rows, inclusive.\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n min_value=1,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }",
"description": "Expect the number of rows to be between two values.\n\nExpectTableRowCountToBeBetween is a Batch Expectation.\n\nBatchExpectations are one of the most common types of Expectation.\nThey are evaluated for an entire Batch, and answer a semantic question about the Batch itself.\n\nArgs:\n min_value (int or None): The minimum number of rows, inclusive.\n max_value (int or None): The maximum number of rows, inclusive.\n strict_min (boolean): If True, the row count must be strictly larger than min_value, default=False\n strict_max (boolean): If True, the row count must be strictly smaller than max_value, default=False\n\nOther Parameters:\n result_format (str or None): Which output mode to use: BOOLEAN_ONLY, BASIC, COMPLETE, or SUMMARY. For more detail, see [result_format](https://docs.greatexpectations.io/docs/reference/expectations/result_format).\n catch_exceptions (boolean or None): If True, then catch exceptions and include them as part of the result object. For more detail, see [catch_exceptions](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#catch_exceptions).\n meta (dict or None): A JSON-serializable dictionary (nesting allowed) that will be included in the output without modification. For more detail, see [meta](https://docs.greatexpectations.io/docs/reference/expectations/standard_arguments/#meta).\n\nReturns:\n An [ExpectationSuiteValidationResult](https://docs.greatexpectations.io/docs/terms/validation_result)\n\n Exact fields vary depending on the values passed to result_format, catch_exceptions, and meta.\n\nNotes:\n * min_value and max_value are both inclusive unless strict_min or strict_max are set to True.\n * If min_value is None, then max_value is treated as an upper bound, and the number of acceptable rows has no minimum.\n * If max_value is None, then min_value is treated as a lower bound, and the number of acceptable rows has no maximum.\n\nSee Also:\n [ExpectTableRowCountToEqual](https://greatexpectations.io/expectations/expect_table_row_count_to_equal)\n\nSupported Data Sources:\n [Pandas](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Spark](https://docs.greatexpectations.io/docs/application_integration_support/)\n [SQLite](https://docs.greatexpectations.io/docs/application_integration_support/)\n [PostgreSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MySQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [MSSQL](https://docs.greatexpectations.io/docs/application_integration_support/)\n [BigQuery](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Snowflake](https://docs.greatexpectations.io/docs/application_integration_support/)\n [Databricks (SQL)](https://docs.greatexpectations.io/docs/application_integration_support/)\n\nData Quality Issues:\n Volume\n\nExample Data:\n test test2\n 0 1.00 2\n 1 2.30 5\n 2 4.33 0\n\nCode Examples:\n Passing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n min_value=1,\n max_value=4\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": true\n }\n\n Failing Case:\n Input:\n ExpectTableRowCountToBeBetween(\n max_value=2\n )\n\n Output:\n {\n \"exception_info\": {\n \"raised_exception\": false,\n \"exception_traceback\": null,\n \"exception_message\": null\n },\n \"result\": {\n \"observed_value\": 3\n },\n \"meta\": {},\n \"success\": false\n }",
"type": "object",
"properties": {
"id": {
Expand Down Expand Up @@ -98,6 +98,18 @@
}
]
},
"strict_min": {
"title": "Strict Min",
"description": "If True, the row count must be strictly smaller than max_value, default=False",
"default": false,
"type": "boolean"
},
"strict_max": {
"title": "Strict Max",
"description": "If True, the row count must be strictly larger than min_value, default=False",
"default": false,
"type": "boolean"
},
"row_condition": {
"title": "Row Condition",
"type": "string"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,30 @@ def test_golden_path(batch_for_datasource: Batch) -> None:
gxe.ExpectTableRowCountToBeBetween(min_value=3, max_value=5),
id="inclusivity",
),
pytest.param(
gxe.ExpectTableRowCountToBeBetween(
min_value=None, max_value=None, strict_min=True, strict_max=True
),
id="strict_min_max_vacuously_true",
),
pytest.param(
gxe.ExpectTableRowCountToBeBetween(
min_value=2, max_value=None, strict_min=True, strict_max=True
),
id="strict_min_max_just_min",
),
pytest.param(
gxe.ExpectTableRowCountToBeBetween(
min_value=None, max_value=4, strict_min=True, strict_max=True
),
id="strict_min_max_just_max",
),
pytest.param(
gxe.ExpectTableRowCountToBeBetween(
min_value=2, max_value=4, strict_min=True, strict_max=True
),
id="strict_min_max_inclusive",
),
],
)
@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA)
Expand Down Expand Up @@ -75,6 +99,20 @@ def test_empty_data(batch_for_datasource: Batch) -> None:
gxe.ExpectTableRowCountToBeBetween(min_value=4, max_value=4),
id="bad_range",
),
pytest.param(
gxe.ExpectTableRowCountToBeBetween(min_value=3, max_value=4, strict_min=True),
id="strict_min_max_observed_same_as_min",
),
pytest.param(
gxe.ExpectTableRowCountToBeBetween(min_value=2, max_value=3, strict_max=True),
id="strict_min_max_observed_same_as_max",
),
pytest.param(
gxe.ExpectTableRowCountToBeBetween(
min_value=3, max_value=3, strict_min=True, strict_max=True
),
id="strict_min_max_observed_same_as_min_and_max",
),
],
)
@parameterize_batch_for_data_sources(data_source_configs=JUST_PANDAS_DATA_SOURCES, data=DATA)
Expand Down
Loading