From 8cb8ff205d9fa116a872db2df02d0d8292b9fa19 Mon Sep 17 00:00:00 2001 From: kumar saurav Date: Wed, 4 Sep 2024 00:15:44 +0530 Subject: [PATCH] add: timestamp validation add: timestamp validation add: timestamp validation fix: module error --- dcs_core/core/datasource/sql_datasource.py | 280 ++++++++++++++++++ dcs_core/core/validation/manager.py | 12 + .../core/validation/validity_validation.py | 126 ++++++++ docs/validations/validity.md | 74 ++++- .../configuration/test_configuration_v1.py | 90 ++++++ .../datasource/test_sql_datasource.py | 68 ++++- 6 files changed, 642 insertions(+), 8 deletions(-) diff --git a/dcs_core/core/datasource/sql_datasource.py b/dcs_core/core/datasource/sql_datasource.py index 60e674d1..9f66729a 100644 --- a/dcs_core/core/datasource/sql_datasource.py +++ b/dcs_core/core/datasource/sql_datasource.py @@ -724,3 +724,283 @@ def query_get_null_keyword_count( return round((result[0] / result[1]) * 100, 2) if result[1] > 0 else 0 return result[0] if result else 0 + + def query_timestamp_metric( + self, + table: str, + field: str, + operation: str, + predefined_regex: str, + filters: str = None, + ) -> Union[float, int]: + """ + :param table: Table name + :param field: Column name + :param operation: Metric operation ("count" or "percent") + :param predefined_regex: regex pattern + :param filters: filter condition + :return: Tuple containing valid count and total count (or percentage) + """ + + qualified_table_name = self.qualified_table_name(table) + + timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$" + + if predefined_regex == "timestamp_iso": + regex_condition = f"{field} ~ '{timestamp_iso_regex}'" + else: + raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}") + + filters_clause = f"WHERE {filters}" if filters else "" + + query = f""" + WITH extracted_timestamps AS ( + SELECT + {field}, + SUBSTRING({field} FROM '^(\d{{4}})') AS year, -- Extract year + SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month + SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day + SUBSTRING({field} FROM 'T(\d{{2}})') AS hour, -- Extract hour + SUBSTRING({field} FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute + SUBSTRING({field} FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second + SUBSTRING({field} FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone + FROM {qualified_table_name} + {filters_clause} + ), + validated_timestamps AS ( + SELECT + {field}, + CASE + WHEN + -- Validate each component with its specific rules + year ~ '^\d{{4}}$' AND + month ~ '^(0[1-9]|1[0-2])$' AND + day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND + hour ~ '^([01][0-9]|2[0-3])$' AND + minute ~ '^[0-5][0-9]$' AND + second ~ '^[0-5][0-9]$' AND + (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND + -- Additional check for days in months (e.g., February) + ( + (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR + (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR + (month = '02' AND day BETWEEN '01' AND + CASE + -- Handle leap years + WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29' + ELSE '28' + END + ) + ) + THEN 1 + ELSE 0 + END AS is_valid + FROM extracted_timestamps + ) + SELECT COUNT(*) AS valid_count, COUNT(*) AS total_count + FROM validated_timestamps + WHERE is_valid = 1; + """ + + try: + valid_count = self.fetchone(query)[0] + total_count_query = ( + f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}" + ) + total_count = self.fetchone(total_count_query)[0] + + if operation == "count": + return valid_count, total_count + elif operation == "percent": + return valid_count, total_count + else: + raise ValueError(f"Unknown operation: {operation}") + + except Exception as e: + print(f"Error occurred: {e}") + return 0, 0 + + def query_timestamp_not_in_future_metric( + self, + table: str, + field: str, + operation: str, + predefined_regex: str, + filters: str = None, + ) -> Union[float, int]: + """ + :param table: Table name + :param field: Column name + :param operation: Metric operation ("count" or "percent") + :param predefined_regex: regex pattern + :param filters: filter condition + :return: Tuple containing count of valid timestamps not in the future and total count + """ + + qualified_table_name = self.qualified_table_name(table) + + timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$" + + if predefined_regex == "timestamp_iso": + regex_condition = f"{field} ~ '{timestamp_iso_regex}'" + else: + raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}") + + filters_clause = f"WHERE {filters}" if filters else "" + + query = f""" + WITH extracted_timestamps AS ( + SELECT + {field}, + SUBSTRING({field} FROM '^(\d{{4}})') AS year, -- Extract year + SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month + SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day, -- Extract day + SUBSTRING({field} FROM 'T(\d{{2}})') AS hour, -- Extract hour + SUBSTRING({field} FROM 'T\d{{2}}:(\d{{2}})') AS minute, -- Extract minute + SUBSTRING({field} FROM 'T\d{{2}}:\d{{2}}:(\d{{2}})') AS second, -- Extract second + SUBSTRING({field} FROM '([+-]\d{{2}}:\d{{2}}|Z)$') AS timezone -- Extract timezone + FROM {qualified_table_name} + {filters_clause} + ), + validated_timestamps AS ( + SELECT + {field}, + CASE + WHEN + year ~ '^\d{{4}}$' AND + month ~ '^(0[1-9]|1[0-2])$' AND + day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND + hour ~ '^([01][0-9]|2[0-3])$' AND + minute ~ '^[0-5][0-9]$' AND + second ~ '^[0-5][0-9]$' AND + (timezone IS NULL OR timezone ~ '^(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])$') AND + ( + (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR + (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR + (month = '02' AND day BETWEEN '01' AND + CASE + WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29' + ELSE '28' + END + ) + ) + THEN 1 + ELSE 0 + END AS is_valid + FROM extracted_timestamps + ), + timestamps_not_in_future AS ( + SELECT * + FROM validated_timestamps + WHERE is_valid = 1 AND ({field} ~ '{timestamp_iso_regex}') AND {field}::timestamp <= CURRENT_TIMESTAMP + ) + SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}) AS total_count + FROM timestamps_not_in_future; + """ + try: + valid_count = self.fetchone(query)[0] + total_count_query = ( + f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}" + ) + total_count = self.fetchone(total_count_query)[0] + + if operation == "count": + return valid_count, total_count + elif operation == "percent": + return valid_count, total_count + else: + raise ValueError(f"Unknown operation: {operation}") + + except Exception as e: + print(f"Error occurred: {e}") + return 0, 0 + + def query_timestamp_date_not_in_future_metric( + self, + table: str, + field: str, + operation: str, + predefined_regex: str, + filters: str = None, + ) -> Union[float, int]: + """ + :param table: Table name + :param field: Column name + :param operation: Metric operation ("count" or "percent") + :param predefined_regex: The regex pattern to use (e.g., "timestamp_iso") + :param filters: Optional filter condition + :return: Tuple containing count of valid dates not in the future and total count + """ + + qualified_table_name = self.qualified_table_name(table) + + timestamp_iso_regex = r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])T([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](?:\.\d{1,3})?(Z|[+-](0[0-9]|1[0-4]):[0-5][0-9])?$" + + if predefined_regex == "timestamp_iso": + regex_condition = f"{field} ~ '{timestamp_iso_regex}'" + else: + raise ValueError(f"Unknown predefined regex pattern: {predefined_regex}") + + filters_clause = f"WHERE {filters}" if filters else "" + + query = f""" + WITH extracted_timestamps AS ( + SELECT + {field}, + SUBSTRING({field} FROM '^(\d{{4}})') AS year, -- Extract year + SUBSTRING({field} FROM '^\d{{4}}-(\d{{2}})') AS month, -- Extract month + SUBSTRING({field} FROM '^\d{{4}}-\d{{2}}-(\d{{2}})') AS day -- Extract day + FROM {qualified_table_name} + {filters_clause} + ), + validated_dates AS ( + SELECT + {field}, + CASE + WHEN + year ~ '^\d{{4}}$' AND + month ~ '^(0[1-9]|1[0-2])$' AND + day ~ '^((0[1-9]|[12][0-9])|(30|31))$' AND + ( + (month IN ('01', '03', '05', '07', '08', '10', '12') AND day BETWEEN '01' AND '31') OR + (month IN ('04', '06', '09', '11') AND day BETWEEN '01' AND '30') OR + (month = '02' AND day BETWEEN '01' AND + CASE + WHEN (year::int % 400 = 0 OR (year::int % 100 != 0 AND year::int % 4 = 0)) THEN '29' + ELSE '28' + END + ) + ) + THEN 1 + ELSE 0 + END AS is_valid + FROM extracted_timestamps + ), + dates_not_in_future AS ( + SELECT * + FROM validated_dates + WHERE is_valid = 1 + AND ({field} ~ '{timestamp_iso_regex}') + AND ({field})::date <= CURRENT_DATE -- Compare only the date part against the current date + ) + SELECT COUNT(*) AS valid_count, (SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}) AS total_count + FROM dates_not_in_future; + """ + + try: + valid_count = self.fetchone(query)[0] + total_count_query = ( + f"SELECT COUNT(*) FROM {qualified_table_name} {filters_clause}" + ) + total_count = self.fetchone(total_count_query)[0] + + if operation == "count": + return valid_count, total_count + elif operation == "percent": + return valid_count, total_count + else: + raise ValueError(f"Unknown operation: {operation}") + + except Exception as e: + print(f"Error occurred: {e}") + return 0, 0 diff --git a/dcs_core/core/validation/manager.py b/dcs_core/core/validation/manager.py index 5293ec03..d4d85e63 100644 --- a/dcs_core/core/validation/manager.py +++ b/dcs_core/core/validation/manager.py @@ -62,6 +62,7 @@ ) from dcs_core.core.validation.validity_validation import ( # noqa F401 this is used in globals CountCUSIPValidation, + CountDateNotInFutureValidation, CountEmailValidation, CountFIGIValidation, CountInvalidRegex, @@ -70,9 +71,11 @@ CountLatitudeValidation, CountLEIValidation, CountLongitudeValidation, + CountNotInFutureValidation, CountPermIDValidation, CountSEDOLValidation, CountSSNValidation, + CountTimeStampValidation, CountUSAPhoneValidation, CountUSAStateCodeValidation, CountUSAZipCodeValidation, @@ -80,6 +83,7 @@ CountValidRegex, CountValidValues, PercentCUSIPValidation, + PercentDateNotInFutureValidation, PercentEmailValidation, PercentFIGIValidation, PercentInvalidRegex, @@ -88,9 +92,11 @@ PercentLatitudeValidation, PercentLEIValidation, PercentLongitudeValidation, + PercentNotInFutureValidation, PercentPermIDValidation, PercentSEDOLValidation, PercentSSNValidation, + PercentTimeStampValidation, PercentUSAPhoneValidation, PercentUSAStateCodeValidation, PercentUSAZipCodeValidation, @@ -173,6 +179,12 @@ class ValidationManager: ValidationFunction.PERCENT_ALL_SPACE.value: "PercentageAllSpaceValidation", ValidationFunction.COUNT_NULL_KEYWORD.value: "CountNullKeywordValidation", ValidationFunction.PERCENT_NULL_KEYWORD.value: "PercentageNullKeywordValidation", + ValidationFunction.COUNT_TIMESTAMP_STRING.value: "CountTimeStampValidation", + ValidationFunction.PERCENT_TIMESTAMP_STRING.value: "PercentTimeStampValidation", + ValidationFunction.COUNT_NOT_IN_FUTURE.value: "CountNotInFutureValidation", + ValidationFunction.PERCENT_NOT_IN_FUTURE.value: "PercentNotInFutureValidation", + ValidationFunction.COUNT_DATE_NOT_IN_FUTURE.value: "CountDateNotInFutureValidation", + ValidationFunction.PERCENT_DATE_NOT_IN_FUTURE.value: "PercentDateNotInFutureValidation", } def __init__( diff --git a/dcs_core/core/validation/validity_validation.py b/dcs_core/core/validation/validity_validation.py index da472a73..527160d5 100644 --- a/dcs_core/core/validation/validity_validation.py +++ b/dcs_core/core/validation/validity_validation.py @@ -698,3 +698,129 @@ def _generate_metric_value(self, **kwargs) -> Union[float, int]: raise NotImplementedError( "Perm ID validation is only supported for SQL data sources" ) + + +class CountTimeStampValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + valid_count, total_row_count = self.data_source.query_timestamp_metric( + table=self.dataset_name, + field=self.field_name, + operation="count", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return valid_count + else: + raise ValueError( + "Unsupported data source type for CountTimeStampValidation" + ) + + +class PercentTimeStampValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + valid_count, total_row_count = self.data_source.query_timestamp_metric( + table=self.dataset_name, + field=self.field_name, + operation="percent", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return ( + round((valid_count / total_row_count) * 100, 2) + if total_row_count > 0 + else 0.0 + ) + else: + raise ValueError( + "Unsupported data source type for PercentTimeStampValidation" + ) + + +class CountNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="count", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return valid_count + else: + raise ValueError( + "Unsupported data source type for CountNotInFutureValidation" + ) + + +class PercentNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="percent", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return ( + round((valid_count / total_row_count) * 100, 2) + if total_row_count > 0 + else 0.0 + ) + else: + raise ValueError( + "Unsupported data source type for PercentNotInFutureValidation" + ) + + +class CountDateNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_date_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="count", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return valid_count + else: + raise ValueError( + "Unsupported data source type for CountDateNotInFutureValidation" + ) + + +class PercentDateNotInFutureValidation(Validation): + def _generate_metric_value(self, **kwargs) -> Union[float, int]: + if isinstance(self.data_source, SQLDataSource): + ( + valid_count, + total_row_count, + ) = self.data_source.query_timestamp_date_not_in_future_metric( + table=self.dataset_name, + field=self.field_name, + operation="percent", + predefined_regex="timestamp_iso", + filters=self.where_filter if self.where_filter is not None else None, + ) + return ( + round((valid_count / total_row_count) * 100, 2) + if total_row_count > 0 + else 0.0 + ) + else: + raise ValueError( + "Unsupported data source type for PercentDateNotInFutureValidation" + ) diff --git a/docs/validations/validity.md b/docs/validations/validity.md index d10976a7..ac906a9d 100644 --- a/docs/validations/validity.md +++ b/docs/validations/validity.md @@ -277,7 +277,7 @@ validations for location_db.geolocation: on: percent_longitude(longitude_column_name) threshold: "> 80" ``` -``` + ## Count SSN @@ -553,4 +553,76 @@ The percent null keyword validation checks the percentage of null like keyword i validations for product_db.products: - percent_null_keyword: on: percent_null_keyboard(keyword) +``` + +## Count Timestamp String + +The count timestamp string validation checks the number of valid timestamp string in ISO format in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - count_valid_timestamp: + on: count_timestamp_string(timestamp) +``` + +## Percent Timestamp String + +The percent timestamp string validation checks the percentage of valid timestamp string in ISO format in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - percent_valid_timestamp: + on: percent_timestamp_string(timestamp) +``` + +## Count Not In Future + +The count not in future validation checks the number of valid timestamp string that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - count_timestamp_not_in_future: + on: count_not_in_future(future_timestamp) +``` + +## Percent Not In Future + +The percent date not in future validation checks the percentage of valid timestamp string that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - percent_timestamp_not_in_future: + on: percent_not_in_future(future_timestamp) +``` + +## Count Date Not In Future + +The count date not in future validation checks the number of valid timestamp string with date that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - count_date_not_in_future: + on: count_date_not_in_future(future_timestamp) +``` + +## Percent Date Not In Future + +The percent date not in future validation checks the percentage of valid timestamp string with date that are not in future in a dataset. + +**Example** + +```yaml title="dcs_config.yaml" +validations for product_db.products: + - percent_date_not_in_future: + on: percent_date_not_in_future(future_timestamp) ``` \ No newline at end of file diff --git a/tests/core/configuration/test_configuration_v1.py b/tests/core/configuration/test_configuration_v1.py index 64e0d9e4..4cd96e00 100644 --- a/tests/core/configuration/test_configuration_v1.py +++ b/tests/core/configuration/test_configuration_v1.py @@ -1087,3 +1087,93 @@ def test_should_parse_percent_null_keyword(): .get_validation_function == ValidationFunction.PERCENT_NULL_KEYWORD ) + + +def test_should_parse_count_timestamp_string(): + yaml_string = """ + validations for source.table: + - test: + on: count_timestamp_string(timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.COUNT_TIMESTAMP_STRING + ) + + +def test_should_parse_percent_timestamp_string(): + yaml_string = """ + validations for source.table: + - test: + on: percent_timestamp_string(timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.PERCENT_TIMESTAMP_STRING + ) + + +def test_should_parse_count_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: count_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.COUNT_NOT_IN_FUTURE + ) + + +def test_should_parse_percent_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: percent_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.PERCENT_NOT_IN_FUTURE + ) + + +def test_should_parse_count_date_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: count_date_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.COUNT_DATE_NOT_IN_FUTURE + ) + + +def test_should_parse_percent_date_not_in_future(): + yaml_string = """ + validations for source.table: + - test: + on: percent_date_not_in_future(future_timestamp) + """ + configuration = load_configuration_from_yaml_str(yaml_string) + assert ( + configuration.validations["source.table"] + .validations["test"] + .get_validation_function + == ValidationFunction.PERCENT_DATE_NOT_IN_FUTURE + ) diff --git a/tests/integration/datasource/test_sql_datasource.py b/tests/integration/datasource/test_sql_datasource.py index acb57cdd..34ae68a4 100644 --- a/tests/integration/datasource/test_sql_datasource.py +++ b/tests/integration/datasource/test_sql_datasource.py @@ -134,7 +134,10 @@ def setup_tables( salary INTEGER, price FLOAT, all_space VARCHAR(50), - null_keyword VARCHAR(50) + null_keyword VARCHAR(50), + timestamp VARCHAR(50), + not_in_future VARCHAR(50), + date_not_in_future VARCHAR(50) ) """ ) @@ -146,27 +149,33 @@ def setup_tables( ('thor', '{(utc_now - datetime.timedelta(days=10)).strftime("%Y-%m-%d")}', 1500, NULL, 'thor hammer', 'e7194aaa-5516-4362-a5ff-6ff971976bec', '123-456-7890', 'jane.doe@domain', 'C2', 'ABCDE', 40.0678, -7555555554.0060,'856-45-6789','0067340', - 'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0, 100.0,'Allen','null'), -- invalid email -- invalid usa_state_code -- invalid usa_zip_code -- invalid cusip -- invalid perm_id + 'JRIK0092LOAUCXTR6042','03783310','BBG000B9XRY4','US0378331005', '1234--5678-9012--3456-789', 0, 100.0,'Allen','null', + '2024-01-15T12:30:45Z','2024-09-06T01:15:00Z','2023-12-31T23:59:59+01:00'), -- invalid email -- invalid usa_state_code -- invalid usa_zip_code -- invalid cusip -- invalid perm_id ('captain america', '{(utc_now - datetime.timedelta(days=3)).strftime("%Y-%m-%d")}', 90, 80, 'shield', 'e7194aaa-5516-4362-a5ff-6ff971976b', '(123) 456-7890', 'john.doe@.com ', 'NY', '12-345', 34.0522, -118.2437,'000-12-3456', 'B01HL06', - 'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000, -50.0,' ','Alvin'), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn + 'CDR300OS7OJENVEDDW89','037833100','BBG000BL2H25','US5949181045', '1234567890123456789', 1000, -50.0,' ','Alvin', + '2021-06-15T08:22:33.123Z','2024-08-25T09:15:00Z','2024-08-25T09:15:00Z'), -- invalid weapon_id --invalid email -- invalid usa_zip_code -- invalid ssn ('iron man', '{(utc_now - datetime.timedelta(days=4)).strftime("%Y-%m-%d")}', 50, 70, 'suit', '1739c676-6108-4dd2-8984-2459df744936', '123 456 7890', 'contact@company..org', 'XY', '85001', 37.7749, -122.4194,'859-99-9999','4155586', - 'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0, -150.0,'Ram','nil'), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id + 'VXQ400F1OBWAVPBJP86','594918104','BBG000B3YB97','US38259P5088', '123456789012345678', 0, -150.0,'Ram','nil', + '2024-04-31T12:30:45Z','2024-10-07T12:00:00Z','2024-10-10T12:00:00Z'), -- invalid email -- invalid usa_state_code -- invalid lei -- invalid perm_id ('hawk eye', '{(utc_now - datetime.timedelta(days=5)).strftime("%Y-%m-%d")}', 40, 60, 'bow', '1739c676-6108-4dd2-8984-2459df746', '+1 123-456-7890', 'user@@example.com', 'TX', '30301', 51.1657, 10.4515,'123-45-67890','12345', - 'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90, 50.0,' ','Simon'), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol + 'FKRD00GCEYWDCNYLNF60','38259P508','BBG000B57Y12','US83165F1026', '5647382910564738291', 90, 50.0,' ','Simon', + '2023-13-01T00:00:00Z','2025-01-01T00:00:00+01:00','2024-09-06T01:15:00Z'), -- invalid weapon_id --invalid email -- invalid ssn -- invalid sedol ('clark kent', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}', 35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8', '09123.456.7890', 'contact@company.org', 'ZZ', '123456', 51.5074, -0.1278,'666-45-6789','34A56B7', - '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0, -25.0,'Simon','None'), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id + '6R5J00FMIANQQH6JMN56','83165F102','BBG000B9XRY','US0231351067', '1234-5678-9012-3456-78X', 0, -25.0,'Simon','None', + '2023-03-08T16:45:00+02:00','2024-12-31T23:59:59Z','2024-09-06T01:15:00Z'), -- invalid weapon_id -- invalid phone -- invalid usa_state_code -- invalid usa_zip_code -- invalid ssn -- invalid sedol -- invalid figi -- invalid perm_id ('black widow', '{(utc_now - datetime.timedelta(days=6)).strftime("%Y-%m-%d")}', 35, 50, '', '7be61b2c-45dc-4889-97e3-9202e8032c73', '+1 (123) 456-7890', 'jane_smith123@domain.co.uk', 'FL', '90210', 483.8566, 2.3522,'001-01-0001','456VGHY', - '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70, 30.0,'Sam','Ram') -- invalid isin -- invalid sedol + '0FPB00BBRHUYOE7DSK19','023135106','BBG000B6R530','US037833100', '2345-6789-0123-4567-890', 70, 30.0,'Sam','Ram', + '2021-06-15T08:22:33.123Z','2024-10-10T12:00:00Z','2024-09-05T23:59:59Z') -- invalid isin -- invalid sedol """ postgresql_connection.execute(text(insert_query)) @@ -659,3 +668,48 @@ def test_should_return_row_count_for_null_keyword( table=self.TABLE_NAME, field="null_keyword", operation="count" ) assert valid_count == 3 + + def test_should_return_row_count_for_valid_timestamp_string( + self, postgres_datasource: PostgresDataSource + ): + ( + valid_count, + total_row_count, + ) = postgres_datasource.query_timestamp_metric( + table=self.TABLE_NAME, + field="timestamp", + operation="count", + predefined_regex="timestamp_iso", + ) + assert valid_count == 4 + assert total_row_count == 6 + + def test_should_return_row_count_for_valid_timestamp_not_in_future( + self, postgres_datasource: PostgresDataSource + ): + ( + valid_count, + total_row_count, + ) = postgres_datasource.query_timestamp_not_in_future_metric( + table=self.TABLE_NAME, + field="not_in_future", + operation="count", + predefined_regex="timestamp_iso", + ) + assert valid_count == 2 + assert total_row_count == 6 + + def test_should_return_row_count_for_valid_timestamp_date_not_in_future( + self, postgres_datasource: PostgresDataSource + ): + ( + valid_count, + total_row_count, + ) = postgres_datasource.query_timestamp_date_not_in_future_metric( + table=self.TABLE_NAME, + field="date_not_in_future", + operation="count", + predefined_regex="timestamp_iso", + ) + assert valid_count == 5 + assert total_row_count == 6