From 90faae59ec22588eb41b5a4d54c7d65f6493c4c8 Mon Sep 17 00:00:00 2001 From: JulianForeman <71847719+JulianForeman@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:26:14 -0800 Subject: [PATCH] Adding prep function that formats postal codes and validates them if asked to do so (#418) --- django/api/constants/constants.py | 4 +- .../api/services/spreadsheet_uploader_prep.py | 39 ++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/django/api/constants/constants.py b/django/api/constants/constants.py index 227dab2c..ffe20391 100644 --- a/django/api/constants/constants.py +++ b/django/api/constants/constants.py @@ -26,7 +26,8 @@ location_checker, email_validator, validate_field_values, - region_checker + region_checker, + format_postal_codes ) from api.services.resolvers import get_google_resolver from api.constants.misc import GER_VALID_FIELD_VALUES, ARC_VALID_FIELD_VALUES, LOCALITY_FEATURES_MAP, CVP_DATA_VALID_FIELD_VALUES @@ -829,6 +830,7 @@ class CVPDataColumnMapping(Enum): {"function": location_checker, "columns": ["City"], "kwargs": {"columns_to_features_map": {"City": LOCALITY_FEATURES_MAP}, "indices_offset":2}}, {"function": email_validator, "columns": ["Email"], "kwargs": {"indices_offset":2, "get_resolver": get_google_resolver}}, {"function": validate_field_values, "columns": [], "kwargs": {"indices_offset":2, "fields_and_values": GER_VALID_FIELD_VALUES}}, + {"function": format_postal_codes, "columns": ["Postal code"], "kwargs": {"indices_offset":2, "validate": True}} ] }, "CVP Data": { diff --git a/django/api/services/spreadsheet_uploader_prep.py b/django/api/services/spreadsheet_uploader_prep.py index 8d8317bf..7e4db9a6 100644 --- a/django/api/services/spreadsheet_uploader_prep.py +++ b/django/api/services/spreadsheet_uploader_prep.py @@ -410,4 +410,41 @@ def region_checker(df, *columns, **kwargs): } } - return result \ No newline at end of file + return result + +def format_postal_codes(df, *columns, **kwargs): + validate = kwargs.get('validate', False) + indices_offset = kwargs.get("indices_offset", 0) + + result = {} + + for column in columns: + series = df[column] + map_of_values_to_indices = get_map_of_values_to_indices(series, indices_offset) + invalid_groups = [] + + for value, indices in map_of_values_to_indices.items(): + clean_value = value.replace(" ", "") if isinstance(value, str) else "" + + if len(clean_value) == 6: + formatted_value = clean_value[:3] + " " + clean_value[3:] + for index in indices: + df.at[index - indices_offset, column] = formatted_value + elif validate: + if pd.isna(value) or value == "": + value = "Empty" + invalid_groups.append({ + "Invalid Postal Code": value, + "Rows": indices + }) + + if validate and invalid_groups: + result[column] = { + "Invalid Postal Codes": { + "Expected Type": "Postal Code should be formatted as (XXX XXX)", + "Groups": invalid_groups, + "Severity": "Warning" + } + } + + return result if validate else None