Skip to content

Commit

Permalink
Adding prep function that formats postal codes and validates them if …
Browse files Browse the repository at this point in the history
…asked to do so (#418)
  • Loading branch information
JulianForeman authored Nov 28, 2024
1 parent 90aa0ad commit 90faae5
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
4 changes: 3 additions & 1 deletion django/api/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
location_checker,
email_validator,
validate_field_values,
region_checker
region_checker,
format_postal_codes
)
from api.services.resolvers import get_google_resolver
from api.constants.misc import GER_VALID_FIELD_VALUES, ARC_VALID_FIELD_VALUES, LOCALITY_FEATURES_MAP, CVP_DATA_VALID_FIELD_VALUES
Expand Down Expand Up @@ -829,6 +830,7 @@ class CVPDataColumnMapping(Enum):
{"function": location_checker, "columns": ["City"], "kwargs": {"columns_to_features_map": {"City": LOCALITY_FEATURES_MAP}, "indices_offset":2}},
{"function": email_validator, "columns": ["Email"], "kwargs": {"indices_offset":2, "get_resolver": get_google_resolver}},
{"function": validate_field_values, "columns": [], "kwargs": {"indices_offset":2, "fields_and_values": GER_VALID_FIELD_VALUES}},
{"function": format_postal_codes, "columns": ["Postal code"], "kwargs": {"indices_offset":2, "validate": True}}
]
},
"CVP Data": {
Expand Down
39 changes: 38 additions & 1 deletion django/api/services/spreadsheet_uploader_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,4 +410,41 @@ def region_checker(df, *columns, **kwargs):
}
}

return result
return result

def format_postal_codes(df, *columns, **kwargs):
validate = kwargs.get('validate', False)
indices_offset = kwargs.get("indices_offset", 0)

result = {}

for column in columns:
series = df[column]
map_of_values_to_indices = get_map_of_values_to_indices(series, indices_offset)
invalid_groups = []

for value, indices in map_of_values_to_indices.items():
clean_value = value.replace(" ", "") if isinstance(value, str) else ""

if len(clean_value) == 6:
formatted_value = clean_value[:3] + " " + clean_value[3:]
for index in indices:
df.at[index - indices_offset, column] = formatted_value
elif validate:
if pd.isna(value) or value == "":
value = "Empty"
invalid_groups.append({
"Invalid Postal Code": value,
"Rows": indices
})

if validate and invalid_groups:
result[column] = {
"Invalid Postal Codes": {
"Expected Type": "Postal Code should be formatted as (XXX XXX)",
"Groups": invalid_groups,
"Severity": "Warning"
}
}

return result if validate else None

0 comments on commit 90faae5

Please sign in to comment.