diff --git a/django/api/constants/constants.py b/django/api/constants/constants.py index f5213aae..74da2492 100644 --- a/django/api/constants/constants.py +++ b/django/api/constants/constants.py @@ -27,7 +27,7 @@ region_checker ) from api.services.resolvers import get_google_resolver -from api.constants.misc import GER_VALID_FIELD_VALUES, ARC_VALID_FIELD_VALUES +from api.constants.misc import GER_VALID_FIELD_VALUES, ARC_VALID_FIELD_VALUES, LOCALITY_FEATURES_MAP from enum import Enum @@ -680,7 +680,7 @@ class GoElectricRebatesColumnMapping(Enum): "validation_functions": [ {"function": validate_phone_numbers, "columns": ["Phone Number"], "kwargs": {"indices_offset": 2}}, {"function": typo_checker, "columns": ["Applicant Name"], "kwargs": {"cutoff": 0.8, "indices_offset": 2}}, - # {"function": location_checker, "columns": ["City"], "kwargs": {"indices_offset":2}}, + {"function": location_checker, "columns": ["City"], "kwargs": {"columns_to_features_map": {"City": LOCALITY_FEATURES_MAP}, "indices_offset":2}}, {"function": email_validator, "columns": ["Email"], "kwargs": {"indices_offset":2, "get_resolver": get_google_resolver}}, {"function": validate_field_values, "columns": [], "kwargs": {"indices_offset":2, "fields_and_values": GER_VALID_FIELD_VALUES}}, ] diff --git a/django/api/constants/misc.py b/django/api/constants/misc.py index 95de56bb..18efbfc8 100644 --- a/django/api/constants/misc.py +++ b/django/api/constants/misc.py @@ -55,27 +55,15 @@ 867, ] -RELEVANT_FEATURES = [ - "Canadian Forces Base", - "Canadian Forces Station", - "City", - "Community", - "District Municipality (1)", - "First Nation Village", - "Former Locality", - "Indian Government District", - "Indian Government District : Land Unit", - "Indian Reserve-Réserve indienne", - "Locality", - "Recreation Facility", - "Recreational Community", - "Region", - "Regional District", - "Resort Municipality", - "Urban Community", - "Village (1)", - "Town", -] +# map of feature category codes to feature types for locality features: +LOCALITY_FEATURES_MAP = { + 1: ["City", "District Municipality (1)", "Resort Municipality", "Village (1)", "Town"], + 2: ["Community", "First Nation Village", "Former Locality", "Locality", "Recreational Community"], + 3: ["Urban Community"], + 5: ["Indian Government District", "Indian Government District : Land Unit"], + 6: ["Indian Reserve-Réserve indienne", "Region", "Regional District"], + 28: ["Canadian Forces Base", "Canadian Forces Station", "Recreation Facility"], +} GER_VALID_FIELD_VALUES = { 'Approvals': ['Approved', 'Approved Fraudulent'], diff --git a/django/api/services/bcngws.py b/django/api/services/bcngws.py index c42b7278..55ce2aec 100644 --- a/django/api/services/bcngws.py +++ b/django/api/services/bcngws.py @@ -1,17 +1,21 @@ import requests from django.conf import settings -from api.constants.misc import RELEVANT_FEATURES -# names should be a list of location names, page_size should be an integer >=1, <=200 + +# names should be a list of location names, feature_category should be an integer or *, +# feature_types should be a list or *, page_size should be an integer >=1, <=200, # start_index should be an integer, result should be a set -def get_placename_matches(names, page_size, start_index, result): - names_string = ", ".join(names) +def get_placename_matches( + names, feature_category, feature_types, page_size, start_index, result +): + names_string = " ".join(names) query = { "outputFormat": "json", "name": names_string, "itemsPerPage": page_size, "startIndex": start_index, + "featureCategory": feature_category, } try: @@ -22,11 +26,18 @@ def get_placename_matches(names, page_size, start_index, result): for feature in response["features"]: name = feature["properties"]["name"] type = feature["properties"]["featureType"] - if type in RELEVANT_FEATURES: + if feature_types == "*" or type in feature_types: result.add(name) if response["properties"]["totalResults"] >= start_index + page_size: - get_placename_matches(names, page_size, start_index + page_size, result) + get_placename_matches( + names, + feature_category, + feature_types, + page_size, + start_index + page_size, + result, + ) except requests.RequestException as e: print(f"Error fetching data: {e}") diff --git a/django/api/services/spreadsheet_uploader_prep.py b/django/api/services/spreadsheet_uploader_prep.py index 0b93eabb..9bd685f4 100644 --- a/django/api/services/spreadsheet_uploader_prep.py +++ b/django/api/services/spreadsheet_uploader_prep.py @@ -252,21 +252,19 @@ def validate_phone_numbers(df, *columns, **kwargs): return result -def location_checker(df, *columns, batch_size=50, **kwargs): +def location_checker(df, *columns, columns_to_features_map={}, **kwargs): result = {} for column in columns: indices = [] series = df[column] map_of_values_to_indices = get_map_of_values_to_indices(series, kwargs.get("indices_offset", 0)) - values = series.to_list() unique_values = set(series) - unique_values_list = list(values) + unique_values_list = list(unique_values) communities = set() - for i in range(0, len(unique_values_list), batch_size): - batch_values = unique_values_list[i:i + batch_size] - # Send request to API with list of names, returns all the communities that somewhat matched - get_placename_matches(batch_values, 200, 1, communities) + features_map = columns_to_features_map[column] + for category_code, feature_types in features_map.items(): + get_placename_matches(unique_values_list, category_code, feature_types, 200, 1, communities) # Find names that don't have a match in the locations_set names_without_match = unique_values.difference(communities)