Skip to content

Commit

Permalink
fix: update location checker (#383)
Browse files Browse the repository at this point in the history
  • Loading branch information
tim738745 authored Aug 21, 2024
1 parent d607b7d commit c0b18a7
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 36 deletions.
4 changes: 2 additions & 2 deletions django/api/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
region_checker
)
from api.services.resolvers import get_google_resolver
from api.constants.misc import GER_VALID_FIELD_VALUES, ARC_VALID_FIELD_VALUES
from api.constants.misc import GER_VALID_FIELD_VALUES, ARC_VALID_FIELD_VALUES, LOCALITY_FEATURES_MAP


from enum import Enum
Expand Down Expand Up @@ -680,7 +680,7 @@ class GoElectricRebatesColumnMapping(Enum):
"validation_functions": [
{"function": validate_phone_numbers, "columns": ["Phone Number"], "kwargs": {"indices_offset": 2}},
{"function": typo_checker, "columns": ["Applicant Name"], "kwargs": {"cutoff": 0.8, "indices_offset": 2}},
# {"function": location_checker, "columns": ["City"], "kwargs": {"indices_offset":2}},
{"function": location_checker, "columns": ["City"], "kwargs": {"columns_to_features_map": {"City": LOCALITY_FEATURES_MAP}, "indices_offset":2}},
{"function": email_validator, "columns": ["Email"], "kwargs": {"indices_offset":2, "get_resolver": get_google_resolver}},
{"function": validate_field_values, "columns": [], "kwargs": {"indices_offset":2, "fields_and_values": GER_VALID_FIELD_VALUES}},
]
Expand Down
30 changes: 9 additions & 21 deletions django/api/constants/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,27 +55,15 @@
867,
]

RELEVANT_FEATURES = [
"Canadian Forces Base",
"Canadian Forces Station",
"City",
"Community",
"District Municipality (1)",
"First Nation Village",
"Former Locality",
"Indian Government District",
"Indian Government District : Land Unit",
"Indian Reserve-Réserve indienne",
"Locality",
"Recreation Facility",
"Recreational Community",
"Region",
"Regional District",
"Resort Municipality",
"Urban Community",
"Village (1)",
"Town",
]
# map of feature category codes to feature types for locality features:
LOCALITY_FEATURES_MAP = {
1: ["City", "District Municipality (1)", "Resort Municipality", "Village (1)", "Town"],
2: ["Community", "First Nation Village", "Former Locality", "Locality", "Recreational Community"],
3: ["Urban Community"],
5: ["Indian Government District", "Indian Government District : Land Unit"],
6: ["Indian Reserve-Réserve indienne", "Region", "Regional District"],
28: ["Canadian Forces Base", "Canadian Forces Station", "Recreation Facility"],
}

GER_VALID_FIELD_VALUES = {
'Approvals': ['Approved', 'Approved Fraudulent'],
Expand Down
23 changes: 17 additions & 6 deletions django/api/services/bcngws.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
import requests
from django.conf import settings
from api.constants.misc import RELEVANT_FEATURES

# names should be a list of location names, page_size should be an integer >=1, <=200

# names should be a list of location names, feature_category should be an integer or *,
# feature_types should be a list or *, page_size should be an integer >=1, <=200,
# start_index should be an integer, result should be a set
def get_placename_matches(names, page_size, start_index, result):
names_string = ", ".join(names)
def get_placename_matches(
names, feature_category, feature_types, page_size, start_index, result
):
names_string = " ".join(names)

query = {
"outputFormat": "json",
"name": names_string,
"itemsPerPage": page_size,
"startIndex": start_index,
"featureCategory": feature_category,
}

try:
Expand All @@ -22,11 +26,18 @@ def get_placename_matches(names, page_size, start_index, result):
for feature in response["features"]:
name = feature["properties"]["name"]
type = feature["properties"]["featureType"]
if type in RELEVANT_FEATURES:
if feature_types == "*" or type in feature_types:
result.add(name)

if response["properties"]["totalResults"] >= start_index + page_size:
get_placename_matches(names, page_size, start_index + page_size, result)
get_placename_matches(
names,
feature_category,
feature_types,
page_size,
start_index + page_size,
result,
)

except requests.RequestException as e:
print(f"Error fetching data: {e}")
12 changes: 5 additions & 7 deletions django/api/services/spreadsheet_uploader_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,21 +252,19 @@ def validate_phone_numbers(df, *columns, **kwargs):
return result


def location_checker(df, *columns, batch_size=50, **kwargs):
def location_checker(df, *columns, columns_to_features_map={}, **kwargs):
result = {}
for column in columns:
indices = []
series = df[column]
map_of_values_to_indices = get_map_of_values_to_indices(series, kwargs.get("indices_offset", 0))
values = series.to_list()
unique_values = set(series)
unique_values_list = list(values)
unique_values_list = list(unique_values)

communities = set()
for i in range(0, len(unique_values_list), batch_size):
batch_values = unique_values_list[i:i + batch_size]
# Send request to API with list of names, returns all the communities that somewhat matched
get_placename_matches(batch_values, 200, 1, communities)
features_map = columns_to_features_map[column]
for category_code, feature_types in features_map.items():
get_placename_matches(unique_values_list, category_code, feature_types, 200, 1, communities)

# Find names that don't have a match in the locations_set
names_without_match = unique_values.difference(communities)
Expand Down

0 comments on commit c0b18a7

Please sign in to comment.