From dd93fb97b0384c9076a6c5ede48bcf3c068f9c26 Mon Sep 17 00:00:00 2001 From: JulianForeman <71847719+JulianForeman@users.noreply.github.com> Date: Wed, 7 Aug 2024 14:06:33 -0700 Subject: [PATCH] Fix: Geographic Name Maximum Query Length #361 (#372) * Adding batch system to API call to avoid max query length error * Accidentally removed a couple relevant comments and code * Adding unique_values_list to loop in case of duplicates --- django/api/services/bcngws.py | 5 ++--- django/api/services/spreadsheet_uploader_prep.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/django/api/services/bcngws.py b/django/api/services/bcngws.py index 66bf4924..c42b7278 100644 --- a/django/api/services/bcngws.py +++ b/django/api/services/bcngws.py @@ -2,7 +2,6 @@ from django.conf import settings from api.constants.misc import RELEVANT_FEATURES - # names should be a list of location names, page_size should be an integer >=1, <=200 # start_index should be an integer, result should be a set def get_placename_matches(names, page_size, start_index, result): @@ -11,13 +10,13 @@ def get_placename_matches(names, page_size, start_index, result): query = { "outputFormat": "json", "name": names_string, - "itemsPerPage": 200, + "itemsPerPage": page_size, "startIndex": start_index, } try: response = requests.get(settings.PLACENAMES_ENDPOINT, params=query) - response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx) + response.raise_for_status() response = response.json() for feature in response["features"]: diff --git a/django/api/services/spreadsheet_uploader_prep.py b/django/api/services/spreadsheet_uploader_prep.py index 8b082c66..91c1c03c 100644 --- a/django/api/services/spreadsheet_uploader_prep.py +++ b/django/api/services/spreadsheet_uploader_prep.py @@ -250,7 +250,7 @@ def validate_phone_numbers(df, *columns, **kwargs): return result -def location_checker(df, *columns, **kwargs): +def location_checker(df, *columns, batch_size=50, **kwargs): result = {} for column in columns: indices = [] @@ -258,15 +258,21 @@ def location_checker(df, *columns, **kwargs): map_of_values_to_indices = get_map_of_values_to_indices(series, kwargs.get("indices_offset", 0)) values = series.to_list() unique_values = set(series) + unique_values_list = list(values) communities = set() - # populate communities by calling the bcngws API with the values: - get_placename_matches(values, 200, 1, communities) + for i in range(0, len(unique_values_list), batch_size): + batch_values = unique_values_list[i:i + batch_size] + # Send request to API with list of names, returns all the communities that somewhat matched + get_placename_matches(batch_values, 200, 1, communities) + + # Find names that don't have a match in the locations_set names_without_match = unique_values.difference(communities) for name in names_without_match: indices_to_add = map_of_values_to_indices[name] indices.extend(indices_to_add) if indices: + indices.sort() result[column] = { "Unrecognized City Names": { "Expected Type": "The following city names are not in the list of geographic names. Please double check that these places exist or have correct spelling and adjust your dataset accordingly.",