Skip to content

Commit

Permalink
Switch to supporting new input attribute 'geostore_ids'
Browse files Browse the repository at this point in the history
  • Loading branch information
danscales committed Jan 7, 2025
1 parent 724a3fb commit 209f60f
Showing 1 changed file with 32 additions and 27 deletions.
59 changes: 32 additions & 27 deletions lambdas/preprocessing/src/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,42 +32,47 @@ def handler(event: Dict[str, Any], context: Any) -> Any:
LOGGER.info(f"Running preprocessing with parameters: {event}")
fc: Optional[Dict] = event.get("feature_collection")
uri: Optional[str] = event.get("uri")
id_field = event.get("id_field", "fid")
geostore_ids: Optional[List[str]] = event.get("geostore_ids")
id_field: Optional[str] = event.get("id_field", "fid")

if fc is not None and uri is not None:
raise Exception("Please specify GeoJSON via (only) one parameter!")
if (fc and uri) or (fc and geostore_ids) or (uri and geostore_ids):
raise Exception("Please specify exactly one of 'feature_collection', 'uri', or 'geostore_ids'.")
elif fc is not None:
gpdf = gpd.GeoDataFrame.from_features(fc)
gpdf = gpd.GeoDataFrame.from_features(fc, columns=[id_field, "geometry"])
elif uri is not None:
gpdf = gpd.read_file(uri)
gpdf = gpd.read_file(uri, columns=[id_field, "geometry"])
elif geostore_ids is not None:
geostore_info = get_geostore_geoms(geostore_ids)
else:
raise Exception("Please specify GeoJSON via (only) one parameter!")
raise Exception("Please specify exactly one of 'feature_collection', 'uri', or 'geostore_ids'.")

if id_field not in gpdf.columns.tolist():
if gpdf is not None and id_field not in gpdf.columns.tolist():
raise Exception(f"Input feature collection is missing ID field '{id_field}'")

columns = gpdf.columns.tolist()
hasGeo = "geometry" in columns
hasGeoId = "geostore_id" in columns
if (not hasGeo and not hasGeoId) or (hasGeo and hasGeoId):
raise Exception("Input feature collection must have exactly one of 'geometry' or 'geostore_id'")

geostore_geoms: List[Dict[str, Any]]
if hasGeoId:
geostore_ids: List[str] = []
rows: List[List[str]] = []
if geostore_info is not None:
for info in geostore_info:
# Use the geostoreId itself as the id field for the output.
id = info["geostoreId"]
# The RW find-by-ids call returns the geometry as a feature collection,
# which I think should always have one feature (?)
fc = info["geostore"]["data"]["attributes"]["geojson"]["features"]
if fc is None:
raise Exception(f"Missing features attribute for geostore '{id}'")
# GeoDataFrame.from_features() expects each feature to have a
# 'properties' field.
for f in fc:
if f.get("properties") is None:
f["properties"] = {}
minidf = gpd.GeoDataFrame.from_features(fc)
geom = shape(getattr(minidf.iloc[0], "geometry"))
encoded_geom = encode_geometry(geom)
rows.append([id, encoded_geom])
else:
for record in gpdf.itertuples():
geostore_ids.append(getattr(record, "geostore_id"))
geostore_geoms = get_geostore_geoms(geostore_ids)

rows = []
for (i, record) in enumerate(gpdf.itertuples()):
if hasGeoId:
minidf = gpd.GeoDataFrame.from_features(geostore_geoms[i]["geostore"]["data"]["attributes"]["geojson"]["features"])
geom = shape(getattr(minidf.head(), "geometry"))
else:
geom = shape(getattr(record, "geometry"))
encoded_geom = encode_geometry(geom)
rows.append([getattr(record, id_field), encoded_geom])
encoded_geom = encode_geometry(geom)
rows.append([getattr(record, id_field), encoded_geom])

# Consider replacing UUID with hash of args for cacheability
request_hash: UUID = uuid4()
Expand Down

0 comments on commit 209f60f

Please sign in to comment.