diff --git a/lambdas/preprocessing/src/lambda_function.py b/lambdas/preprocessing/src/lambda_function.py index b5e1a33..4c0b556 100644 --- a/lambdas/preprocessing/src/lambda_function.py +++ b/lambdas/preprocessing/src/lambda_function.py @@ -32,42 +32,47 @@ def handler(event: Dict[str, Any], context: Any) -> Any: LOGGER.info(f"Running preprocessing with parameters: {event}") fc: Optional[Dict] = event.get("feature_collection") uri: Optional[str] = event.get("uri") - id_field = event.get("id_field", "fid") + geostore_ids: Optional[List[str]] = event.get("geostore_ids") + id_field: Optional[str] = event.get("id_field", "fid") - if fc is not None and uri is not None: - raise Exception("Please specify GeoJSON via (only) one parameter!") + if (fc and uri) or (fc and geostore_ids) or (uri and geostore_ids): + raise Exception("Please specify exactly one of 'feature_collection', 'uri', or 'geostore_ids'.") elif fc is not None: - gpdf = gpd.GeoDataFrame.from_features(fc) + gpdf = gpd.GeoDataFrame.from_features(fc, columns=[id_field, "geometry"]) elif uri is not None: - gpdf = gpd.read_file(uri) + gpdf = gpd.read_file(uri, columns=[id_field, "geometry"]) + elif geostore_ids is not None: + geostore_info = get_geostore_geoms(geostore_ids) else: - raise Exception("Please specify GeoJSON via (only) one parameter!") + raise Exception("Please specify exactly one of 'feature_collection', 'uri', or 'geostore_ids'.") - if id_field not in gpdf.columns.tolist(): + if gpdf is not None and id_field not in gpdf.columns.tolist(): raise Exception(f"Input feature collection is missing ID field '{id_field}'") - columns = gpdf.columns.tolist() - hasGeo = "geometry" in columns - hasGeoId = "geostore_id" in columns - if (not hasGeo and not hasGeoId) or (hasGeo and hasGeoId): - raise Exception("Input feature collection must have exactly one of 'geometry' or 'geostore_id'") - - geostore_geoms: List[Dict[str, Any]] - if hasGeoId: - geostore_ids: List[str] = [] + rows: List[List[str]] = [] + if geostore_info is not None: + for info in geostore_info: + # Use the geostoreId itself as the id field for the output. + id = info["geostoreId"] + # The RW find-by-ids call returns the geometry as a feature collection, + # which I think should always have one feature (?) + fc = info["geostore"]["data"]["attributes"]["geojson"]["features"] + if fc is None: + raise Exception(f"Missing features attribute for geostore '{id}'") + # GeoDataFrame.from_features() expects each feature to have a + # 'properties' field. + for f in fc: + if f.get("properties") is None: + f["properties"] = {} + minidf = gpd.GeoDataFrame.from_features(fc) + geom = shape(getattr(minidf.iloc[0], "geometry")) + encoded_geom = encode_geometry(geom) + rows.append([id, encoded_geom]) + else: for record in gpdf.itertuples(): - geostore_ids.append(getattr(record, "geostore_id")) - geostore_geoms = get_geostore_geoms(geostore_ids) - - rows = [] - for (i, record) in enumerate(gpdf.itertuples()): - if hasGeoId: - minidf = gpd.GeoDataFrame.from_features(geostore_geoms[i]["geostore"]["data"]["attributes"]["geojson"]["features"]) - geom = shape(getattr(minidf.head(), "geometry")) - else: geom = shape(getattr(record, "geometry")) - encoded_geom = encode_geometry(geom) - rows.append([getattr(record, id_field), encoded_geom]) + encoded_geom = encode_geometry(geom) + rows.append([getattr(record, id_field), encoded_geom]) # Consider replacing UUID with hash of args for cacheability request_hash: UUID = uuid4()