diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py index e7075ab8..65de98ec 100644 --- a/cerulean_cloud/cloud_run_orchestrator/handler.py +++ b/cerulean_cloud/cloud_run_orchestrator/handler.py @@ -321,19 +321,35 @@ async def _orchestrate( base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds) print(f"base_group_bounds: {base_group_bounds}") - offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles) + # tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), see tiling.py line 61. + offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33) offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale) offset_group_bounds = group_bounds_from_list_of_bounds(offset_tiles_bounds) - print(f"Offset image shape is {offset_group_shape}") - print(f"offset_group_bounds: {offset_group_bounds}") + print(f"Offset 1 offset_tiles_bounds {offset_tiles_bounds}") + print(f"Offset 1 image shape is {offset_group_shape}") + print(f"Offset 1 offset_group_bounds: {offset_group_bounds}") - print(f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}") + print("START OF OFFSET #2") + offset_2_tiles_bounds = offset_bounds_from_base_tiles( + base_tiles, offset_amount=0.66 + ) + offset_2_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale) + offset_2_group_bounds = group_bounds_from_list_of_bounds(offset_2_tiles_bounds) + print(f"Offset 2 offset_tiles_bounds {offset_2_tiles_bounds}") + print(f"Offset 2 image shape is {offset_2_group_shape}") + print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}") + + print( + f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}, {len(offset_2_tiles_bounds)}" + ) # Filter out land tiles # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7 # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))] + offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)] + offset_2_tiles_bounds = [b for b in offset_2_tiles_bounds if is_tile_over_water(b)] ntiles = len(base_tiles) noffsettiles = len(offset_tiles_bounds) @@ -407,13 +423,25 @@ async def _orchestrate( "offset tiles", ) + offset_2_tiles_inference = await perform_inference( + offset_2_tiles_bounds, + cloud_run_inference.get_offset_tile_inference, + 20, + "offset2 tiles", + ) + if model.type == "MASKRCNN": out_fc = geojson.FeatureCollection( features=flatten_feature_list(base_tiles_inference) ) + out_fc_offset = geojson.FeatureCollection( features=flatten_feature_list(offset_tiles_inference) ) + + out_fc_offset_2 = geojson.FeatureCollection( + features=flatten_feature_list(offset_2_tiles_inference) + ) elif model.type == "UNET": # print("Loading all tiles into memory for merge!") # ds_base_tiles = [] @@ -475,10 +503,10 @@ async def _orchestrate( # Example: S1A_IW_GRDH_1SDV_20230727T185101_20230727T185126_049613_05F744_1E56 print("XXXDEBUG out_fc", out_fc) print("XXXDEBUG out_fc_offset", out_fc_offset) + print("XXXCDEBUG out_fc_offset2", out_fc_offset_2) + merged_inferences = merge_inferences( - out_fc, - out_fc_offset, - isolated_conf_multiplier=0.5, + feature_collections=[out_fc, out_fc_offset, out_fc_offset_2], proximity_meters=500, closing_meters=0, opening_meters=0, @@ -540,5 +568,4 @@ async def _orchestrate( ntiles=ntiles, noffsettiles=noffsettiles, ) - return orchestrator_result diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index 8b36db52..a722176e 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -1,4 +1,6 @@ """merging inference from base and offset tiles""" +from typing import List + import geojson import geopandas as gpd import networkx as nx @@ -12,24 +14,20 @@ def reproject_to_utm(gdf_wgs84): def merge_inferences( - base_tile_fc: geojson.FeatureCollection, - offset_tile_fc: geojson.FeatureCollection, - isolated_conf_multiplier: float = 1, + feature_collections: List[geojson.FeatureCollection], proximity_meters: int = 500, closing_meters: int = 0, opening_meters: int = 0, ) -> geojson.FeatureCollection: """ - Merge base and offset tile inference. + Merge base and all offset tile inference. - This function takes in two geojson FeatureCollections and merges them together. - During the merge, the geometries can be adjusted to incorporate neighboring features - based on the proximity setting. The confidence of isolated features can also be adjusted. + This function takes a list of geojson FeatureCollections and merges them together. During the merge, the + geometries can be adjusted to incorporate neighboring features based on the proximity setting. The + confidence of isolated features can also be adjusted. Parameters: - - base_tile_fc: The primary FeatureCollection to be merged. - - offset_tile_fc: The secondary FeatureCollection to be merged with the primary. - - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1). + - feature_collections: A list of FeatureCollecitons to be merged, a primary and any secondary FeatureCollections - proximity_meters: The distance to check for neighboring features and expand the geometries (default is 500m). - closing_meters: The distance to apply the morphological 'closing' operation (default is 0m). - opening_meters: The distance to apply the morphological 'opening' operation (default is 0m). @@ -37,75 +35,76 @@ def merge_inferences( Returns: A merged geojson FeatureCollection. """ - - # Check if both FeatureCollections have features - if base_tile_fc["features"] and offset_tile_fc["features"]: - # Convert the FeatureCollections to GeoDataFrames - base_gdf = gpd.GeoDataFrame.from_features(base_tile_fc["features"], crs=4326) - offset_gdf = gpd.GeoDataFrame.from_features( - offset_tile_fc["features"], crs=4326 + # We reproject to UTM for processing. This assumes that all offset images will either be in the same UTM zone as + # the input image chip, or that the difference that arise from an offset crossing into a second UTM zone will + # have little or no impact on comparison to the original image. + gdfs_for_processing = [ + reproject_to_utm( + gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i) ) + for i, fc in enumerate(feature_collections) + if fc["features"] + ] - # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations) - base_gdf = reproject_to_utm(base_gdf) - offset_gdf = offset_gdf.to_crs(base_gdf.crs) - - # Combine both GeoDataFrames - concat_gdf = pd.concat([base_gdf, offset_gdf], ignore_index=True) - final_gdf = concat_gdf.copy() - - # If proximity is set, expand the geometry of each feature by the defined distance - if proximity_meters is not None: - concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters) - - # Join the features that intersect with each other - joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index() - - # Create a graph where each node represents a feature and edges represent overlaps/intersections - G = nx.from_pandas_edgelist(joined, "index", "index_right") - - # For each connected component in the graph, assign a group index and count its features - group_mapping = { - feature: group - for group, component in enumerate(nx.connected_components(G)) - for feature in component - } - group_counts = { - feature: len(component) - for component in nx.connected_components(G) - for feature in component - } - - # Map the group indices and counts back to the GeoDataFrame - final_gdf["group_index"] = final_gdf.index.map(group_mapping) - final_gdf["group_count"] = final_gdf.index.map(group_counts) - - # Adjust the confidence value for features that are isolated (not part of a larger group) - final_gdf.loc[ - final_gdf["group_count"] == 1, "machine_confidence" - ] *= isolated_conf_multiplier - - # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index - dissolved_gdf = final_gdf.dissolve( - by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"} - ) + if len(gdfs_for_processing) == 0: + # No inferences found in any tiling + return geojson.FeatureCollection(features=[]) - # If set, apply a morphological 'closing' operation to the geometries - if closing_meters is not None: - dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer( - -closing_meters - ) + # Concat the GeoDataFrames + concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True) + final_gdf = concat_gdf.copy() + + # If proximity is set, expand the geometry of each feature by the defined distance + if proximity_meters is not None: + concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters) + + # Join the features that intersect with each other + joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index() + + # Create a graph where each node represents a feature and edges represent overlaps/intersections + G = nx.from_pandas_edgelist(joined, "index", "index_right") + + # For each connected component in the graph, assign a group index and count its features + group_mapping = { + feature: group + for group, component in enumerate(nx.connected_components(G)) + for feature in component + } + group_counts = { + feature: len(component) + for component in nx.connected_components(G) + for feature in component + } + + # Map the group indices and counts back to the GeoDataFrame + final_gdf["group_index"] = final_gdf.index.map(group_mapping) + final_gdf["group_count"] = final_gdf.index.map(group_counts) + + # Adjust the confidence value for features that are isolated (not part of a larger group) + final_gdf["overlap_factor"] = final_gdf.groupby("group_index")[ + "fc_index" + ].transform(lambda x: len(x.unique()) / len(feature_collections)) + + final_gdf["machine_confidence"] *= final_gdf["overlap_factor"] + + # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index + dissolved_gdf = final_gdf.dissolve( + by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"} + ) + + # If set, apply a morphological 'closing' operation to the geometries + if closing_meters is not None: + dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer( + -closing_meters + ) - # If set, apply a morphological 'opening' operation to the geometries - if opening_meters is not None: - dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer( - opening_meters - ) + # If set, apply a morphological 'opening' operation to the geometries + if opening_meters is not None: + dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer( + opening_meters + ) - # Reproject the GeoDataFrame back to WGS 84 CRS - result = dissolved_gdf.to_crs(crs=4326) + # Reproject the GeoDataFrame back to WGS 84 CRS + result = dissolved_gdf.to_crs(crs=4326) - return result.__geo_interface__ - else: - # If one of the FeatureCollections is empty, return an empty FeatureCollection - return geojson.FeatureCollection(features=[]) + return result.__geo_interface__ diff --git a/cerulean_cloud/tiling.py b/cerulean_cloud/tiling.py index d11f1584..0c2b2816 100644 --- a/cerulean_cloud/tiling.py +++ b/cerulean_cloud/tiling.py @@ -58,6 +58,7 @@ def adjacent_tile(tile: morecantile.Tile, dx: int, dy: int) -> morecantile.Tile: def offset_bounds_from_base_tiles( tiles: List[morecantile.Tile], + offset_amount: float = 0.5, ) -> List[Tuple[float, float, float, float]]: """from a set of base tiles, generate offset tiles""" out_offset_tile_bounds = [] @@ -74,8 +75,8 @@ def offset_bounds_from_base_tiles( # +2 because tileymax needs to be included (+1) and the new grid has one extra row/column (+1) tile = morecantile.Tile(new_x, new_y, zoom) adj_tile = adjacent_tile(tile, -1, -1) # Negative dY is upwards!!! - minx, maxy = pixel_to_location(adj_tile, 0.5, 0.5) - maxx, miny = pixel_to_location(tile, 0.5, 0.5) + minx, maxy = pixel_to_location(adj_tile, offset_amount, offset_amount) + maxx, miny = pixel_to_location(tile, offset_amount, offset_amount) out_offset_tile_bounds += [(minx, miny, maxx, maxy)] return out_offset_tile_bounds diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index d70440e9..4de55dd0 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -433,9 +433,7 @@ def test_func_merge_inferences(): offset_tile_fc = dict(geojson.load(src)) merged = merge_inferences( - base_tile_fc=base_tile_fc, - offset_tile_fc=offset_tile_fc, - isolated_conf_multiplier=0.1, + [base_tile_fc, offset_tile_fc], proximity_meters=500, closing_meters=100, opening_meters=100, @@ -458,23 +456,19 @@ def test_func_merge_inferences_empty(): with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src: offset_tile_fc = dict(geojson.load(src)) - merged = merge_inferences( - base_tile_fc=geojson.FeatureCollection(features=[]), - offset_tile_fc=offset_tile_fc, - ) + merged = merge_inferences([geojson.FeatureCollection(features=[]), offset_tile_fc]) assert merged["type"] == "FeatureCollection" - assert len(merged["features"]) == 0 + assert len(merged["features"]) == 5 - merged = merge_inferences( - base_tile_fc=offset_tile_fc, - offset_tile_fc=geojson.FeatureCollection(features=[]), - ) + merged = merge_inferences([offset_tile_fc, geojson.FeatureCollection(features=[])]) assert merged["type"] == "FeatureCollection" - assert len(merged["features"]) == 0 + assert len(merged["features"]) == 5 merged = merge_inferences( - base_tile_fc=geojson.FeatureCollection(features=[]), - offset_tile_fc=geojson.FeatureCollection(features=[]), + [ + geojson.FeatureCollection(features=[]), + geojson.FeatureCollection(features=[]), + ], ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0