From 6ef510c5359ff543776f4149f9980678c984bb02 Mon Sep 17 00:00:00 2001 From: christian Date: Thu, 2 Nov 2023 16:53:02 -0400 Subject: [PATCH 01/18] first pass of double offset --- .../cloud_run_orchestrator/handler.py | 66 +++++-- .../cloud_run_orchestrator/merging.py | 172 +++++++++--------- cerulean_cloud/tiling.py | 6 +- 3 files changed, 148 insertions(+), 96 deletions(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py index e7075ab8..af83d810 100644 --- a/cerulean_cloud/cloud_run_orchestrator/handler.py +++ b/cerulean_cloud/cloud_run_orchestrator/handler.py @@ -321,19 +321,36 @@ async def _orchestrate( base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds) print(f"base_group_bounds: {base_group_bounds}") - offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles) + # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), + # see tiling.py line 61. + offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33) offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale) offset_group_bounds = group_bounds_from_list_of_bounds(offset_tiles_bounds) - print(f"Offset image shape is {offset_group_shape}") - print(f"offset_group_bounds: {offset_group_bounds}") + print(f"Offset 1 offset_tiles_bounds {offset_tiles_bounds}") + print(f"Offset 1 image shape is {offset_group_shape}") + print(f"Offset 1 offset_group_bounds: {offset_group_bounds}") - print(f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}") + print("START OF OFFSET #2") + offset_2_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.66) + offset_2_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale) + offset_2_group_bounds = group_bounds_from_list_of_bounds(offset_2_tiles_bounds) + print(f"Offset 2 offset_tiles_bounds {offset_2_tiles_bounds}") + print(f"Offset 2 image shape is {offset_2_group_shape}") + print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}") + + print(f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}") + # XXXC - THIS IS THE END OF THE OFFSETTING. # Filter out land tiles # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7 # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))] + + # XXXC - OFFSET TIlE BOUNDS OVER WATER offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)] + print(f"offset_tiles_bounds: {offset_tiles_bounds}") + offset_2_tiles_bounds = [b for b in offset_2_tiles_bounds if is_tile_over_water(b)] + print(f"offset_2_tiles_bounds: {offset_2_tiles_bounds}") ntiles = len(base_tiles) noffsettiles = len(offset_tiles_bounds) @@ -399,21 +416,37 @@ async def _orchestrate( 20, "base tiles", ) - + + # XXXC - START OFFSET 2 offset_tiles_inference = await perform_inference( offset_tiles_bounds, cloud_run_inference.get_offset_tile_inference, 20, "offset tiles", ) + # XXXC - END OFFSET 2 + + # XXXC - START OFFSET 3 + offset_2_tiles_inference = await perform_inference( + offset_2_tiles_bounds, + cloud_run_inference.get_offset_tile_inference, # THIS FUNCTION NEEDS TO BE EDITTED + 20, + "offset2 tiles", + ) + # XXXC - END OFFSET 3 if model.type == "MASKRCNN": out_fc = geojson.FeatureCollection( features=flatten_feature_list(base_tiles_inference) ) + # XXXC - OFFSET 3 >> THIS NEEDS TO BE EDITTED out_fc_offset = geojson.FeatureCollection( features=flatten_feature_list(offset_tiles_inference) ) + + out_fc_offset_2 = geojson.FeatureCollection( + features=flatten_feature_list(offset_2_tiles_inference) + ) elif model.type == "UNET": # print("Loading all tiles into memory for merge!") # ds_base_tiles = [] @@ -475,14 +508,23 @@ async def _orchestrate( # Example: S1A_IW_GRDH_1SDV_20230727T185101_20230727T185126_049613_05F744_1E56 print("XXXDEBUG out_fc", out_fc) print("XXXDEBUG out_fc_offset", out_fc_offset) - merged_inferences = merge_inferences( - out_fc, - out_fc_offset, - isolated_conf_multiplier=0.5, + print("XXXCDEBUG out_fc_offset2", out_fc_offset_2) + + # XXXC - OFFSET 3 >> change code to allow number of offsets, pass in list of featureCollections (2 or 3) + print("XXXC >> TRYING TO MERGE INFERENCES") + merged_inferences = merge_inferences( # changes to 15-16, give list of FCs 2-3 items + feature_collections = [out_fc, out_fc_offset, out_fc_offset_2], + # out_fc, + # out_fc_offset, + # out_fc_offset_2, # Added this as a test before giving merge a list + + # Not declaring isolated_conf_multiplier because the default value is updated in merging.py + # isolated_conf_multiplier=0.3, # Changed to ~.33 (1/len(# of FCs)), could be calculated in merging.py and document this. Originally 0.5 proximity_meters=500, closing_meters=0, opening_meters=0, ) + print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED") if merged_inferences.get("features"): async with db_client.session.begin(): @@ -519,11 +561,13 @@ async def _orchestrate( end_time = datetime.now() print(f"End time: {end_time}") print("Returning results!") + print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565") + async with db_client.session.begin(): orchestrator_run.success = True orchestrator_run.inference_end_time = end_time - + # XXXC >> reduce num variables to a list of featureCollection, ala the merging approach orchestrator_result = OrchestratorResult( classification_base=out_fc, classification_offset=out_fc_offset, @@ -540,5 +584,5 @@ async def _orchestrate( ntiles=ntiles, noffsettiles=noffsettiles, ) - + print("XXXC >> IF WE GET HERE EVERYTHING WORKED") return orchestrator_result diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index 8b36db52..21ce9667 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -10,26 +10,23 @@ def reproject_to_utm(gdf_wgs84): utm_crs = gdf_wgs84.estimate_utm_crs(datum_name="WGS 84") return gdf_wgs84.to_crs(utm_crs) - def merge_inferences( - base_tile_fc: geojson.FeatureCollection, - offset_tile_fc: geojson.FeatureCollection, - isolated_conf_multiplier: float = 1, + feature_collections: list[geojson.FeatureCollection], # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc + isolated_conf_multiplier: float = None, # THIS IS FURTHER DEFINED ON LINES 39-40 as 1 / len(feature_collections) proximity_meters: int = 500, closing_meters: int = 0, opening_meters: int = 0, ) -> geojson.FeatureCollection: """ - Merge base and offset tile inference. + Merge base and all offset tile inference. - This function takes in two geojson FeatureCollections and merges them together. - During the merge, the geometries can be adjusted to incorporate neighboring features - based on the proximity setting. The confidence of isolated features can also be adjusted. + This function takes a list of geojson FeatureCollections and merges them together. During the merge, the + geometries can be adjusted to incorporate neighboring features based on the proximity setting. The + confidence of isolated features can also be adjusted. Parameters: - - base_tile_fc: The primary FeatureCollection to be merged. - - offset_tile_fc: The secondary FeatureCollection to be merged with the primary. - - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1). + - feature_collections: A list of FeatureCollecitons to be merged, a primary and any secondary FeatureCollections + - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1 / len(feature_collections)). - proximity_meters: The distance to check for neighboring features and expand the geometries (default is 500m). - closing_meters: The distance to apply the morphological 'closing' operation (default is 0m). - opening_meters: The distance to apply the morphological 'opening' operation (default is 0m). @@ -38,74 +35,85 @@ def merge_inferences( A merged geojson FeatureCollection. """ - # Check if both FeatureCollections have features - if base_tile_fc["features"] and offset_tile_fc["features"]: - # Convert the FeatureCollections to GeoDataFrames - base_gdf = gpd.GeoDataFrame.from_features(base_tile_fc["features"], crs=4326) - offset_gdf = gpd.GeoDataFrame.from_features( - offset_tile_fc["features"], crs=4326 - ) - - # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations) - base_gdf = reproject_to_utm(base_gdf) - offset_gdf = offset_gdf.to_crs(base_gdf.crs) - - # Combine both GeoDataFrames - concat_gdf = pd.concat([base_gdf, offset_gdf], ignore_index=True) - final_gdf = concat_gdf.copy() - - # If proximity is set, expand the geometry of each feature by the defined distance - if proximity_meters is not None: - concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters) - - # Join the features that intersect with each other - joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index() - - # Create a graph where each node represents a feature and edges represent overlaps/intersections - G = nx.from_pandas_edgelist(joined, "index", "index_right") - - # For each connected component in the graph, assign a group index and count its features - group_mapping = { - feature: group - for group, component in enumerate(nx.connected_components(G)) - for feature in component - } - group_counts = { - feature: len(component) - for component in nx.connected_components(G) - for feature in component - } - - # Map the group indices and counts back to the GeoDataFrame - final_gdf["group_index"] = final_gdf.index.map(group_mapping) - final_gdf["group_count"] = final_gdf.index.map(group_counts) - - # Adjust the confidence value for features that are isolated (not part of a larger group) - final_gdf.loc[ - final_gdf["group_count"] == 1, "machine_confidence" - ] *= isolated_conf_multiplier - - # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index - dissolved_gdf = final_gdf.dissolve( - by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"} - ) - - # If set, apply a morphological 'closing' operation to the geometries - if closing_meters is not None: - dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer( - -closing_meters - ) - - # If set, apply a morphological 'opening' operation to the geometries - if opening_meters is not None: - dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer( - opening_meters - ) - - # Reproject the GeoDataFrame back to WGS 84 CRS - result = dissolved_gdf.to_crs(crs=4326) - - return result.__geo_interface__ - else: - # If one of the FeatureCollections is empty, return an empty FeatureCollection - return geojson.FeatureCollection(features=[]) + # Define the isolated_conf_multiplier + if isolated_conf_multiplier is None: + isolated_conf_multiplier = 1 / len(feature_collections) + + # Combined GeoDataFrames. Only appended inf all FeatureCollections have at least 1 feature. + # gdfs_for_processing = [] + + # Check that all FeatureCollections have features. This throws out any detection if it is not present in + # all tiles of the feature_collections. + # if any(len(fc["features"]) == 0 for fc in feature_collections): + # shared_crs = feature_collections[0].crs + # for fc in feature_collections: + # # Convert the fc to a GeoDataFrame + # gdf = gpd.GeoDataFrame.from_features(fc["features"], crs=4326) + + # # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations) + # gdfs_for_processing.append(gdf) + + # gdf_r = reproject_to_utm(gdf) + + # else: + # # If one of the FeatureCollections is empty, return an empty FeatureCollection + # return geojson.FeatureCollection(features=[]) + + gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)] + gdfs_for_processing = [gdf.to_crs(gdfs_for_processing[0].reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing] + + # Concat the GeoDataFrames + concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True) + final_gdf = concat_gdf.copy() + + # If proximity is set, expand the geometry of each feature by the defined distance + if proximity_meters is not None: + concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters) + + # Join the features that intersect with each other >> XXXC THIS MAY NEED REWORK, keep an eye on it :) + joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index() + + # Create a graph where each node represents a feature and edges represent overlaps/intersections + G = nx.from_pandas_edgelist(joined, "index", "index_right") + + # For each connected component in the graph, assign a group index and count its features + group_mapping = { + feature: group + for group, component in enumerate(nx.connected_components(G)) + for feature in component + } + group_counts = { + feature: len(component) + for component in nx.connected_components(G) + for feature in component + } + + # Map the group indices and counts back to the GeoDataFrame + final_gdf["group_index"] = final_gdf.index.map(group_mapping) + final_gdf["group_count"] = final_gdf.index.map(group_counts) + + + # Adjust the confidence value for features that are isolated (not part of a larger group) + # XXXC >> how do we make this a little more dynamic, check against number of tile detections to + # impact the conf_multiplier value + final_gdf.loc[ + final_gdf["group_count"] == 1, "machine_confidence" + ] *= isolated_conf_multiplier + + # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index + dissolved_gdf = final_gdf.dissolve( + by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"} + ) + + # If set, apply a morphological 'closing' operation to the geometries + if closing_meters is not None: + dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(-closing_meters) + + # If set, apply a morphological 'opening' operation to the geometries + if opening_meters is not None: + dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(opening_meters) + + # Reproject the GeoDataFrame back to WGS 84 CRS + result = dissolved_gdf.to_crs(crs=4326) + + return result.__geo_interface__ diff --git a/cerulean_cloud/tiling.py b/cerulean_cloud/tiling.py index d11f1584..9619a4f9 100644 --- a/cerulean_cloud/tiling.py +++ b/cerulean_cloud/tiling.py @@ -55,9 +55,9 @@ def adjacent_tile(tile: morecantile.Tile, dx: int, dy: int) -> morecantile.Tile: other = morecantile.Tile(x=x + dx, y=y + dy, z=z) return other - def offset_bounds_from_base_tiles( tiles: List[morecantile.Tile], + offset_amount: float = 0.5, ) -> List[Tuple[float, float, float, float]]: """from a set of base tiles, generate offset tiles""" out_offset_tile_bounds = [] @@ -74,8 +74,8 @@ def offset_bounds_from_base_tiles( # +2 because tileymax needs to be included (+1) and the new grid has one extra row/column (+1) tile = morecantile.Tile(new_x, new_y, zoom) adj_tile = adjacent_tile(tile, -1, -1) # Negative dY is upwards!!! - minx, maxy = pixel_to_location(adj_tile, 0.5, 0.5) - maxx, miny = pixel_to_location(tile, 0.5, 0.5) + minx, maxy = pixel_to_location(adj_tile, offset_amount, offset_amount) + maxx, miny = pixel_to_location(tile, offset_amount, offset_amount) out_offset_tile_bounds += [(minx, miny, maxx, maxy)] return out_offset_tile_bounds From 78d58a72882e6b36dba9fe32fde243f026f3b897 Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 11:05:56 -0400 Subject: [PATCH 02/18] Testing an update to the merge_inferences function --- cerulean_cloud/cloud_run_orchestrator/handler.py | 3 +++ cerulean_cloud/cloud_run_orchestrator/merging.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py index af83d810..9a854c42 100644 --- a/cerulean_cloud/cloud_run_orchestrator/handler.py +++ b/cerulean_cloud/cloud_run_orchestrator/handler.py @@ -524,6 +524,9 @@ async def _orchestrate( closing_meters=0, opening_meters=0, ) + fc_count = len(merged_inferences.feature_collections) + print(f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences.") + print(f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}") print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED") if merged_inferences.get("features"): diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index 21ce9667..31c01337 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -59,7 +59,7 @@ def merge_inferences( # # If one of the FeatureCollections is empty, return an empty FeatureCollection # return geojson.FeatureCollection(features=[]) - gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)] + gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) if fc["features"] else gpd.GeoDataFrame([], crs=4326) for fc in feature_collections] gdfs_for_processing = [gdf.to_crs(gdfs_for_processing[0].reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing] # Concat the GeoDataFrames From f0e23a5829c12da3ddbc7f1a188286e501389350 Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 11:27:13 -0400 Subject: [PATCH 03/18] Fixed a typing issue was using list not List for FeatureCollection specifications, fixed now --- cerulean_cloud/cloud_run_orchestrator/merging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index 31c01337..f3ac0c45 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -11,7 +11,7 @@ def reproject_to_utm(gdf_wgs84): return gdf_wgs84.to_crs(utm_crs) def merge_inferences( - feature_collections: list[geojson.FeatureCollection], # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc + feature_collections: List[geojson.FeatureCollection], # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc isolated_conf_multiplier: float = None, # THIS IS FURTHER DEFINED ON LINES 39-40 as 1 / len(feature_collections) proximity_meters: int = 500, closing_meters: int = 0, From 351e5e24ced36ead74d29eb51dab8ee4558f3ace Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 11:35:05 -0400 Subject: [PATCH 04/18] Importing List from typing, needed for the merging --- cerulean_cloud/cloud_run_orchestrator/merging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index f3ac0c45..d079d1c6 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -3,6 +3,7 @@ import geopandas as gpd import networkx as nx import pandas as pd +from typing import List def reproject_to_utm(gdf_wgs84): From 6dd6f38ced8c16e92ba04508f95d60c7e71ee7c2 Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 14:11:08 -0400 Subject: [PATCH 05/18] Trying to fix the test_merging functions --- .../test_cerulean_cloud/test_cloud_run_orchestrator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index d70440e9..dc8fdd60 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -427,15 +427,15 @@ def test_flatten_result(): def test_func_merge_inferences(): with open("test/test_cerulean_cloud/fixtures/base.geojson") as src: - base_tile_fc = dict(geojson.load(src)) + out_fc = dict(geojson.load(src)) with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src: - offset_tile_fc = dict(geojson.load(src)) + out_fc_offset = dict(geojson.load(src)) merged = merge_inferences( - base_tile_fc=base_tile_fc, - offset_tile_fc=offset_tile_fc, - isolated_conf_multiplier=0.1, + # base_tile_fc=base_tile_fc, + # offset_tile_fc=offset_tile_fc, + feature_collections = [out_fc, out_fc_offset], proximity_meters=500, closing_meters=100, opening_meters=100, From 9e4897ddf7dd5812ebbb8f3a63de958027c124fd Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 14:16:30 -0400 Subject: [PATCH 06/18] More test fixing to get merges to run --- test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index dc8fdd60..94848ca7 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -459,22 +459,22 @@ def test_func_merge_inferences_empty(): offset_tile_fc = dict(geojson.load(src)) merged = merge_inferences( - base_tile_fc=geojson.FeatureCollection(features=[]), + feature_collections=geojson.FeatureCollection(features=[]), offset_tile_fc=offset_tile_fc, ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 merged = merge_inferences( - base_tile_fc=offset_tile_fc, + feature_collections=offset_tile_fc, offset_tile_fc=geojson.FeatureCollection(features=[]), ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 merged = merge_inferences( - base_tile_fc=geojson.FeatureCollection(features=[]), - offset_tile_fc=geojson.FeatureCollection(features=[]), + feature_collections=geojson.FeatureCollection(features=[]), + feature_collections=geojson.FeatureCollection(features=[]), ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 From 794e45a625e9be2b99e27398f25785aab4f6e79c Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 14:43:07 -0400 Subject: [PATCH 07/18] another fix to testing merge --- test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index 94848ca7..aa5d09ac 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -474,7 +474,7 @@ def test_func_merge_inferences_empty(): merged = merge_inferences( feature_collections=geojson.FeatureCollection(features=[]), - feature_collections=geojson.FeatureCollection(features=[]), + # feature_collections=geojson.FeatureCollection(features=[]), ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 From d8a2a6142e692ef109ee21600d567e83a91cc1de Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 15:19:59 -0400 Subject: [PATCH 08/18] Seem to have been mis-using reproject_to_utm(), trying to rectify --- cerulean_cloud/cloud_run_orchestrator/merging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index d079d1c6..aa7a02b2 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -60,8 +60,9 @@ def merge_inferences( # # If one of the FeatureCollections is empty, return an empty FeatureCollection # return geojson.FeatureCollection(features=[]) + # gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)] gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) if fc["features"] else gpd.GeoDataFrame([], crs=4326) for fc in feature_collections] - gdfs_for_processing = [gdf.to_crs(gdfs_for_processing[0].reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing] + gdfs_for_processing = [gdf.to_crs(reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing] # Concat the GeoDataFrames concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True) From 742bcf19cd5d09c1af6358a67727961920a6459d Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 15:40:41 -0400 Subject: [PATCH 09/18] I hate this test --- test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index aa5d09ac..5c04c899 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -460,14 +460,14 @@ def test_func_merge_inferences_empty(): merged = merge_inferences( feature_collections=geojson.FeatureCollection(features=[]), - offset_tile_fc=offset_tile_fc, + feature_collections=[offset_tile_fc], ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 merged = merge_inferences( - feature_collections=offset_tile_fc, - offset_tile_fc=geojson.FeatureCollection(features=[]), + feature_collections=[offset_tile_fc], + feature_collections=geojson.FeatureCollection(features=[]), ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 From bf7423a8c2146d4afd8b38c564bd766f49e7d818 Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 15:47:28 -0400 Subject: [PATCH 10/18] Once again, with hatred. Trying to get the dumb test working --- test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index 5c04c899..ceba7739 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -460,13 +460,11 @@ def test_func_merge_inferences_empty(): merged = merge_inferences( feature_collections=geojson.FeatureCollection(features=[]), - feature_collections=[offset_tile_fc], ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 merged = merge_inferences( - feature_collections=[offset_tile_fc], feature_collections=geojson.FeatureCollection(features=[]), ) assert merged["type"] == "FeatureCollection" From 3faae6b8a2ca77c6b9ec697e828a4e88d4440e70 Mon Sep 17 00:00:00 2001 From: christian Date: Fri, 3 Nov 2023 16:07:29 -0400 Subject: [PATCH 11/18] Truly, is there a greater force than my rage at testing? Trying again, this time all test merge should be in the right format --- test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index ceba7739..6102bd0d 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -458,21 +458,21 @@ def test_func_merge_inferences_empty(): with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src: offset_tile_fc = dict(geojson.load(src)) + merged = merge_inferences( - feature_collections=geojson.FeatureCollection(features=[]), + feature_collections = [geojson.FeatureCollection(features=[]),offset_tile_fc] ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 merged = merge_inferences( - feature_collections=geojson.FeatureCollection(features=[]), + feature_collections=[offset_tile_fc,geojson.FeatureCollection(features=[])] ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 merged = merge_inferences( - feature_collections=geojson.FeatureCollection(features=[]), - # feature_collections=geojson.FeatureCollection(features=[]), + feature_collections=[geojson.FeatureCollection(features=[]),geojson.FeatureCollection(features=[])] ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 From 35ba939d1f3eb45da75a70628b3770c5a195de1e Mon Sep 17 00:00:00 2001 From: Jona Date: Sat, 4 Nov 2023 17:14:32 -0400 Subject: [PATCH 12/18] Proposed a fix to the tests. Also added first pass version of dividing by N for that number of feature_collections --- .../cloud_run_orchestrator/merging.py | 71 ++++++++----------- .../test_cloud_run_orchestrator.py | 26 +++---- 2 files changed, 41 insertions(+), 56 deletions(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index aa7a02b2..515159a1 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -1,9 +1,10 @@ """merging inference from base and offset tiles""" +from typing import List + import geojson import geopandas as gpd import networkx as nx import pandas as pd -from typing import List def reproject_to_utm(gdf_wgs84): @@ -11,9 +12,11 @@ def reproject_to_utm(gdf_wgs84): utm_crs = gdf_wgs84.estimate_utm_crs(datum_name="WGS 84") return gdf_wgs84.to_crs(utm_crs) + def merge_inferences( - feature_collections: List[geojson.FeatureCollection], # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc - isolated_conf_multiplier: float = None, # THIS IS FURTHER DEFINED ON LINES 39-40 as 1 / len(feature_collections) + feature_collections: List[ + geojson.FeatureCollection + ], # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc proximity_meters: int = 500, closing_meters: int = 0, opening_meters: int = 0, @@ -21,8 +24,8 @@ def merge_inferences( """ Merge base and all offset tile inference. - This function takes a list of geojson FeatureCollections and merges them together. During the merge, the - geometries can be adjusted to incorporate neighboring features based on the proximity setting. The + This function takes a list of geojson FeatureCollections and merges them together. During the merge, the + geometries can be adjusted to incorporate neighboring features based on the proximity setting. The confidence of isolated features can also be adjusted. Parameters: @@ -35,34 +38,17 @@ def merge_inferences( Returns: A merged geojson FeatureCollection. """ - - # Define the isolated_conf_multiplier - if isolated_conf_multiplier is None: - isolated_conf_multiplier = 1 / len(feature_collections) - - # Combined GeoDataFrames. Only appended inf all FeatureCollections have at least 1 feature. - # gdfs_for_processing = [] - - # Check that all FeatureCollections have features. This throws out any detection if it is not present in - # all tiles of the feature_collections. - # if any(len(fc["features"]) == 0 for fc in feature_collections): - # shared_crs = feature_collections[0].crs - # for fc in feature_collections: - # # Convert the fc to a GeoDataFrame - # gdf = gpd.GeoDataFrame.from_features(fc["features"], crs=4326) - - # # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations) - # gdfs_for_processing.append(gdf) - - # gdf_r = reproject_to_utm(gdf) - - # else: - # # If one of the FeatureCollections is empty, return an empty FeatureCollection - # return geojson.FeatureCollection(features=[]) - - # gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)] - gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) if fc["features"] else gpd.GeoDataFrame([], crs=4326) for fc in feature_collections] - gdfs_for_processing = [gdf.to_crs(reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing] + gdfs_for_processing = [ + reproject_to_utm( + gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i) + ) + for i, fc in enumerate(feature_collections) + if fc["features"] + ] + + if len(gdfs_for_processing) == 0: + # No inferences found in any tiling + return geojson.FeatureCollection(features=[]) # Concat the GeoDataFrames concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True) @@ -94,13 +80,12 @@ def merge_inferences( final_gdf["group_index"] = final_gdf.index.map(group_mapping) final_gdf["group_count"] = final_gdf.index.map(group_counts) - # Adjust the confidence value for features that are isolated (not part of a larger group) - # XXXC >> how do we make this a little more dynamic, check against number of tile detections to - # impact the conf_multiplier value - final_gdf.loc[ - final_gdf["group_count"] == 1, "machine_confidence" - ] *= isolated_conf_multiplier + final_gdf["overlap_factor"] = final_gdf.groupby("group_index")[ + "fc_index" + ].transform(lambda x: len(x.unique()) / len(feature_collections)) + + final_gdf["machine_confidence"] *= final_gdf["overlap_factor"] # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index dissolved_gdf = final_gdf.dissolve( @@ -109,11 +94,15 @@ def merge_inferences( # If set, apply a morphological 'closing' operation to the geometries if closing_meters is not None: - dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(-closing_meters) + dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer( + -closing_meters + ) # If set, apply a morphological 'opening' operation to the geometries if opening_meters is not None: - dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(opening_meters) + dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer( + opening_meters + ) # Reproject the GeoDataFrame back to WGS 84 CRS result = dissolved_gdf.to_crs(crs=4326) diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py index 6102bd0d..4de55dd0 100644 --- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py +++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py @@ -427,15 +427,13 @@ def test_flatten_result(): def test_func_merge_inferences(): with open("test/test_cerulean_cloud/fixtures/base.geojson") as src: - out_fc = dict(geojson.load(src)) + base_tile_fc = dict(geojson.load(src)) with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src: - out_fc_offset = dict(geojson.load(src)) + offset_tile_fc = dict(geojson.load(src)) merged = merge_inferences( - # base_tile_fc=base_tile_fc, - # offset_tile_fc=offset_tile_fc, - feature_collections = [out_fc, out_fc_offset], + [base_tile_fc, offset_tile_fc], proximity_meters=500, closing_meters=100, opening_meters=100, @@ -458,21 +456,19 @@ def test_func_merge_inferences_empty(): with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src: offset_tile_fc = dict(geojson.load(src)) - - merged = merge_inferences( - feature_collections = [geojson.FeatureCollection(features=[]),offset_tile_fc] - ) + merged = merge_inferences([geojson.FeatureCollection(features=[]), offset_tile_fc]) assert merged["type"] == "FeatureCollection" - assert len(merged["features"]) == 0 + assert len(merged["features"]) == 5 - merged = merge_inferences( - feature_collections=[offset_tile_fc,geojson.FeatureCollection(features=[])] - ) + merged = merge_inferences([offset_tile_fc, geojson.FeatureCollection(features=[])]) assert merged["type"] == "FeatureCollection" - assert len(merged["features"]) == 0 + assert len(merged["features"]) == 5 merged = merge_inferences( - feature_collections=[geojson.FeatureCollection(features=[]),geojson.FeatureCollection(features=[])] + [ + geojson.FeatureCollection(features=[]), + geojson.FeatureCollection(features=[]), + ], ) assert merged["type"] == "FeatureCollection" assert len(merged["features"]) == 0 From e55178de11632d179f344d020496ba775b3f6473 Mon Sep 17 00:00:00 2001 From: Jona Date: Sat, 4 Nov 2023 19:04:33 -0400 Subject: [PATCH 13/18] Black reformatting --- .../cloud_run_orchestrator/handler.py | 36 +++++++++++-------- cerulean_cloud/tiling.py | 1 + 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py index 9a854c42..326059b9 100644 --- a/cerulean_cloud/cloud_run_orchestrator/handler.py +++ b/cerulean_cloud/cloud_run_orchestrator/handler.py @@ -321,7 +321,7 @@ async def _orchestrate( base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds) print(f"base_group_bounds: {base_group_bounds}") - # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), + # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), # see tiling.py line 61. offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33) offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale) @@ -331,21 +331,25 @@ async def _orchestrate( print(f"Offset 1 offset_group_bounds: {offset_group_bounds}") print("START OF OFFSET #2") - offset_2_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.66) + offset_2_tiles_bounds = offset_bounds_from_base_tiles( + base_tiles, offset_amount=0.66 + ) offset_2_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale) offset_2_group_bounds = group_bounds_from_list_of_bounds(offset_2_tiles_bounds) print(f"Offset 2 offset_tiles_bounds {offset_2_tiles_bounds}") print(f"Offset 2 image shape is {offset_2_group_shape}") print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}") - print(f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}") + print( + f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}" + ) # XXXC - THIS IS THE END OF THE OFFSETTING. # Filter out land tiles # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7 # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))] - + # XXXC - OFFSET TIlE BOUNDS OVER WATER offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)] print(f"offset_tiles_bounds: {offset_tiles_bounds}") @@ -416,8 +420,8 @@ async def _orchestrate( 20, "base tiles", ) - - # XXXC - START OFFSET 2 + + # XXXC - START OFFSET 2 offset_tiles_inference = await perform_inference( offset_tiles_bounds, cloud_run_inference.get_offset_tile_inference, @@ -426,7 +430,7 @@ async def _orchestrate( ) # XXXC - END OFFSET 2 - # XXXC - START OFFSET 3 + # XXXC - START OFFSET 3 offset_2_tiles_inference = await perform_inference( offset_2_tiles_bounds, cloud_run_inference.get_offset_tile_inference, # THIS FUNCTION NEEDS TO BE EDITTED @@ -439,7 +443,7 @@ async def _orchestrate( out_fc = geojson.FeatureCollection( features=flatten_feature_list(base_tiles_inference) ) - # XXXC - OFFSET 3 >> THIS NEEDS TO BE EDITTED + # XXXC - OFFSET 3 >> THIS NEEDS TO BE EDITTED out_fc_offset = geojson.FeatureCollection( features=flatten_feature_list(offset_tiles_inference) ) @@ -513,11 +517,10 @@ async def _orchestrate( # XXXC - OFFSET 3 >> change code to allow number of offsets, pass in list of featureCollections (2 or 3) print("XXXC >> TRYING TO MERGE INFERENCES") merged_inferences = merge_inferences( # changes to 15-16, give list of FCs 2-3 items - feature_collections = [out_fc, out_fc_offset, out_fc_offset_2], + feature_collections=[out_fc, out_fc_offset, out_fc_offset_2], # out_fc, # out_fc_offset, # out_fc_offset_2, # Added this as a test before giving merge a list - # Not declaring isolated_conf_multiplier because the default value is updated in merging.py # isolated_conf_multiplier=0.3, # Changed to ~.33 (1/len(# of FCs)), could be calculated in merging.py and document this. Originally 0.5 proximity_meters=500, @@ -525,8 +528,12 @@ async def _orchestrate( opening_meters=0, ) fc_count = len(merged_inferences.feature_collections) - print(f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences.") - print(f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}") + print( + f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences." + ) + print( + f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}" + ) print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED") if merged_inferences.get("features"): @@ -564,8 +571,9 @@ async def _orchestrate( end_time = datetime.now() print(f"End time: {end_time}") print("Returning results!") - print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565") - + print( + "XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565" + ) async with db_client.session.begin(): orchestrator_run.success = True diff --git a/cerulean_cloud/tiling.py b/cerulean_cloud/tiling.py index 9619a4f9..0c2b2816 100644 --- a/cerulean_cloud/tiling.py +++ b/cerulean_cloud/tiling.py @@ -55,6 +55,7 @@ def adjacent_tile(tile: morecantile.Tile, dx: int, dy: int) -> morecantile.Tile: other = morecantile.Tile(x=x + dx, y=y + dy, z=z) return other + def offset_bounds_from_base_tiles( tiles: List[morecantile.Tile], offset_amount: float = 0.5, From 2ca0ad4fd7329a24c8383be5ef272ac6174681c0 Mon Sep 17 00:00:00 2001 From: Jona Date: Sat, 4 Nov 2023 21:56:18 -0400 Subject: [PATCH 14/18] Speed up DB by removing AOIs Fix bug where merge_inferences() was treated like a class rather than a function --- alembic/versions/5e03ce584f3c_add_eez.py | 1 + alembic/versions/c0bd1215a3ca_add_iho.py | 1 + alembic/versions/f9b7166c86b7_add_mpa.py | 1 + cerulean_cloud/cloud_run_orchestrator/handler.py | 8 -------- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/alembic/versions/5e03ce584f3c_add_eez.py b/alembic/versions/5e03ce584f3c_add_eez.py index e3d728df..f951dd2c 100644 --- a/alembic/versions/5e03ce584f3c_add_eez.py +++ b/alembic/versions/5e03ce584f3c_add_eez.py @@ -44,6 +44,7 @@ def upgrade() -> None: session = orm.Session(bind=bind) eez = get_eez_from_url() # geojson.load(open("EEZ_and_HighSeas_20230410.json")) + eez = {"features": []} # noqa for feat in eez.get("features"): sovereign_keys = [ k for k in list(feat["properties"].keys()) if k.startswith("SOVEREIGN") diff --git a/alembic/versions/c0bd1215a3ca_add_iho.py b/alembic/versions/c0bd1215a3ca_add_iho.py index df574d85..841805cb 100644 --- a/alembic/versions/c0bd1215a3ca_add_iho.py +++ b/alembic/versions/c0bd1215a3ca_add_iho.py @@ -36,6 +36,7 @@ def upgrade() -> None: session = orm.Session(bind=bind) iho = get_iho_from_url() + iho = {"features": []} # noqa for feat in iho.get("features"): with session.begin(): aoi_iho = database_schema.AoiIho( diff --git a/alembic/versions/f9b7166c86b7_add_mpa.py b/alembic/versions/f9b7166c86b7_add_mpa.py index a44e10d8..e2d7ac19 100644 --- a/alembic/versions/f9b7166c86b7_add_mpa.py +++ b/alembic/versions/f9b7166c86b7_add_mpa.py @@ -36,6 +36,7 @@ def upgrade() -> None: session = orm.Session(bind=bind) mpa = get_mpa_from_url() + mpa = {"features": []} # noqa for feat in mpa.get("features"): with session.begin(): aoi_mpa = database_schema.AoiMpa( diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py index 326059b9..e36b42df 100644 --- a/cerulean_cloud/cloud_run_orchestrator/handler.py +++ b/cerulean_cloud/cloud_run_orchestrator/handler.py @@ -527,14 +527,6 @@ async def _orchestrate( closing_meters=0, opening_meters=0, ) - fc_count = len(merged_inferences.feature_collections) - print( - f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences." - ) - print( - f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}" - ) - print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED") if merged_inferences.get("features"): async with db_client.session.begin(): From 56c561e1b87fe239ecff207d64836c9c92742b65 Mon Sep 17 00:00:00 2001 From: christian Date: Mon, 6 Nov 2023 14:12:40 -0500 Subject: [PATCH 15/18] Final edits and clean-up on the cloud_run_orchestrator for feature offsetting --- .../cloud_run_orchestrator/handler.py | 26 +++---------------- .../cloud_run_orchestrator/merging.py | 6 ++--- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py index e36b42df..c5032614 100644 --- a/cerulean_cloud/cloud_run_orchestrator/handler.py +++ b/cerulean_cloud/cloud_run_orchestrator/handler.py @@ -321,8 +321,7 @@ async def _orchestrate( base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds) print(f"base_group_bounds: {base_group_bounds}") - # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), - # see tiling.py line 61. + # tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), see tiling.py line 61. offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33) offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale) offset_group_bounds = group_bounds_from_list_of_bounds(offset_tiles_bounds) @@ -343,18 +342,14 @@ async def _orchestrate( print( f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}" ) - # XXXC - THIS IS THE END OF THE OFFSETTING. # Filter out land tiles # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7 # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))] - # XXXC - OFFSET TIlE BOUNDS OVER WATER offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)] - print(f"offset_tiles_bounds: {offset_tiles_bounds}") offset_2_tiles_bounds = [b for b in offset_2_tiles_bounds if is_tile_over_water(b)] - print(f"offset_2_tiles_bounds: {offset_2_tiles_bounds}") ntiles = len(base_tiles) noffsettiles = len(offset_tiles_bounds) @@ -421,29 +416,25 @@ async def _orchestrate( "base tiles", ) - # XXXC - START OFFSET 2 offset_tiles_inference = await perform_inference( offset_tiles_bounds, cloud_run_inference.get_offset_tile_inference, 20, "offset tiles", ) - # XXXC - END OFFSET 2 - # XXXC - START OFFSET 3 offset_2_tiles_inference = await perform_inference( offset_2_tiles_bounds, cloud_run_inference.get_offset_tile_inference, # THIS FUNCTION NEEDS TO BE EDITTED 20, "offset2 tiles", ) - # XXXC - END OFFSET 3 if model.type == "MASKRCNN": out_fc = geojson.FeatureCollection( features=flatten_feature_list(base_tiles_inference) ) - # XXXC - OFFSET 3 >> THIS NEEDS TO BE EDITTED + out_fc_offset = geojson.FeatureCollection( features=flatten_feature_list(offset_tiles_inference) ) @@ -514,15 +505,8 @@ async def _orchestrate( print("XXXDEBUG out_fc_offset", out_fc_offset) print("XXXCDEBUG out_fc_offset2", out_fc_offset_2) - # XXXC - OFFSET 3 >> change code to allow number of offsets, pass in list of featureCollections (2 or 3) - print("XXXC >> TRYING TO MERGE INFERENCES") - merged_inferences = merge_inferences( # changes to 15-16, give list of FCs 2-3 items + merged_inferences = merge_inferences( feature_collections=[out_fc, out_fc_offset, out_fc_offset_2], - # out_fc, - # out_fc_offset, - # out_fc_offset_2, # Added this as a test before giving merge a list - # Not declaring isolated_conf_multiplier because the default value is updated in merging.py - # isolated_conf_multiplier=0.3, # Changed to ~.33 (1/len(# of FCs)), could be calculated in merging.py and document this. Originally 0.5 proximity_meters=500, closing_meters=0, opening_meters=0, @@ -563,9 +547,6 @@ async def _orchestrate( end_time = datetime.now() print(f"End time: {end_time}") print("Returning results!") - print( - "XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565" - ) async with db_client.session.begin(): orchestrator_run.success = True @@ -587,5 +568,4 @@ async def _orchestrate( ntiles=ntiles, noffsettiles=noffsettiles, ) - print("XXXC >> IF WE GET HERE EVERYTHING WORKED") return orchestrator_result diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index 515159a1..14231e42 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -14,9 +14,7 @@ def reproject_to_utm(gdf_wgs84): def merge_inferences( - feature_collections: List[ - geojson.FeatureCollection - ], # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc + feature_collections: List[geojson.FeatureCollection], proximity_meters: int = 500, closing_meters: int = 0, opening_meters: int = 0, @@ -58,7 +56,7 @@ def merge_inferences( if proximity_meters is not None: concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters) - # Join the features that intersect with each other >> XXXC THIS MAY NEED REWORK, keep an eye on it :) + # Join the features that intersect with each other joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index() # Create a graph where each node represents a feature and edges represent overlaps/intersections From bb33c6dff29872c585d3ae0a5ae7f031c95e339a Mon Sep 17 00:00:00 2001 From: Jona Date: Mon, 6 Nov 2023 14:46:19 -0500 Subject: [PATCH 16/18] Remove AOI hack --- alembic/versions/5e03ce584f3c_add_eez.py | 1 - alembic/versions/c0bd1215a3ca_add_iho.py | 1 - alembic/versions/f9b7166c86b7_add_mpa.py | 1 - 3 files changed, 3 deletions(-) diff --git a/alembic/versions/5e03ce584f3c_add_eez.py b/alembic/versions/5e03ce584f3c_add_eez.py index f951dd2c..e3d728df 100644 --- a/alembic/versions/5e03ce584f3c_add_eez.py +++ b/alembic/versions/5e03ce584f3c_add_eez.py @@ -44,7 +44,6 @@ def upgrade() -> None: session = orm.Session(bind=bind) eez = get_eez_from_url() # geojson.load(open("EEZ_and_HighSeas_20230410.json")) - eez = {"features": []} # noqa for feat in eez.get("features"): sovereign_keys = [ k for k in list(feat["properties"].keys()) if k.startswith("SOVEREIGN") diff --git a/alembic/versions/c0bd1215a3ca_add_iho.py b/alembic/versions/c0bd1215a3ca_add_iho.py index 841805cb..df574d85 100644 --- a/alembic/versions/c0bd1215a3ca_add_iho.py +++ b/alembic/versions/c0bd1215a3ca_add_iho.py @@ -36,7 +36,6 @@ def upgrade() -> None: session = orm.Session(bind=bind) iho = get_iho_from_url() - iho = {"features": []} # noqa for feat in iho.get("features"): with session.begin(): aoi_iho = database_schema.AoiIho( diff --git a/alembic/versions/f9b7166c86b7_add_mpa.py b/alembic/versions/f9b7166c86b7_add_mpa.py index e2d7ac19..a44e10d8 100644 --- a/alembic/versions/f9b7166c86b7_add_mpa.py +++ b/alembic/versions/f9b7166c86b7_add_mpa.py @@ -36,7 +36,6 @@ def upgrade() -> None: session = orm.Session(bind=bind) mpa = get_mpa_from_url() - mpa = {"features": []} # noqa for feat in mpa.get("features"): with session.begin(): aoi_mpa = database_schema.AoiMpa( From 7209751a105dee3381ab45e2019ea4acb3713b16 Mon Sep 17 00:00:00 2001 From: christian Date: Mon, 6 Nov 2023 15:55:30 -0500 Subject: [PATCH 17/18] Committing code changes to the correct branch this time. Minor edits, added description to the reproject_to_utm section of merging. --- alembic/versions/5e03ce584f3c_add_eez.py | 1 - alembic/versions/c0bd1215a3ca_add_iho.py | 1 - alembic/versions/f9b7166c86b7_add_mpa.py | 1 - cerulean_cloud/cloud_run_orchestrator/handler.py | 6 +++--- cerulean_cloud/cloud_run_orchestrator/merging.py | 5 ++++- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/alembic/versions/5e03ce584f3c_add_eez.py b/alembic/versions/5e03ce584f3c_add_eez.py index f951dd2c..e3d728df 100644 --- a/alembic/versions/5e03ce584f3c_add_eez.py +++ b/alembic/versions/5e03ce584f3c_add_eez.py @@ -44,7 +44,6 @@ def upgrade() -> None: session = orm.Session(bind=bind) eez = get_eez_from_url() # geojson.load(open("EEZ_and_HighSeas_20230410.json")) - eez = {"features": []} # noqa for feat in eez.get("features"): sovereign_keys = [ k for k in list(feat["properties"].keys()) if k.startswith("SOVEREIGN") diff --git a/alembic/versions/c0bd1215a3ca_add_iho.py b/alembic/versions/c0bd1215a3ca_add_iho.py index 841805cb..df574d85 100644 --- a/alembic/versions/c0bd1215a3ca_add_iho.py +++ b/alembic/versions/c0bd1215a3ca_add_iho.py @@ -36,7 +36,6 @@ def upgrade() -> None: session = orm.Session(bind=bind) iho = get_iho_from_url() - iho = {"features": []} # noqa for feat in iho.get("features"): with session.begin(): aoi_iho = database_schema.AoiIho( diff --git a/alembic/versions/f9b7166c86b7_add_mpa.py b/alembic/versions/f9b7166c86b7_add_mpa.py index e2d7ac19..a44e10d8 100644 --- a/alembic/versions/f9b7166c86b7_add_mpa.py +++ b/alembic/versions/f9b7166c86b7_add_mpa.py @@ -36,7 +36,6 @@ def upgrade() -> None: session = orm.Session(bind=bind) mpa = get_mpa_from_url() - mpa = {"features": []} # noqa for feat in mpa.get("features"): with session.begin(): aoi_mpa = database_schema.AoiMpa( diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py index c5032614..65de98ec 100644 --- a/cerulean_cloud/cloud_run_orchestrator/handler.py +++ b/cerulean_cloud/cloud_run_orchestrator/handler.py @@ -340,7 +340,7 @@ async def _orchestrate( print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}") print( - f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}" + f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}, {len(offset_2_tiles_bounds)}" ) # Filter out land tiles @@ -425,7 +425,7 @@ async def _orchestrate( offset_2_tiles_inference = await perform_inference( offset_2_tiles_bounds, - cloud_run_inference.get_offset_tile_inference, # THIS FUNCTION NEEDS TO BE EDITTED + cloud_run_inference.get_offset_tile_inference, 20, "offset2 tiles", ) @@ -551,7 +551,7 @@ async def _orchestrate( async with db_client.session.begin(): orchestrator_run.success = True orchestrator_run.inference_end_time = end_time - # XXXC >> reduce num variables to a list of featureCollection, ala the merging approach + orchestrator_result = OrchestratorResult( classification_base=out_fc, classification_offset=out_fc_offset, diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index 14231e42..15c629a2 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -28,7 +28,6 @@ def merge_inferences( Parameters: - feature_collections: A list of FeatureCollecitons to be merged, a primary and any secondary FeatureCollections - - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1 / len(feature_collections)). - proximity_meters: The distance to check for neighboring features and expand the geometries (default is 500m). - closing_meters: The distance to apply the morphological 'closing' operation (default is 0m). - opening_meters: The distance to apply the morphological 'opening' operation (default is 0m). @@ -36,6 +35,10 @@ def merge_inferences( Returns: A merged geojson FeatureCollection. """ + # We reproject to UTM for processing. This assumes that all offset images will either be in the same UTM zone as + # the input image chip, or that the difference that arise from an offset crossing into a second UTM zone will + # have little or no impact on comparison to the origina image. + gdfs_for_processing = [ reproject_to_utm( gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i) From 14c82009291daa157b53e29c38e0cdbe28c506c3 Mon Sep 17 00:00:00 2001 From: christian Date: Mon, 6 Nov 2023 16:28:03 -0500 Subject: [PATCH 18/18] fixed typo --- cerulean_cloud/cloud_run_orchestrator/merging.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py index 15c629a2..a722176e 100644 --- a/cerulean_cloud/cloud_run_orchestrator/merging.py +++ b/cerulean_cloud/cloud_run_orchestrator/merging.py @@ -37,8 +37,7 @@ def merge_inferences( """ # We reproject to UTM for processing. This assumes that all offset images will either be in the same UTM zone as # the input image chip, or that the difference that arise from an offset crossing into a second UTM zone will - # have little or no impact on comparison to the origina image. - + # have little or no impact on comparison to the original image. gdfs_for_processing = [ reproject_to_utm( gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i)