SkyTruth · jonaraphael · Nov 6, 2023 · Nov 2, 2023 · Nov 3, 2023 · Nov 3, 2023
diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py
@@ -321,19 +321,35 @@ async def _orchestrate(
     base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds)
     print(f"base_group_bounds: {base_group_bounds}")
 
-    offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles)
+    # tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), see tiling.py line 61.
+    offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33)
     offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale)
     offset_group_bounds = group_bounds_from_list_of_bounds(offset_tiles_bounds)
-    print(f"Offset image shape is {offset_group_shape}")
-    print(f"offset_group_bounds: {offset_group_bounds}")
+    print(f"Offset 1 offset_tiles_bounds {offset_tiles_bounds}")
+    print(f"Offset 1 image shape is {offset_group_shape}")
+    print(f"Offset 1 offset_group_bounds: {offset_group_bounds}")
 
-    print(f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}")
+    print("START OF OFFSET #2")
+    offset_2_tiles_bounds = offset_bounds_from_base_tiles(
+        base_tiles, offset_amount=0.66
+    )
+    offset_2_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale)
+    offset_2_group_bounds = group_bounds_from_list_of_bounds(offset_2_tiles_bounds)
+    print(f"Offset 2 offset_tiles_bounds {offset_2_tiles_bounds}")
+    print(f"Offset 2 image shape is {offset_2_group_shape}")
+    print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}")
+
+    print(
+        f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}, {len(offset_2_tiles_bounds)}"
+    )
 
     # Filter out land tiles
     # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7
     # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E
     base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))]
+
     offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)]
+    offset_2_tiles_bounds = [b for b in offset_2_tiles_bounds if is_tile_over_water(b)]
 
     ntiles = len(base_tiles)
     noffsettiles = len(offset_tiles_bounds)
@@ -407,13 +423,25 @@ async def _orchestrate(
                 "offset tiles",
             )
 
+            offset_2_tiles_inference = await perform_inference(
+                offset_2_tiles_bounds,
+                cloud_run_inference.get_offset_tile_inference,
+                20,
+                "offset2 tiles",
+            )
+
             if model.type == "MASKRCNN":
                 out_fc = geojson.FeatureCollection(
                     features=flatten_feature_list(base_tiles_inference)
                 )
+
                 out_fc_offset = geojson.FeatureCollection(
                     features=flatten_feature_list(offset_tiles_inference)
                 )
+
+                out_fc_offset_2 = geojson.FeatureCollection(
+                    features=flatten_feature_list(offset_2_tiles_inference)
+                )
             elif model.type == "UNET":
                 # print("Loading all tiles into memory for merge!")
                 # ds_base_tiles = []
@@ -475,10 +503,10 @@ async def _orchestrate(
             # Example: S1A_IW_GRDH_1SDV_20230727T185101_20230727T185126_049613_05F744_1E56
             print("XXXDEBUG out_fc", out_fc)
             print("XXXDEBUG out_fc_offset", out_fc_offset)
+            print("XXXCDEBUG out_fc_offset2", out_fc_offset_2)
+
             merged_inferences = merge_inferences(
-                out_fc,
-                out_fc_offset,
-                isolated_conf_multiplier=0.5,
+                feature_collections=[out_fc, out_fc_offset, out_fc_offset_2],
                 proximity_meters=500,
                 closing_meters=0,
                 opening_meters=0,
@@ -540,5 +568,4 @@ async def _orchestrate(
                 ntiles=ntiles,
                 noffsettiles=noffsettiles,
             )
-
     return orchestrator_result
diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -1,4 +1,6 @@
 """merging inference from base and offset tiles"""
+from typing import List
+
 import geojson
 import geopandas as gpd
 import networkx as nx
@@ -12,100 +14,97 @@ def reproject_to_utm(gdf_wgs84):
 
 
 def merge_inferences(
-    base_tile_fc: geojson.FeatureCollection,
-    offset_tile_fc: geojson.FeatureCollection,
-    isolated_conf_multiplier: float = 1,
+    feature_collections: List[geojson.FeatureCollection],
     proximity_meters: int = 500,
     closing_meters: int = 0,
     opening_meters: int = 0,
 ) -> geojson.FeatureCollection:
     """
-    Merge base and offset tile inference.
+    Merge base and all offset tile inference.
 
-    This function takes in two geojson FeatureCollections and merges them together.
-    During the merge, the geometries can be adjusted to incorporate neighboring features
-    based on the proximity setting. The confidence of isolated features can also be adjusted.
+    This function takes a list of geojson FeatureCollections and merges them together. During the merge, the
+    geometries can be adjusted to incorporate neighboring features based on the proximity setting. The
+    confidence of isolated features can also be adjusted.
 
     Parameters:
-    - base_tile_fc: The primary FeatureCollection to be merged.
-    - offset_tile_fc: The secondary FeatureCollection to be merged with the primary.
-    - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1).
+    - feature_collections: A list of FeatureCollecitons to be merged, a primary and any secondary FeatureCollections
     - proximity_meters: The distance to check for neighboring features and expand the geometries (default is 500m).
     - closing_meters: The distance to apply the morphological 'closing' operation (default is 0m).
     - opening_meters: The distance to apply the morphological 'opening' operation (default is 0m).
 
     Returns:
     A merged geojson FeatureCollection.
     """
-
-    # Check if both FeatureCollections have features
-    if base_tile_fc["features"] and offset_tile_fc["features"]:
-        # Convert the FeatureCollections to GeoDataFrames
-        base_gdf = gpd.GeoDataFrame.from_features(base_tile_fc["features"], crs=4326)
-        offset_gdf = gpd.GeoDataFrame.from_features(
-            offset_tile_fc["features"], crs=4326
+    # We reproject to UTM for processing. This assumes that all offset images will either be in the same UTM zone as
+    # the input image chip, or that the difference that arise from an offset crossing into a second UTM zone will
+    # have little or no impact on comparison to the original image.
+    gdfs_for_processing = [
+        reproject_to_utm(
+            gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i)
         )
+        for i, fc in enumerate(feature_collections)
+        if fc["features"]
+    ]
 
-        # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations)
-        base_gdf = reproject_to_utm(base_gdf)
-        offset_gdf = offset_gdf.to_crs(base_gdf.crs)
-
-        # Combine both GeoDataFrames
-        concat_gdf = pd.concat([base_gdf, offset_gdf], ignore_index=True)
-        final_gdf = concat_gdf.copy()
-
-        # If proximity is set, expand the geometry of each feature by the defined distance
-        if proximity_meters is not None:
-            concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters)
-
-        # Join the features that intersect with each other
-        joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index()
-
-        # Create a graph where each node represents a feature and edges represent overlaps/intersections
-        G = nx.from_pandas_edgelist(joined, "index", "index_right")
-
-        # For each connected component in the graph, assign a group index and count its features
-        group_mapping = {
-            feature: group
-            for group, component in enumerate(nx.connected_components(G))
-            for feature in component
-        }
-        group_counts = {
-            feature: len(component)
-            for component in nx.connected_components(G)
-            for feature in component
-        }
-
-        # Map the group indices and counts back to the GeoDataFrame
-        final_gdf["group_index"] = final_gdf.index.map(group_mapping)
-        final_gdf["group_count"] = final_gdf.index.map(group_counts)
-
-        # Adjust the confidence value for features that are isolated (not part of a larger group)
-        final_gdf.loc[
-            final_gdf["group_count"] == 1, "machine_confidence"
-        ] *= isolated_conf_multiplier
-
-        # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index
-        dissolved_gdf = final_gdf.dissolve(
-            by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"}
-        )
+    if len(gdfs_for_processing) == 0:
+        # No inferences found in any tiling
+        return geojson.FeatureCollection(features=[])
 
-        # If set, apply a morphological 'closing' operation to the geometries
-        if closing_meters is not None:
-            dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(
-                -closing_meters
-            )
+    # Concat the GeoDataFrames
+    concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True)
+    final_gdf = concat_gdf.copy()
+
+    # If proximity is set, expand the geometry of each feature by the defined distance
+    if proximity_meters is not None:
+        concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters)
+
+    # Join the features that intersect with each other
+    joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index()
+
+    # Create a graph where each node represents a feature and edges represent overlaps/intersections
+    G = nx.from_pandas_edgelist(joined, "index", "index_right")
+
+    # For each connected component in the graph, assign a group index and count its features
+    group_mapping = {
+        feature: group
+        for group, component in enumerate(nx.connected_components(G))
+        for feature in component
+    }
+    group_counts = {
+        feature: len(component)
+        for component in nx.connected_components(G)
+        for feature in component
+    }
+
+    # Map the group indices and counts back to the GeoDataFrame
+    final_gdf["group_index"] = final_gdf.index.map(group_mapping)
+    final_gdf["group_count"] = final_gdf.index.map(group_counts)
+
+    # Adjust the confidence value for features that are isolated (not part of a larger group)
+    final_gdf["overlap_factor"] = final_gdf.groupby("group_index")[
+        "fc_index"
+    ].transform(lambda x: len(x.unique()) / len(feature_collections))
+
+    final_gdf["machine_confidence"] *= final_gdf["overlap_factor"]
+
+    # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index
+    dissolved_gdf = final_gdf.dissolve(
+        by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"}
+    )
+
+    # If set, apply a morphological 'closing' operation to the geometries
+    if closing_meters is not None:
+        dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(
+            -closing_meters
+        )
 
-        # If set, apply a morphological 'opening' operation to the geometries
-        if opening_meters is not None:
-            dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(
-                opening_meters
-            )
+    # If set, apply a morphological 'opening' operation to the geometries
+    if opening_meters is not None:
+        dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(
+            opening_meters
+        )
 
-        # Reproject the GeoDataFrame back to WGS 84 CRS
-        result = dissolved_gdf.to_crs(crs=4326)
+    # Reproject the GeoDataFrame back to WGS 84 CRS
+    result = dissolved_gdf.to_crs(crs=4326)
 
-        return result.__geo_interface__
-    else:
-        # If one of the FeatureCollections is empty, return an empty FeatureCollection
-        return geojson.FeatureCollection(features=[])
+    return result.__geo_interface__
diff --git a/cerulean_cloud/tiling.py b/cerulean_cloud/tiling.py
@@ -58,6 +58,7 @@ def adjacent_tile(tile: morecantile.Tile, dx: int, dy: int) -> morecantile.Tile:
 
 def offset_bounds_from_base_tiles(
     tiles: List[morecantile.Tile],
+    offset_amount: float = 0.5,
 ) -> List[Tuple[float, float, float, float]]:
     """from a set of base tiles, generate offset tiles"""
     out_offset_tile_bounds = []
@@ -74,8 +75,8 @@ def offset_bounds_from_base_tiles(
             # +2 because tileymax needs to be included (+1) and the new grid has one extra row/column (+1)
             tile = morecantile.Tile(new_x, new_y, zoom)
             adj_tile = adjacent_tile(tile, -1, -1)  # Negative dY is upwards!!!
-            minx, maxy = pixel_to_location(adj_tile, 0.5, 0.5)
-            maxx, miny = pixel_to_location(tile, 0.5, 0.5)
+            minx, maxy = pixel_to_location(adj_tile, offset_amount, offset_amount)
+            maxx, miny = pixel_to_location(tile, offset_amount, offset_amount)
             out_offset_tile_bounds += [(minx, miny, maxx, maxy)]
 
     return out_offset_tile_bounds
diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -433,9 +433,7 @@ def test_func_merge_inferences():
         offset_tile_fc = dict(geojson.load(src))
 
     merged = merge_inferences(
-        base_tile_fc=base_tile_fc,
-        offset_tile_fc=offset_tile_fc,
-        isolated_conf_multiplier=0.1,
+        [base_tile_fc, offset_tile_fc],
         proximity_meters=500,
         closing_meters=100,
         opening_meters=100,
@@ -458,23 +456,19 @@ def test_func_merge_inferences_empty():
     with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src:
         offset_tile_fc = dict(geojson.load(src))
 
-    merged = merge_inferences(
-        base_tile_fc=geojson.FeatureCollection(features=[]),
-        offset_tile_fc=offset_tile_fc,
-    )
+    merged = merge_inferences([geojson.FeatureCollection(features=[]), offset_tile_fc])
     assert merged["type"] == "FeatureCollection"
-    assert len(merged["features"]) == 0
+    assert len(merged["features"]) == 5
 
-    merged = merge_inferences(
-        base_tile_fc=offset_tile_fc,
-        offset_tile_fc=geojson.FeatureCollection(features=[]),
-    )
+    merged = merge_inferences([offset_tile_fc, geojson.FeatureCollection(features=[])])
     assert merged["type"] == "FeatureCollection"
-    assert len(merged["features"]) == 0
+    assert len(merged["features"]) == 5
 
     merged = merge_inferences(
-        base_tile_fc=geojson.FeatureCollection(features=[]),
-        offset_tile_fc=geojson.FeatureCollection(features=[]),
+        [
+            geojson.FeatureCollection(features=[]),
+            geojson.FeatureCollection(features=[]),
+        ],
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0