From 6ef510c5359ff543776f4149f9980678c984bb02 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Thu, 2 Nov 2023 16:53:02 -0400
Subject: [PATCH 01/18] first pass of double offset

---
 .../cloud_run_orchestrator/handler.py         |  66 +++++--
 .../cloud_run_orchestrator/merging.py         | 172 +++++++++---------
 cerulean_cloud/tiling.py                      |   6 +-
 3 files changed, 148 insertions(+), 96 deletions(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py
index e7075ab8..af83d810 100644
--- a/cerulean_cloud/cloud_run_orchestrator/handler.py
+++ b/cerulean_cloud/cloud_run_orchestrator/handler.py
@@ -321,19 +321,36 @@ async def _orchestrate(
     base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds)
     print(f"base_group_bounds: {base_group_bounds}")
 
-    offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles)
+    # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), 
+    # see tiling.py line 61.
+    offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33)
     offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale)
     offset_group_bounds = group_bounds_from_list_of_bounds(offset_tiles_bounds)
-    print(f"Offset image shape is {offset_group_shape}")
-    print(f"offset_group_bounds: {offset_group_bounds}")
+    print(f"Offset 1 offset_tiles_bounds {offset_tiles_bounds}")
+    print(f"Offset 1 image shape is {offset_group_shape}")
+    print(f"Offset 1 offset_group_bounds: {offset_group_bounds}")
 
-    print(f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}")
+    print("START OF OFFSET #2")
+    offset_2_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.66)
+    offset_2_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale)
+    offset_2_group_bounds = group_bounds_from_list_of_bounds(offset_2_tiles_bounds)
+    print(f"Offset 2 offset_tiles_bounds {offset_2_tiles_bounds}")
+    print(f"Offset 2 image shape is {offset_2_group_shape}")
+    print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}")
+
+    print(f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}")
+    # XXXC - THIS IS THE END OF THE OFFSETTING.
 
     # Filter out land tiles
     # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7
     # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E
     base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))]
+    
+    # XXXC - OFFSET TIlE BOUNDS OVER WATER
     offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)]
+    print(f"offset_tiles_bounds: {offset_tiles_bounds}")
+    offset_2_tiles_bounds = [b for b in offset_2_tiles_bounds if is_tile_over_water(b)]
+    print(f"offset_2_tiles_bounds: {offset_2_tiles_bounds}")
 
     ntiles = len(base_tiles)
     noffsettiles = len(offset_tiles_bounds)
@@ -399,21 +416,37 @@ async def _orchestrate(
                 20,
                 "base tiles",
             )
-
+            
+            # XXXC - START OFFSET 2  
             offset_tiles_inference = await perform_inference(
                 offset_tiles_bounds,
                 cloud_run_inference.get_offset_tile_inference,
                 20,
                 "offset tiles",
             )
+            # XXXC - END OFFSET 2
+
+            # XXXC - START OFFSET 3 
+            offset_2_tiles_inference = await perform_inference(
+                offset_2_tiles_bounds,
+                cloud_run_inference.get_offset_tile_inference,  # THIS FUNCTION NEEDS TO BE EDITTED
+                20,
+                "offset2 tiles",
+            )
+            # XXXC - END OFFSET 3
 
             if model.type == "MASKRCNN":
                 out_fc = geojson.FeatureCollection(
                     features=flatten_feature_list(base_tiles_inference)
                 )
+                # XXXC - OFFSET 3  >> THIS NEEDS TO BE EDITTED 
                 out_fc_offset = geojson.FeatureCollection(
                     features=flatten_feature_list(offset_tiles_inference)
                 )
+
+                out_fc_offset_2 = geojson.FeatureCollection(
+                    features=flatten_feature_list(offset_2_tiles_inference)
+                )
             elif model.type == "UNET":
                 # print("Loading all tiles into memory for merge!")
                 # ds_base_tiles = []
@@ -475,14 +508,23 @@ async def _orchestrate(
             # Example: S1A_IW_GRDH_1SDV_20230727T185101_20230727T185126_049613_05F744_1E56
             print("XXXDEBUG out_fc", out_fc)
             print("XXXDEBUG out_fc_offset", out_fc_offset)
-            merged_inferences = merge_inferences(
-                out_fc,
-                out_fc_offset,
-                isolated_conf_multiplier=0.5,
+            print("XXXCDEBUG out_fc_offset2", out_fc_offset_2)
+
+            # XXXC - OFFSET 3 >> change code to allow number of offsets, pass in list of featureCollections (2 or 3)
+            print("XXXC >> TRYING TO MERGE INFERENCES")
+            merged_inferences = merge_inferences(  # changes to 15-16, give list of FCs 2-3 items
+                feature_collections = [out_fc, out_fc_offset, out_fc_offset_2],
+                # out_fc,
+                # out_fc_offset,
+                # out_fc_offset_2,  # Added this as a test before giving merge a list
+                
+                # Not declaring isolated_conf_multiplier because the default value is updated in merging.py
+                # isolated_conf_multiplier=0.3,  # Changed to ~.33 (1/len(# of FCs)), could be calculated in merging.py and document this. Originally 0.5
                 proximity_meters=500,
                 closing_meters=0,
                 opening_meters=0,
             )
+            print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED")
 
             if merged_inferences.get("features"):
                 async with db_client.session.begin():
@@ -519,11 +561,13 @@ async def _orchestrate(
             end_time = datetime.now()
             print(f"End time: {end_time}")
             print("Returning results!")
+            print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565")
+            
 
             async with db_client.session.begin():
                 orchestrator_run.success = True
                 orchestrator_run.inference_end_time = end_time
-
+            # XXXC >> reduce num variables to a list of featureCollection, ala the merging approach
             orchestrator_result = OrchestratorResult(
                 classification_base=out_fc,
                 classification_offset=out_fc_offset,
@@ -540,5 +584,5 @@ async def _orchestrate(
                 ntiles=ntiles,
                 noffsettiles=noffsettiles,
             )
-
+    print("XXXC >> IF WE GET HERE EVERYTHING WORKED")
     return orchestrator_result
diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index 8b36db52..21ce9667 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -10,26 +10,23 @@ def reproject_to_utm(gdf_wgs84):
     utm_crs = gdf_wgs84.estimate_utm_crs(datum_name="WGS 84")
     return gdf_wgs84.to_crs(utm_crs)
 
-
 def merge_inferences(
-    base_tile_fc: geojson.FeatureCollection,
-    offset_tile_fc: geojson.FeatureCollection,
-    isolated_conf_multiplier: float = 1,
+    feature_collections: list[geojson.FeatureCollection],  # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc
+    isolated_conf_multiplier: float = None,  # THIS IS FURTHER DEFINED ON LINES 39-40 as 1 / len(feature_collections)
     proximity_meters: int = 500,
     closing_meters: int = 0,
     opening_meters: int = 0,
 ) -> geojson.FeatureCollection:
     """
-    Merge base and offset tile inference.
+    Merge base and all offset tile inference.
 
-    This function takes in two geojson FeatureCollections and merges them together.
-    During the merge, the geometries can be adjusted to incorporate neighboring features
-    based on the proximity setting. The confidence of isolated features can also be adjusted.
+    This function takes a list of geojson FeatureCollections and merges them together. During the merge, the 
+    geometries can be adjusted to incorporate neighboring features based on the proximity setting. The 
+    confidence of isolated features can also be adjusted.
 
     Parameters:
-    - base_tile_fc: The primary FeatureCollection to be merged.
-    - offset_tile_fc: The secondary FeatureCollection to be merged with the primary.
-    - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1).
+    - feature_collections: A list of FeatureCollecitons to be merged, a primary and any secondary FeatureCollections
+    - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1 / len(feature_collections)).
     - proximity_meters: The distance to check for neighboring features and expand the geometries (default is 500m).
     - closing_meters: The distance to apply the morphological 'closing' operation (default is 0m).
     - opening_meters: The distance to apply the morphological 'opening' operation (default is 0m).
@@ -38,74 +35,85 @@ def merge_inferences(
     A merged geojson FeatureCollection.
     """
 
-    # Check if both FeatureCollections have features
-    if base_tile_fc["features"] and offset_tile_fc["features"]:
-        # Convert the FeatureCollections to GeoDataFrames
-        base_gdf = gpd.GeoDataFrame.from_features(base_tile_fc["features"], crs=4326)
-        offset_gdf = gpd.GeoDataFrame.from_features(
-            offset_tile_fc["features"], crs=4326
-        )
-
-        # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations)
-        base_gdf = reproject_to_utm(base_gdf)
-        offset_gdf = offset_gdf.to_crs(base_gdf.crs)
-
-        # Combine both GeoDataFrames
-        concat_gdf = pd.concat([base_gdf, offset_gdf], ignore_index=True)
-        final_gdf = concat_gdf.copy()
-
-        # If proximity is set, expand the geometry of each feature by the defined distance
-        if proximity_meters is not None:
-            concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters)
-
-        # Join the features that intersect with each other
-        joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index()
-
-        # Create a graph where each node represents a feature and edges represent overlaps/intersections
-        G = nx.from_pandas_edgelist(joined, "index", "index_right")
-
-        # For each connected component in the graph, assign a group index and count its features
-        group_mapping = {
-            feature: group
-            for group, component in enumerate(nx.connected_components(G))
-            for feature in component
-        }
-        group_counts = {
-            feature: len(component)
-            for component in nx.connected_components(G)
-            for feature in component
-        }
-
-        # Map the group indices and counts back to the GeoDataFrame
-        final_gdf["group_index"] = final_gdf.index.map(group_mapping)
-        final_gdf["group_count"] = final_gdf.index.map(group_counts)
-
-        # Adjust the confidence value for features that are isolated (not part of a larger group)
-        final_gdf.loc[
-            final_gdf["group_count"] == 1, "machine_confidence"
-        ] *= isolated_conf_multiplier
-
-        # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index
-        dissolved_gdf = final_gdf.dissolve(
-            by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"}
-        )
-
-        # If set, apply a morphological 'closing' operation to the geometries
-        if closing_meters is not None:
-            dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(
-                -closing_meters
-            )
-
-        # If set, apply a morphological 'opening' operation to the geometries
-        if opening_meters is not None:
-            dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(
-                opening_meters
-            )
-
-        # Reproject the GeoDataFrame back to WGS 84 CRS
-        result = dissolved_gdf.to_crs(crs=4326)
-
-        return result.__geo_interface__
-    else:
-        # If one of the FeatureCollections is empty, return an empty FeatureCollection
-        return geojson.FeatureCollection(features=[])
+    # Define the isolated_conf_multiplier
+    if isolated_conf_multiplier is None:
+        isolated_conf_multiplier = 1 / len(feature_collections)
+
+    # Combined GeoDataFrames. Only appended inf all FeatureCollections have at least 1 feature.
+    # gdfs_for_processing = []
+
+    # Check that all FeatureCollections have features. This throws out any detection if it is not present in 
+    # all tiles of the feature_collections.
+    # if any(len(fc["features"]) == 0 for fc in feature_collections):
+    #     shared_crs = feature_collections[0].crs
+    #     for fc in feature_collections:
+    #         # Convert the fc to a GeoDataFrame
+    #         gdf = gpd.GeoDataFrame.from_features(fc["features"], crs=4326)
+        
+    #         # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations)
+    #         gdfs_for_processing.append(gdf)
+
+    #     gdf_r = reproject_to_utm(gdf)
+    
+    # else:
+    #     # If one of the FeatureCollections is empty, return an empty FeatureCollection
+    #     return geojson.FeatureCollection(features=[])
+
+    gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)]
+    gdfs_for_processing = [gdf.to_crs(gdfs_for_processing[0].reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing]
+
+    # Concat the GeoDataFrames
+    concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True)
+    final_gdf = concat_gdf.copy()
+
+    # If proximity is set, expand the geometry of each feature by the defined distance
+    if proximity_meters is not None:
+        concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters)
+
+    # Join the features that intersect with each other >> XXXC THIS MAY NEED REWORK, keep an eye on it :)
+    joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index()
+
+    # Create a graph where each node represents a feature and edges represent overlaps/intersections
+    G = nx.from_pandas_edgelist(joined, "index", "index_right")
+
+    # For each connected component in the graph, assign a group index and count its features
+    group_mapping = {
+        feature: group
+        for group, component in enumerate(nx.connected_components(G))
+        for feature in component
+    }
+    group_counts = {
+        feature: len(component)
+        for component in nx.connected_components(G)
+        for feature in component
+    }
+
+    # Map the group indices and counts back to the GeoDataFrame
+    final_gdf["group_index"] = final_gdf.index.map(group_mapping)
+    final_gdf["group_count"] = final_gdf.index.map(group_counts)
+
+
+    # Adjust the confidence value for features that are isolated (not part of a larger group)
+    # XXXC >> how do we make this a little more dynamic, check against number of tile detections to
+    # impact the conf_multiplier value
+    final_gdf.loc[
+        final_gdf["group_count"] == 1, "machine_confidence"
+    ] *= isolated_conf_multiplier
+
+    # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index
+    dissolved_gdf = final_gdf.dissolve(
+        by="group_index", aggfunc={"machine_confidence": "median", "inf_idx": "max"}
+    )
+
+    # If set, apply a morphological 'closing' operation to the geometries
+    if closing_meters is not None:
+        dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(-closing_meters)
+
+    # If set, apply a morphological 'opening' operation to the geometries
+    if opening_meters is not None:
+        dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(opening_meters)
+
+    # Reproject the GeoDataFrame back to WGS 84 CRS
+    result = dissolved_gdf.to_crs(crs=4326)
+
+    return result.__geo_interface__
diff --git a/cerulean_cloud/tiling.py b/cerulean_cloud/tiling.py
index d11f1584..9619a4f9 100644
--- a/cerulean_cloud/tiling.py
+++ b/cerulean_cloud/tiling.py
@@ -55,9 +55,9 @@ def adjacent_tile(tile: morecantile.Tile, dx: int, dy: int) -> morecantile.Tile:
     other = morecantile.Tile(x=x + dx, y=y + dy, z=z)
     return other
 
-
 def offset_bounds_from_base_tiles(
     tiles: List[morecantile.Tile],
+    offset_amount: float = 0.5,
 ) -> List[Tuple[float, float, float, float]]:
     """from a set of base tiles, generate offset tiles"""
     out_offset_tile_bounds = []
@@ -74,8 +74,8 @@ def offset_bounds_from_base_tiles(
             # +2 because tileymax needs to be included (+1) and the new grid has one extra row/column (+1)
             tile = morecantile.Tile(new_x, new_y, zoom)
             adj_tile = adjacent_tile(tile, -1, -1)  # Negative dY is upwards!!!
-            minx, maxy = pixel_to_location(adj_tile, 0.5, 0.5)
-            maxx, miny = pixel_to_location(tile, 0.5, 0.5)
+            minx, maxy = pixel_to_location(adj_tile, offset_amount, offset_amount)
+            maxx, miny = pixel_to_location(tile, offset_amount, offset_amount)
             out_offset_tile_bounds += [(minx, miny, maxx, maxy)]
 
     return out_offset_tile_bounds

From 78d58a72882e6b36dba9fe32fde243f026f3b897 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 11:05:56 -0400
Subject: [PATCH 02/18] Testing an update to the merge_inferences function

---
 cerulean_cloud/cloud_run_orchestrator/handler.py | 3 +++
 cerulean_cloud/cloud_run_orchestrator/merging.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py
index af83d810..9a854c42 100644
--- a/cerulean_cloud/cloud_run_orchestrator/handler.py
+++ b/cerulean_cloud/cloud_run_orchestrator/handler.py
@@ -524,6 +524,9 @@ async def _orchestrate(
                 closing_meters=0,
                 opening_meters=0,
             )
+            fc_count = len(merged_inferences.feature_collections)
+            print(f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences.")
+            print(f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}")
             print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED")
 
             if merged_inferences.get("features"):
diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index 21ce9667..31c01337 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -59,7 +59,7 @@ def merge_inferences(
     #     # If one of the FeatureCollections is empty, return an empty FeatureCollection
     #     return geojson.FeatureCollection(features=[])
 
-    gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)]
+    gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) if fc["features"] else gpd.GeoDataFrame([], crs=4326) for fc in feature_collections]
     gdfs_for_processing = [gdf.to_crs(gdfs_for_processing[0].reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing]
 
     # Concat the GeoDataFrames

From f0e23a5829c12da3ddbc7f1a188286e501389350 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 11:27:13 -0400
Subject: [PATCH 03/18] Fixed a typing issue was using list not List for
 FeatureCollection specifications, fixed now

---
 cerulean_cloud/cloud_run_orchestrator/merging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index 31c01337..f3ac0c45 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -11,7 +11,7 @@ def reproject_to_utm(gdf_wgs84):
     return gdf_wgs84.to_crs(utm_crs)
 
 def merge_inferences(
-    feature_collections: list[geojson.FeatureCollection],  # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc
+    feature_collections: List[geojson.FeatureCollection],  # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc
     isolated_conf_multiplier: float = None,  # THIS IS FURTHER DEFINED ON LINES 39-40 as 1 / len(feature_collections)
     proximity_meters: int = 500,
     closing_meters: int = 0,

From 351e5e24ced36ead74d29eb51dab8ee4558f3ace Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 11:35:05 -0400
Subject: [PATCH 04/18] Importing List from typing, needed for the merging

---
 cerulean_cloud/cloud_run_orchestrator/merging.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index f3ac0c45..d079d1c6 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -3,6 +3,7 @@
 import geopandas as gpd
 import networkx as nx
 import pandas as pd
+from typing import List
 
 
 def reproject_to_utm(gdf_wgs84):

From 6dd6f38ced8c16e92ba04508f95d60c7e71ee7c2 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 14:11:08 -0400
Subject: [PATCH 05/18] Trying to fix the test_merging functions

---
 .../test_cerulean_cloud/test_cloud_run_orchestrator.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
index d70440e9..dc8fdd60 100644
--- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
+++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -427,15 +427,15 @@ def test_flatten_result():
 
 def test_func_merge_inferences():
     with open("test/test_cerulean_cloud/fixtures/base.geojson") as src:
-        base_tile_fc = dict(geojson.load(src))
+        out_fc = dict(geojson.load(src))
 
     with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src:
-        offset_tile_fc = dict(geojson.load(src))
+        out_fc_offset = dict(geojson.load(src))
 
     merged = merge_inferences(
-        base_tile_fc=base_tile_fc,
-        offset_tile_fc=offset_tile_fc,
-        isolated_conf_multiplier=0.1,
+        # base_tile_fc=base_tile_fc,
+        # offset_tile_fc=offset_tile_fc,
+        feature_collections = [out_fc, out_fc_offset],
         proximity_meters=500,
         closing_meters=100,
         opening_meters=100,

From 9e4897ddf7dd5812ebbb8f3a63de958027c124fd Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 14:16:30 -0400
Subject: [PATCH 06/18] More test fixing to get merges to run

---
 test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
index dc8fdd60..94848ca7 100644
--- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
+++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -459,22 +459,22 @@ def test_func_merge_inferences_empty():
         offset_tile_fc = dict(geojson.load(src))
 
     merged = merge_inferences(
-        base_tile_fc=geojson.FeatureCollection(features=[]),
+        feature_collections=geojson.FeatureCollection(features=[]),
         offset_tile_fc=offset_tile_fc,
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0
 
     merged = merge_inferences(
-        base_tile_fc=offset_tile_fc,
+        feature_collections=offset_tile_fc,
         offset_tile_fc=geojson.FeatureCollection(features=[]),
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0
 
     merged = merge_inferences(
-        base_tile_fc=geojson.FeatureCollection(features=[]),
-        offset_tile_fc=geojson.FeatureCollection(features=[]),
+        feature_collections=geojson.FeatureCollection(features=[]),
+        feature_collections=geojson.FeatureCollection(features=[]),
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0

From 794e45a625e9be2b99e27398f25785aab4f6e79c Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 14:43:07 -0400
Subject: [PATCH 07/18] another fix to testing merge

---
 test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
index 94848ca7..aa5d09ac 100644
--- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
+++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -474,7 +474,7 @@ def test_func_merge_inferences_empty():
 
     merged = merge_inferences(
         feature_collections=geojson.FeatureCollection(features=[]),
-        feature_collections=geojson.FeatureCollection(features=[]),
+        # feature_collections=geojson.FeatureCollection(features=[]),
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0

From d8a2a6142e692ef109ee21600d567e83a91cc1de Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 15:19:59 -0400
Subject: [PATCH 08/18] Seem to have been mis-using     reproject_to_utm(),
 trying to rectify

---
 cerulean_cloud/cloud_run_orchestrator/merging.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index d079d1c6..aa7a02b2 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -60,8 +60,9 @@ def merge_inferences(
     #     # If one of the FeatureCollections is empty, return an empty FeatureCollection
     #     return geojson.FeatureCollection(features=[])
 
+    # gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)]
     gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) if fc["features"] else gpd.GeoDataFrame([], crs=4326) for fc in feature_collections]
-    gdfs_for_processing = [gdf.to_crs(gdfs_for_processing[0].reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing]
+    gdfs_for_processing = [gdf.to_crs(reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing]
 
     # Concat the GeoDataFrames
     concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True)

From 742bcf19cd5d09c1af6358a67727961920a6459d Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 15:40:41 -0400
Subject: [PATCH 09/18] I hate this test

---
 test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
index aa5d09ac..5c04c899 100644
--- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
+++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -460,14 +460,14 @@ def test_func_merge_inferences_empty():
 
     merged = merge_inferences(
         feature_collections=geojson.FeatureCollection(features=[]),
-        offset_tile_fc=offset_tile_fc,
+        feature_collections=[offset_tile_fc],
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0
 
     merged = merge_inferences(
-        feature_collections=offset_tile_fc,
-        offset_tile_fc=geojson.FeatureCollection(features=[]),
+        feature_collections=[offset_tile_fc],
+        feature_collections=geojson.FeatureCollection(features=[]),
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0

From bf7423a8c2146d4afd8b38c564bd766f49e7d818 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 15:47:28 -0400
Subject: [PATCH 10/18] Once again, with hatred. Trying to get the dumb test
 working

---
 test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
index 5c04c899..ceba7739 100644
--- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
+++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -460,13 +460,11 @@ def test_func_merge_inferences_empty():
 
     merged = merge_inferences(
         feature_collections=geojson.FeatureCollection(features=[]),
-        feature_collections=[offset_tile_fc],
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0
 
     merged = merge_inferences(
-        feature_collections=[offset_tile_fc],
         feature_collections=geojson.FeatureCollection(features=[]),
     )
     assert merged["type"] == "FeatureCollection"

From 3faae6b8a2ca77c6b9ec697e828a4e88d4440e70 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Fri, 3 Nov 2023 16:07:29 -0400
Subject: [PATCH 11/18] Truly, is there a greater force than my rage at
 testing? Trying again, this time all test merge should be in the right format

---
 test/test_cerulean_cloud/test_cloud_run_orchestrator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
index ceba7739..6102bd0d 100644
--- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
+++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -458,21 +458,21 @@ def test_func_merge_inferences_empty():
     with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src:
         offset_tile_fc = dict(geojson.load(src))
 
+
     merged = merge_inferences(
-        feature_collections=geojson.FeatureCollection(features=[]),
+        feature_collections = [geojson.FeatureCollection(features=[]),offset_tile_fc]
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0
 
     merged = merge_inferences(
-        feature_collections=geojson.FeatureCollection(features=[]),
+        feature_collections=[offset_tile_fc,geojson.FeatureCollection(features=[])]
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0
 
     merged = merge_inferences(
-        feature_collections=geojson.FeatureCollection(features=[]),
-        # feature_collections=geojson.FeatureCollection(features=[]),
+        feature_collections=[geojson.FeatureCollection(features=[]),geojson.FeatureCollection(features=[])]
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0

From 35ba939d1f3eb45da75a70628b3770c5a195de1e Mon Sep 17 00:00:00 2001
From: Jona <jona.raphael@gmail.com>
Date: Sat, 4 Nov 2023 17:14:32 -0400
Subject: [PATCH 12/18] Proposed a fix to the tests. Also added first pass
 version of dividing by N for that number of feature_collections

---
 .../cloud_run_orchestrator/merging.py         | 71 ++++++++-----------
 .../test_cloud_run_orchestrator.py            | 26 +++----
 2 files changed, 41 insertions(+), 56 deletions(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index aa7a02b2..515159a1 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -1,9 +1,10 @@
 """merging inference from base and offset tiles"""
+from typing import List
+
 import geojson
 import geopandas as gpd
 import networkx as nx
 import pandas as pd
-from typing import List
 
 
 def reproject_to_utm(gdf_wgs84):
@@ -11,9 +12,11 @@ def reproject_to_utm(gdf_wgs84):
     utm_crs = gdf_wgs84.estimate_utm_crs(datum_name="WGS 84")
     return gdf_wgs84.to_crs(utm_crs)
 
+
 def merge_inferences(
-    feature_collections: List[geojson.FeatureCollection],  # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc
-    isolated_conf_multiplier: float = None,  # THIS IS FURTHER DEFINED ON LINES 39-40 as 1 / len(feature_collections)
+    feature_collections: List[
+        geojson.FeatureCollection
+    ],  # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc
     proximity_meters: int = 500,
     closing_meters: int = 0,
     opening_meters: int = 0,
@@ -21,8 +24,8 @@ def merge_inferences(
     """
     Merge base and all offset tile inference.
 
-    This function takes a list of geojson FeatureCollections and merges them together. During the merge, the 
-    geometries can be adjusted to incorporate neighboring features based on the proximity setting. The 
+    This function takes a list of geojson FeatureCollections and merges them together. During the merge, the
+    geometries can be adjusted to incorporate neighboring features based on the proximity setting. The
     confidence of isolated features can also be adjusted.
 
     Parameters:
@@ -35,34 +38,17 @@ def merge_inferences(
     Returns:
     A merged geojson FeatureCollection.
     """
-
-    # Define the isolated_conf_multiplier
-    if isolated_conf_multiplier is None:
-        isolated_conf_multiplier = 1 / len(feature_collections)
-
-    # Combined GeoDataFrames. Only appended inf all FeatureCollections have at least 1 feature.
-    # gdfs_for_processing = []
-
-    # Check that all FeatureCollections have features. This throws out any detection if it is not present in 
-    # all tiles of the feature_collections.
-    # if any(len(fc["features"]) == 0 for fc in feature_collections):
-    #     shared_crs = feature_collections[0].crs
-    #     for fc in feature_collections:
-    #         # Convert the fc to a GeoDataFrame
-    #         gdf = gpd.GeoDataFrame.from_features(fc["features"], crs=4326)
-        
-    #         # Reproject both GeoDataFrames to a UTM CRS (for accurate distance calculations)
-    #         gdfs_for_processing.append(gdf)
-
-    #     gdf_r = reproject_to_utm(gdf)
-    
-    # else:
-    #     # If one of the FeatureCollections is empty, return an empty FeatureCollection
-    #     return geojson.FeatureCollection(features=[])
-
-    # gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) for fc in feature_collections if fc["features"] else gpd.GeoDataFrame([],crs=4326)]
-    gdfs_for_processing = [gpd.GeoDataFrame.from_features(fc["features"], crs=4326) if fc["features"] else gpd.GeoDataFrame([], crs=4326) for fc in feature_collections]
-    gdfs_for_processing = [gdf.to_crs(reproject_to_utm(gdfs_for_processing[0])) for gdf in gdfs_for_processing]
+    gdfs_for_processing = [
+        reproject_to_utm(
+            gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i)
+        )
+        for i, fc in enumerate(feature_collections)
+        if fc["features"]
+    ]
+
+    if len(gdfs_for_processing) == 0:
+        # No inferences found in any tiling
+        return geojson.FeatureCollection(features=[])
 
     # Concat the GeoDataFrames
     concat_gdf = pd.concat(gdfs_for_processing, ignore_index=True)
@@ -94,13 +80,12 @@ def merge_inferences(
     final_gdf["group_index"] = final_gdf.index.map(group_mapping)
     final_gdf["group_count"] = final_gdf.index.map(group_counts)
 
-
     # Adjust the confidence value for features that are isolated (not part of a larger group)
-    # XXXC >> how do we make this a little more dynamic, check against number of tile detections to
-    # impact the conf_multiplier value
-    final_gdf.loc[
-        final_gdf["group_count"] == 1, "machine_confidence"
-    ] *= isolated_conf_multiplier
+    final_gdf["overlap_factor"] = final_gdf.groupby("group_index")[
+        "fc_index"
+    ].transform(lambda x: len(x.unique()) / len(feature_collections))
+
+    final_gdf["machine_confidence"] *= final_gdf["overlap_factor"]
 
     # Dissolve overlapping features into one based on their group index and calculate the median confidence and maximum inference index
     dissolved_gdf = final_gdf.dissolve(
@@ -109,11 +94,15 @@ def merge_inferences(
 
     # If set, apply a morphological 'closing' operation to the geometries
     if closing_meters is not None:
-        dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(-closing_meters)
+        dissolved_gdf["geometry"] = dissolved_gdf.buffer(closing_meters).buffer(
+            -closing_meters
+        )
 
     # If set, apply a morphological 'opening' operation to the geometries
     if opening_meters is not None:
-        dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(opening_meters)
+        dissolved_gdf["geometry"] = dissolved_gdf.buffer(-opening_meters).buffer(
+            opening_meters
+        )
 
     # Reproject the GeoDataFrame back to WGS 84 CRS
     result = dissolved_gdf.to_crs(crs=4326)
diff --git a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
index 6102bd0d..4de55dd0 100644
--- a/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
+++ b/test/test_cerulean_cloud/test_cloud_run_orchestrator.py
@@ -427,15 +427,13 @@ def test_flatten_result():
 
 def test_func_merge_inferences():
     with open("test/test_cerulean_cloud/fixtures/base.geojson") as src:
-        out_fc = dict(geojson.load(src))
+        base_tile_fc = dict(geojson.load(src))
 
     with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src:
-        out_fc_offset = dict(geojson.load(src))
+        offset_tile_fc = dict(geojson.load(src))
 
     merged = merge_inferences(
-        # base_tile_fc=base_tile_fc,
-        # offset_tile_fc=offset_tile_fc,
-        feature_collections = [out_fc, out_fc_offset],
+        [base_tile_fc, offset_tile_fc],
         proximity_meters=500,
         closing_meters=100,
         opening_meters=100,
@@ -458,21 +456,19 @@ def test_func_merge_inferences_empty():
     with open("test/test_cerulean_cloud/fixtures/offset.geojson") as src:
         offset_tile_fc = dict(geojson.load(src))
 
-
-    merged = merge_inferences(
-        feature_collections = [geojson.FeatureCollection(features=[]),offset_tile_fc]
-    )
+    merged = merge_inferences([geojson.FeatureCollection(features=[]), offset_tile_fc])
     assert merged["type"] == "FeatureCollection"
-    assert len(merged["features"]) == 0
+    assert len(merged["features"]) == 5
 
-    merged = merge_inferences(
-        feature_collections=[offset_tile_fc,geojson.FeatureCollection(features=[])]
-    )
+    merged = merge_inferences([offset_tile_fc, geojson.FeatureCollection(features=[])])
     assert merged["type"] == "FeatureCollection"
-    assert len(merged["features"]) == 0
+    assert len(merged["features"]) == 5
 
     merged = merge_inferences(
-        feature_collections=[geojson.FeatureCollection(features=[]),geojson.FeatureCollection(features=[])]
+        [
+            geojson.FeatureCollection(features=[]),
+            geojson.FeatureCollection(features=[]),
+        ],
     )
     assert merged["type"] == "FeatureCollection"
     assert len(merged["features"]) == 0

From e55178de11632d179f344d020496ba775b3f6473 Mon Sep 17 00:00:00 2001
From: Jona <jona.raphael@gmail.com>
Date: Sat, 4 Nov 2023 19:04:33 -0400
Subject: [PATCH 13/18] Black reformatting

---
 .../cloud_run_orchestrator/handler.py         | 36 +++++++++++--------
 cerulean_cloud/tiling.py                      |  1 +
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py
index 9a854c42..326059b9 100644
--- a/cerulean_cloud/cloud_run_orchestrator/handler.py
+++ b/cerulean_cloud/cloud_run_orchestrator/handler.py
@@ -321,7 +321,7 @@ async def _orchestrate(
     base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds)
     print(f"base_group_bounds: {base_group_bounds}")
 
-    # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), 
+    # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(),
     # see tiling.py line 61.
     offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33)
     offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale)
@@ -331,21 +331,25 @@ async def _orchestrate(
     print(f"Offset 1 offset_group_bounds: {offset_group_bounds}")
 
     print("START OF OFFSET #2")
-    offset_2_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.66)
+    offset_2_tiles_bounds = offset_bounds_from_base_tiles(
+        base_tiles, offset_amount=0.66
+    )
     offset_2_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale)
     offset_2_group_bounds = group_bounds_from_list_of_bounds(offset_2_tiles_bounds)
     print(f"Offset 2 offset_tiles_bounds {offset_2_tiles_bounds}")
     print(f"Offset 2 image shape is {offset_2_group_shape}")
     print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}")
 
-    print(f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}")
+    print(
+        f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}"
+    )
     # XXXC - THIS IS THE END OF THE OFFSETTING.
 
     # Filter out land tiles
     # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7
     # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E
     base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))]
-    
+
     # XXXC - OFFSET TIlE BOUNDS OVER WATER
     offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)]
     print(f"offset_tiles_bounds: {offset_tiles_bounds}")
@@ -416,8 +420,8 @@ async def _orchestrate(
                 20,
                 "base tiles",
             )
-            
-            # XXXC - START OFFSET 2  
+
+            # XXXC - START OFFSET 2
             offset_tiles_inference = await perform_inference(
                 offset_tiles_bounds,
                 cloud_run_inference.get_offset_tile_inference,
@@ -426,7 +430,7 @@ async def _orchestrate(
             )
             # XXXC - END OFFSET 2
 
-            # XXXC - START OFFSET 3 
+            # XXXC - START OFFSET 3
             offset_2_tiles_inference = await perform_inference(
                 offset_2_tiles_bounds,
                 cloud_run_inference.get_offset_tile_inference,  # THIS FUNCTION NEEDS TO BE EDITTED
@@ -439,7 +443,7 @@ async def _orchestrate(
                 out_fc = geojson.FeatureCollection(
                     features=flatten_feature_list(base_tiles_inference)
                 )
-                # XXXC - OFFSET 3  >> THIS NEEDS TO BE EDITTED 
+                # XXXC - OFFSET 3  >> THIS NEEDS TO BE EDITTED
                 out_fc_offset = geojson.FeatureCollection(
                     features=flatten_feature_list(offset_tiles_inference)
                 )
@@ -513,11 +517,10 @@ async def _orchestrate(
             # XXXC - OFFSET 3 >> change code to allow number of offsets, pass in list of featureCollections (2 or 3)
             print("XXXC >> TRYING TO MERGE INFERENCES")
             merged_inferences = merge_inferences(  # changes to 15-16, give list of FCs 2-3 items
-                feature_collections = [out_fc, out_fc_offset, out_fc_offset_2],
+                feature_collections=[out_fc, out_fc_offset, out_fc_offset_2],
                 # out_fc,
                 # out_fc_offset,
                 # out_fc_offset_2,  # Added this as a test before giving merge a list
-                
                 # Not declaring isolated_conf_multiplier because the default value is updated in merging.py
                 # isolated_conf_multiplier=0.3,  # Changed to ~.33 (1/len(# of FCs)), could be calculated in merging.py and document this. Originally 0.5
                 proximity_meters=500,
@@ -525,8 +528,12 @@ async def _orchestrate(
                 opening_meters=0,
             )
             fc_count = len(merged_inferences.feature_collections)
-            print(f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences.")
-            print(f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}")
+            print(
+                f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences."
+            )
+            print(
+                f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}"
+            )
             print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED")
 
             if merged_inferences.get("features"):
@@ -564,8 +571,9 @@ async def _orchestrate(
             end_time = datetime.now()
             print(f"End time: {end_time}")
             print("Returning results!")
-            print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565")
-            
+            print(
+                "XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565"
+            )
 
             async with db_client.session.begin():
                 orchestrator_run.success = True
diff --git a/cerulean_cloud/tiling.py b/cerulean_cloud/tiling.py
index 9619a4f9..0c2b2816 100644
--- a/cerulean_cloud/tiling.py
+++ b/cerulean_cloud/tiling.py
@@ -55,6 +55,7 @@ def adjacent_tile(tile: morecantile.Tile, dx: int, dy: int) -> morecantile.Tile:
     other = morecantile.Tile(x=x + dx, y=y + dy, z=z)
     return other
 
+
 def offset_bounds_from_base_tiles(
     tiles: List[morecantile.Tile],
     offset_amount: float = 0.5,

From 2ca0ad4fd7329a24c8383be5ef272ac6174681c0 Mon Sep 17 00:00:00 2001
From: Jona <jona.raphael@gmail.com>
Date: Sat, 4 Nov 2023 21:56:18 -0400
Subject: [PATCH 14/18] Speed up DB by removing AOIs Fix bug where
 merge_inferences() was treated like a class rather than a function

---
 alembic/versions/5e03ce584f3c_add_eez.py         | 1 +
 alembic/versions/c0bd1215a3ca_add_iho.py         | 1 +
 alembic/versions/f9b7166c86b7_add_mpa.py         | 1 +
 cerulean_cloud/cloud_run_orchestrator/handler.py | 8 --------
 4 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/alembic/versions/5e03ce584f3c_add_eez.py b/alembic/versions/5e03ce584f3c_add_eez.py
index e3d728df..f951dd2c 100644
--- a/alembic/versions/5e03ce584f3c_add_eez.py
+++ b/alembic/versions/5e03ce584f3c_add_eez.py
@@ -44,6 +44,7 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     eez = get_eez_from_url()  # geojson.load(open("EEZ_and_HighSeas_20230410.json"))
+    eez = {"features": []}  # noqa
     for feat in eez.get("features"):
         sovereign_keys = [
             k for k in list(feat["properties"].keys()) if k.startswith("SOVEREIGN")
diff --git a/alembic/versions/c0bd1215a3ca_add_iho.py b/alembic/versions/c0bd1215a3ca_add_iho.py
index df574d85..841805cb 100644
--- a/alembic/versions/c0bd1215a3ca_add_iho.py
+++ b/alembic/versions/c0bd1215a3ca_add_iho.py
@@ -36,6 +36,7 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     iho = get_iho_from_url()
+    iho = {"features": []}  # noqa
     for feat in iho.get("features"):
         with session.begin():
             aoi_iho = database_schema.AoiIho(
diff --git a/alembic/versions/f9b7166c86b7_add_mpa.py b/alembic/versions/f9b7166c86b7_add_mpa.py
index a44e10d8..e2d7ac19 100644
--- a/alembic/versions/f9b7166c86b7_add_mpa.py
+++ b/alembic/versions/f9b7166c86b7_add_mpa.py
@@ -36,6 +36,7 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     mpa = get_mpa_from_url()
+    mpa = {"features": []}  # noqa
     for feat in mpa.get("features"):
         with session.begin():
             aoi_mpa = database_schema.AoiMpa(
diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py
index 326059b9..e36b42df 100644
--- a/cerulean_cloud/cloud_run_orchestrator/handler.py
+++ b/cerulean_cloud/cloud_run_orchestrator/handler.py
@@ -527,14 +527,6 @@ async def _orchestrate(
                 closing_meters=0,
                 opening_meters=0,
             )
-            fc_count = len(merged_inferences.feature_collections)
-            print(
-                f"XXXC >> Sanity Check, there area {fc_count} Feature Collections being fed to merge_inferences."
-            )
-            print(
-                f"XXXC >> Checking the isolated_conf_multiplier: {merged_inferences.isolated_conf_multiplier}"
-            )
-            print("XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED")
 
             if merged_inferences.get("features"):
                 async with db_client.session.begin():

From 56c561e1b87fe239ecff207d64836c9c92742b65 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Mon, 6 Nov 2023 14:12:40 -0500
Subject: [PATCH 15/18] Final edits and clean-up on the cloud_run_orchestrator
 for feature offsetting

---
 .../cloud_run_orchestrator/handler.py         | 26 +++----------------
 .../cloud_run_orchestrator/merging.py         |  6 ++---
 2 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py
index e36b42df..c5032614 100644
--- a/cerulean_cloud/cloud_run_orchestrator/handler.py
+++ b/cerulean_cloud/cloud_run_orchestrator/handler.py
@@ -321,8 +321,7 @@ async def _orchestrate(
     base_group_bounds = group_bounds_from_list_of_bounds(base_tiles_bounds)
     print(f"base_group_bounds: {base_group_bounds}")
 
-    # XXXC - THIS IS THE START OF THE OFFSETTING. tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(),
-    # see tiling.py line 61.
+    # tiling.py was updated to allow for offset_amount to be declared by offset_bounds_from_base_tiles(), see tiling.py line 61.
     offset_tiles_bounds = offset_bounds_from_base_tiles(base_tiles, offset_amount=0.33)
     offset_group_shape = offset_group_shape_from_base_tiles(base_tiles, scale=scale)
     offset_group_bounds = group_bounds_from_list_of_bounds(offset_tiles_bounds)
@@ -343,18 +342,14 @@ async def _orchestrate(
     print(
         f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}"
     )
-    # XXXC - THIS IS THE END OF THE OFFSETTING.
 
     # Filter out land tiles
     # XXXBUG is_tile_over_water throws ValueError if the scene crosses or is close to the antimeridian. Example: S1A_IW_GRDH_1SDV_20230726T183302_20230726T183327_049598_05F6CA_31E7
     # XXXBUG is_tile_over_water throws IndexError if the scene touches the Caspian sea (globe says it is NOT ocean, whereas our cloud_function_scene_relevancy says it is). Example: S1A_IW_GRDH_1SDV_20230727T025332_20230727T025357_049603_05F6F2_AF3E
     base_tiles = [t for t in base_tiles if is_tile_over_water(tiler.bounds(t))]
 
-    # XXXC - OFFSET TIlE BOUNDS OVER WATER
     offset_tiles_bounds = [b for b in offset_tiles_bounds if is_tile_over_water(b)]
-    print(f"offset_tiles_bounds: {offset_tiles_bounds}")
     offset_2_tiles_bounds = [b for b in offset_2_tiles_bounds if is_tile_over_water(b)]
-    print(f"offset_2_tiles_bounds: {offset_2_tiles_bounds}")
 
     ntiles = len(base_tiles)
     noffsettiles = len(offset_tiles_bounds)
@@ -421,29 +416,25 @@ async def _orchestrate(
                 "base tiles",
             )
 
-            # XXXC - START OFFSET 2
             offset_tiles_inference = await perform_inference(
                 offset_tiles_bounds,
                 cloud_run_inference.get_offset_tile_inference,
                 20,
                 "offset tiles",
             )
-            # XXXC - END OFFSET 2
 
-            # XXXC - START OFFSET 3
             offset_2_tiles_inference = await perform_inference(
                 offset_2_tiles_bounds,
                 cloud_run_inference.get_offset_tile_inference,  # THIS FUNCTION NEEDS TO BE EDITTED
                 20,
                 "offset2 tiles",
             )
-            # XXXC - END OFFSET 3
 
             if model.type == "MASKRCNN":
                 out_fc = geojson.FeatureCollection(
                     features=flatten_feature_list(base_tiles_inference)
                 )
-                # XXXC - OFFSET 3  >> THIS NEEDS TO BE EDITTED
+
                 out_fc_offset = geojson.FeatureCollection(
                     features=flatten_feature_list(offset_tiles_inference)
                 )
@@ -514,15 +505,8 @@ async def _orchestrate(
             print("XXXDEBUG out_fc_offset", out_fc_offset)
             print("XXXCDEBUG out_fc_offset2", out_fc_offset_2)
 
-            # XXXC - OFFSET 3 >> change code to allow number of offsets, pass in list of featureCollections (2 or 3)
-            print("XXXC >> TRYING TO MERGE INFERENCES")
-            merged_inferences = merge_inferences(  # changes to 15-16, give list of FCs 2-3 items
+            merged_inferences = merge_inferences(
                 feature_collections=[out_fc, out_fc_offset, out_fc_offset_2],
-                # out_fc,
-                # out_fc_offset,
-                # out_fc_offset_2,  # Added this as a test before giving merge a list
-                # Not declaring isolated_conf_multiplier because the default value is updated in merging.py
-                # isolated_conf_multiplier=0.3,  # Changed to ~.33 (1/len(# of FCs)), could be calculated in merging.py and document this. Originally 0.5
                 proximity_meters=500,
                 closing_meters=0,
                 opening_meters=0,
@@ -563,9 +547,6 @@ async def _orchestrate(
             end_time = datetime.now()
             print(f"End time: {end_time}")
             print("Returning results!")
-            print(
-                "XXXC >> IF WE GET HERE THE MERGE INFERENCES WORKED EVEN MORE, GOT TO LINE 565"
-            )
 
             async with db_client.session.begin():
                 orchestrator_run.success = True
@@ -587,5 +568,4 @@ async def _orchestrate(
                 ntiles=ntiles,
                 noffsettiles=noffsettiles,
             )
-    print("XXXC >> IF WE GET HERE EVERYTHING WORKED")
     return orchestrator_result
diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index 515159a1..14231e42 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -14,9 +14,7 @@ def reproject_to_utm(gdf_wgs84):
 
 
 def merge_inferences(
-    feature_collections: List[
-        geojson.FeatureCollection
-    ],  # XXXC >> USING THIS AS A TEST replacement for base_tile_fc and offset_tile_fc
+    feature_collections: List[geojson.FeatureCollection],
     proximity_meters: int = 500,
     closing_meters: int = 0,
     opening_meters: int = 0,
@@ -58,7 +56,7 @@ def merge_inferences(
     if proximity_meters is not None:
         concat_gdf["geometry"] = concat_gdf.buffer(proximity_meters)
 
-    # Join the features that intersect with each other >> XXXC THIS MAY NEED REWORK, keep an eye on it :)
+    # Join the features that intersect with each other
     joined = gpd.sjoin(concat_gdf, concat_gdf, predicate="intersects").reset_index()
 
     # Create a graph where each node represents a feature and edges represent overlaps/intersections

From bb33c6dff29872c585d3ae0a5ae7f031c95e339a Mon Sep 17 00:00:00 2001
From: Jona <jona.raphael@gmail.com>
Date: Mon, 6 Nov 2023 14:46:19 -0500
Subject: [PATCH 16/18] Remove AOI hack

---
 alembic/versions/5e03ce584f3c_add_eez.py | 1 -
 alembic/versions/c0bd1215a3ca_add_iho.py | 1 -
 alembic/versions/f9b7166c86b7_add_mpa.py | 1 -
 3 files changed, 3 deletions(-)

diff --git a/alembic/versions/5e03ce584f3c_add_eez.py b/alembic/versions/5e03ce584f3c_add_eez.py
index f951dd2c..e3d728df 100644
--- a/alembic/versions/5e03ce584f3c_add_eez.py
+++ b/alembic/versions/5e03ce584f3c_add_eez.py
@@ -44,7 +44,6 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     eez = get_eez_from_url()  # geojson.load(open("EEZ_and_HighSeas_20230410.json"))
-    eez = {"features": []}  # noqa
     for feat in eez.get("features"):
         sovereign_keys = [
             k for k in list(feat["properties"].keys()) if k.startswith("SOVEREIGN")
diff --git a/alembic/versions/c0bd1215a3ca_add_iho.py b/alembic/versions/c0bd1215a3ca_add_iho.py
index 841805cb..df574d85 100644
--- a/alembic/versions/c0bd1215a3ca_add_iho.py
+++ b/alembic/versions/c0bd1215a3ca_add_iho.py
@@ -36,7 +36,6 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     iho = get_iho_from_url()
-    iho = {"features": []}  # noqa
     for feat in iho.get("features"):
         with session.begin():
             aoi_iho = database_schema.AoiIho(
diff --git a/alembic/versions/f9b7166c86b7_add_mpa.py b/alembic/versions/f9b7166c86b7_add_mpa.py
index e2d7ac19..a44e10d8 100644
--- a/alembic/versions/f9b7166c86b7_add_mpa.py
+++ b/alembic/versions/f9b7166c86b7_add_mpa.py
@@ -36,7 +36,6 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     mpa = get_mpa_from_url()
-    mpa = {"features": []}  # noqa
     for feat in mpa.get("features"):
         with session.begin():
             aoi_mpa = database_schema.AoiMpa(

From 7209751a105dee3381ab45e2019ea4acb3713b16 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Mon, 6 Nov 2023 15:55:30 -0500
Subject: [PATCH 17/18] Committing code changes to the correct branch this
 time. Minor edits, added description to the reproject_to_utm section of
 merging.

---
 alembic/versions/5e03ce584f3c_add_eez.py         | 1 -
 alembic/versions/c0bd1215a3ca_add_iho.py         | 1 -
 alembic/versions/f9b7166c86b7_add_mpa.py         | 1 -
 cerulean_cloud/cloud_run_orchestrator/handler.py | 6 +++---
 cerulean_cloud/cloud_run_orchestrator/merging.py | 5 ++++-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/alembic/versions/5e03ce584f3c_add_eez.py b/alembic/versions/5e03ce584f3c_add_eez.py
index f951dd2c..e3d728df 100644
--- a/alembic/versions/5e03ce584f3c_add_eez.py
+++ b/alembic/versions/5e03ce584f3c_add_eez.py
@@ -44,7 +44,6 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     eez = get_eez_from_url()  # geojson.load(open("EEZ_and_HighSeas_20230410.json"))
-    eez = {"features": []}  # noqa
     for feat in eez.get("features"):
         sovereign_keys = [
             k for k in list(feat["properties"].keys()) if k.startswith("SOVEREIGN")
diff --git a/alembic/versions/c0bd1215a3ca_add_iho.py b/alembic/versions/c0bd1215a3ca_add_iho.py
index 841805cb..df574d85 100644
--- a/alembic/versions/c0bd1215a3ca_add_iho.py
+++ b/alembic/versions/c0bd1215a3ca_add_iho.py
@@ -36,7 +36,6 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     iho = get_iho_from_url()
-    iho = {"features": []}  # noqa
     for feat in iho.get("features"):
         with session.begin():
             aoi_iho = database_schema.AoiIho(
diff --git a/alembic/versions/f9b7166c86b7_add_mpa.py b/alembic/versions/f9b7166c86b7_add_mpa.py
index e2d7ac19..a44e10d8 100644
--- a/alembic/versions/f9b7166c86b7_add_mpa.py
+++ b/alembic/versions/f9b7166c86b7_add_mpa.py
@@ -36,7 +36,6 @@ def upgrade() -> None:
     session = orm.Session(bind=bind)
 
     mpa = get_mpa_from_url()
-    mpa = {"features": []}  # noqa
     for feat in mpa.get("features"):
         with session.begin():
             aoi_mpa = database_schema.AoiMpa(
diff --git a/cerulean_cloud/cloud_run_orchestrator/handler.py b/cerulean_cloud/cloud_run_orchestrator/handler.py
index c5032614..65de98ec 100644
--- a/cerulean_cloud/cloud_run_orchestrator/handler.py
+++ b/cerulean_cloud/cloud_run_orchestrator/handler.py
@@ -340,7 +340,7 @@ async def _orchestrate(
     print(f"Offset 2 offset_group_bounds: {offset_2_group_bounds}")
 
     print(
-        f"Original tiles are {len(base_tiles)}, {len(offset_group_bounds)}, {len(offset_2_group_bounds)}"
+        f"Original tiles are {len(base_tiles)}, {len(offset_tiles_bounds)}, {len(offset_2_tiles_bounds)}"
     )
 
     # Filter out land tiles
@@ -425,7 +425,7 @@ async def _orchestrate(
 
             offset_2_tiles_inference = await perform_inference(
                 offset_2_tiles_bounds,
-                cloud_run_inference.get_offset_tile_inference,  # THIS FUNCTION NEEDS TO BE EDITTED
+                cloud_run_inference.get_offset_tile_inference,
                 20,
                 "offset2 tiles",
             )
@@ -551,7 +551,7 @@ async def _orchestrate(
             async with db_client.session.begin():
                 orchestrator_run.success = True
                 orchestrator_run.inference_end_time = end_time
-            # XXXC >> reduce num variables to a list of featureCollection, ala the merging approach
+
             orchestrator_result = OrchestratorResult(
                 classification_base=out_fc,
                 classification_offset=out_fc_offset,
diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index 14231e42..15c629a2 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -28,7 +28,6 @@ def merge_inferences(
 
     Parameters:
     - feature_collections: A list of FeatureCollecitons to be merged, a primary and any secondary FeatureCollections
-    - isolated_conf_multiplier: A multiplier for the confidence of isolated features (default is 1 / len(feature_collections)).
     - proximity_meters: The distance to check for neighboring features and expand the geometries (default is 500m).
     - closing_meters: The distance to apply the morphological 'closing' operation (default is 0m).
     - opening_meters: The distance to apply the morphological 'opening' operation (default is 0m).
@@ -36,6 +35,10 @@ def merge_inferences(
     Returns:
     A merged geojson FeatureCollection.
     """
+    # We reproject to UTM for processing. This assumes that all offset images will either be in the same UTM zone as
+    # the input image chip, or that the difference that arise from an offset crossing into a second UTM zone will
+    # have little or no impact on comparison to the origina image.
+
     gdfs_for_processing = [
         reproject_to_utm(
             gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i)

From 14c82009291daa157b53e29c38e0cdbe28c506c3 Mon Sep 17 00:00:00 2001
From: christian <cjthomas730@gmail.com>
Date: Mon, 6 Nov 2023 16:28:03 -0500
Subject: [PATCH 18/18] fixed typo

---
 cerulean_cloud/cloud_run_orchestrator/merging.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cerulean_cloud/cloud_run_orchestrator/merging.py b/cerulean_cloud/cloud_run_orchestrator/merging.py
index 15c629a2..a722176e 100644
--- a/cerulean_cloud/cloud_run_orchestrator/merging.py
+++ b/cerulean_cloud/cloud_run_orchestrator/merging.py
@@ -37,8 +37,7 @@ def merge_inferences(
     """
     # We reproject to UTM for processing. This assumes that all offset images will either be in the same UTM zone as
     # the input image chip, or that the difference that arise from an offset crossing into a second UTM zone will
-    # have little or no impact on comparison to the origina image.
-
+    # have little or no impact on comparison to the original image.
     gdfs_for_processing = [
         reproject_to_utm(
             gpd.GeoDataFrame.from_features(fc["features"], crs=4326).assign(fc_index=i)