SkyTruth · jonaraphael · Sep 18, 2024 · Sep 18, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/cerulean_cloud/cloud_function_ais_analysis/main.py b/cerulean_cloud/cloud_function_ais_analysis/main.py
@@ -13,6 +13,10 @@
 
 from cerulean_cloud.database_client import DatabaseClient, get_engine
 
+# Initialize the database engine globally to reuse across requests
+# This improves performance by avoiding the overhead of creating a new engine for each request
+DB_ENGINE = get_engine()
+
 
 def verify_api_key(request):
     """Function to verify API key"""
@@ -57,7 +61,10 @@ def main(request):
     verify_api_key(request)
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
-    res = loop.run_until_complete(handle_asa_request(request))
+    try:
+        res = loop.run_until_complete(handle_asa_request(request))
+    finally:
+        loop.close()  # Ensure the event loop is properly closed
     return res
 
 

diff --git a/cerulean_cloud/cloud_run_orchestrator/clients.py b/cerulean_cloud/cloud_run_orchestrator/clients.py
@@ -1,6 +1,7 @@
 """Clients for other cloud run functions"""
 
 import asyncio
+import gc  # Import garbage collection module
 import json
 import os
 import zipfile
@@ -26,7 +27,7 @@
 
 
 def img_array_to_b64_image(img_array: np.ndarray, to_uint8=False) -> str:
-    """convert input b64image to torch tensor"""
+    """Convert input image array to base64-encoded image."""
     if to_uint8 and not img_array.dtype == np.uint8:
         print(
             f"WARNING: changing from dtype {img_array.dtype} to uint8 without scaling!"
@@ -47,7 +48,7 @@ def img_array_to_b64_image(img_array: np.ndarray, to_uint8=False) -> str:
 
 
 class CloudRunInferenceClient:
-    """Client for inference cloud run"""
+    """Client for inference cloud run."""
 
     def __init__(
         self,
@@ -60,15 +61,17 @@ def __init__(
         scale: int,
         model_dict,
     ):
-        """init"""
+        """Initialize the inference client."""
         self.url = url
         self.titiler_client = titiler_client
         self.sceneid = sceneid
+        self.scale = scale  # 1=256, 2=512, 3=...
+        self.model_dict = model_dict
+
+        # Handle auxiliary datasets and ensure they are properly managed
         self.aux_datasets = handle_aux_datasets(
             layers, self.sceneid, tileset_envelope_bounds, image_hw_pixels
         )
-        self.scale = scale  # 1=256, 2=512, 3=...
-        self.model_dict = model_dict
 
     async def fetch_and_process_image(
         self, tile_bounds, rescale=(0, 255), num_channels=1
@@ -117,6 +120,9 @@ async def process_auxiliary_datasets(self, img_array, tile_bounds):
             window = rasterio.windows.from_bounds(*tile_bounds, transform=src.transform)
             height, width = img_array.shape[1:]
             aux_ds = src.read(window=window, out_shape=(src.count, height, width))
+
+        del src
+        gc.collect()
         return np.concatenate([img_array, aux_ds], axis=0)
 
     async def send_inference_request_and_handle_response(self, http_client, img_array):
@@ -134,9 +140,6 @@ async def send_inference_request_and_handle_response(self, http_client, img_arra
 
         Raises:
         - Exception: If the request fails or the service returns an unexpected status code, with details provided in the exception message.
-
-        Note:
-        - This function constructs the inference payload by encoding the image and specifying the geographic bounds and any additional inference parameters through `self.model_dict`.
         """
 
         encoded = img_array_to_b64_image(img_array, to_uint8=True)
@@ -190,9 +193,12 @@ async def get_tile_inference(self, http_client, tile_bounds, rescale=(0, 255)):
             return InferenceResultStack(stack=[])
         if self.aux_datasets:
             img_array = await self.process_auxiliary_datasets(img_array, tile_bounds)
-        return await self.send_inference_request_and_handle_response(
+        res = await self.send_inference_request_and_handle_response(
             http_client, img_array
         )
+        del img_array
+        gc.collect()
+        return res
 
     async def run_parallel_inference(self, tileset):
         """
@@ -216,12 +222,17 @@ async def run_parallel_inference(self, tileset):
             inferences = await asyncio.gather(*tasks, return_exceptions=False)
             # False means this process will error out if any subtask errors out
             # True means this process will return a list including errors if any subtask errors out
+
+        # After processing, close and delete aux_datasets
+        if self.aux_datasets:
+            del self.aux_datasets
+            gc.collect()
+
         return inferences
 
 
 def get_scene_date_month(scene_id: str) -> str:
-    """From a scene id, fetch the month of the scene"""
-    # i.e. S1A_IW_GRDH_1SDV_20200802T141646_20200802T141711_033729_03E8C7_E4F5
+    """From a scene id, fetch the month of the scene."""
     date_time_str = scene_id[17:32]
     date_time_obj = datetime.strptime(date_time_str, "%Y%m%dT%H%M%S")
     date_time_obj = date_time_obj.replace(day=1, hour=0, minute=0, second=0)
@@ -235,7 +246,7 @@ def get_ship_density(
     max_dens=100,
     url="http://gmtds.maplarge.com/Api/ProcessDirect?",
 ) -> np.ndarray:
-    """fetch ship density from gmtds service"""
+    """Fetch ship density from gmtds service."""
     h, w = img_shape
     bbox_wms = bounds[0], bounds[2], bounds[1], bounds[-1]
 
@@ -322,6 +333,10 @@ def get_query(bbox_wms, scene_date_month):
 
     dens_array = ar / (max_dens / 255)
     dens_array[dens_array >= 255] = 255
+
+    del tempbuf, zipfile_ob, cont, r, ar
+    gc.collect()
+
     return np.squeeze(dens_array.astype("uint8"))
 
 
@@ -331,7 +346,7 @@ def get_dist_array(
     raster_ds: str,
     max_distance: int = 60000,
 ):
-    """fetch distance array from pre computed distance raster dataset"""
+    """Fetch distance array from pre-computed distance raster dataset."""
     with COGReader(raster_ds) as image:
         height, width = img_shape[0:2]
         img = image.part(
@@ -340,6 +355,7 @@ def get_dist_array(
             width=width,
         )
         data = img.data_as_image()
+
     if (data == 0).all():
         data = np.ones(img_shape) * 255
     else:
@@ -350,13 +366,17 @@ def get_dist_array(
         data, (*img_shape[0:2], 1), order=1, preserve_range=True
     )  # resampling interpolation must match training data prep
     upsampled = np.squeeze(upsampled)
+
+    del data, img
+    gc.collect()
+
     return upsampled.astype(np.uint8)
 
 
 def handle_aux_datasets(
     layers, scene_id, tileset_envelope_bounds, image_hw_pixels, **kwargs
 ):
-    """handle aux datasets"""
+    """Handle auxiliary datasets."""
     if layers[0].short_name != "VV":
         raise NotImplementedError(
             f"VV Layer must come first. Instead found: {layers[0].short_name}"
@@ -389,6 +409,9 @@ def handle_aux_datasets(
                     [aux_dataset_channels, ar], axis=2
                 )
 
+            del ar
+            gc.collect()
+
         aux_memfile = MemoryFile()
         if aux_dataset_channels is not None:
             height, width = aux_dataset_channels.shape[0:2]
@@ -408,6 +431,9 @@ def handle_aux_datasets(
             ) as dataset:
                 dataset.write(reshape_as_raster(aux_dataset_channels))
 
+        del aux_dataset_channels
+        gc.collect()
+
         return aux_memfile
     else:
         return None