modify zoo model to allow the model to compute segmentations without …

…a coastseg session, move filter_segmentations to classifier, and add a script that will run on planet imagery
SatelliteShorelines · Dec 19, 2024 · 8b2c126 · 8b2c126
1 parent 81c607c
commit 8b2c126
Show file tree

Hide file tree

Showing 6 changed files with 366 additions and 54 deletions.
diff --git a/8_extract_shorelines_planet.py b/8_extract_shorelines_planet.py
@@ -0,0 +1,160 @@
+from coastseg import zoo_model
+from coastseg.tide_correction import compute_tidal_corrections
+from coastseg import file_utilities
+from coastseg import classifier
+import os
+from coastseg import raster_utils
+import numpy as np
+# The Zoo Model is a machine learning model that can be used to extract shorelines from satellite imagery.
+# This script will only run a single ROI at a time. If you want to run multiple ROIs, you will need to run this script multiple times.
+
+# Extract Shoreline Settings
+settings ={
+    'min_length_sl': 100,       # minimum length (m) of shoreline perimeter to be valid
+    'max_dist_ref':500,         # maximum distance (m) from reference shoreline to search for valid shorelines. This detrmines the width of the buffer around the reference shoreline  
+    'cloud_thresh': 0.5,        # threshold on maximum cloud cover (0-1). If the cloud cover is above this threshold, no shorelines will be extracted from that image
+    'dist_clouds': 100,         # distance(m) around clouds where shoreline will not be mapped
+    'min_beach_area': 50,      # minimum area (m^2) for an object to be labelled as a beach
+    'sand_color': 'default',    # 'default', 'latest', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches)
+    "apply_cloud_mask": True,   # apply cloud mask to the imagery. If False, the cloud mask will not be applied.
+}
+
+
+# The model can be run using the following settings:
+model_setting = {
+            "sample_direc": None, # directory of jpgs  ex. C:/Users/username/CoastSeg/data/ID_lla12_datetime11-07-23__08_14_11/jpg_files/preprocessed/RGB/",
+            "use_GPU": "0",  # 0 or 1 0 means no GPU
+            "implementation": "BEST",  # BEST or ENSEMBLE 
+            "model_type": "global_segformer_RGB_4class_14036903", # model name from the zoo
+            "otsu": False, # Otsu Thresholding
+            "tta": False,  # Test Time Augmentation
+        }
+# Available models can run input "RGB" # or "MNDWI" or "NDWI"
+img_type = "RGB"  # make sure the model name is compatible with the image type
+# percentage of no data allowed in the image eg. 0.75 means 75% of the image can be no data
+percent_no_data = 0.75
+
+# Assume the planet data is in this format
+# sitename
+# ├── jpg_files
+# │   ├── preprocessed
+# │   │   ├── RGB
+# │   │   │   ├── 2020-06-01-21-16-34_RGB_PS.jpg
+# │   │   │__NIR
+# │   │   │   ├── 2020-06-01-21-16-34_NIR_PS.jpg
+# │  
+# │ _PS
+# │   ├──meta
+# │   │   ├── 20200601_211634_44_2277_3B_AnalyticMS_toar_clip.txt
+# │   ├──ms
+# │   │   ├── 20200601_211634_44_2277_3B_AnalyticMS_toar_clip.tif
+# │   ├──cloud_mask
+# │   │   ├── 20200601_211634_44_2277_3B_udm2_clip.tif
+
+# This script assumes the site directory is in the format of the above structure and is provided
+session_dir = r'C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered'
+
+# Make a simpler version of run model that doesn't need config
+
+# Make a simpler version of extract shorelines that doesn't need config
+
+zoo_model_instance = zoo_model.Zoo_Model()
+
+# save settings to the zoo model instance
+settings.update(model_setting)
+# save the settings to the model instance
+zoo_model_instance.set_settings(**settings)
+
+model_implementation = settings.get('implementation', "BEST")
+model_name = settings.get('model_type', None)
+RGB_directory = os.path.join(session_dir, 'jpg_files', 'preprocessed', 'RGB')
+
+output_directory = os.path.join(session_dir, 'segmentations')
+os.makedirs(output_directory, exist_ok=True)
+
+
+zoo_model_instance.run_model_on_directory(output_directory, RGB_directory,model_name )
+
+# Now run the segmentation classifier on the output directory
+classifier.filter_segmentations(output_directory,threshold=0.50)
+
+# Extract shorelines from directory
+satellite = 'PS' # planet
+meta_dir = os.path.join(session_dir, satellite, 'meta')
+ms_dir = os.path.join(session_dir, satellite, 'ms')
+udm2_dir = os.path.join(session_dir, satellite, 'udm2')
+filepath =  [ms_dir,udm2_dir] # get the directories of the ms and udm2 files
+
+pixel_size = 3.0 # meters For PlanetScope
+
+# get the minimum beach area in number of pixels depending on the satellite
+settings["min_length_sl"] 
+
+# whatever file is currently being processed
+filename =''
+
+# get date
+date = filename[:19]
+
+# get filepath of ms and udm2 of this particular filenmae
+fn = [os.path.join(ms_dir, filename), os.path.join(udm2_dir, filename)]
+
+
+# basically this function should do what preprocess_single does
+
+# filepaths to .tif files
+fn_ms = fn[0]
+fn_mask = fn[1]
+
+im_ms, georef = raster_utils.read_ms_planet(fn_ms)
+cloud_mask = raster_utils.read_cloud_mask_planet(fn_mask)
+im_nodata = raster_utils.read_nodata_mask_planet(fn_mask)
+combined_mask = np.logical_or(cloud_mask, im_nodata)
+
+# Note may need to add code to add 0 pixels to the no data mask later
+
+# Only do this is apply_cloud_mask is True
+cloud_mask = np.logical_or(cloud_mask, im_nodata)
+
+# Here is where we would get rid of the image if cloud cover or no data was above a certain threshold
+
+# this is the next step in the process find the shorelines
+    # ref_shoreline_buffer = SDS_shoreline.create_shoreline_buffer(
+    #     cloud_mask.shape, georef, image_epsg, pixel_size, settings
+    # )
+    # # read the model outputs from the npz file for this image
+    # npz_file = find_matching_npz(filename, os.path.join(session_path, "good"))
+    # if npz_file is None:
+    #     npz_file = find_matching_npz(filename, session_path)
+    # if npz_file is None:
+    #     logger.warning(f"npz file not found for {filename}")
+    #     return None
+
+    # # get the labels for water and land
+    # land_mask = load_merged_image_labels(npz_file, class_indices=class_indices)
+    # all_labels = load_image_labels(npz_file)
+
+    # min_beach_area = settings["min_beach_area"]
+    # land_mask = remove_small_objects_and_binarize(land_mask, min_beach_area)
+
+    # # get the shoreline from the image
+    # shoreline = find_shoreline(
+    #     fn,
+    #     image_epsg,
+    #     settings,
+    #     np.logical_xor(cloud_mask, im_nodata),
+    #     cloud_mask,
+    #     im_nodata,
+    #     georef,
+    #     land_mask,
+    #     ref_shoreline_buffer,
+    # )
+
+# zoo_model_instance.run_model_and_extract_shorelines(
+#             model_setting["sample_direc"],
+#             session_name=model_session_name,
+#             shoreline_path=shoreline_path,
+#             transects_path=transects_path,
+#             shoreline_extraction_area_path = shoreline_extraction_area_path,
+#             coregistered=True,
+#         )
diff --git a/src/coastseg/classifier.py b/src/coastseg/classifier.py
@@ -11,6 +11,31 @@
 
 # Some of these functions were originally written by Mark Lundine and have been modified for this project.
 
+def filter_segmentations(
+    session_path: str,
+    threshold: float = 0.40,
+) -> str:
+    """
+    Sort model output files into "good" and "bad" folders based on the satellite name in the filename.
+    Applies the land mask to the model output files in the "good" folder.
+
+    Args:
+        session_path (str): The path to the session directory containing the model output files.
+
+    Returns:
+        str: The path to the "good" folder containing the sorted model output files.
+    """
+    segmentation_classifier = get_segmentation_classifier()
+    good_path = os.path.join(session_path, "good")
+    csv_path,good_path,bad_path = run_inference_segmentation_classifier(segmentation_classifier,
+                    session_path,
+                    session_path,
+                    good_path=good_path,
+                    threshold=threshold)
+    # if the good folder does not exist then this means the classifier could not find any png files at the session path and something went wrong
+    if not os.path.exists(good_path):
+        raise FileNotFoundError(f"No model output files found at {session_path}. Shoreline Filtering failed.")
+    return good_path
 
 def move_matching_files(input_image_path, search_string, file_exts, target_dir):
     """

diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py
@@ -1486,30 +1486,6 @@ def extract_shorelines_with_dask(
     return combine_satellite_data(shoreline_dict)
 
 
-def filter_segmentations(
-    session_path: str,
-) -> str:
-    """
-    Sort model output files into "good" and "bad" folders based on the satellite name in the filename.
-    Applies the land mask to the model output files in the "good" folder.
-
-    Args:
-        session_path (str): The path to the session directory containing the model output files.
-
-    Returns:
-        str: The path to the "good" folder containing the sorted model output files.
-    """
-    segmentation_classifier = classifier.get_segmentation_classifier()
-    good_path = os.path.join(session_path, "good")
-    csv_path,good_path,bad_path = classifier.run_inference_segmentation_classifier(segmentation_classifier,
-                    session_path,
-                    session_path,
-                    good_path=good_path,
-                    threshold=0.40)
-    # if the good folder does not exist then this means the classifier could not find any png files at the session path and something went wrong
-    if not os.path.exists(good_path):
-        raise FileNotFoundError(f"No model output files found at {session_path}. Shoreline Filtering failed.")
-    return good_path
 
 
 def get_min_shoreline_length(satname: str, default_min_length_sl: float) -> int:
@@ -1972,7 +1948,7 @@ def create_extracted_shorelines_from_session(
             return self 
 
         # Filter the segmentations to only include the good segmentations, then update the metadata to only include the files with the good segmentations
-        good_directory = filter_segmentations(session_path)
+        good_directory = classifier.filter_segmentations(session_path)
         metadata= common.filter_metadata_with_dates(metadata,good_directory,file_type="npz") 
 
         extracted_shorelines_dict = extract_shorelines_with_dask(

diff --git a/src/coastseg/file_utilities.py b/src/coastseg/file_utilities.py
@@ -325,8 +325,8 @@ def find_parent_directory(
                               continue until the top-level directory is reached.
 
     Returns:
-        str or None: The path to the parent directory containing the directory with
-                     the specified name, or None if the directory is not found.
+        str : The path to the parent directory containing the directory with
+                     the specified name, or "" if the directory is not found.
     """
     while True:
         # check if the current directory name contains the target directory name
@@ -341,7 +341,7 @@ def find_parent_directory(
             print(
                 f"Reached top-level directory without finding '{directory_name}':", path
             )
-            return None
+            return ""
 
         # update the path to the parent directory and continue the loop
         path = parent_dir

diff --git a/src/coastseg/raster_utils.py b/src/coastseg/raster_utils.py
@@ -0,0 +1,73 @@
+from osgeo import gdal
+import numpy as np
+# import rasterio
+
+def read_bands(filename: str, satname: str = "") -> list:
+    """
+    Read all the raster bands of a geospatial image file using GDAL.
+
+    This function opens the provided geospatial file using GDAL in read-only mode
+    and extracts each of the raster bands as a separate array. Each array represents
+    the values of the raster band across the entire spatial extent of the image.
+
+    Parameters:
+    -----------
+    filename : str
+        The path to the geospatial image file (e.g., a GeoTIFF) to be read.
+
+    Returns:
+    --------
+    list of np.ndarray
+        A list containing 2D numpy arrays. Each array in the list represents
+        one band from the geospatial image. The order of the arrays in the list
+        corresponds to the order of the bands in the original image.
+
+    Notes:
+    ------
+    This function relies on GDAL for reading geospatial data. Ensure that GDAL
+    is properly installed and available in the Python environment.
+
+    Example:
+    --------
+    bands = read_bands('path/to/image.tif')
+    """
+    data = gdal.Open(filename, gdal.GA_ReadOnly)
+    # save the contents of each raster band as an array and save each array to the bands list
+    # save separate NumPy array for each raster band in the dataset, with each array representing the pixel values of the corresponding band
+    if satname == "S2" and data.RasterCount == 5:
+        bands = [
+            data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount - 1)
+        ]
+    else:
+        bands = [
+            data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)
+        ]
+    return bands
+
+def read_ms_planet(fn_ms):
+    # read ms bands
+    data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
+    georef = np.array(data.GetGeoTransform())
+    bands = read_bands(fn_ms)
+    im_ms = np.stack(bands, 2)
+    return im_ms, georef
+
+def read_cloud_mask_planet(filepath: str,cloud_mask_band = 6) -> np.ndarray:
+    """The UDM mask in planet has a lot more data so the cloud mask is different"""
+    data = gdal.Open(filepath, gdal.GA_ReadOnly)
+    cloud_mask = data.GetRasterBand(cloud_mask_band).ReadAsArray()
+    return cloud_mask
+
+def read_nodata_mask_planet(filepath: str,nodata_band=8) -> np.ndarray:
+    """The UDM mask in planet has a lot more data so the cloud mask is different"""
+    data = gdal.Open(filepath, gdal.GA_ReadOnly)
+    im_nodata = data.GetRasterBand(nodata_band).ReadAsArray()
+    return im_nodata
+
+# Example usage:
+# udm_path = r"C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered\PS\udm2\2020-06-11-21-52-01_3B_AnalyticMS_toar_clip.tif"
+# ms_path = r"C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered\PS\ms\2020-06-11-21-52-01_3B_AnalyticMS_toar_clip.tif"
+
+# print(np.all(read_cloud_mask_planet(udm_path))) # it works!
+# print(read_ms_planet(ms_path)) # it works!
+