Skip to content

Commit

Permalink
modify zoo model to allow the model to compute segmentations without …
Browse files Browse the repository at this point in the history
…a coastseg session, move filter_segmentations to classifier, and add a script that will run on planet imagery
  • Loading branch information
2320sharon committed Dec 19, 2024
1 parent 81c607c commit 8b2c126
Show file tree
Hide file tree
Showing 6 changed files with 366 additions and 54 deletions.
160 changes: 160 additions & 0 deletions 8_extract_shorelines_planet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
from coastseg import zoo_model
from coastseg.tide_correction import compute_tidal_corrections
from coastseg import file_utilities
from coastseg import classifier
import os
from coastseg import raster_utils
import numpy as np
# The Zoo Model is a machine learning model that can be used to extract shorelines from satellite imagery.
# This script will only run a single ROI at a time. If you want to run multiple ROIs, you will need to run this script multiple times.

# Extract Shoreline Settings
settings ={
'min_length_sl': 100, # minimum length (m) of shoreline perimeter to be valid
'max_dist_ref':500, # maximum distance (m) from reference shoreline to search for valid shorelines. This detrmines the width of the buffer around the reference shoreline
'cloud_thresh': 0.5, # threshold on maximum cloud cover (0-1). If the cloud cover is above this threshold, no shorelines will be extracted from that image
'dist_clouds': 100, # distance(m) around clouds where shoreline will not be mapped
'min_beach_area': 50, # minimum area (m^2) for an object to be labelled as a beach
'sand_color': 'default', # 'default', 'latest', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches)
"apply_cloud_mask": True, # apply cloud mask to the imagery. If False, the cloud mask will not be applied.
}


# The model can be run using the following settings:
model_setting = {
"sample_direc": None, # directory of jpgs ex. C:/Users/username/CoastSeg/data/ID_lla12_datetime11-07-23__08_14_11/jpg_files/preprocessed/RGB/",
"use_GPU": "0", # 0 or 1 0 means no GPU
"implementation": "BEST", # BEST or ENSEMBLE
"model_type": "global_segformer_RGB_4class_14036903", # model name from the zoo
"otsu": False, # Otsu Thresholding
"tta": False, # Test Time Augmentation
}
# Available models can run input "RGB" # or "MNDWI" or "NDWI"
img_type = "RGB" # make sure the model name is compatible with the image type
# percentage of no data allowed in the image eg. 0.75 means 75% of the image can be no data
percent_no_data = 0.75

# Assume the planet data is in this format
# sitename
# ├── jpg_files
# │ ├── preprocessed
# │ │ ├── RGB
# │ │ │ ├── 2020-06-01-21-16-34_RGB_PS.jpg
# │ │ │__NIR
# │ │ │ ├── 2020-06-01-21-16-34_NIR_PS.jpg
# │
# │ _PS
# │ ├──meta
# │ │ ├── 20200601_211634_44_2277_3B_AnalyticMS_toar_clip.txt
# │ ├──ms
# │ │ ├── 20200601_211634_44_2277_3B_AnalyticMS_toar_clip.tif
# │ ├──cloud_mask
# │ │ ├── 20200601_211634_44_2277_3B_udm2_clip.tif

# This script assumes the site directory is in the format of the above structure and is provided
session_dir = r'C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered'

# Make a simpler version of run model that doesn't need config

# Make a simpler version of extract shorelines that doesn't need config

zoo_model_instance = zoo_model.Zoo_Model()

# save settings to the zoo model instance
settings.update(model_setting)
# save the settings to the model instance
zoo_model_instance.set_settings(**settings)

model_implementation = settings.get('implementation', "BEST")
model_name = settings.get('model_type', None)
RGB_directory = os.path.join(session_dir, 'jpg_files', 'preprocessed', 'RGB')

output_directory = os.path.join(session_dir, 'segmentations')
os.makedirs(output_directory, exist_ok=True)


zoo_model_instance.run_model_on_directory(output_directory, RGB_directory,model_name )

# Now run the segmentation classifier on the output directory
classifier.filter_segmentations(output_directory,threshold=0.50)

# Extract shorelines from directory
satellite = 'PS' # planet
meta_dir = os.path.join(session_dir, satellite, 'meta')
ms_dir = os.path.join(session_dir, satellite, 'ms')
udm2_dir = os.path.join(session_dir, satellite, 'udm2')
filepath = [ms_dir,udm2_dir] # get the directories of the ms and udm2 files

pixel_size = 3.0 # meters For PlanetScope

# get the minimum beach area in number of pixels depending on the satellite
settings["min_length_sl"]

# whatever file is currently being processed
filename =''

# get date
date = filename[:19]

# get filepath of ms and udm2 of this particular filenmae
fn = [os.path.join(ms_dir, filename), os.path.join(udm2_dir, filename)]


# basically this function should do what preprocess_single does

# filepaths to .tif files
fn_ms = fn[0]
fn_mask = fn[1]

im_ms, georef = raster_utils.read_ms_planet(fn_ms)
cloud_mask = raster_utils.read_cloud_mask_planet(fn_mask)
im_nodata = raster_utils.read_nodata_mask_planet(fn_mask)
combined_mask = np.logical_or(cloud_mask, im_nodata)

# Note may need to add code to add 0 pixels to the no data mask later

# Only do this is apply_cloud_mask is True
cloud_mask = np.logical_or(cloud_mask, im_nodata)

# Here is where we would get rid of the image if cloud cover or no data was above a certain threshold

# this is the next step in the process find the shorelines
# ref_shoreline_buffer = SDS_shoreline.create_shoreline_buffer(
# cloud_mask.shape, georef, image_epsg, pixel_size, settings
# )
# # read the model outputs from the npz file for this image
# npz_file = find_matching_npz(filename, os.path.join(session_path, "good"))
# if npz_file is None:
# npz_file = find_matching_npz(filename, session_path)
# if npz_file is None:
# logger.warning(f"npz file not found for {filename}")
# return None

# # get the labels for water and land
# land_mask = load_merged_image_labels(npz_file, class_indices=class_indices)
# all_labels = load_image_labels(npz_file)

# min_beach_area = settings["min_beach_area"]
# land_mask = remove_small_objects_and_binarize(land_mask, min_beach_area)

# # get the shoreline from the image
# shoreline = find_shoreline(
# fn,
# image_epsg,
# settings,
# np.logical_xor(cloud_mask, im_nodata),
# cloud_mask,
# im_nodata,
# georef,
# land_mask,
# ref_shoreline_buffer,
# )

# zoo_model_instance.run_model_and_extract_shorelines(
# model_setting["sample_direc"],
# session_name=model_session_name,
# shoreline_path=shoreline_path,
# transects_path=transects_path,
# shoreline_extraction_area_path = shoreline_extraction_area_path,
# coregistered=True,
# )
25 changes: 25 additions & 0 deletions src/coastseg/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,31 @@

# Some of these functions were originally written by Mark Lundine and have been modified for this project.

def filter_segmentations(
session_path: str,
threshold: float = 0.40,
) -> str:
"""
Sort model output files into "good" and "bad" folders based on the satellite name in the filename.
Applies the land mask to the model output files in the "good" folder.
Args:
session_path (str): The path to the session directory containing the model output files.
Returns:
str: The path to the "good" folder containing the sorted model output files.
"""
segmentation_classifier = get_segmentation_classifier()
good_path = os.path.join(session_path, "good")
csv_path,good_path,bad_path = run_inference_segmentation_classifier(segmentation_classifier,
session_path,
session_path,
good_path=good_path,
threshold=threshold)
# if the good folder does not exist then this means the classifier could not find any png files at the session path and something went wrong
if not os.path.exists(good_path):
raise FileNotFoundError(f"No model output files found at {session_path}. Shoreline Filtering failed.")
return good_path

def move_matching_files(input_image_path, search_string, file_exts, target_dir):
"""
Expand Down
26 changes: 1 addition & 25 deletions src/coastseg/extracted_shoreline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1486,30 +1486,6 @@ def extract_shorelines_with_dask(
return combine_satellite_data(shoreline_dict)


def filter_segmentations(
session_path: str,
) -> str:
"""
Sort model output files into "good" and "bad" folders based on the satellite name in the filename.
Applies the land mask to the model output files in the "good" folder.
Args:
session_path (str): The path to the session directory containing the model output files.
Returns:
str: The path to the "good" folder containing the sorted model output files.
"""
segmentation_classifier = classifier.get_segmentation_classifier()
good_path = os.path.join(session_path, "good")
csv_path,good_path,bad_path = classifier.run_inference_segmentation_classifier(segmentation_classifier,
session_path,
session_path,
good_path=good_path,
threshold=0.40)
# if the good folder does not exist then this means the classifier could not find any png files at the session path and something went wrong
if not os.path.exists(good_path):
raise FileNotFoundError(f"No model output files found at {session_path}. Shoreline Filtering failed.")
return good_path


def get_min_shoreline_length(satname: str, default_min_length_sl: float) -> int:
Expand Down Expand Up @@ -1972,7 +1948,7 @@ def create_extracted_shorelines_from_session(
return self

# Filter the segmentations to only include the good segmentations, then update the metadata to only include the files with the good segmentations
good_directory = filter_segmentations(session_path)
good_directory = classifier.filter_segmentations(session_path)
metadata= common.filter_metadata_with_dates(metadata,good_directory,file_type="npz")

extracted_shorelines_dict = extract_shorelines_with_dask(
Expand Down
6 changes: 3 additions & 3 deletions src/coastseg/file_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,8 @@ def find_parent_directory(
continue until the top-level directory is reached.
Returns:
str or None: The path to the parent directory containing the directory with
the specified name, or None if the directory is not found.
str : The path to the parent directory containing the directory with
the specified name, or "" if the directory is not found.
"""
while True:
# check if the current directory name contains the target directory name
Expand All @@ -341,7 +341,7 @@ def find_parent_directory(
print(
f"Reached top-level directory without finding '{directory_name}':", path
)
return None
return ""

# update the path to the parent directory and continue the loop
path = parent_dir
Expand Down
73 changes: 73 additions & 0 deletions src/coastseg/raster_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from osgeo import gdal
import numpy as np
# import rasterio

def read_bands(filename: str, satname: str = "") -> list:
"""
Read all the raster bands of a geospatial image file using GDAL.
This function opens the provided geospatial file using GDAL in read-only mode
and extracts each of the raster bands as a separate array. Each array represents
the values of the raster band across the entire spatial extent of the image.
Parameters:
-----------
filename : str
The path to the geospatial image file (e.g., a GeoTIFF) to be read.
Returns:
--------
list of np.ndarray
A list containing 2D numpy arrays. Each array in the list represents
one band from the geospatial image. The order of the arrays in the list
corresponds to the order of the bands in the original image.
Notes:
------
This function relies on GDAL for reading geospatial data. Ensure that GDAL
is properly installed and available in the Python environment.
Example:
--------
bands = read_bands('path/to/image.tif')
"""
data = gdal.Open(filename, gdal.GA_ReadOnly)
# save the contents of each raster band as an array and save each array to the bands list
# save separate NumPy array for each raster band in the dataset, with each array representing the pixel values of the corresponding band
if satname == "S2" and data.RasterCount == 5:
bands = [
data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount - 1)
]
else:
bands = [
data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount)
]
return bands

def read_ms_planet(fn_ms):
# read ms bands
data = gdal.Open(fn_ms, gdal.GA_ReadOnly)
georef = np.array(data.GetGeoTransform())
bands = read_bands(fn_ms)
im_ms = np.stack(bands, 2)
return im_ms, georef

def read_cloud_mask_planet(filepath: str,cloud_mask_band = 6) -> np.ndarray:
"""The UDM mask in planet has a lot more data so the cloud mask is different"""
data = gdal.Open(filepath, gdal.GA_ReadOnly)
cloud_mask = data.GetRasterBand(cloud_mask_band).ReadAsArray()
return cloud_mask

def read_nodata_mask_planet(filepath: str,nodata_band=8) -> np.ndarray:
"""The UDM mask in planet has a lot more data so the cloud mask is different"""
data = gdal.Open(filepath, gdal.GA_ReadOnly)
im_nodata = data.GetRasterBand(nodata_band).ReadAsArray()
return im_nodata

# Example usage:
# udm_path = r"C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered\PS\udm2\2020-06-11-21-52-01_3B_AnalyticMS_toar_clip.tif"
# ms_path = r"C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered\PS\ms\2020-06-11-21-52-01_3B_AnalyticMS_toar_clip.tif"

# print(np.all(read_cloud_mask_planet(udm_path))) # it works!
# print(read_ms_planet(ms_path)) # it works!

Loading

0 comments on commit 8b2c126

Please sign in to comment.