diff --git a/8_extract_shorelines_planet.py b/8_extract_shorelines_planet.py new file mode 100644 index 0000000..57e8ffe --- /dev/null +++ b/8_extract_shorelines_planet.py @@ -0,0 +1,160 @@ +from coastseg import zoo_model +from coastseg.tide_correction import compute_tidal_corrections +from coastseg import file_utilities +from coastseg import classifier +import os +from coastseg import raster_utils +import numpy as np +# The Zoo Model is a machine learning model that can be used to extract shorelines from satellite imagery. +# This script will only run a single ROI at a time. If you want to run multiple ROIs, you will need to run this script multiple times. + +# Extract Shoreline Settings +settings ={ + 'min_length_sl': 100, # minimum length (m) of shoreline perimeter to be valid + 'max_dist_ref':500, # maximum distance (m) from reference shoreline to search for valid shorelines. This detrmines the width of the buffer around the reference shoreline + 'cloud_thresh': 0.5, # threshold on maximum cloud cover (0-1). If the cloud cover is above this threshold, no shorelines will be extracted from that image + 'dist_clouds': 100, # distance(m) around clouds where shoreline will not be mapped + 'min_beach_area': 50, # minimum area (m^2) for an object to be labelled as a beach + 'sand_color': 'default', # 'default', 'latest', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches) + "apply_cloud_mask": True, # apply cloud mask to the imagery. If False, the cloud mask will not be applied. +} + + +# The model can be run using the following settings: +model_setting = { + "sample_direc": None, # directory of jpgs ex. C:/Users/username/CoastSeg/data/ID_lla12_datetime11-07-23__08_14_11/jpg_files/preprocessed/RGB/", + "use_GPU": "0", # 0 or 1 0 means no GPU + "implementation": "BEST", # BEST or ENSEMBLE + "model_type": "global_segformer_RGB_4class_14036903", # model name from the zoo + "otsu": False, # Otsu Thresholding + "tta": False, # Test Time Augmentation + } +# Available models can run input "RGB" # or "MNDWI" or "NDWI" +img_type = "RGB" # make sure the model name is compatible with the image type +# percentage of no data allowed in the image eg. 0.75 means 75% of the image can be no data +percent_no_data = 0.75 + +# Assume the planet data is in this format +# sitename +# ├── jpg_files +# │ ├── preprocessed +# │ │ ├── RGB +# │ │ │ ├── 2020-06-01-21-16-34_RGB_PS.jpg +# │ │ │__NIR +# │ │ │ ├── 2020-06-01-21-16-34_NIR_PS.jpg +# │ +# │ _PS +# │ ├──meta +# │ │ ├── 20200601_211634_44_2277_3B_AnalyticMS_toar_clip.txt +# │ ├──ms +# │ │ ├── 20200601_211634_44_2277_3B_AnalyticMS_toar_clip.tif +# │ ├──cloud_mask +# │ │ ├── 20200601_211634_44_2277_3B_udm2_clip.tif + +# This script assumes the site directory is in the format of the above structure and is provided +session_dir = r'C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered' + +# Make a simpler version of run model that doesn't need config + +# Make a simpler version of extract shorelines that doesn't need config + +zoo_model_instance = zoo_model.Zoo_Model() + +# save settings to the zoo model instance +settings.update(model_setting) +# save the settings to the model instance +zoo_model_instance.set_settings(**settings) + +model_implementation = settings.get('implementation', "BEST") +model_name = settings.get('model_type', None) +RGB_directory = os.path.join(session_dir, 'jpg_files', 'preprocessed', 'RGB') + +output_directory = os.path.join(session_dir, 'segmentations') +os.makedirs(output_directory, exist_ok=True) + + +zoo_model_instance.run_model_on_directory(output_directory, RGB_directory,model_name ) + +# Now run the segmentation classifier on the output directory +classifier.filter_segmentations(output_directory,threshold=0.50) + +# Extract shorelines from directory +satellite = 'PS' # planet +meta_dir = os.path.join(session_dir, satellite, 'meta') +ms_dir = os.path.join(session_dir, satellite, 'ms') +udm2_dir = os.path.join(session_dir, satellite, 'udm2') +filepath = [ms_dir,udm2_dir] # get the directories of the ms and udm2 files + +pixel_size = 3.0 # meters For PlanetScope + +# get the minimum beach area in number of pixels depending on the satellite +settings["min_length_sl"] + +# whatever file is currently being processed +filename ='' + +# get date +date = filename[:19] + +# get filepath of ms and udm2 of this particular filenmae +fn = [os.path.join(ms_dir, filename), os.path.join(udm2_dir, filename)] + + +# basically this function should do what preprocess_single does + +# filepaths to .tif files +fn_ms = fn[0] +fn_mask = fn[1] + +im_ms, georef = raster_utils.read_ms_planet(fn_ms) +cloud_mask = raster_utils.read_cloud_mask_planet(fn_mask) +im_nodata = raster_utils.read_nodata_mask_planet(fn_mask) +combined_mask = np.logical_or(cloud_mask, im_nodata) + +# Note may need to add code to add 0 pixels to the no data mask later + +# Only do this is apply_cloud_mask is True +cloud_mask = np.logical_or(cloud_mask, im_nodata) + +# Here is where we would get rid of the image if cloud cover or no data was above a certain threshold + +# this is the next step in the process find the shorelines + # ref_shoreline_buffer = SDS_shoreline.create_shoreline_buffer( + # cloud_mask.shape, georef, image_epsg, pixel_size, settings + # ) + # # read the model outputs from the npz file for this image + # npz_file = find_matching_npz(filename, os.path.join(session_path, "good")) + # if npz_file is None: + # npz_file = find_matching_npz(filename, session_path) + # if npz_file is None: + # logger.warning(f"npz file not found for {filename}") + # return None + + # # get the labels for water and land + # land_mask = load_merged_image_labels(npz_file, class_indices=class_indices) + # all_labels = load_image_labels(npz_file) + + # min_beach_area = settings["min_beach_area"] + # land_mask = remove_small_objects_and_binarize(land_mask, min_beach_area) + + # # get the shoreline from the image + # shoreline = find_shoreline( + # fn, + # image_epsg, + # settings, + # np.logical_xor(cloud_mask, im_nodata), + # cloud_mask, + # im_nodata, + # georef, + # land_mask, + # ref_shoreline_buffer, + # ) + +# zoo_model_instance.run_model_and_extract_shorelines( +# model_setting["sample_direc"], +# session_name=model_session_name, +# shoreline_path=shoreline_path, +# transects_path=transects_path, +# shoreline_extraction_area_path = shoreline_extraction_area_path, +# coregistered=True, +# ) \ No newline at end of file diff --git a/src/coastseg/classifier.py b/src/coastseg/classifier.py index b0e45b9..8fe6789 100644 --- a/src/coastseg/classifier.py +++ b/src/coastseg/classifier.py @@ -11,6 +11,31 @@ # Some of these functions were originally written by Mark Lundine and have been modified for this project. +def filter_segmentations( + session_path: str, + threshold: float = 0.40, +) -> str: + """ + Sort model output files into "good" and "bad" folders based on the satellite name in the filename. + Applies the land mask to the model output files in the "good" folder. + + Args: + session_path (str): The path to the session directory containing the model output files. + + Returns: + str: The path to the "good" folder containing the sorted model output files. + """ + segmentation_classifier = get_segmentation_classifier() + good_path = os.path.join(session_path, "good") + csv_path,good_path,bad_path = run_inference_segmentation_classifier(segmentation_classifier, + session_path, + session_path, + good_path=good_path, + threshold=threshold) + # if the good folder does not exist then this means the classifier could not find any png files at the session path and something went wrong + if not os.path.exists(good_path): + raise FileNotFoundError(f"No model output files found at {session_path}. Shoreline Filtering failed.") + return good_path def move_matching_files(input_image_path, search_string, file_exts, target_dir): """ diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index a7acf24..34c4176 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -1486,30 +1486,6 @@ def extract_shorelines_with_dask( return combine_satellite_data(shoreline_dict) -def filter_segmentations( - session_path: str, -) -> str: - """ - Sort model output files into "good" and "bad" folders based on the satellite name in the filename. - Applies the land mask to the model output files in the "good" folder. - - Args: - session_path (str): The path to the session directory containing the model output files. - - Returns: - str: The path to the "good" folder containing the sorted model output files. - """ - segmentation_classifier = classifier.get_segmentation_classifier() - good_path = os.path.join(session_path, "good") - csv_path,good_path,bad_path = classifier.run_inference_segmentation_classifier(segmentation_classifier, - session_path, - session_path, - good_path=good_path, - threshold=0.40) - # if the good folder does not exist then this means the classifier could not find any png files at the session path and something went wrong - if not os.path.exists(good_path): - raise FileNotFoundError(f"No model output files found at {session_path}. Shoreline Filtering failed.") - return good_path def get_min_shoreline_length(satname: str, default_min_length_sl: float) -> int: @@ -1972,7 +1948,7 @@ def create_extracted_shorelines_from_session( return self # Filter the segmentations to only include the good segmentations, then update the metadata to only include the files with the good segmentations - good_directory = filter_segmentations(session_path) + good_directory = classifier.filter_segmentations(session_path) metadata= common.filter_metadata_with_dates(metadata,good_directory,file_type="npz") extracted_shorelines_dict = extract_shorelines_with_dask( diff --git a/src/coastseg/file_utilities.py b/src/coastseg/file_utilities.py index 7fdf3b8..30d6a62 100644 --- a/src/coastseg/file_utilities.py +++ b/src/coastseg/file_utilities.py @@ -325,8 +325,8 @@ def find_parent_directory( continue until the top-level directory is reached. Returns: - str or None: The path to the parent directory containing the directory with - the specified name, or None if the directory is not found. + str : The path to the parent directory containing the directory with + the specified name, or "" if the directory is not found. """ while True: # check if the current directory name contains the target directory name @@ -341,7 +341,7 @@ def find_parent_directory( print( f"Reached top-level directory without finding '{directory_name}':", path ) - return None + return "" # update the path to the parent directory and continue the loop path = parent_dir diff --git a/src/coastseg/raster_utils.py b/src/coastseg/raster_utils.py new file mode 100644 index 0000000..2bc908a --- /dev/null +++ b/src/coastseg/raster_utils.py @@ -0,0 +1,73 @@ +from osgeo import gdal +import numpy as np +# import rasterio + +def read_bands(filename: str, satname: str = "") -> list: + """ + Read all the raster bands of a geospatial image file using GDAL. + + This function opens the provided geospatial file using GDAL in read-only mode + and extracts each of the raster bands as a separate array. Each array represents + the values of the raster band across the entire spatial extent of the image. + + Parameters: + ----------- + filename : str + The path to the geospatial image file (e.g., a GeoTIFF) to be read. + + Returns: + -------- + list of np.ndarray + A list containing 2D numpy arrays. Each array in the list represents + one band from the geospatial image. The order of the arrays in the list + corresponds to the order of the bands in the original image. + + Notes: + ------ + This function relies on GDAL for reading geospatial data. Ensure that GDAL + is properly installed and available in the Python environment. + + Example: + -------- + bands = read_bands('path/to/image.tif') + """ + data = gdal.Open(filename, gdal.GA_ReadOnly) + # save the contents of each raster band as an array and save each array to the bands list + # save separate NumPy array for each raster band in the dataset, with each array representing the pixel values of the corresponding band + if satname == "S2" and data.RasterCount == 5: + bands = [ + data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount - 1) + ] + else: + bands = [ + data.GetRasterBand(k + 1).ReadAsArray() for k in range(data.RasterCount) + ] + return bands + +def read_ms_planet(fn_ms): + # read ms bands + data = gdal.Open(fn_ms, gdal.GA_ReadOnly) + georef = np.array(data.GetGeoTransform()) + bands = read_bands(fn_ms) + im_ms = np.stack(bands, 2) + return im_ms, georef + +def read_cloud_mask_planet(filepath: str,cloud_mask_band = 6) -> np.ndarray: + """The UDM mask in planet has a lot more data so the cloud mask is different""" + data = gdal.Open(filepath, gdal.GA_ReadOnly) + cloud_mask = data.GetRasterBand(cloud_mask_band).ReadAsArray() + return cloud_mask + +def read_nodata_mask_planet(filepath: str,nodata_band=8) -> np.ndarray: + """The UDM mask in planet has a lot more data so the cloud mask is different""" + data = gdal.Open(filepath, gdal.GA_ReadOnly) + im_nodata = data.GetRasterBand(nodata_band).ReadAsArray() + return im_nodata + +# Example usage: +# udm_path = r"C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered\PS\udm2\2020-06-11-21-52-01_3B_AnalyticMS_toar_clip.tif" +# ms_path = r"C:\development\coastseg-planet\downloads\UNALAKLEET_pier_cloud_0.7_TOAR_enabled_2020-06-01_to_2023-08-01\e2821741-0677-435a-a93a-d4f060f3adf4\coregistered\PS\ms\2020-06-11-21-52-01_3B_AnalyticMS_toar_clip.tif" + +# print(np.all(read_cloud_mask_planet(udm_path))) # it works! +# print(read_ms_planet(ms_path)) # it works! + diff --git a/src/coastseg/zoo_model.py b/src/coastseg/zoo_model.py index f3de248..db0f359 100644 --- a/src/coastseg/zoo_model.py +++ b/src/coastseg/zoo_model.py @@ -48,6 +48,19 @@ logger = logging.getLogger(__name__) +def move_model_outputs_to_directory(input_directory: str, output_directory: str,preprocessed_data:dict) -> None: + """Copies the model outputs ( png + npz files) from the input directory to the output directory. Saves the model settings to the output directory.""" + if not os.path.exists(input_directory): + logger.warning(f"No model outputs were generated") + print(f"No model outputs were generated") + raise Exception(f"No model outputs were generated. Check if the directory contained enough data to run the model or try raising the percentage of no data allowed.") + + model_settings_path = os.path.join(output_directory, "model_settings.json") + file_utilities.write_to_json(model_settings_path, preprocessed_data) + + # copy files from out to session folder + file_utilities.move_files(input_directory, output_directory, delete_src=True) + def filter_no_data_pixels(files: list[str], percent_no_data: float = 0.50) -> list[str]: def percentage_of_black_pixels(img: "PIL.Image") -> float: @@ -166,10 +179,14 @@ def get_imagery_directory(img_type: str, RGB_path: str) -> str: output_path = os.path.dirname(RGB_path) if img_type == "RGB": output_path = RGB_path - elif img_type == "RGB+MNDWI+NDWI": + elif img_type == "RGB+MNDWI+NDWI": # this is for 5 band imagery NIR_path = os.path.join(output_path, "NIR") + if not os.path.exists(NIR_path): + raise FileNotFoundError(f"{NIR_path} not found at {output_path}") NDWI_path = RGB_to_infrared(RGB_path, NIR_path, output_path, "NDWI") SWIR_path = os.path.join(output_path, "SWIR") + if not os.path.exists(SWIR_path): + raise FileNotFoundError(f"{SWIR_path} not found at {output_path}") MNDWI_path = RGB_to_infrared(RGB_path, SWIR_path, output_path, "MNDWI") five_band_path = file_utilities.create_directory(output_path, "five_band") output_path = get_five_band_imagery( @@ -178,9 +195,13 @@ def get_imagery_directory(img_type: str, RGB_path: str) -> str: # default filetype is NIR and if NDWI is selected else filetype to SWIR elif img_type == "NDWI": NIR_path = os.path.join(output_path, "NIR") + if not os.path.exists(NIR_path): + raise FileNotFoundError(f"{NIR_path} not found at {output_path}") output_path = RGB_to_infrared(RGB_path, NIR_path, output_path, "NDWI") elif img_type == "MNDWI": SWIR_path = os.path.join(output_path, "SWIR") + if not os.path.exists(SWIR_path): + raise FileNotFoundError(f"{SWIR_path} not found at {output_path}") output_path = RGB_to_infrared(RGB_path, SWIR_path, output_path, "MNDWI") else: raise ValueError( @@ -670,17 +691,9 @@ def preprocess_data( Returns: dict: The updated model dictionary containing the paths to the processed data. """ - # if configs do not exist then raise an error and do not save the session - if not file_utilities.validate_config_files_exist(src_directory): - logger.warning( - f"Config files config.json or config_gdf.geojson do not exist in roi directory { src_directory}\n This means that the download did not complete successfully." - ) - raise FileNotFoundError( - f"Config files config.json or config_gdf.geojson do not exist in roi directory { src_directory}\n This means that the download did not complete successfully." - ) # get full path to directory named 'RGB' containing RGBs RGB_path = file_utilities.find_directory_recursively(src_directory, name="RGB") - # convert RGB to MNDWI, NDWI,or 5 band + # create a new directory called "MNDWI","NDWI" or 5_band using the RGB directory depending on the img_type model_dict["sample_direc"] = get_imagery_directory(img_type, RGB_path) return model_dict @@ -958,12 +971,9 @@ def postprocess_data( """ # get roi_ids session_path = session.path + # The "out" directory is by default where the model outputs are saved outputs_path = os.path.join(preprocessed_data["sample_direc"], "out") - if not os.path.exists(outputs_path): - logger.warning(f"No model outputs were generated") - print(f"No model outputs were generated") - raise Exception(f"No model outputs were generated. Check if {roi_directory} contained enough data to run the model or try raising the percentage of no data allowed.") - logger.info(f"Moving from {outputs_path} files to {session_path}") + move_model_outputs_to_directory(outputs_path, session_path,preprocessed_data) # if configs do not exist then raise an error and do not save the session if not file_utilities.validate_config_files_exist(roi_directory): @@ -980,7 +990,7 @@ def postprocess_data( roi_id, os.path.join(session_path, "config.json"), ) - # Copy over the config_gdf.geojson file + # Copy over the config_gdf.geojson file to the session directory config_gdf_path = os.path.join(roi_directory, "config_gdf.geojson") if os.path.exists(config_gdf_path): # Read in the GeoJSON file using geopandas @@ -993,11 +1003,6 @@ def postprocess_data( gdf_4326.to_file( os.path.join(session_path, "config_gdf.geojson"), driver="GeoJSON" ) - model_settings_path = os.path.join(session_path, "model_settings.json") - file_utilities.write_to_json(model_settings_path, preprocessed_data) - - # copy files from out to session folder - file_utilities.move_files(outputs_path, session_path, delete_src=True) session.save(session.path) def get_weights_directory(self,model_implementation:str, model_id: str) -> str: @@ -1025,6 +1030,8 @@ def get_weights_directory(self,model_implementation:str, model_id: str) -> str: # check if a local model should be loaded or not if USE_LOCAL_MODEL == False or LOCAL_MODEL_PATH == "": # create the model directory & download the model + if not model_id: + raise ValueError("Please select a model.") weights_directory = self.get_model_directory(model_id) self.download_model(model_implementation, model_id, weights_directory) else: @@ -1106,6 +1113,7 @@ def run_model( use_tta: bool, percent_no_data: float, coregistered: bool = False, + roi_directory:str ="" ): """ Runs the model for image segmentation. @@ -1121,7 +1129,7 @@ def run_model( use_tta (bool): Whether to use test-time augmentation or not. percent_no_data (float): The percentage of no data allowed in the image. coregistered (bool, optional): Whether the images are coregistered or not. Defaults to False. - + roi_directory (str, optional): The directory containing the ROI data. Defaults to "". Returns: None """ @@ -1153,10 +1161,16 @@ def run_model( "tta": use_tta, "percent_no_data": percent_no_data, } - # get parent roi_directory from the selected imagery directory - roi_directory = file_utilities.find_parent_directory( - src_directory, "ID_", "data" - ) + # If the ROI directory is not provided, check the parent folders of the provided imagery directory until either the ROI directory is found or the data directory is reached + if not roi_directory: + # get parent (aka the ROI directory that contains jpg_files and satellite directories) from the selected imagery directory + roi_directory = file_utilities.find_parent_directory( + src_directory, "ID_", "data" + ) + if not roi_directory: + raise FileNotFoundError( + f"Could not find the ROI directory that contained 'ID' in the name in {src_directory}" + ) if coregistered: roi_directory = os.path.join(roi_directory, "coregistered") @@ -1171,6 +1185,70 @@ def run_model( session.add_roi_ids([file_utilities.extract_roi_id(roi_directory)]) print(f"\n Model results saved to {session.path}") + def run_model_on_directory( + self, + output_directory:str, + src_directory: str, + model_name: str, + img_type: str="RGB", + model_implementation: str = "BEST", + use_GPU: str="0", + use_otsu: bool=False, + use_tta: bool = False, + percent_no_data: float =0.90, + coregistered: bool = False, + ): + """ + Runs the model for image segmentation. + + Args: + img_type (str): The type of image. + model_implementation (str): The implementation of the model. BEST or ENSEMBLE + output_directory (str): The directory to save the model outputs. + src_directory (str): The directory of RGB images. + model_name (str): The name of the model. + use_GPU (str): Whether to use GPU or not. + use_otsu (bool): Whether to use Otsu thresholding or not. + use_tta (bool): Whether to use test-time augmentation or not. + percent_no_data (float): The percentage of no data allowed in the image. Defaults to 0.90. + coregistered (bool, optional): Whether the images are coregistered or not. Defaults to False. + roi_directory (str, optional): The directory containing the ROI data. Defaults to "". + Returns: + None + """ + logger.info(f"Selected directory of RGBs: {src_directory}") + logger.info(f"model_name: {model_name}") + logger.info(f"model_implementation: {model_implementation}") + logger.info(f"use_GPU: {use_GPU}") + logger.info(f"use_otsu: {use_otsu}") + logger.info(f"use_tta: {use_tta}") + + print(f"Running model {model_name}") + # print(f"self.settings: {self.settings}") + self.prepare_model(model_implementation, model_name) + + model_dict = { + "use_GPU": use_GPU, + "sample_direc": "", + "implementation": model_implementation, + "model_type": model_name, + "otsu": use_otsu, + "tta": use_tta, + "percent_no_data": percent_no_data, + } + + # add back coregistered ?? + + model_dict = self.preprocess_data(src_directory, model_dict, img_type) + logger.info(f"model_dict: {model_dict}") + + self.compute_segmentation(model_dict, percent_no_data) + + # The "out" directory is by default where the model outputs are saved + model_outputs_path = os.path.join(model_dict["sample_direc"], "out") + move_model_outputs_to_directory(model_outputs_path, output_directory,model_dict) + print(f"\n Model results saved to {output_directory}") + def get_model_directory(self, model_id: str): # Create a directory to hold the downloaded models downloaded_models_path = common.get_downloaded_models_dir()