From f0e15ba1903b4010bda1a7405aa935fd26a8f559 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 13 Dec 2024 11:07:43 -0800 Subject: [PATCH] add new versions of classifier models for the shoreline segmentation filters and image filters --- run_image_classifier.py | 43 +++ run_shoreline_segmentation_classifier.py | 31 ++ src/coastseg/classifier.py | 354 ++++++++++++++++++++++- src/coastseg/coastseg_map.py | 8 +- src/coastseg/downloads.py | 45 --- 5 files changed, 416 insertions(+), 65 deletions(-) create mode 100644 run_image_classifier.py create mode 100644 run_shoreline_segmentation_classifier.py diff --git a/run_image_classifier.py b/run_image_classifier.py new file mode 100644 index 00000000..6146036d --- /dev/null +++ b/run_image_classifier.py @@ -0,0 +1,43 @@ +from coastseg import classifier +import os + +input_path =r'C:\development\doodleverse\coastseg\CoastSeg\data\ID_1_datetime06-04-24__12_09_54\jpg_files\preprocessed\RGB' +output_path = input_path +output_csv=os.path.join(input_path,'classification_results.csv') + +# classifier_path = classifier.get_image_classifier('RGB') +classifier_path = classifier.get_image_classifier('rgb') +print(f"Classifier path: {classifier_path}") +classifier.run_inference_rgb_image_classifier(classifier_path, + input_path, + output_path, + output_csv, + threshold=0.40) + +# try the gray +# classifier_path = classifier.get_image_classifier('gray') +# print(f"Classifier path: {classifier_path}") +# classifier.run_inference_gray_image_classifier(classifier_path, +# input_path, +# output_path, +# output_csv, +# threshold=0.40) + + + + +# apply good bad classifier to the downloaded imagery +# for key in roi_settings.keys(): +# data_path = os.path.join(roi_settings[key]['filepath'],roi_settings[key]['sitename']) +# RGB_path = os.path.join(data_path,'jpg_files','preprocessed','RGB') +# print(f"Sorting images in {RGB_path}") +# input_path =RGB_path +# output_path = RGB_path +# output_csv=os.path.join(RGB_path,'classification_results.csv') +# # model_path = os.path.join(r'C:\development\doodleverse\coastseg\CoastSeg\src\coastseg\classifier_model','best.h5') +# model_path = classifier.get_classifier() +# classifier.run_inference(model_path, +# input_path, +# output_path, +# output_csv, +# threshold=0.10) \ No newline at end of file diff --git a/run_shoreline_segmentation_classifier.py b/run_shoreline_segmentation_classifier.py new file mode 100644 index 00000000..bd299878 --- /dev/null +++ b/run_shoreline_segmentation_classifier.py @@ -0,0 +1,31 @@ +from coastseg import classifier +import os + +input_path =r'C:\development\doodleverse\coastseg\CoastSeg\sessions\coreg_session2\good' +output_path = input_path +output_csv=os.path.join(input_path,'classification_results.csv') + +segmentation_classifier = classifier.get_segmentation_classifier() +classifier.run_inference_segmentation_classifier(segmentation_classifier, + input_path, + output_path, + output_csv, + threshold=0.40) + + + +# apply good bad classifier to the downloaded imagery +# for key in roi_settings.keys(): +# data_path = os.path.join(roi_settings[key]['filepath'],roi_settings[key]['sitename']) +# RGB_path = os.path.join(data_path,'jpg_files','preprocessed','RGB') +# print(f"Sorting images in {RGB_path}") +# input_path =RGB_path +# output_path = RGB_path +# output_csv=os.path.join(RGB_path,'classification_results.csv') +# # model_path = os.path.join(r'C:\development\doodleverse\coastseg\CoastSeg\src\coastseg\classifier_model','best.h5') +# model_path = classifier.get_classifier() +# classifier.run_inference(model_path, +# input_path, +# output_path, +# output_csv, +# threshold=0.10) \ No newline at end of file diff --git a/src/coastseg/classifier.py b/src/coastseg/classifier.py index c57700d7..df6e6a68 100644 --- a/src/coastseg/classifier.py +++ b/src/coastseg/classifier.py @@ -1,19 +1,56 @@ import os import glob -import tensorflow as tf -from tensorflow import keras import pandas as pd import shutil +import pooch +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from coastseg import common +from coastseg import file_utilities + +def move_matching_files(input_image_path, search_string, file_exts, target_dir): + """ + Move files matching the given search string and file extensions to the target directory. + + Example: + input_image_path = 'C:/path/to/image.jpg' + search_string = '2021-01-01' + file_exts = ['.jpg', '.jpeg', '.png'] + target_dir = 'C:/path/to/target_dir' + move_matching_files(input_image_path, search_string, file_exts, target_dir) + All of the files matching the search string and file extensions will be moved to the target directory. + + Args: + input_image_path (str): Path to the original input image. + search_string (str): The string to look for in filenames. + file_exts (list): List of file extensions to match. + target_dir (str): Directory where matching files should be moved. + """ + for ext in file_exts: + # Create the search pattern + pattern = os.path.join(os.path.dirname(input_image_path), f"*{search_string}*{ext}") + matching_files = glob.glob(pattern) + for matching_file in matching_files: + if os.path.exists(matching_file): + output_image_path = os.path.join(target_dir, os.path.basename(matching_file)) + shutil.move(matching_file, output_image_path) + def sort_images(inference_df_path, output_folder, - threshold=0.40): + threshold=0.40, + file_exts:list=None): """ Using model results to sort the images the model was run on into good and bad folders inputs: inference_df_path (str): path to the csv containing model results output_folder (str): path to the directory containing the inference images """ + if not file_exts: + file_exts = [] + + bad_dir = os.path.join(output_folder, 'bad') dirs = [output_folder, bad_dir] for d in dirs: @@ -25,11 +62,15 @@ def sort_images(inference_df_path, for i in range(len(inference_df)): input_image_path = inference_df['im_paths'].iloc[i] im_name = os.path.basename(input_image_path) + if inference_df['model_scores'].iloc[i] < threshold: + date = common.extract_date_from_filename(im_name) + # for each file extentsion in the list get the matching file that match the im_name date + move_matching_files(input_image_path, date, file_exts, bad_dir) output_image_path = os.path.join(bad_dir, im_name) shutil.move(input_image_path, output_image_path) -def run_inference(path_to_model_ckpt, +def run_inference_image_classifier(path_to_model_ckpt, path_to_inference_imgs, output_folder, result_path, @@ -80,22 +121,303 @@ def run_inference(path_to_model_ckpt, threshold=threshold) return result_path -def get_classifier() -> str: +def run_inference_rgb_image_classifier(path_to_model_ckpt, + path_to_inference_imgs, + output_folder, + result_path, + threshold): + """ + Runs the trained model on images, classifying them either as good or bad + Saves the results to a csv (image_path, class (good or bad), score (0 to 1) + Sorts the images into good or bad folders + Images should be '.jpg' + inputs: + path_to_model_ckpt (str): path to the saved keras model + path_to_inference_imgs (str): path to the folder containing images to run the model on + output_folder (str): path to save outputs to + result_path (str): csv path to save results to + threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image) + returns: + result_path (str): csv path of saved results + """ + try: + os.mkdir(output_folder) + except: + pass + image_size = (128, 128) + model = define_RGB_image_classifier_model(input_shape=image_size + (3,), num_classes=2) + model.load_weights(path_to_model_ckpt) + types = ('*.jpg', '*.jpeg', '*.png') + im_paths = [] + for files in types: + im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files))) + model_scores = [None]*len(im_paths) + im_classes = [None]*len(im_paths) + i=0 + for im_path in im_paths: + print(im_path) + img = keras.utils.load_img(im_path, color_mode='rgb',target_size=image_size) + img_array = keras.utils.img_to_array(img) + img_array = tf.expand_dims(img_array, 0) + predictions = model.predict(img_array) + score = float(keras.activations.sigmoid(predictions[0][0])) + model_scores[i] = score + i=i+1 + ##save results to a csv + df = pd.DataFrame({'im_paths':im_paths, + 'model_scores':model_scores + } + ) + print(result_path) + + df.to_csv(result_path) + sort_images(result_path, + output_folder, + threshold=threshold) + return result_path + +def run_inference_gray_image_classifier(path_to_model_ckpt, + path_to_inference_imgs, + output_folder, + result_path, + threshold): + """ + Runs the trained model on images, classifying them either as good or bad + Saves the results to a csv (image_path, class (good or bad), score (0 to 1) + Sorts the images into good or bad folders + Images should be '.jpg' + inputs: + path_to_model_ckpt (str): path to the saved keras model + path_to_inference_imgs (str): path to the folder containing images to run the model on + output_folder (str): path to save outputs to + result_path (str): csv path to save results to + threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image) + returns: + result_path (str): csv path of saved results + """ + try: + os.mkdir(output_folder) + except: + pass + image_size = (128, 128) + model = define_RGB_image_classifier_model(input_shape=image_size + (1,), num_classes=2) + model.load_weights(path_to_model_ckpt) + types = ('*.jpg', '*.jpeg', '*.png') + im_paths = [] + for files in types: + im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files))) + model_scores = [None]*len(im_paths) + im_classes = [None]*len(im_paths) + i=0 + for im_path in im_paths: + img = keras.utils.load_img(im_path, color_mode='grayscale',target_size=image_size) + img_array = keras.utils.img_to_array(img) + img_array = tf.expand_dims(img_array, 0) + predictions = model.predict(img_array) + score = float(keras.activations.sigmoid(predictions[0][0])) + model_scores[i] = score + i=i+1 + ##save results to a csv + df = pd.DataFrame({'im_paths':im_paths, + 'model_scores':model_scores + } + ) + df.to_csv(result_path) + sort_images(result_path, + output_folder, + threshold=threshold) + return result_path + +def define_RGB_image_classifier_model(input_shape, num_classes=2): + """ + Defines the classification model + inputs: + input_shape (tuple (xdim, ydim)): shape of images for model + num_classes (int, optional): number of classes for the model + """ + inputs = keras.Input(shape=input_shape) + + # Entry block + x = inputs + # Entry block + x = layers.Rescaling(1.0 / 255)(inputs) + x = layers.Conv2D(128, 3, strides=2, padding="same")(x) + x = layers.BatchNormalization()(x) + x = layers.Activation("relu")(x) + + previous_block_activation = x # Set aside residual + + for size in [256, 512, 728]: + x = layers.Activation("relu")(x) + x = layers.SeparableConv2D(size, 3, padding="same")(x) + x = layers.BatchNormalization()(x) + + x = layers.Activation("relu")(x) + x = layers.SeparableConv2D(size, 3, padding="same")(x) + x = layers.BatchNormalization()(x) + + x = layers.MaxPooling2D(3, strides=2, padding="same")(x) + + # Project residual + residual = layers.Conv2D(size, 1, strides=2, padding="same")( + previous_block_activation + ) + x = layers.add([x, residual]) # Add back residual + previous_block_activation = x # Set aside next residual + + x = layers.SeparableConv2D(1024, 3, padding="same")(x) + x = layers.BatchNormalization()(x) + x = layers.Activation("relu")(x) + + x = layers.GlobalAveragePooling2D()(x) + if num_classes == 2: + units = 1 + else: + units = num_classes + + x = layers.Dropout(0.5)(x) + outputs = layers.Dense(units, activation=None)(x) + + return keras.Model(inputs, outputs) + +def get_image_classifier(type:str='rgb') -> str: """returns full path to the good/bad classifier model Returns: str: full path to downloaded_models directory """ - # directory to hold downloaded models from Zenodo - script_dir = os.path.dirname(os.path.abspath(__file__)) + downloaded_models_path = common.get_downloaded_models_dir() + + if type.lower() == 'rgb': + model_name ='ImageRGBClassifier' + model_directory = file_utilities.create_directory( + downloaded_models_path, model_name + ) + + # directory to hold downloaded models from Zenodo + file_path = pooch.retrieve( + # URL to one of Pooch's test files + url="https://github.com/mlundine/ShorelineFilter/raw/refs/heads/main/models/image_rgb/best.h5", + known_hash=None, + progressbar=True, + path= model_directory, + ) + else: # get the grayscale model + model_name ='ImageGrayClassifier' + print(model_name) + model_directory = file_utilities.create_directory( + downloaded_models_path, model_name + ) + file_path = pooch.retrieve( + # URL to one of Pooch's test files + url="https://github.com/mlundine/ShorelineFilter/raw/refs/heads/main/models/image_gray/best.h5", + known_hash=None, + progressbar=True, + fname='best_gray.h5', + path= model_directory, + ) + return file_path - downloaded_models_path = os.path.abspath( - os.path.join(script_dir, "classifier_model") +def get_segmentation_classifier() -> str: + """returns full path to the good/bad classifier model + Returns: + str: full path to downloaded_models directory + """ + model_name ='ShorelineFilter' + downloaded_models_path = common.get_downloaded_models_dir() + model_directory = file_utilities.create_directory( + downloaded_models_path, model_name ) - if not os.path.exists(downloaded_models_path): - os.mkdir(downloaded_models_path) - - model_path = os.path.join(downloaded_models_path, "best.h5") - if not os.path.exists(model_path): - raise Exception(f"Classifier model not found at {model_path}") - return model_path \ No newline at end of file + # directory to hold downloaded models from Zenodo + file_path = pooch.retrieve( + # URL to one of Pooch's test files + url="https://github.com/mlundine/ShorelineFilter/raw/refs/heads/main/models/segmentation_rgb/best_seg.h5", + known_hash=None, + progressbar=True, + path= model_directory, + ) + return file_path + +def define_segmentation_classifier_model(input_shape, num_classes=2): + """ + Defines the segmentation classification model + inputs: + input_shape (tuple (xdim, ydim)): shape of images for model + num_classes (int, optional): number of classes for the model + """ + inputs = keras.Input(shape=input_shape) + + # Entry block + x = inputs + # Entry block + x = layers.Rescaling(1.0 / 255)(inputs) + x = layers.Conv2D(16, 3, padding='same', activation='relu')(x) + x = layers.BatchNormalization()(x) + x = layers.MaxPooling2D()(x) + x = layers.Conv2D(32, 3, padding='same', activation='relu')(x) + x = layers.BatchNormalization()(x) + x = layers.MaxPooling2D()(x) + x = layers.Conv2D(64, 3, padding='same', activation='relu')(x) + x = layers.BatchNormalization()(x) + x = layers.GlobalAveragePooling2D()(x) + x = layers.Dropout(0.5)(x) + outputs = layers.Dense(1 if num_classes == 2 else num_classes, activation=None)(x) + + return keras.Model(inputs, outputs) + +def run_inference_segmentation_classifier(path_to_model_ckpt, + path_to_inference_imgs, + output_folder, + result_path, + threshold): + """ + Runs the trained model on segmentation images, classifying them either as good or bad + Saves the results to a csv (image_path, class (good or bad), score (0 to 1) + Sorts the images into good or bad folders + Images should be '.jpg' + inputs: + path_to_model_ckpt (str): path to the saved keras model + path_to_inference_imgs (str): path to the folder containing images to run the model on + output_folder (str): path to save outputs to + result_path (str): csv path to save results to + threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image) + returns: + result_path (str): csv path of saved results + """ + try: + os.mkdir(output_folder) + except: + pass + image_size = (512, 512) + model = define_segmentation_classifier_model(input_shape=image_size + (3,), num_classes=2) + # model.load_weights(resource_path, by_name=True, skip_mismatch=True) # this was temporary code to get it to work when the layers did not match saved file compare to layeres in define model + # model.save_weights("corrected_weights.h5") # this was temporary to get it work + # model.load_weights(path_to_model_ckpt) #original line did not wor + model.load_weights(path_to_model_ckpt) + types = ('*.jpg', '*.jpeg', '*.png') + im_paths = [] + for files in types: + im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files))) + model_scores = [None]*len(im_paths) + im_classes = [None]*len(im_paths) + i=0 + for im_path in im_paths: + img = keras.utils.load_img(im_path, color_mode='rgb',target_size=image_size) + img_array = keras.utils.img_to_array(img) + img_array = tf.expand_dims(img_array, 0) + predictions = model.predict(img_array) + score = float(keras.activations.sigmoid(predictions[0][0])) + model_scores[i] = score + i=i+1 + ##save results to a csv + df = pd.DataFrame({'im_paths':im_paths, + 'model_scores':model_scores + } + ) + + df.to_csv(result_path) + sort_images(result_path, + output_folder, + threshold=threshold, + file_exts=['npz'],) + return result_path \ No newline at end of file diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 0b7a77ad..a4f94835 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -1980,7 +1980,7 @@ def extract_all_shorelines(self,roi_ids:list=None) -> None: shoreline_extraction_area_gdf = getattr(self.shoreline_extraction_area, "gdf", None) if self.shoreline_extraction_area else None # apply good bad classifier to the downloaded imagery - from coastseg.classifier import run_inference,get_classifier + from coastseg.classifier import run_inference_rgb_image_classifier,get_image_classifier for key in roi_settings.keys(): data_path = os.path.join(roi_settings[key]['filepath'],roi_settings[key]['sitename']) @@ -1990,12 +1990,12 @@ def extract_all_shorelines(self,roi_ids:list=None) -> None: output_path = RGB_path output_csv=os.path.join(RGB_path,'classification_results.csv') # model_path = os.path.join(r'C:\development\doodleverse\coastseg\CoastSeg\src\coastseg\classifier_model','best.h5') - model_path = get_classifier() - run_inference(model_path, + model_path = get_image_classifier('rgb') + run_inference_rgb_image_classifier(model_path, input_path, output_path, output_csv, - threshold=0.10) + threshold=0.40) #3. get selected ROIs on map and extract shoreline for each of them diff --git a/src/coastseg/downloads.py b/src/coastseg/downloads.py index 5a1406a0..6b1e2b31 100644 --- a/src/coastseg/downloads.py +++ b/src/coastseg/downloads.py @@ -294,51 +294,6 @@ def session_creator(): # start all the tasks at once await tqdm.asyncio.tqdm.gather(*tasks) - -async def async_download_url_dict(url_dict: dict = {}): - """ - Asynchronously downloads files from a given dictionary of URLs and save locations. - - Parameters - ---------- - url_dict : dict, optional - A dictionary where the keys represent local save paths and the values are the corresponding URLs of the files to be downloaded. Default is an empty dictionary. - - Usage - ----- - url_dict = { - "/path/to/save/file1.h5": "https://zenodo.org/record/7574784/file1.h5", - "/path/to/save/file2.json": "https://zenodo.org/record/7574784/file2.json", - "/path/to/save/file3.txt": "https://zenodo.org/record/7574784/file3.txt", - } - - await async_download_url_dict(url_dict) - """ - - def session_creator(): - # Set the custom timeout value (in seconds) - keepalive_timeout = 100 - # Configure the timeout - connector = aiohttp.TCPConnector(keepalive_timeout=keepalive_timeout) - # Create and return the session with the configured timeout - return aiohttp.ClientSession( - connector=connector, timeout=aiohttp.ClientTimeout(total=600) - ) - - # allow 1 concurrent downloads - semaphore = asyncio.Semaphore(1) - tasks = [] - for save_path, url in url_dict.items(): - task = asyncio.create_task( - download_zenodo_file( - semaphore, session_creator, url, save_path, max_retries=0 - ) - ) - tasks.append(task) - # start all the tasks at once - await tqdm.asyncio.tqdm.gather(*tasks) - - async def download_zenodo_file( semaphore: asyncio.Semaphore, session_creator: callable,