From f0e15ba1903b4010bda1a7405aa935fd26a8f559 Mon Sep 17 00:00:00 2001
From: Sharon Fitzpatrick <SF2309@Gmail.Com>
Date: Fri, 13 Dec 2024 11:07:43 -0800
Subject: [PATCH] add new versions of classifier models for the shoreline
 segmentation filters and image filters

---
 run_image_classifier.py                  |  43 +++
 run_shoreline_segmentation_classifier.py |  31 ++
 src/coastseg/classifier.py               | 354 ++++++++++++++++++++++-
 src/coastseg/coastseg_map.py             |   8 +-
 src/coastseg/downloads.py                |  45 ---
 5 files changed, 416 insertions(+), 65 deletions(-)
 create mode 100644 run_image_classifier.py
 create mode 100644 run_shoreline_segmentation_classifier.py

diff --git a/run_image_classifier.py b/run_image_classifier.py
new file mode 100644
index 00000000..6146036d
--- /dev/null
+++ b/run_image_classifier.py
@@ -0,0 +1,43 @@
+from coastseg import classifier
+import os
+
+input_path =r'C:\development\doodleverse\coastseg\CoastSeg\data\ID_1_datetime06-04-24__12_09_54\jpg_files\preprocessed\RGB'
+output_path = input_path
+output_csv=os.path.join(input_path,'classification_results.csv')
+
+# classifier_path = classifier.get_image_classifier('RGB')
+classifier_path = classifier.get_image_classifier('rgb')
+print(f"Classifier path: {classifier_path}")
+classifier.run_inference_rgb_image_classifier(classifier_path,
+                input_path,
+                output_path,
+                output_csv,
+                threshold=0.40)
+
+# try the gray
+# classifier_path = classifier.get_image_classifier('gray')
+# print(f"Classifier path: {classifier_path}")
+# classifier.run_inference_gray_image_classifier(classifier_path,
+#                 input_path,
+#                 output_path,
+#                 output_csv,
+#                 threshold=0.40)
+
+
+
+
+# apply good bad classifier to the downloaded imagery
+# for key in roi_settings.keys():
+#     data_path = os.path.join(roi_settings[key]['filepath'],roi_settings[key]['sitename'])
+#     RGB_path = os.path.join(data_path,'jpg_files','preprocessed','RGB')
+#     print(f"Sorting images in {RGB_path}")
+#     input_path =RGB_path
+#     output_path = RGB_path
+#     output_csv=os.path.join(RGB_path,'classification_results.csv')
+#     # model_path = os.path.join(r'C:\development\doodleverse\coastseg\CoastSeg\src\coastseg\classifier_model','best.h5')
+#     model_path = classifier.get_classifier()
+#     classifier.run_inference(model_path,
+#                 input_path,
+#                 output_path,
+#                 output_csv,
+#                 threshold=0.10)
\ No newline at end of file
diff --git a/run_shoreline_segmentation_classifier.py b/run_shoreline_segmentation_classifier.py
new file mode 100644
index 00000000..bd299878
--- /dev/null
+++ b/run_shoreline_segmentation_classifier.py
@@ -0,0 +1,31 @@
+from coastseg import classifier
+import os
+
+input_path =r'C:\development\doodleverse\coastseg\CoastSeg\sessions\coreg_session2\good'
+output_path = input_path
+output_csv=os.path.join(input_path,'classification_results.csv')
+
+segmentation_classifier = classifier.get_segmentation_classifier()
+classifier.run_inference_segmentation_classifier(segmentation_classifier,
+                input_path,
+                output_path,
+                output_csv,
+                threshold=0.40)
+
+
+
+# apply good bad classifier to the downloaded imagery
+# for key in roi_settings.keys():
+#     data_path = os.path.join(roi_settings[key]['filepath'],roi_settings[key]['sitename'])
+#     RGB_path = os.path.join(data_path,'jpg_files','preprocessed','RGB')
+#     print(f"Sorting images in {RGB_path}")
+#     input_path =RGB_path
+#     output_path = RGB_path
+#     output_csv=os.path.join(RGB_path,'classification_results.csv')
+#     # model_path = os.path.join(r'C:\development\doodleverse\coastseg\CoastSeg\src\coastseg\classifier_model','best.h5')
+#     model_path = classifier.get_classifier()
+#     classifier.run_inference(model_path,
+#                 input_path,
+#                 output_path,
+#                 output_csv,
+#                 threshold=0.10)
\ No newline at end of file
diff --git a/src/coastseg/classifier.py b/src/coastseg/classifier.py
index c57700d7..df6e6a68 100644
--- a/src/coastseg/classifier.py
+++ b/src/coastseg/classifier.py
@@ -1,19 +1,56 @@
 import os
 import glob
-import tensorflow as tf
-from tensorflow import keras
 import pandas as pd
 import shutil
+import pooch
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from coastseg import common
+from coastseg import file_utilities
+
+def move_matching_files(input_image_path, search_string, file_exts, target_dir):
+    """
+    Move files matching the given search string and file extensions to the target directory.
+
+    Example:
+    input_image_path = 'C:/path/to/image.jpg'
+    search_string = '2021-01-01'
+    file_exts = ['.jpg', '.jpeg', '.png']
+    target_dir = 'C:/path/to/target_dir'
+    move_matching_files(input_image_path, search_string, file_exts, target_dir)
+    All of the files matching the search string and file extensions will be moved to the target directory.
+    
+    Args:
+        input_image_path (str): Path to the original input image.
+        search_string (str): The string to look for in filenames.
+        file_exts (list): List of file extensions to match.
+        target_dir (str): Directory where matching files should be moved.
+    """
+    for ext in file_exts:
+        # Create the search pattern
+        pattern = os.path.join(os.path.dirname(input_image_path), f"*{search_string}*{ext}")
+        matching_files = glob.glob(pattern)
+        for matching_file in matching_files:
+            if os.path.exists(matching_file):
+                output_image_path = os.path.join(target_dir, os.path.basename(matching_file))
+                shutil.move(matching_file, output_image_path)
+
 
 def sort_images(inference_df_path,
                 output_folder,
-                threshold=0.40):
+                threshold=0.40,
+                file_exts:list=None):
     """
     Using model results to sort the images the model was run on into good and bad folders
     inputs:
     inference_df_path (str): path to the csv containing model results
     output_folder (str): path to the directory containing the inference images
     """
+    if not file_exts:
+        file_exts = []
+    
+
     bad_dir = os.path.join(output_folder, 'bad')
     dirs = [output_folder, bad_dir]
     for d in dirs:
@@ -25,11 +62,15 @@ def sort_images(inference_df_path,
     for i in range(len(inference_df)):
         input_image_path = inference_df['im_paths'].iloc[i]
         im_name = os.path.basename(input_image_path) 
+
         if inference_df['model_scores'].iloc[i] < threshold:
+            date = common.extract_date_from_filename(im_name)
+            # for each file extentsion in the list get the matching file that match the im_name date
+            move_matching_files(input_image_path, date, file_exts, bad_dir)
             output_image_path = os.path.join(bad_dir, im_name)
             shutil.move(input_image_path, output_image_path)
             
-def run_inference(path_to_model_ckpt,
+def run_inference_image_classifier(path_to_model_ckpt,
                   path_to_inference_imgs,
                   output_folder,
                   result_path,
@@ -80,22 +121,303 @@ def run_inference(path_to_model_ckpt,
                 threshold=threshold)
     return result_path
 
-def get_classifier() -> str:
+def run_inference_rgb_image_classifier(path_to_model_ckpt,
+                      path_to_inference_imgs,
+                      output_folder,
+                      result_path,
+                      threshold):
+    """
+    Runs the trained model on images, classifying them either as good or bad
+    Saves the results to a csv (image_path, class (good or bad), score (0 to 1)
+    Sorts the images into good or bad folders
+    Images should be '.jpg'
+    inputs:
+    path_to_model_ckpt (str): path to the saved keras model
+    path_to_inference_imgs (str): path to the folder containing images to run the model on
+    output_folder (str): path to save outputs to
+    result_path (str): csv path to save results to
+    threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
+    returns:
+    result_path (str): csv path of saved results
+    """
+    try:
+        os.mkdir(output_folder)
+    except:
+        pass
+    image_size = (128, 128)
+    model = define_RGB_image_classifier_model(input_shape=image_size + (3,), num_classes=2)
+    model.load_weights(path_to_model_ckpt)
+    types = ('*.jpg', '*.jpeg', '*.png') 
+    im_paths = []
+    for files in types:
+        im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files)))
+    model_scores = [None]*len(im_paths)
+    im_classes = [None]*len(im_paths)
+    i=0
+    for im_path in im_paths:
+        print(im_path)
+        img = keras.utils.load_img(im_path, color_mode='rgb',target_size=image_size)
+        img_array = keras.utils.img_to_array(img)
+        img_array = tf.expand_dims(img_array, 0)
+        predictions = model.predict(img_array)
+        score = float(keras.activations.sigmoid(predictions[0][0]))
+        model_scores[i] = score
+        i=i+1
+    ##save results to a csv
+    df = pd.DataFrame({'im_paths':im_paths,
+                       'model_scores':model_scores
+                       }
+                      )
+    print(result_path)
+
+    df.to_csv(result_path)
+    sort_images(result_path,
+                output_folder,
+                threshold=threshold)
+    return result_path
+
+def run_inference_gray_image_classifier(path_to_model_ckpt,
+                       path_to_inference_imgs,
+                       output_folder,
+                       result_path,
+                       threshold):
+    """
+    Runs the trained model on images, classifying them either as good or bad
+    Saves the results to a csv (image_path, class (good or bad), score (0 to 1)
+    Sorts the images into good or bad folders
+    Images should be '.jpg'
+    inputs:
+    path_to_model_ckpt (str): path to the saved keras model
+    path_to_inference_imgs (str): path to the folder containing images to run the model on
+    output_folder (str): path to save outputs to
+    result_path (str): csv path to save results to
+    threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
+    returns:
+    result_path (str): csv path of saved results
+    """
+    try:
+        os.mkdir(output_folder)
+    except:
+        pass
+    image_size = (128, 128)
+    model = define_RGB_image_classifier_model(input_shape=image_size + (1,), num_classes=2)
+    model.load_weights(path_to_model_ckpt)
+    types = ('*.jpg', '*.jpeg', '*.png') 
+    im_paths = []
+    for files in types:
+        im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files)))
+    model_scores = [None]*len(im_paths)
+    im_classes = [None]*len(im_paths)
+    i=0
+    for im_path in im_paths:
+        img = keras.utils.load_img(im_path, color_mode='grayscale',target_size=image_size)
+        img_array = keras.utils.img_to_array(img)
+        img_array = tf.expand_dims(img_array, 0)
+        predictions = model.predict(img_array)
+        score = float(keras.activations.sigmoid(predictions[0][0]))
+        model_scores[i] = score
+        i=i+1
+    ##save results to a csv
+    df = pd.DataFrame({'im_paths':im_paths,
+                       'model_scores':model_scores
+                       }
+                      )
+    df.to_csv(result_path)
+    sort_images(result_path,
+                output_folder,
+                threshold=threshold)
+    return result_path
+
+def define_RGB_image_classifier_model(input_shape, num_classes=2):
+    """
+    Defines the classification model
+    inputs:
+    input_shape (tuple (xdim, ydim)): shape of images for model
+    num_classes (int, optional): number of classes for the model
+    """
+    inputs = keras.Input(shape=input_shape)
+
+    # Entry block
+    x = inputs
+    # Entry block
+    x = layers.Rescaling(1.0 / 255)(inputs)
+    x = layers.Conv2D(128, 3, strides=2, padding="same")(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.Activation("relu")(x)
+
+    previous_block_activation = x  # Set aside residual
+
+    for size in [256, 512, 728]:
+        x = layers.Activation("relu")(x)
+        x = layers.SeparableConv2D(size, 3, padding="same")(x)
+        x = layers.BatchNormalization()(x)
+
+        x = layers.Activation("relu")(x)
+        x = layers.SeparableConv2D(size, 3, padding="same")(x)
+        x = layers.BatchNormalization()(x)
+
+        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
+
+        # Project residual
+        residual = layers.Conv2D(size, 1, strides=2, padding="same")(
+            previous_block_activation
+        )
+        x = layers.add([x, residual])  # Add back residual
+        previous_block_activation = x  # Set aside next residual
+
+    x = layers.SeparableConv2D(1024, 3, padding="same")(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.Activation("relu")(x)
+
+    x = layers.GlobalAveragePooling2D()(x)
+    if num_classes == 2:
+        units = 1
+    else:
+        units = num_classes
+
+    x = layers.Dropout(0.5)(x)
+    outputs = layers.Dense(units, activation=None)(x)
+
+    return keras.Model(inputs, outputs)
+
+def get_image_classifier(type:str='rgb') -> str:
     """returns full path to the good/bad classifier model
     Returns:
         str: full path to downloaded_models directory
     """
-    # directory to hold downloaded models from Zenodo
-    script_dir = os.path.dirname(os.path.abspath(__file__))
+    downloaded_models_path = common.get_downloaded_models_dir()
+
+    if type.lower() == 'rgb':
+        model_name ='ImageRGBClassifier'
+        model_directory = file_utilities.create_directory(
+            downloaded_models_path, model_name
+        )
+
+        # directory to hold downloaded models from Zenodo
+        file_path = pooch.retrieve(
+            # URL to one of Pooch's test files
+            url="https://github.com/mlundine/ShorelineFilter/raw/refs/heads/main/models/image_rgb/best.h5", 
+            known_hash=None,
+            progressbar=True,
+            path= model_directory,
+            )
+    else: # get the grayscale model
+        model_name ='ImageGrayClassifier'
+        print(model_name)
+        model_directory = file_utilities.create_directory(
+            downloaded_models_path, model_name
+        )
+        file_path = pooch.retrieve(
+            # URL to one of Pooch's test files
+            url="https://github.com/mlundine/ShorelineFilter/raw/refs/heads/main/models/image_gray/best.h5", 
+            known_hash=None,
+            progressbar=True,
+            fname='best_gray.h5',
+            path= model_directory,
+            )
+    return file_path
 
-    downloaded_models_path = os.path.abspath(
-        os.path.join(script_dir, "classifier_model")
+def get_segmentation_classifier() -> str:
+    """returns full path to the good/bad classifier model
+    Returns:
+        str: full path to downloaded_models directory
+    """
+    model_name ='ShorelineFilter'
+    downloaded_models_path = common.get_downloaded_models_dir()
+    model_directory = file_utilities.create_directory(
+        downloaded_models_path, model_name
     )
-    if not os.path.exists(downloaded_models_path):
-        os.mkdir(downloaded_models_path)
-    
-    model_path = os.path.join(downloaded_models_path, "best.h5")
-    if not os.path.exists(model_path):
-        raise Exception(f"Classifier model not found at {model_path}")
 
-    return model_path
\ No newline at end of file
+    # directory to hold downloaded models from Zenodo
+    file_path = pooch.retrieve(
+        # URL to one of Pooch's test files
+        url="https://github.com/mlundine/ShorelineFilter/raw/refs/heads/main/models/segmentation_rgb/best_seg.h5",
+        known_hash=None,
+        progressbar=True,
+        path= model_directory,
+        )
+    return file_path
+
+def define_segmentation_classifier_model(input_shape, num_classes=2):
+    """
+    Defines the segmentation classification model
+    inputs:
+    input_shape (tuple (xdim, ydim)): shape of images for model
+    num_classes (int, optional): number of classes for the model
+    """
+    inputs = keras.Input(shape=input_shape)
+
+    # Entry block
+    x =  inputs
+    # Entry block
+    x = layers.Rescaling(1.0 / 255)(inputs)
+    x = layers.Conv2D(16, 3, padding='same', activation='relu')(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.MaxPooling2D()(x)
+    x = layers.Conv2D(32, 3, padding='same', activation='relu')(x)
+    x = layers.BatchNormalization()(x) 
+    x = layers.MaxPooling2D()(x)
+    x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.GlobalAveragePooling2D()(x)
+    x = layers.Dropout(0.5)(x)
+    outputs = layers.Dense(1 if num_classes == 2 else num_classes, activation=None)(x)
+
+    return keras.Model(inputs, outputs)
+
+def run_inference_segmentation_classifier(path_to_model_ckpt,
+                      path_to_inference_imgs,
+                      output_folder,
+                      result_path,
+                      threshold):
+    """
+    Runs the trained model on segmentation images, classifying them either as good or bad
+    Saves the results to a csv (image_path, class (good or bad), score (0 to 1)
+    Sorts the images into good or bad folders
+    Images should be '.jpg'
+    inputs:
+    path_to_model_ckpt (str): path to the saved keras model
+    path_to_inference_imgs (str): path to the folder containing images to run the model on
+    output_folder (str): path to save outputs to
+    result_path (str): csv path to save results to
+    threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
+    returns:
+    result_path (str): csv path of saved results
+    """
+    try:
+        os.mkdir(output_folder)
+    except:
+        pass
+    image_size = (512, 512)
+    model = define_segmentation_classifier_model(input_shape=image_size + (3,), num_classes=2)
+    # model.load_weights(resource_path, by_name=True, skip_mismatch=True) # this was temporary code to get it to work when the layers did not match saved file compare to layeres in define model
+    # model.save_weights("corrected_weights.h5")  # this was temporary to get it work 
+    # model.load_weights(path_to_model_ckpt) #original line did not wor
+    model.load_weights(path_to_model_ckpt)
+    types = ('*.jpg', '*.jpeg', '*.png') 
+    im_paths = []
+    for files in types:
+        im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files)))
+    model_scores = [None]*len(im_paths)
+    im_classes = [None]*len(im_paths)
+    i=0
+    for im_path in im_paths:
+        img = keras.utils.load_img(im_path, color_mode='rgb',target_size=image_size)
+        img_array = keras.utils.img_to_array(img)
+        img_array = tf.expand_dims(img_array, 0)
+        predictions = model.predict(img_array)
+        score = float(keras.activations.sigmoid(predictions[0][0]))
+        model_scores[i] = score
+        i=i+1
+    ##save results to a csv
+    df = pd.DataFrame({'im_paths':im_paths,
+                       'model_scores':model_scores
+                       }
+                      )
+
+    df.to_csv(result_path)
+    sort_images(result_path,
+                output_folder,
+                threshold=threshold,
+                file_exts=['npz'],)
+    return result_path
\ No newline at end of file
diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py
index 0b7a77ad..a4f94835 100644
--- a/src/coastseg/coastseg_map.py
+++ b/src/coastseg/coastseg_map.py
@@ -1980,7 +1980,7 @@ def extract_all_shorelines(self,roi_ids:list=None) -> None:
         shoreline_extraction_area_gdf = getattr(self.shoreline_extraction_area, "gdf", None) if self.shoreline_extraction_area else None
 
         # apply good bad classifier to the downloaded imagery
-        from coastseg.classifier import run_inference,get_classifier
+        from coastseg.classifier import run_inference_rgb_image_classifier,get_image_classifier
         
         for key in roi_settings.keys():
             data_path = os.path.join(roi_settings[key]['filepath'],roi_settings[key]['sitename'])
@@ -1990,12 +1990,12 @@ def extract_all_shorelines(self,roi_ids:list=None) -> None:
             output_path = RGB_path
             output_csv=os.path.join(RGB_path,'classification_results.csv')
             # model_path = os.path.join(r'C:\development\doodleverse\coastseg\CoastSeg\src\coastseg\classifier_model','best.h5')
-            model_path = get_classifier()
-            run_inference(model_path,
+            model_path = get_image_classifier('rgb')
+            run_inference_rgb_image_classifier(model_path,
                         input_path,
                         output_path,
                         output_csv,
-                        threshold=0.10)
+                        threshold=0.40)
 
 
         #3. get selected ROIs on map and extract shoreline for each of them
diff --git a/src/coastseg/downloads.py b/src/coastseg/downloads.py
index 5a1406a0..6b1e2b31 100644
--- a/src/coastseg/downloads.py
+++ b/src/coastseg/downloads.py
@@ -294,51 +294,6 @@ def session_creator():
     # start all the tasks at once
     await tqdm.asyncio.tqdm.gather(*tasks)
 
-
-async def async_download_url_dict(url_dict: dict = {}):
-    """
-    Asynchronously downloads files from a given dictionary of URLs and save locations.
-
-    Parameters
-    ----------
-    url_dict : dict, optional
-        A dictionary where the keys represent local save paths and the values are the corresponding URLs of the files to be downloaded. Default is an empty dictionary.
-
-    Usage
-    -----
-    url_dict = {
-        "/path/to/save/file1.h5": "https://zenodo.org/record/7574784/file1.h5",
-        "/path/to/save/file2.json": "https://zenodo.org/record/7574784/file2.json",
-        "/path/to/save/file3.txt": "https://zenodo.org/record/7574784/file3.txt",
-    }
-
-    await async_download_url_dict(url_dict)
-    """
-
-    def session_creator():
-        # Set the custom timeout value (in seconds)
-        keepalive_timeout = 100
-        # Configure the timeout
-        connector = aiohttp.TCPConnector(keepalive_timeout=keepalive_timeout)
-        # Create and return the session with the configured timeout
-        return aiohttp.ClientSession(
-            connector=connector, timeout=aiohttp.ClientTimeout(total=600)
-        )
-
-    # allow 1 concurrent downloads
-    semaphore = asyncio.Semaphore(1)
-    tasks = []
-    for save_path, url in url_dict.items():
-        task = asyncio.create_task(
-            download_zenodo_file(
-                semaphore, session_creator, url, save_path, max_retries=0
-            )
-        )
-        tasks.append(task)
-    # start all the tasks at once
-    await tqdm.asyncio.tqdm.gather(*tasks)
-
-
 async def download_zenodo_file(
     semaphore: asyncio.Semaphore,
     session_creator: callable,