Skip to content

Commit

Permalink
overhaul classifier to include new function to run image classifier o…
Browse files Browse the repository at this point in the history
…n directory, have sort images create good directory if one is not provided
  • Loading branch information
2320sharon committed Dec 19, 2024
1 parent ff8b749 commit 5bf323a
Showing 1 changed file with 139 additions and 100 deletions.
239 changes: 139 additions & 100 deletions src/coastseg/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,82 @@ def move_matching_files(input_image_path, search_string, file_exts, target_dir):
output_image_path = os.path.join(target_dir, os.path.basename(matching_file))
shutil.move(matching_file, output_image_path)

def sort_images_with_model(input_directory:str,type:str='rgb', threshold:float=0.40):
"""
Sorts a directory of images using the good/bad image model. The bad images
are moved to a 'bad' directory and the good images remain in the original directory.
Example:
sort_images_with_model(type='rgb', input_directory='C:/Coastseg/data/ID_1_datetime06-04-24__12_09_54/jpg_files/preprocessed/RGB', threshold=0.40)
Parameters:
type (str): The type of model to use. Options are 'rgb' or 'gray'. Default is 'rgb'.
The RGB model is used for color images and the gray model is used for grayscale images or RGB images.
input_directory (str): The directory containing the images to be classified. Should contain jpgs, pngs, or jpeg files.
threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
Returns:
None
"""
classifier_path = get_image_classifier(type)

if type.lower() == 'rgb':
run_inference_rgb_image_classifier(classifier_path,
input_directory,
input_directory,
threshold=threshold)
else:
run_inference_gray_image_classifier(classifier_path,
input_directory,
input_directory,
threshold=threshold)

def sort_images(inference_df_path,
output_folder,
good_path="",
bad_path="",
threshold=0.40,
file_exts:list=None):
"""
Using model results to sort the images the model was run on into good and bad folders
Using model results to sort the images the model was run on into good and bad folders.
Put the matching files with the corresponding file extensions into the good or bad directories based on the threshold.
inputs:
inference_df_path (str): path to the csv containing model results
output_folder (str): path to the directory containing the inference images
threshold (float): threshold of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
file_exts (list, optional): list of file extensions to match when moving files to the good or bad directories
returns:
good_path (str): path to the directory containing the good images
bad_dir (str): path to the directory containing the bad images
Example:
inference_df_path = 'C:/path/to/inference_results.csv'
output_folder = 'C:/path/to/output_folder'
threshold = 0.40
file_exts = ['.jpg', '.jpeg', '.png']
sort_images(inference_df_path, output_folder, threshold, file_exts)
This will sort the npz files as well as matching files with the extensions in ['.jpg', '.jpeg', '.png'] from the inference results
into good and bad folders based on the threshold.
"""

if not file_exts:
file_exts = []

if not good_path:
good_path = os.path.join(output_folder, 'good')
if not bad_path:
bad_path = os.path.join(output_folder, 'bad')

bad_dir = os.path.join(output_folder, 'bad')
dirs = [output_folder, bad_dir]
dirs = [output_folder, bad_path, good_path]
for d in dirs:
try:
os.mkdir(d)
except:
pass
os.makedirs(d, exist_ok=True)

inference_df = pd.read_csv(inference_df_path)
for i in range(len(inference_df)):
input_image_path = inference_df['im_paths'].iloc[i]
Expand All @@ -66,66 +120,21 @@ def sort_images(inference_df_path,
if inference_df['model_scores'].iloc[i] < threshold:
date = common.extract_date_from_filename(im_name)
# for each file extentsion in the list get the matching file that match the im_name date
move_matching_files(input_image_path, date, file_exts, bad_dir)
output_image_path = os.path.join(bad_dir, im_name)
move_matching_files(input_image_path, date, file_exts, bad_path)
output_image_path = os.path.join(bad_path, im_name)
shutil.move(input_image_path, output_image_path)
else: # if it was higher than the threshold it was a good image and should be moved to the good directory
date = common.extract_date_from_filename(im_name)
move_matching_files(input_image_path, date, file_exts, good_path)
output_image_path = os.path.join(good_path, im_name)
shutil.move(input_image_path, output_image_path)
return good_path, bad_path

def run_inference_image_classifier(path_to_model_ckpt,
path_to_inference_imgs,
output_folder,
result_path,
threshold):
"""
Runs the trained model on images, classifying them either as good or bad
Saves the results to a csv (image_path, class (good or bad), score (0 to 1)
Sorts the images into good or bad folders
Images should be '.jpg'
inputs:
path_to_model_ckpt (str): path to the saved keras model
path_to_inference_imgs (str): path to the folder containing images to run the model on
output_folder (str): path to save outputs to
result_path (str): csv path to save results to
threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
returns:
result_path (str): csv path of saved results
"""
try:
os.mkdir(output_folder)
except:
pass
image_size = (128, 128)
model = keras.models.load_model(path_to_model_ckpt)
types = ('*.jpg', '*.jpeg', '*.png')
im_paths = []
for files in types:
im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files)))
model_scores = [None]*len(im_paths)
im_classes = [None]*len(im_paths)
i=0
for im_path in im_paths:
img = keras.utils.load_img(im_path, color_mode='grayscale',target_size=image_size)
img_array = keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)
predictions = model.predict(img_array,verbose=False)
score = float(keras.activations.sigmoid(predictions[0][0]))
model_scores[i] = score
i=i+1
##save results to a csv
df = pd.DataFrame({'im_paths':im_paths,
'model_scores':model_scores
}
)
df.to_csv(result_path)
sort_images(result_path,
output_folder,
threshold=threshold)
return result_path

def run_inference_rgb_image_classifier(path_to_model_ckpt,
path_to_inference_imgs,
output_folder,
result_path,
threshold):
csv_path="",
threshold=0.40):
"""
Runs the trained model on images, classifying them either as good or bad
Saves the results to a csv (image_path, class (good or bad), score (0 to 1)
Expand All @@ -135,15 +144,16 @@ def run_inference_rgb_image_classifier(path_to_model_ckpt,
path_to_model_ckpt (str): path to the saved keras model
path_to_inference_imgs (str): path to the folder containing images to run the model on
output_folder (str): path to save outputs to
result_path (str): csv path to save results to
csv_path (str): csv path to save results to. If not provided, the results will be saved to output_folder/classification_results.csv
threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
returns:
result_path (str): csv path of saved results
csv_path (str): csv path of saved results
"""
try:
os.mkdir(output_folder)
except:
pass
if not csv_path:
csv_path = os.path.join(output_folder, 'classification_results.csv')

os.makedirs(output_folder,exist_ok=True)

image_size = (128, 128)
model = define_RGB_image_classifier_model(input_shape=image_size + (3,), num_classes=2)
model.load_weights(path_to_model_ckpt)
Expand All @@ -168,19 +178,19 @@ def run_inference_rgb_image_classifier(path_to_model_ckpt,
'model_scores':model_scores
}
)
print(result_path)

df.to_csv(result_path)
sort_images(result_path,
df.to_csv(csv_path)
sort_images(csv_path,
output_folder,
good_path=output_folder,
threshold=threshold)
return result_path
return csv_path

def run_inference_gray_image_classifier(path_to_model_ckpt,
path_to_inference_imgs,
output_folder,
result_path,
threshold):
csv_path="",
threshold=0.40):
"""
Runs the trained model on images, classifying them either as good or bad
Saves the results to a csv (image_path, class (good or bad), score (0 to 1)
Expand All @@ -190,15 +200,15 @@ def run_inference_gray_image_classifier(path_to_model_ckpt,
path_to_model_ckpt (str): path to the saved keras model
path_to_inference_imgs (str): path to the folder containing images to run the model on
output_folder (str): path to save outputs to
result_path (str): csv path to save results to
csv_path (str): csv path to save results to. If not provided, the results will be saved to output_folder/classification_results.csv
threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
returns:
result_path (str): csv path of saved results
csv_path (str): csv path of saved results
"""
try:
os.mkdir(output_folder)
except:
pass
if not csv_path:
csv_path = os.path.join(output_folder, 'classification_results.csv')

os.makedirs(output_folder,exist_ok=True)
image_size = (128, 128)
model = define_RGB_image_classifier_model(input_shape=image_size + (1,), num_classes=2)
model.load_weights(path_to_model_ckpt)
Expand All @@ -222,11 +232,12 @@ def run_inference_gray_image_classifier(path_to_model_ckpt,
'model_scores':model_scores
}
)
df.to_csv(result_path)
sort_images(result_path,
df.to_csv(csv_path)
sort_images(csv_path,
output_folder,
good_path=output_folder,
threshold=threshold)
return result_path
return csv_path

def define_RGB_image_classifier_model(input_shape, num_classes=2):
"""
Expand Down Expand Up @@ -281,7 +292,13 @@ def define_RGB_image_classifier_model(input_shape, num_classes=2):
return keras.Model(inputs, outputs)

def get_image_classifier(type:str='rgb') -> str:
"""returns full path to the good/bad classifier model
"""
Downloads the image classifier model from Zenodo and returns the path to the downloaded model
Args:
type (str, optional): type of model to download. Options are 'rgb' or 'gray'. Defaults to 'rgb'.
Returns:
str: full path to downloaded_models directory
"""
Expand Down Expand Up @@ -365,30 +382,45 @@ def define_segmentation_classifier_model(input_shape, num_classes=2):

return keras.Model(inputs, outputs)

def run_inference_segmentation_classifier(path_to_model_ckpt,
path_to_inference_imgs,
output_folder,
result_path="",
def run_inference_segmentation_classifier(path_to_model_ckpt:str,
path_to_inference_imgs:str,
output_folder:str,
csv_path="",
good_path="",
bad_path="",
threshold=0.10):
"""
Runs the trained model on segmentation images, classifying them either as good or bad
Saves the results to a csv (image_path, class (good or bad), score (0 to 1)
Sorts the images into good or bad folders
Images should be '.jpg'
inputs:
path_to_model_ckpt (str): path to the saved keras model
path_to_inference_imgs (str): path to the folder containing images to run the model on
output_folder (str): path to save outputs to
result_path (str): csv path to save results to
csv_path (str): csv path to save results to
If not provided, the results will be saved to output_folder/classification_results.csv
threshold (float): threshold on sigmoid of model output (ex: 0.6 means mark images as good if model output is >= 0.6, or 60% sure it's a good image)
returns:
result_path (str): csv path of saved results
csv_path (str): csv path of saved results
good_path (str): path to the directory containing the good images
bad_path (str): path to the directory containing the bad images
"""
try:
os.mkdir(output_folder)
except:
pass

os.makedirs(output_folder,exist_ok=True)

if not good_path:
good_path = os.path.join(output_folder, 'good')
if not bad_path:
bad_path = os.path.join(output_folder, 'bad')

dirs = [output_folder, bad_path, good_path]
for d in dirs:
os.makedirs(d, exist_ok=True)


image_size = (512, 512)
model = define_segmentation_classifier_model(input_shape=image_size + (3,), num_classes=2)
# model.load_weights(resource_path, by_name=True, skip_mismatch=True) # this was temporary code to get it to work when the layers did not match saved file compare to layeres in define model
Expand All @@ -399,6 +431,11 @@ def run_inference_segmentation_classifier(path_to_model_ckpt,
im_paths = []
for files in types:
im_paths.extend(glob.glob(os.path.join(path_to_inference_imgs, files)))

# If not files exist return the good and bad paths. This is assuming the files were previously sorted
if im_paths == []:
return csv_path,good_path,bad_path

model_scores = [None]*len(im_paths)
im_classes = [None]*len(im_paths)
i=0
Expand All @@ -416,12 +453,14 @@ def run_inference_segmentation_classifier(path_to_model_ckpt,
}
)

if not result_path:
result_path = os.path.join(output_folder, 'classification_results.csv')
if not csv_path:
csv_path = os.path.join(output_folder, 'classification_results.csv')

df.to_csv(result_path)
sort_images(result_path,
df.to_csv(csv_path)
good_path,bad_path=sort_images(csv_path,
output_folder,
good_path=good_path,
bad_path=bad_path,
threshold=threshold,
file_exts=['npz'],)
return result_path
return csv_path,good_path,bad_path

0 comments on commit 5bf323a

Please sign in to comment.