diff --git a/code/chapter-2/visualization.py b/code/chapter-2/visualization.py index 6e9bb8b..6b4f947 100644 --- a/code/chapter-2/visualization.py +++ b/code/chapter-2/visualization.py @@ -2,7 +2,6 @@ import tensorflow from tensorflow.keras.preprocessing import image from tensorflow.keras.utils import get_file -import json from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input from tf_explain.core.grad_cam import GradCAM @@ -10,133 +9,108 @@ import PIL from PIL import Image, ImageDraw, ImageFont +import matplotlib.pyplot as plt import matplotlib.cm as cm +import matplotlib.image as mpimg from argparse import ArgumentParser import glob import os -CLASS_INDEX = None -CLASS_INDEX_PATH = '../imagenet_class_index.json' - model = VGG16(weights='imagenet', include_top=True, input_tensor=None, input_shape=None, pooling=None, classes=1000) +#Check with 'print(model.summary())' +last_conv_layer_name = "block5_conv3" +#Include all layers between last convolutional layer and prediction layer +classifier_layer_names = ["block5_pool", "flatten", "fc1", "fc2", "predictions"] + +def get_img_array(img_path, size): + img = tensorflow.keras.preprocessing.image.load_img(img_path, target_size=(224, 224)) + # `array` is a float32 Numpy array + array = tensorflow.keras.preprocessing.image.img_to_array(img) + # We add a dimension to transform our array into a "batch" + # of size (1, 299, 299, 3) + array = np.expand_dims(array, axis=0) + return array + + +def make_gradcam_heatmap( + img_path, model, last_conv_layer_name, classifier_layer_names, output_path +): + + img_array = preprocess_input(get_img_array(img_path, size= (224, 224))) + # First, we create a model that maps the input image to the activations + # of the last conv layer + last_conv_layer = model.get_layer(last_conv_layer_name) + last_conv_layer_model = tensorflow.keras.Model(model.inputs, last_conv_layer.output) + + # Second, we create a model that maps the activations of the last conv + # layer to the final class predictions + classifier_input = tensorflow.keras.Input(shape=last_conv_layer.output.shape[1:]) + x = classifier_input + for layer_name in classifier_layer_names: + x = model.get_layer(layer_name)(x) + classifier_model = tensorflow.keras.Model(classifier_input, x) + + # Then, we compute the gradient of the top predicted class for our input image + # with respect to the activations of the last conv layer + with tensorflow.GradientTape() as tape: + # Compute activations of the last conv layer and make the tape watch it + last_conv_layer_output = last_conv_layer_model(img_array) + tape.watch(last_conv_layer_output) + # Compute class predictions + preds = classifier_model(last_conv_layer_output) + top_pred_index = tensorflow.argmax(preds[0]) + top_class_channel = preds[:, top_pred_index] + + # This is the gradient of the top predicted class with regard to + # the output feature map of the last conv layer + grads = tape.gradient(top_class_channel, last_conv_layer_output) + + # This is a vector where each entry is the mean intensity of the gradient + # over a specific feature map channel + pooled_grads = tensorflow.reduce_mean(grads, axis=(0, 1, 2)) + + # We multiply each channel in the feature map array + # by "how important this channel is" with regard to the top predicted class + last_conv_layer_output = last_conv_layer_output.numpy()[0] + pooled_grads = pooled_grads.numpy() + for i in range(pooled_grads.shape[-1]): + last_conv_layer_output[:, :, i] *= pooled_grads[i] + + # The channel-wise mean of the resulting feature map + # is our heatmap of class activation + heatmap = np.mean(last_conv_layer_output, axis=-1) + + # For visualization purpose, we will also normalize the heatmap between 0 & 1 + heatmap = np.maximum(heatmap, 0) / np.max(heatmap) + + # We load the original image + img = tensorflow.keras.preprocessing.image.load_img(img_path) + img = tensorflow.keras.preprocessing.image.img_to_array(img) + + # We rescale heatmap to a range 0-255 + heatmap = np.uint8(255 * heatmap) + + # We use jet colormap to colorize heatmap + jet = cm.get_cmap("jet") + + # We use RGB values of the colormap + jet_colors = jet(np.arange(256))[:, :3] + jet_heatmap = jet_colors[heatmap] + + # We create an image with RGB colorized heatmap + jet_heatmap = tensorflow.keras.preprocessing.image.array_to_img(jet_heatmap) + jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0])) + jet_heatmap = tensorflow.keras.preprocessing.image.img_to_array(jet_heatmap) + + # Superimpose the heatmap on original image + superimposed_img = jet_heatmap * 0.4 + img + superimposed_img = tensorflow.keras.preprocessing.image.array_to_img(superimposed_img) + + #Save the the superimposed image to the output path + superimposed_img.save(output_path) -# Note: decode_predictions(preds, top) is originally a keras function. -# We have modified it here so that it returns the index of the class label along with the predictions. -# The results are assimilated based on the assumption that there is only one top 1% prediction. - -def decode_predictions_modified(preds, top=1): - global CLASS_INDEX - if len(preds.shape) != 2 or preds.shape[1] != 1000: - raise ValueError( - '`decode_predictions` expects ' 'a batch of predictions ''(i.e. a 2D array of shape (samples, 1000)). ' 'Found array with shape: ' + str(preds.shape)) - if CLASS_INDEX is None: - fpath = get_file('imagenet_class_index.json', - CLASS_INDEX_PATH, cache_subdir='models') - CLASS_INDEX = json.load(open(fpath)) - results = [] - for pred in preds: - top_indices = pred.argsort()[-top:][::-1] - for i in top_indices: - results = [i, tuple(CLASS_INDEX[str(i)]), (pred[i],)] - return results - -# Function that takes an image and model and produces the predictions - -def get_predictions(img, model): - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - preds = model.predict(x) - return decode_predictions_modified(preds, top=1) - -# NOTE: These two functions are taken from Shao-Chuan Wang -# as per the copyright on http://code.activestate.com/recipes/577591-conversion-of-pil-image-and-numpy-array/ - -""" - Copyright 2011 Shao-Chuan Wang - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -""" -# Modified from the original format to take only the array as input and calculate the size on the fly -def array_to_PIL(arr): - mode = 'RGBA' - # Use only the height and width for further processing - size = (arr.shape[0], arr.shape[1]) # (224,224) - arr = arr.reshape(arr.shape[0]*arr.shape[1], arr.shape[2]) - if len(arr[0]) == 3: - arr = np.c_[arr, 255*np.ones((len(arr), 1), np.uint8)] - return Image.frombuffer(mode, size, arr.tostring(), 'raw', mode, 0, 1) - -# Function that puts text based prediction and class name on top of the image -def overlay_prediction_on_image(img, prediction_class, prediction_probability, width, height): - img = img.resize((width, height), Image.ANTIALIAS) - draw = ImageDraw.Draw(img) - l = len(prediction_class) - # Place a black rectangle to provide a background for the text - # The size of the rectangle should change with respect to the image - draw.rectangle([int(width*0.05), int(width*0.05), - int(width*0.5), int(width*0.11)], fill=(0, 0, 0)) - draw.text((int(width*0.06), int(width*0.06)), '{0:.0f}'.format( - prediction_probability) + "% " + prediction_class, fill=(255, 255, 255)) - return img - -# Based on StackOverflow code by user DTing -# https://stackoverflow.com/questions/30227466/combine-several-images-horizontally-with-python - - -def join_images(img1, img2): - widths, heights = zip(*(i.size for i in [img1, img2])) - total_width = sum(widths) - max_height = max(heights) - new_img = Image.new('RGB', (total_width, max_height)) - x_offset = 0 - for img in [img1, img2]: - new_img.paste(img, (x_offset, 0)) - x_offset += img.size[0] - return new_img - - -def process_image(image_path, output_path): - explainer = GradCAM() - - img = image.load_img(image_path, target_size=(224, 224)) - img = image.img_to_array(img) - data = ([img], None) - - original_image = Image.open(image_path) - width = int(original_image.size[0]/4) - height = int(original_image.size[1]/4) - original_image.thumbnail((width, height), Image.ANTIALIAS) - - class_index, class_name, prob_value = get_predictions(img, model) - heatmap = explainer.explain(data, model, "block5_conv3", class_index) - - # overlay the text prediction on the heatmap overlay - heatmap_with_prediction_overlayed = overlay_prediction_on_image( - array_to_PIL(heatmap), class_name[-1], prob_value[0] * 100, width, height) - - # place the images side by side - joined_image = join_images( - original_image, heatmap_with_prediction_overlayed) - joined_image.save(output_path) def process_video(videoframes_path, output_prefix): counter = 0 @@ -146,8 +120,8 @@ def process_video(videoframes_path, output_prefix): for input_path in sorted(glob.glob(videoframes_path + "/*.jpg")): counter += 1 output_path = output_dir + "/result-" + str(counter).zfill(4) + '.jpg' - process_image(input_path, output_path) - + + make_gradcam_heatmap(input_path, model, last_conv_layer_name, classifier_layer_names, output_path) def get_command_line_arguments(): parser = ArgumentParser() @@ -163,8 +137,14 @@ def get_command_line_arguments(): if args.process_type == "image": image_path = args.path output_prefix = os.path.splitext(os.path.basename(image_path))[0] - process_image(image_path, output_prefix + "_output.jpg") + make_gradcam_heatmap(image_path, model, last_conv_layer_name, classifier_layer_names, output_prefix + "_output.jpg") + + #Plot the superimposed image + img = mpimg.imread(output_prefix + "_output.jpg") + plt.imshow(img) + plt.show() + elif args.process_type == "video": videoframes_path = args.path output_prefix = os.path.dirname(videoframes_path) - process_video(videoframes_path, output_prefix) + heatmaps = process_video(videoframes_path, output_prefix)