From a80c7fcf60ce8a69f567d17bc380c141ad194a52 Mon Sep 17 00:00:00 2001
From: shokiami <kiami.sho@gmail.com>
Date: Sun, 28 Nov 2021 19:10:49 -0800
Subject: [PATCH 1/3] Implemented multicropping.

---
 CropRunner.py | 56 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/CropRunner.py b/CropRunner.py
index 982111c..4c80df0 100644
--- a/CropRunner.py
+++ b/CropRunner.py
@@ -41,6 +41,12 @@
 # Mark the center of the crop?
 mark_center = True
 
+# The number of crops per multicrop
+MULTICROP_COUNT = 3
+
+# The scale factor for each multicrop
+MULTICROP_SCALE_FACTOR = 1.25
+
 logging.basicConfig(filename='crop.log', level=logging.DEBUG)
 
 def predict_crop_size(sv_image_y):
@@ -70,18 +76,20 @@ def predict_crop_size(sv_image_y):
 
     return crop_size
 
-def make_single_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, crop_destination, draw_mark=False):
+def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, multicrop=True, draw_mark=False):
     """
     Makes a crop around the object of interest
     :param path_to_image: where the GSV pano is stored
     :param sv_image_x: position
     :param sv_image_y: position
     :param PanoYawDeg: heading
-    :param output_filename: name of file for saving
+    :param destination_dir: path of the crop directory
+    :param label_name: label name
+    :param multicrop: whether or not to make multiple crops for the label
     :param draw_mark: if a dot should be drawn in the centre of the object/image
     :return: none
     """
-    try: 
+    try:
         im = Image.open(pano_img_path)
         # draw = ImageDraw.Draw(im)
 
@@ -108,10 +116,22 @@ def make_single_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, crop_d
         # print("Plotting at " + str(x) + "," + str(y) + " using yaw " + str(pano_yaw_deg))
 
         # print(x, y)
-        top_left_x = x - crop_width / 2
-        top_left_y = y - crop_height / 2
-        cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height))
-        cropped_square.save(crop_destination)
+        for i in range(MULTICROP_COUNT):
+            top_left_x = x - crop_width / 2
+            top_left_y = y - crop_height / 2
+            cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height))
+            crop_name = label_name +  "_" + chr(ord('a') + i) + ".jpg"
+            crop_destination = os.path.join(destination_dir, crop_name)
+            if not os.path.exists(crop_destination):
+                cropped_square.save(crop_destination)
+                print("Successfully extracted crop to " + crop_name)
+                logging.info(label_name + " " + pano_img_path + " " + str(sv_image_x)
+                             + " " + str(sv_image_y) + " " + str(pano_yaw_deg))
+                logging.info("---------------------------------------------------")
+            if not multicrop:
+                break
+            crop_width *= MULTICROP_SCALE_FACTOR
+            crop_height *= MULTICROP_SCALE_FACTOR
         im.close()
     except Exception as e:
         print(e)
@@ -199,26 +219,18 @@ def crop_label_subset(input_rows, output_rows, path_to_gsv_scrapes, destination_
                 os.makedirs(destination_folder)
 
             if not label_type == 0:
-                label_id = int(row[7])
-                crop_name = str(label_id) + ".jpg"  
+                label_name = str(row[7])
+                make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, True)
             else:
                 # In order to uniquely identify null crops, we concatenate the pid of process they
                 # were generated on and the counter within the process to the name of the null crop.
-                crop_name = "null_" + str(process_pid) + "_" +  str(counter) + ".jpg"
-
-            crop_destination = os.path.join(destination_dir, crop_name)
-
-            if not os.path.exists(crop_destination):
-                make_single_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, crop_destination, False)
-                print("Successfully extracted crop to " + crop_name)
-                logging.info(crop_name + " " + pano_id + " " + str(sv_image_x)
-                             + " " + str(sv_image_y) + " " + str(pano_yaw_deg))
-                logging.info("---------------------------------------------------")
+                label_name = "null_" + str(process_pid) + "_" +  str(counter)
+                make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, False)
 
-            output_rows.append([crop_name, label_type])
+            output_rows.append([label_name, label_type])
         else:
             print("Panorama image not found.")
             try:
-                logging.warning("Skipped label id " + str(label_id) + " due to missing image.")
+                logging.warning("Skipped label id " + label_name + " due to missing image.")
             except NameError:
-                logging.warning("Skipped null crop " + str(process_pid) + " " + str(counter) + " due to missing image.")
\ No newline at end of file
+                logging.warning("Skipped null crop " + str(process_pid) + " " + str(counter) + " due to missing image.")

From d1f7badc9adcf9dea037a831f423ebb2ec602298 Mon Sep 17 00:00:00 2001
From: shokiami <kiami.sho@gmail.com>
Date: Sun, 28 Nov 2021 19:55:11 -0800
Subject: [PATCH 2/3] Added bounds checking.

---
 CropRunner.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/CropRunner.py b/CropRunner.py
index 4c80df0..ff939eb 100644
--- a/CropRunner.py
+++ b/CropRunner.py
@@ -109,7 +109,7 @@ def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_d
         x = ((float(pano_yaw_deg) / 360) * im_width + sv_image_x) % im_width
         y = im_height / 2 - sv_image_y
 
-        r = 10
+        # r = 10
         # if draw_mark:
         #     draw.ellipse((x - r, y - r, x + r, y + r), fill=128)
 
@@ -119,14 +119,16 @@ def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_d
         for i in range(MULTICROP_COUNT):
             top_left_x = x - crop_width / 2
             top_left_y = y - crop_height / 2
-            cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height))
-            crop_name = label_name +  "_" + chr(ord('a') + i) + ".jpg"
+            if multicrop:
+                crop_name = label_name + "_" + str(i) + ".jpg"
+            else:
+                crop_name = label_name + ".jpg"
             crop_destination = os.path.join(destination_dir, crop_name)
-            if not os.path.exists(crop_destination):
+            if not os.path.exists(crop_destination) and 0 <= top_left_x and top_left_x + crop_width <= im_width and 0 <= top_left_y and top_left_y + crop_height <= im_height:
+                cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height))
                 cropped_square.save(crop_destination)
                 print("Successfully extracted crop to " + crop_name)
-                logging.info(label_name + " " + pano_img_path + " " + str(sv_image_x)
-                             + " " + str(sv_image_y) + " " + str(pano_yaw_deg))
+                logging.info(label_name + " " + pano_img_path + " " + str(sv_image_x) + " " + str(sv_image_y) + " " + str(pano_yaw_deg))
                 logging.info("---------------------------------------------------")
             if not multicrop:
                 break

From 99e39fadf7072534a548c2a06186b679e9fc1003 Mon Sep 17 00:00:00 2001
From: shokiami <kiami.sho@gmail.com>
Date: Sun, 28 Nov 2021 20:06:57 -0800
Subject: [PATCH 3/3] Merge branch 'main' of
 github.com:ThatOneGoat/sidewalk-cv-2021 into 22-multiple-crops-per-label

---
 analyze_results.py      | 10 ++---
 eval.py                 | 50 +++++++++++++++---------
 train.py                | 64 ++++++++++++++++++-------------
 utils/training_utils.py | 85 ++++++++++++++++++++++++++++++++++++-----
 4 files changed, 151 insertions(+), 58 deletions(-)

diff --git a/analyze_results.py b/analyze_results.py
index 9bdd8e6..d00a6c9 100644
--- a/analyze_results.py
+++ b/analyze_results.py
@@ -9,8 +9,8 @@
 if not os.path.isdir(VISUALIZATIONS_PATH):
     os.makedirs(VISUALIZATIONS_PATH)
 
-SESSION_NAME = 'regnet_save.pt'
-TRAIN_SAVE_PATH = "./datasets/" + SESSION_NAME
+SESSION_NAME = 'efficientnetb3_weighted_loss_save'
+TRAIN_SAVE_PATH = "./datasets/" + SESSION_NAME + ".pt"
 label_types = {
     0: "null",
     1: "curb ramp",
@@ -34,7 +34,7 @@ def plot_label_metric(metric_name):
     plt.xlabel("epoch", fontsize=16)
     plt.ylabel(metric_name, fontsize=16)
     plt.legend(prop={'size': 16})
-    plt.savefig(VISUALIZATIONS_PATH + metric_name)
+    plt.savefig(VISUALIZATIONS_PATH + metric_name + "_" + SESSION_NAME)
 
 plot_label_metric('precision_validation')
 plot_label_metric('precision_train')
@@ -48,7 +48,7 @@ def plot_label_metric(metric_name):
 plt.xlabel("epoch", fontsize=16)
 plt.ylabel("accuracy", fontsize=16)
 plt.legend(prop={'size': 16})
-plt.savefig(VISUALIZATIONS_PATH + "accuracies")
+plt.savefig(VISUALIZATIONS_PATH + "accuracies_" + SESSION_NAME)
 
 figure(figsize=(16, 12))
 plt.plot(np.arange(epochs), metrics['loss_train'], label = 'train loss')
@@ -57,4 +57,4 @@ def plot_label_metric(metric_name):
 plt.xlabel("epoch", fontsize=16)
 plt.ylabel("loss", fontsize=16)
 plt.legend(prop={'size': 16})
-plt.savefig(VISUALIZATIONS_PATH + "losses")
+plt.savefig(VISUALIZATIONS_PATH + "losses_" + SESSION_NAME)
diff --git a/eval.py b/eval.py
index 69cdc0e..7b373d7 100644
--- a/eval.py
+++ b/eval.py
@@ -3,7 +3,7 @@
 import torch.nn as nn
 import torchvision
 from datatypes.dataset import SidewalkCropsDataset
-from utils.training_utils import load_best_weights, evaluate
+from utils.training_utils import get_pretrained_model, load_best_weights, evaluate
 from utils.visualization_utils import plot_confusion_matrix
 from torchvision import transforms
 
@@ -12,6 +12,21 @@
     print("made visualization folder")
     os.makedirs(VISUALIZATIONS_PATH)
 
+# set base path to test data folder
+BASE_PATH = "./datasets/"
+
+# name of model architecture
+MODEL_NAME = "efficientnet"
+
+# number of output classes
+NUM_CLASSES = 5  # (1,2,3,4) for label types, 0 for null crops
+
+# the actual classes
+CLASSES = ["null", "curb ramp", "missing ramp", "obstruction", "sfc problem"]
+
+# name of training session for loading purposes
+SESSION_NAME = "efficientnet-no-pretrained-weights"
+
 # check for GPU
 if torch.cuda.is_available():  
   dev = "cuda" 
@@ -20,15 +35,25 @@
 device = torch.device(dev) 
 print(device)
 
+# =================================================================================================
+# load model for evaluation
+model, input_size = get_pretrained_model(MODEL_NAME, NUM_CLASSES, False)
+model.to(device)
+
+pretrained_save_path = BASE_PATH + SESSION_NAME + ".pt"
+load_best_weights(model, pretrained_save_path)
+
+loss_func = nn.CrossEntropyLoss()
+
+# =================================================================================================
 # load our custom test sidewalk crops dataset
 image_transform = transforms.Compose([
   transforms.Resize(256),
-  transforms.CenterCrop(224),
+  transforms.CenterCrop(input_size),
   transforms.ToTensor(),
   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 ])
 
-BASE_PATH = "./datasets/"
 test_labels_csv_path = BASE_PATH + "test_crop_info.csv"
 test_img_dir = BASE_PATH + "test_crops/"
 test_dataset = SidewalkCropsDataset(test_labels_csv_path, test_img_dir, image_transform)
@@ -37,19 +62,10 @@
 
 test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8)
 
-# load model for evaluation
-efficientnetb3 = torchvision.models.resnet50(pretrained = True)
-num_ftrs = efficientnetb3.fc.in_features
-efficientnetb3.fc = nn.Linear(num_ftrs, 5) # (1,2,3,4) for label types, 0 for null crops 
-efficientnetb3.to(device)
-loss_func = nn.CrossEntropyLoss()
-
-pretrained_save_path = BASE_PATH + "resnet50_weighted_loss.pt"
-load_best_weights(efficientnetb3, pretrained_save_path)
-
+# =================================================================================================
 # evaluate loaded model on test set
-test_accuracy, test_loss, cm = evaluate(efficientnetb3, loss_func, test_dataloader, True, device)
-print("Test accuracy for ResNet50 as FT: ", test_accuracy)
-print("Test loss for ResNet50 as FT: ", test_loss)
+test_accuracy, test_loss, cm = evaluate(model, (MODEL_NAME == "inception"), loss_func, test_dataloader, True, device)
+print("Test accuracy for {} as FT: ".format(MODEL_NAME), test_accuracy)
+print("Test loss for {} as FT: ".format(MODEL_NAME), test_loss)
 if cm is not None:
-  plot_confusion_matrix(VISUALIZATIONS_PATH, "resnet50-weighted-loss", cm, ["null", "curb ramp", "missing ramp", "obstruction", "sfc problem"], normalize=True)
+  plot_confusion_matrix(VISUALIZATIONS_PATH, SESSION_NAME, cm, CLASSES, normalize=True)
diff --git a/train.py b/train.py
index aa8a2e3..40a45c0 100644
--- a/train.py
+++ b/train.py
@@ -11,6 +11,15 @@
 # set base path to training/test data folder
 BASE_PATH = "./datasets/"
 
+# name of model architecture
+MODEL_NAME = "efficientnet"
+
+# number of output classes
+NUM_CLASSES = 5  # (1,2,3,4) for label types, 0 for null crops
+
+# name of training session for saving purposes
+TRAIN_SESSION_NAME = "efficientnet-no-pretrained-weights"
+
 # check for GPU
 if torch.cuda.is_available():  
   dev = "cuda" 
@@ -19,10 +28,32 @@
 device = torch.device(dev) 
 print(device)
 
+# =================================================================================================
+# setup model for fine tuning
+model, input_size = get_pretrained_model(MODEL_NAME, NUM_CLASSES, False)
+model.to(device)
+
+lr = 0.01
+
+# weight using inverse of each sample size
+# acquire label sample sizes from train csv
+# samples_per_class = np.array([10000, 11187, 8788, 2678, 7204])
+# weights = 1.0 / samples_per_class
+# norm = np.linalg.norm(weights)
+# normalized_weights = weights / norm
+# normalized_weights_tensor = torch.from_numpy(normalized_weights).float().to(device)
+
+# add normalized_weights_tensor as input to loss_func if weighted loss is desired
+loss_func = nn.CrossEntropyLoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)
+scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.3)
+checkpoint_save_path = BASE_PATH + TRAIN_SESSION_NAME + ".pt"
+
+# =================================================================================================
 # load train datasets
 image_transform = transforms.Compose([
   transforms.Resize(256),
-  transforms.CenterCrop(224),
+  transforms.CenterCrop(input_size),
   transforms.ToTensor(),
   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 ])
@@ -30,10 +61,9 @@
 # having issues with CUDA running out of memory, so lowering batch size
 batch_size = 16
 
-train_labels_csv_path = BASE_PATH + "train_non_null_crop_info.csv"
+train_labels_csv_path = BASE_PATH + "train_subset_crop_info.csv"
 train_img_dir = BASE_PATH + "train_crops/"
 
-
 # load our custom train/val sidewalk crops dataset
 train_val_dataset = SidewalkCropsDataset(train_labels_csv_path, train_img_dir, image_transform)
 
@@ -53,20 +83,8 @@
 print(len(train_dataset))
 print(len(val_dataset))
 
-# get resnet50 for fine tuning
-model = get_pretrained_model()
-num_ftrs = model.fc.in_features
-model.fc = nn.Linear(num_ftrs, 5) # (1,2,3,4) for label types, 0 for null crops 
-model.to(device)
-
-lr = 0.01
-
-loss_func = nn.CrossEntropyLoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=lr)
-scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.1)
-checkpoint_save_path = BASE_PATH + "regnet_save.pt"
-
-# train for 20 epochs
+# =================================================================================================
+# train for n epochs
 epochs = 50
 dataLoaders = {
   "training": train_dataloader,
@@ -76,13 +94,5 @@
 print("next epoch: " + str(last_epoch + 1))
 print("resuming training...\n")
 
-train(model, optimizer, scheduler, loss_func, epochs, dataLoaders, checkpoint_save_path, metrics, last_epoch + 1, device)
-# print("Best validation accuracy: ", best_validation_accuracy)
-
-# visualization of training and validation loss over epochs
-plt.plot(np.arange(epochs), metrics['loss_train'], label="training loss")
-plt.plot(np.arange(epochs), metrics['loss_validation'], label="validation loss")
-plt.title("Training/Validation loss for FT model")
-plt.xlabel("epoch")
-plt.ylabel("loss")
-plt.legend()
+train(model, (MODEL_NAME == "inception"), optimizer, scheduler, loss_func, epochs, dataLoaders,
+      checkpoint_save_path, metrics, last_epoch + 1, device)
diff --git a/utils/training_utils.py b/utils/training_utils.py
index 1d3d847..5d5dd24 100644
--- a/utils/training_utils.py
+++ b/utils/training_utils.py
@@ -2,12 +2,60 @@
 import numpy as np
 import os
 import torch
+import torch.nn as nn
 import torchvision
 from sklearn.metrics import confusion_matrix
 from time import perf_counter
 
-def get_pretrained_model():
-  return torchvision.models.regnet_y_8gf(pretrained = True)
+def get_pretrained_model(model_name, num_classes, use_pretrained=True):
+  model_ft = None
+  input_size = 0
+
+  if model_name == "resnet":
+      """ Resnet50
+      """
+      model_ft = torchvision.models.resnet50(pretrained=use_pretrained)
+      num_ftrs = model_ft.fc.in_features
+      model_ft.fc = nn.Linear(num_ftrs, num_classes)
+      input_size = 224
+
+  elif model_name == "inception":
+      """ Inception v3
+      Be careful, expects (299,299) sized images and has auxiliary output
+      """
+      model_ft = torchvision.models.inception_v3(pretrained=use_pretrained)
+      # Handle the auxilary net
+      num_ftrs = model_ft.AuxLogits.fc.in_features
+      model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
+      # Handle the primary net
+      num_ftrs = model_ft.fc.in_features
+      model_ft.fc = nn.Linear(num_ftrs,num_classes)
+      input_size = 299
+
+  elif model_name == "efficientnet":
+    """ EfficientNetB3
+    """
+    model_ft = torchvision.models.efficientnet_b3(pretrained=use_pretrained)
+    num_ftrs = model_ft.classifier[1].in_features
+    model_ft.classifier[1] = nn.Linear(num_ftrs, num_classes)
+    input_size = 224
+  
+  elif model_name == "regnet":
+    """ RegNet-y, 8gF
+    """
+    model_ft = torchvision.models.regnet_y_8gf(pretrained=use_pretrained)
+    num_ftrs = model_ft.fc.in_features
+    model_ft.fc = nn.Linear(num_ftrs, num_classes)
+    input_size = 224
+
+  else:
+    print("Invalid model name, exiting...")
+    exit()
+
+  return model_ft, input_size
+
+# # Initialize the model for this run
+# model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
 
 def save_training_checkpoint(training_states, best_model_state, metrics, epoch, path):
   # add things like TPR, FPR later when we start evaluating them
@@ -48,7 +96,7 @@ def load_best_weights(model, path):
   model.load_state_dict(checkpoint['best_model_state'])
 
 
-def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_path, metrics, start_epoch, device):
+def train(model, is_inception, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_path, metrics, start_epoch, device):
   t_start = perf_counter()
 
   best_model_state = copy.deepcopy(model.state_dict())
@@ -91,10 +139,19 @@ def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_p
         optimizer.zero_grad()
         use_grad = (mode == 'training')
         with torch.set_grad_enabled(use_grad):
-          outputs = model(inputs)
-          loss = loss_func(outputs, labels)
+          if is_inception and use_grad:
+            # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
+            outputs, aux_outputs = model(inputs)
+            loss1 = loss_func(outputs, labels)
+            loss2 = loss_func(aux_outputs, labels)
+            loss = loss1 + 0.4*loss2
+          else:
+            outputs = model(inputs)
+            loss = loss_func(outputs, labels)
+
           _, preds = torch.max(outputs, 1)
           preds.to(device)
+
           if use_grad:
             # We are training, so make sure to actually
             # train by using loss/stepping.
@@ -136,7 +193,7 @@ def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_p
         metrics['loss_train'].append(loss_avg)
         metrics['accuracy_train'].append(accuracy)
         # make sure to step through lr update schedule
-        #scheduler.step()
+        scheduler.step()
 
     training_states = {'model': model, 'optimizer': optimizer, 'scheduler': scheduler}
     save_training_checkpoint(training_states, best_model_state, metrics, epoch, save_path)
@@ -149,7 +206,7 @@ def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_p
   print("Elapsed time during training in seconds",
                                         t_stop-t_start)
 
-def evaluate(model, loss_func, dataset_loader, test, device):
+def evaluate(model, is_inception, loss_func, dataset_loader, test, device):
   # put model into eval mode
   model.eval()
   
@@ -161,13 +218,23 @@ def evaluate(model, loss_func, dataset_loader, test, device):
   lbllist=torch.zeros(0, dtype=torch.long, device='cpu')
   conf_mat = None
 
+  epoch_count = 0
+
   # correct predictions.
   correct = 0
   total_loss = 0
   with torch.no_grad():
     for inputs, labels in dataset_loader:
       inputs, labels = inputs.to(device), labels.to(device)
-      outputs = model(inputs)
+      epoch_count += inputs.size(0)
+      print("percent {}".format(epoch_count / n))
+      if is_inception:
+        outputs, _ = model(inputs)
+      else:
+        outputs = model(inputs)
+
+      # we ignore aux output in test loss calculation
+      # since we aren't updating weights
       loss = loss_func(outputs, labels)
       _, predictions = torch.max(outputs, 1)
 
@@ -183,4 +250,4 @@ def evaluate(model, loss_func, dataset_loader, test, device):
     conf_mat = confusion_matrix(lbllist.numpy(), predlist.numpy())
     print(conf_mat)
 
-  return  correct / n, total_loss / n, conf_mat
\ No newline at end of file
+  return  correct / n, total_loss / n, conf_mat