From a80c7fcf60ce8a69f567d17bc380c141ad194a52 Mon Sep 17 00:00:00 2001 From: shokiami Date: Sun, 28 Nov 2021 19:10:49 -0800 Subject: [PATCH 1/3] Implemented multicropping. --- CropRunner.py | 56 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/CropRunner.py b/CropRunner.py index 982111c..4c80df0 100644 --- a/CropRunner.py +++ b/CropRunner.py @@ -41,6 +41,12 @@ # Mark the center of the crop? mark_center = True +# The number of crops per multicrop +MULTICROP_COUNT = 3 + +# The scale factor for each multicrop +MULTICROP_SCALE_FACTOR = 1.25 + logging.basicConfig(filename='crop.log', level=logging.DEBUG) def predict_crop_size(sv_image_y): @@ -70,18 +76,20 @@ def predict_crop_size(sv_image_y): return crop_size -def make_single_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, crop_destination, draw_mark=False): +def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, multicrop=True, draw_mark=False): """ Makes a crop around the object of interest :param path_to_image: where the GSV pano is stored :param sv_image_x: position :param sv_image_y: position :param PanoYawDeg: heading - :param output_filename: name of file for saving + :param destination_dir: path of the crop directory + :param label_name: label name + :param multicrop: whether or not to make multiple crops for the label :param draw_mark: if a dot should be drawn in the centre of the object/image :return: none """ - try: + try: im = Image.open(pano_img_path) # draw = ImageDraw.Draw(im) @@ -108,10 +116,22 @@ def make_single_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, crop_d # print("Plotting at " + str(x) + "," + str(y) + " using yaw " + str(pano_yaw_deg)) # print(x, y) - top_left_x = x - crop_width / 2 - top_left_y = y - crop_height / 2 - cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height)) - cropped_square.save(crop_destination) + for i in range(MULTICROP_COUNT): + top_left_x = x - crop_width / 2 + top_left_y = y - crop_height / 2 + cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height)) + crop_name = label_name + "_" + chr(ord('a') + i) + ".jpg" + crop_destination = os.path.join(destination_dir, crop_name) + if not os.path.exists(crop_destination): + cropped_square.save(crop_destination) + print("Successfully extracted crop to " + crop_name) + logging.info(label_name + " " + pano_img_path + " " + str(sv_image_x) + + " " + str(sv_image_y) + " " + str(pano_yaw_deg)) + logging.info("---------------------------------------------------") + if not multicrop: + break + crop_width *= MULTICROP_SCALE_FACTOR + crop_height *= MULTICROP_SCALE_FACTOR im.close() except Exception as e: print(e) @@ -199,26 +219,18 @@ def crop_label_subset(input_rows, output_rows, path_to_gsv_scrapes, destination_ os.makedirs(destination_folder) if not label_type == 0: - label_id = int(row[7]) - crop_name = str(label_id) + ".jpg" + label_name = str(row[7]) + make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, True) else: # In order to uniquely identify null crops, we concatenate the pid of process they # were generated on and the counter within the process to the name of the null crop. - crop_name = "null_" + str(process_pid) + "_" + str(counter) + ".jpg" - - crop_destination = os.path.join(destination_dir, crop_name) - - if not os.path.exists(crop_destination): - make_single_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, crop_destination, False) - print("Successfully extracted crop to " + crop_name) - logging.info(crop_name + " " + pano_id + " " + str(sv_image_x) - + " " + str(sv_image_y) + " " + str(pano_yaw_deg)) - logging.info("---------------------------------------------------") + label_name = "null_" + str(process_pid) + "_" + str(counter) + make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, False) - output_rows.append([crop_name, label_type]) + output_rows.append([label_name, label_type]) else: print("Panorama image not found.") try: - logging.warning("Skipped label id " + str(label_id) + " due to missing image.") + logging.warning("Skipped label id " + label_name + " due to missing image.") except NameError: - logging.warning("Skipped null crop " + str(process_pid) + " " + str(counter) + " due to missing image.") \ No newline at end of file + logging.warning("Skipped null crop " + str(process_pid) + " " + str(counter) + " due to missing image.") From d1f7badc9adcf9dea037a831f423ebb2ec602298 Mon Sep 17 00:00:00 2001 From: shokiami Date: Sun, 28 Nov 2021 19:55:11 -0800 Subject: [PATCH 2/3] Added bounds checking. --- CropRunner.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/CropRunner.py b/CropRunner.py index 4c80df0..ff939eb 100644 --- a/CropRunner.py +++ b/CropRunner.py @@ -109,7 +109,7 @@ def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_d x = ((float(pano_yaw_deg) / 360) * im_width + sv_image_x) % im_width y = im_height / 2 - sv_image_y - r = 10 + # r = 10 # if draw_mark: # draw.ellipse((x - r, y - r, x + r, y + r), fill=128) @@ -119,14 +119,16 @@ def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_d for i in range(MULTICROP_COUNT): top_left_x = x - crop_width / 2 top_left_y = y - crop_height / 2 - cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height)) - crop_name = label_name + "_" + chr(ord('a') + i) + ".jpg" + if multicrop: + crop_name = label_name + "_" + str(i) + ".jpg" + else: + crop_name = label_name + ".jpg" crop_destination = os.path.join(destination_dir, crop_name) - if not os.path.exists(crop_destination): + if not os.path.exists(crop_destination) and 0 <= top_left_x and top_left_x + crop_width <= im_width and 0 <= top_left_y and top_left_y + crop_height <= im_height: + cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height)) cropped_square.save(crop_destination) print("Successfully extracted crop to " + crop_name) - logging.info(label_name + " " + pano_img_path + " " + str(sv_image_x) - + " " + str(sv_image_y) + " " + str(pano_yaw_deg)) + logging.info(label_name + " " + pano_img_path + " " + str(sv_image_x) + " " + str(sv_image_y) + " " + str(pano_yaw_deg)) logging.info("---------------------------------------------------") if not multicrop: break From 99e39fadf7072534a548c2a06186b679e9fc1003 Mon Sep 17 00:00:00 2001 From: shokiami Date: Sun, 28 Nov 2021 20:06:57 -0800 Subject: [PATCH 3/3] Merge branch 'main' of github.com:ThatOneGoat/sidewalk-cv-2021 into 22-multiple-crops-per-label --- analyze_results.py | 10 ++--- eval.py | 50 +++++++++++++++--------- train.py | 64 ++++++++++++++++++------------- utils/training_utils.py | 85 ++++++++++++++++++++++++++++++++++++----- 4 files changed, 151 insertions(+), 58 deletions(-) diff --git a/analyze_results.py b/analyze_results.py index 9bdd8e6..d00a6c9 100644 --- a/analyze_results.py +++ b/analyze_results.py @@ -9,8 +9,8 @@ if not os.path.isdir(VISUALIZATIONS_PATH): os.makedirs(VISUALIZATIONS_PATH) -SESSION_NAME = 'regnet_save.pt' -TRAIN_SAVE_PATH = "./datasets/" + SESSION_NAME +SESSION_NAME = 'efficientnetb3_weighted_loss_save' +TRAIN_SAVE_PATH = "./datasets/" + SESSION_NAME + ".pt" label_types = { 0: "null", 1: "curb ramp", @@ -34,7 +34,7 @@ def plot_label_metric(metric_name): plt.xlabel("epoch", fontsize=16) plt.ylabel(metric_name, fontsize=16) plt.legend(prop={'size': 16}) - plt.savefig(VISUALIZATIONS_PATH + metric_name) + plt.savefig(VISUALIZATIONS_PATH + metric_name + "_" + SESSION_NAME) plot_label_metric('precision_validation') plot_label_metric('precision_train') @@ -48,7 +48,7 @@ def plot_label_metric(metric_name): plt.xlabel("epoch", fontsize=16) plt.ylabel("accuracy", fontsize=16) plt.legend(prop={'size': 16}) -plt.savefig(VISUALIZATIONS_PATH + "accuracies") +plt.savefig(VISUALIZATIONS_PATH + "accuracies_" + SESSION_NAME) figure(figsize=(16, 12)) plt.plot(np.arange(epochs), metrics['loss_train'], label = 'train loss') @@ -57,4 +57,4 @@ def plot_label_metric(metric_name): plt.xlabel("epoch", fontsize=16) plt.ylabel("loss", fontsize=16) plt.legend(prop={'size': 16}) -plt.savefig(VISUALIZATIONS_PATH + "losses") +plt.savefig(VISUALIZATIONS_PATH + "losses_" + SESSION_NAME) diff --git a/eval.py b/eval.py index 69cdc0e..7b373d7 100644 --- a/eval.py +++ b/eval.py @@ -3,7 +3,7 @@ import torch.nn as nn import torchvision from datatypes.dataset import SidewalkCropsDataset -from utils.training_utils import load_best_weights, evaluate +from utils.training_utils import get_pretrained_model, load_best_weights, evaluate from utils.visualization_utils import plot_confusion_matrix from torchvision import transforms @@ -12,6 +12,21 @@ print("made visualization folder") os.makedirs(VISUALIZATIONS_PATH) +# set base path to test data folder +BASE_PATH = "./datasets/" + +# name of model architecture +MODEL_NAME = "efficientnet" + +# number of output classes +NUM_CLASSES = 5 # (1,2,3,4) for label types, 0 for null crops + +# the actual classes +CLASSES = ["null", "curb ramp", "missing ramp", "obstruction", "sfc problem"] + +# name of training session for loading purposes +SESSION_NAME = "efficientnet-no-pretrained-weights" + # check for GPU if torch.cuda.is_available(): dev = "cuda" @@ -20,15 +35,25 @@ device = torch.device(dev) print(device) +# ================================================================================================= +# load model for evaluation +model, input_size = get_pretrained_model(MODEL_NAME, NUM_CLASSES, False) +model.to(device) + +pretrained_save_path = BASE_PATH + SESSION_NAME + ".pt" +load_best_weights(model, pretrained_save_path) + +loss_func = nn.CrossEntropyLoss() + +# ================================================================================================= # load our custom test sidewalk crops dataset image_transform = transforms.Compose([ transforms.Resize(256), - transforms.CenterCrop(224), + transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) -BASE_PATH = "./datasets/" test_labels_csv_path = BASE_PATH + "test_crop_info.csv" test_img_dir = BASE_PATH + "test_crops/" test_dataset = SidewalkCropsDataset(test_labels_csv_path, test_img_dir, image_transform) @@ -37,19 +62,10 @@ test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8) -# load model for evaluation -efficientnetb3 = torchvision.models.resnet50(pretrained = True) -num_ftrs = efficientnetb3.fc.in_features -efficientnetb3.fc = nn.Linear(num_ftrs, 5) # (1,2,3,4) for label types, 0 for null crops -efficientnetb3.to(device) -loss_func = nn.CrossEntropyLoss() - -pretrained_save_path = BASE_PATH + "resnet50_weighted_loss.pt" -load_best_weights(efficientnetb3, pretrained_save_path) - +# ================================================================================================= # evaluate loaded model on test set -test_accuracy, test_loss, cm = evaluate(efficientnetb3, loss_func, test_dataloader, True, device) -print("Test accuracy for ResNet50 as FT: ", test_accuracy) -print("Test loss for ResNet50 as FT: ", test_loss) +test_accuracy, test_loss, cm = evaluate(model, (MODEL_NAME == "inception"), loss_func, test_dataloader, True, device) +print("Test accuracy for {} as FT: ".format(MODEL_NAME), test_accuracy) +print("Test loss for {} as FT: ".format(MODEL_NAME), test_loss) if cm is not None: - plot_confusion_matrix(VISUALIZATIONS_PATH, "resnet50-weighted-loss", cm, ["null", "curb ramp", "missing ramp", "obstruction", "sfc problem"], normalize=True) + plot_confusion_matrix(VISUALIZATIONS_PATH, SESSION_NAME, cm, CLASSES, normalize=True) diff --git a/train.py b/train.py index aa8a2e3..40a45c0 100644 --- a/train.py +++ b/train.py @@ -11,6 +11,15 @@ # set base path to training/test data folder BASE_PATH = "./datasets/" +# name of model architecture +MODEL_NAME = "efficientnet" + +# number of output classes +NUM_CLASSES = 5 # (1,2,3,4) for label types, 0 for null crops + +# name of training session for saving purposes +TRAIN_SESSION_NAME = "efficientnet-no-pretrained-weights" + # check for GPU if torch.cuda.is_available(): dev = "cuda" @@ -19,10 +28,32 @@ device = torch.device(dev) print(device) +# ================================================================================================= +# setup model for fine tuning +model, input_size = get_pretrained_model(MODEL_NAME, NUM_CLASSES, False) +model.to(device) + +lr = 0.01 + +# weight using inverse of each sample size +# acquire label sample sizes from train csv +# samples_per_class = np.array([10000, 11187, 8788, 2678, 7204]) +# weights = 1.0 / samples_per_class +# norm = np.linalg.norm(weights) +# normalized_weights = weights / norm +# normalized_weights_tensor = torch.from_numpy(normalized_weights).float().to(device) + +# add normalized_weights_tensor as input to loss_func if weighted loss is desired +loss_func = nn.CrossEntropyLoss() +optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6) +scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.3) +checkpoint_save_path = BASE_PATH + TRAIN_SESSION_NAME + ".pt" + +# ================================================================================================= # load train datasets image_transform = transforms.Compose([ transforms.Resize(256), - transforms.CenterCrop(224), + transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) @@ -30,10 +61,9 @@ # having issues with CUDA running out of memory, so lowering batch size batch_size = 16 -train_labels_csv_path = BASE_PATH + "train_non_null_crop_info.csv" +train_labels_csv_path = BASE_PATH + "train_subset_crop_info.csv" train_img_dir = BASE_PATH + "train_crops/" - # load our custom train/val sidewalk crops dataset train_val_dataset = SidewalkCropsDataset(train_labels_csv_path, train_img_dir, image_transform) @@ -53,20 +83,8 @@ print(len(train_dataset)) print(len(val_dataset)) -# get resnet50 for fine tuning -model = get_pretrained_model() -num_ftrs = model.fc.in_features -model.fc = nn.Linear(num_ftrs, 5) # (1,2,3,4) for label types, 0 for null crops -model.to(device) - -lr = 0.01 - -loss_func = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(model.parameters(), lr=lr) -scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.1) -checkpoint_save_path = BASE_PATH + "regnet_save.pt" - -# train for 20 epochs +# ================================================================================================= +# train for n epochs epochs = 50 dataLoaders = { "training": train_dataloader, @@ -76,13 +94,5 @@ print("next epoch: " + str(last_epoch + 1)) print("resuming training...\n") -train(model, optimizer, scheduler, loss_func, epochs, dataLoaders, checkpoint_save_path, metrics, last_epoch + 1, device) -# print("Best validation accuracy: ", best_validation_accuracy) - -# visualization of training and validation loss over epochs -plt.plot(np.arange(epochs), metrics['loss_train'], label="training loss") -plt.plot(np.arange(epochs), metrics['loss_validation'], label="validation loss") -plt.title("Training/Validation loss for FT model") -plt.xlabel("epoch") -plt.ylabel("loss") -plt.legend() +train(model, (MODEL_NAME == "inception"), optimizer, scheduler, loss_func, epochs, dataLoaders, + checkpoint_save_path, metrics, last_epoch + 1, device) diff --git a/utils/training_utils.py b/utils/training_utils.py index 1d3d847..5d5dd24 100644 --- a/utils/training_utils.py +++ b/utils/training_utils.py @@ -2,12 +2,60 @@ import numpy as np import os import torch +import torch.nn as nn import torchvision from sklearn.metrics import confusion_matrix from time import perf_counter -def get_pretrained_model(): - return torchvision.models.regnet_y_8gf(pretrained = True) +def get_pretrained_model(model_name, num_classes, use_pretrained=True): + model_ft = None + input_size = 0 + + if model_name == "resnet": + """ Resnet50 + """ + model_ft = torchvision.models.resnet50(pretrained=use_pretrained) + num_ftrs = model_ft.fc.in_features + model_ft.fc = nn.Linear(num_ftrs, num_classes) + input_size = 224 + + elif model_name == "inception": + """ Inception v3 + Be careful, expects (299,299) sized images and has auxiliary output + """ + model_ft = torchvision.models.inception_v3(pretrained=use_pretrained) + # Handle the auxilary net + num_ftrs = model_ft.AuxLogits.fc.in_features + model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes) + # Handle the primary net + num_ftrs = model_ft.fc.in_features + model_ft.fc = nn.Linear(num_ftrs,num_classes) + input_size = 299 + + elif model_name == "efficientnet": + """ EfficientNetB3 + """ + model_ft = torchvision.models.efficientnet_b3(pretrained=use_pretrained) + num_ftrs = model_ft.classifier[1].in_features + model_ft.classifier[1] = nn.Linear(num_ftrs, num_classes) + input_size = 224 + + elif model_name == "regnet": + """ RegNet-y, 8gF + """ + model_ft = torchvision.models.regnet_y_8gf(pretrained=use_pretrained) + num_ftrs = model_ft.fc.in_features + model_ft.fc = nn.Linear(num_ftrs, num_classes) + input_size = 224 + + else: + print("Invalid model name, exiting...") + exit() + + return model_ft, input_size + +# # Initialize the model for this run +# model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True) def save_training_checkpoint(training_states, best_model_state, metrics, epoch, path): # add things like TPR, FPR later when we start evaluating them @@ -48,7 +96,7 @@ def load_best_weights(model, path): model.load_state_dict(checkpoint['best_model_state']) -def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_path, metrics, start_epoch, device): +def train(model, is_inception, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_path, metrics, start_epoch, device): t_start = perf_counter() best_model_state = copy.deepcopy(model.state_dict()) @@ -91,10 +139,19 @@ def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_p optimizer.zero_grad() use_grad = (mode == 'training') with torch.set_grad_enabled(use_grad): - outputs = model(inputs) - loss = loss_func(outputs, labels) + if is_inception and use_grad: + # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958 + outputs, aux_outputs = model(inputs) + loss1 = loss_func(outputs, labels) + loss2 = loss_func(aux_outputs, labels) + loss = loss1 + 0.4*loss2 + else: + outputs = model(inputs) + loss = loss_func(outputs, labels) + _, preds = torch.max(outputs, 1) preds.to(device) + if use_grad: # We are training, so make sure to actually # train by using loss/stepping. @@ -136,7 +193,7 @@ def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_p metrics['loss_train'].append(loss_avg) metrics['accuracy_train'].append(accuracy) # make sure to step through lr update schedule - #scheduler.step() + scheduler.step() training_states = {'model': model, 'optimizer': optimizer, 'scheduler': scheduler} save_training_checkpoint(training_states, best_model_state, metrics, epoch, save_path) @@ -149,7 +206,7 @@ def train(model, optimizer, scheduler, loss_func, epochs, datasetLoaders, save_p print("Elapsed time during training in seconds", t_stop-t_start) -def evaluate(model, loss_func, dataset_loader, test, device): +def evaluate(model, is_inception, loss_func, dataset_loader, test, device): # put model into eval mode model.eval() @@ -161,13 +218,23 @@ def evaluate(model, loss_func, dataset_loader, test, device): lbllist=torch.zeros(0, dtype=torch.long, device='cpu') conf_mat = None + epoch_count = 0 + # correct predictions. correct = 0 total_loss = 0 with torch.no_grad(): for inputs, labels in dataset_loader: inputs, labels = inputs.to(device), labels.to(device) - outputs = model(inputs) + epoch_count += inputs.size(0) + print("percent {}".format(epoch_count / n)) + if is_inception: + outputs, _ = model(inputs) + else: + outputs = model(inputs) + + # we ignore aux output in test loss calculation + # since we aren't updating weights loss = loss_func(outputs, labels) _, predictions = torch.max(outputs, 1) @@ -183,4 +250,4 @@ def evaluate(model, loss_func, dataset_loader, test, device): conf_mat = confusion_matrix(lbllist.numpy(), predlist.numpy()) print(conf_mat) - return correct / n, total_loss / n, conf_mat \ No newline at end of file + return correct / n, total_loss / n, conf_mat