Skip to content

Commit

Permalink
Merge pull request #36 from ThatOneGoat/33_rescale_panos
Browse files Browse the repository at this point in the history
Implemented pano cleaning v1.0 by resizing panos that are of the smaller dimension.
  • Loading branch information
shokiami authored Dec 1, 2021
2 parents f7ec7b6 + 18ac5f8 commit ecc942e
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ alphie-sftp
rawdata

# Ignore generated batch sftp commands file
batch.txt
batch*.text

# Ignore pano-downloads folder
pano-downloads
Expand Down
28 changes: 16 additions & 12 deletions CropRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@
mark_center = True

# The number of crops per multicrop
MULTICROP_COUNT = 3
MULTICROP_COUNT = 2

# The scale factor for each multicrop
MULTICROP_SCALE_FACTOR = 1.25
MULTICROP_SCALE_FACTOR = 1.5

logging.basicConfig(filename='crop.log', level=logging.DEBUG)

Expand Down Expand Up @@ -87,8 +87,9 @@ def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_d
:param label_name: label name
:param multicrop: whether or not to make multiple crops for the label
:param draw_mark: if a dot should be drawn in the centre of the object/image
:return: none
:return: crop_names: a list of crop_names
"""
crop_names = []
try:
im = Image.open(pano_img_path)
# draw = ImageDraw.Draw(im)
Expand Down Expand Up @@ -130,6 +131,7 @@ def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_d
print("Successfully extracted crop to " + crop_name)
logging.info(label_name + " " + pano_img_path + " " + str(sv_image_x) + " " + str(sv_image_y) + " " + str(pano_yaw_deg))
logging.info("---------------------------------------------------")
crop_names.append(crop_name)
if not multicrop:
break
crop_width *= MULTICROP_SCALE_FACTOR
Expand All @@ -139,7 +141,7 @@ def make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_d
print(e)
print("Error for {}".format(pano_img_path))

return
return crop_names

def bulk_extract_crops(path_to_db_export, path_to_gsv_scrapes, destination_dir, mark_label=False):
t_start = perf_counter()
Expand All @@ -149,6 +151,10 @@ def bulk_extract_crops(path_to_db_export, path_to_gsv_scrapes, destination_dir,
label_list = list(csv_f)
row_count = len(label_list)

# make the output directory if needed
if not os.path.isdir(destination_dir):
os.makedirs(destination_dir)

with mp.Manager() as manager:
# get cpu core count
cpu_count = mp.cpu_count()
Expand Down Expand Up @@ -186,7 +192,7 @@ def bulk_extract_crops(path_to_db_export, path_to_gsv_scrapes, destination_dir,
successful_crop_count = len(output_rows)
# no_metadata_fail = 0
# don't count header row as a failed crop
no_pano_fail = row_count - successful_crop_count - 1
no_pano_fail = ((row_count - 1) * MULTICROP_COUNT) - successful_crop_count

for row in output_rows:
csv_w.writerow(row)
Expand Down Expand Up @@ -216,20 +222,18 @@ def crop_label_subset(input_rows, output_rows, path_to_gsv_scrapes, destination_

# Extract the crop
if os.path.exists(pano_img_path):
destination_folder = os.path.join(destination_dir)
if not os.path.isdir(destination_folder):
os.makedirs(destination_folder)

crop_names = []
if not label_type == 0:
label_name = str(row[7])
make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, True)
crop_names = make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, True)
else:
# In order to uniquely identify null crops, we concatenate the pid of process they
# were generated on and the counter within the process to the name of the null crop.
label_name = "null_" + str(process_pid) + "_" + str(counter)
make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, False)
crop_names = make_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, destination_dir, label_name, False)

output_rows.append([label_name, label_type])
for crop_name in crop_names:
output_rows.append([crop_name, label_type])
else:
print("Panorama image not found.")
try:
Expand Down
59 changes: 56 additions & 3 deletions PanoScraper.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import csv
import glob
import multiprocessing as mp
import os
import random
import subprocess
import multiprocessing as mp
from time import perf_counter
from itertools import islice
from datatypes.label import Label
from datatypes.panorama import Panorama
from datatypes.point import Point
import random
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

GSV_IMAGE_WIDTH = 13312
GSV_IMAGE_HEIGHT = 6656

# null crops per pano
NULLS_PER_PANO = 2
NULLS_PER_PANO = 0

def bulk_scrape_panos(n, start_row, path_to_labeldata_csv, local_dir, remote_dir, output_csv_name):
# TODO: find way to clear to pano_downloads folder and batch.txt file
Expand Down Expand Up @@ -123,3 +126,53 @@ def acquire_n_panos(remote_dir, local_dir, pano_ids, thread_id):
print(result)
if sftp.returncode != 0:
print("sftp failed on one or more commands: {0}".format(sftp_command_list))

def clean_panos(path_to_panos):
t_start = perf_counter()

# get list of pano paths
panos = glob.glob(path_to_panos + "/*.jpg")

# get available cpu_count
cpu_count = mp.cpu_count() if mp.cpu_count() <= 8 else 8

# split pano set into chunks for multithreading
pano_set_size = len(panos)
i = 0
processes = []
while i < pano_set_size:
chunk_size = (pano_set_size - i) // cpu_count
print(chunk_size)
pano_ids = set(islice(panos, i, i + chunk_size))
print(pano_ids)
process = mp.Process(target=clean_n_panos, args=(pano_ids,))
processes.append(process)
cpu_count -= 1
i += chunk_size

# start processes
for p in processes:
p.start()

# join processes once finished
for p in processes:
p.join()

t_stop = perf_counter()
execution_time = t_stop - t_start
return execution_time

def clean_n_panos(panos):
for pano_path in panos:
with Image.open(pano_path) as p:
# check if pano needs cleaning by looking for black space
pix = p.load()
if pix[GSV_IMAGE_WIDTH, GSV_IMAGE_HEIGHT] == (0,0,0):
print("resizing ", pano_path)
original_size = p.size
print(original_size)
im = p.crop((0, 0, GSV_IMAGE_WIDTH, GSV_IMAGE_HEIGHT))
print(im.size)
im = im.resize(original_size)
im.save(pano_path)
print(im.size)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ backoff>=1.10.0
torch
scikit-learn
torchvision
glob
13 changes: 10 additions & 3 deletions scrape_and_crop_labels.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from PanoScraper import bulk_scrape_panos
from PanoScraper import bulk_scrape_panos, clean_panos
from CropRunner import bulk_extract_crops

import multiprocessing as mp
import os

if __name__ == '__main__':
# scrape panos from SFTP server
n = 50
n = 20
start_row = 1
path_to_labeldata_csv = "rawdata/seattle-labels-cv-10-29-2021.csv"

Expand All @@ -21,9 +21,12 @@
output_csv_name = 'gathered_panos.csv'
pano_set_size, scraper_exec_time = bulk_scrape_panos(n, start_row, path_to_labeldata_csv, local_dir, remote_dir, output_csv_name)

# clean panos
gsv_pano_path = 'pano-downloads'
clean_time = clean_panos(gsv_pano_path)

# crop labels with scrapped panos
csv_export_path = 'pano-downloads/gathered_panos.csv'
gsv_pano_path = 'pano-downloads'
destination_path = 'crops'
metrics = bulk_extract_crops(csv_export_path, gsv_pano_path, destination_path, mark_label=False)

Expand All @@ -33,6 +36,10 @@
print("Elapsed time scraping {} panos for {} labels in seconds:".format(pano_set_size, n),
scraper_exec_time)
print()
print("Pano Cleaning metrics:")
print("Elapsed time cleaning {} panos in seconds:".format(pano_set_size),
clean_time)
print()
print("Label Cropping metrics:")
print(str(metrics[1]) + " successful crop extractions")
print(str(metrics[2]) + " extractions failed because panorama image was not found.")
Expand Down

0 comments on commit ecc942e

Please sign in to comment.