Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

data/weight > data/weights #69

Open
wants to merge 29 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
*.pyc
*.swp

data/*

10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
## YOLO_tensorflow
## YOLO-tensorflow

Tensorflow implementation of [YOLO](https://arxiv.org/pdf/1506.02640.pdf), including training and test phase.

### Installation

1. Clone yolo_tensorflow repository
1. Clone YOLO-tensorflow repository
```Shell
$ git clone https://github.com/hizhangp/yolo_tensorflow.git
$ cd yolo_tensorflow
$ git clone https://github.com/sungjunhong/YOLO-tensorflow.git
$ cd YOLO-tensorflow
```

2. Download Pascal VOC dataset, and create correct directories
Expand All @@ -16,7 +16,7 @@ Tensorflow implementation of [YOLO](https://arxiv.org/pdf/1506.02640.pdf), inclu
```

3. Download [YOLO_small](https://drive.google.com/file/d/0B5aC8pI-akZUNVFZMmhmcVRpbTA/view?usp=sharing)
weight file and put it in `data/weight`
weight file and put it in `data/weights`

4. Modify configuration in `yolo/config.py`

Expand Down
File renamed without changes
File renamed without changes
176 changes: 104 additions & 72 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,110 +15,123 @@ def __init__(self, net, weight_file):
self.weights_file = weight_file

self.classes = cfg.CLASSES
self.num_class = len(self.classes)
self.image_size = cfg.IMAGE_SIZE
self.cell_size = cfg.CELL_SIZE
self.boxes_per_cell = cfg.BOXES_PER_CELL
self.threshold = cfg.THRESHOLD
self.iou_threshold = cfg.IOU_THRESHOLD
self.boundary1 = self.cell_size * self.cell_size * self.num_class
self.num_class = len(self.classes) # C = 20
self.image_size = cfg.IMAGE_SIZE # 448
self.cell_size = cfg.CELL_SIZE # S = 7
self.boxes_per_cell = cfg.BOXES_PER_CELL # B = 2
self.threshold = cfg.THRESHOLD # 0.2
self.iou_threshold = cfg.IOU_THRESHOLD # 0.5
self.boundary1 = self.cell_size * self.cell_size * self.num_class
# S x S x C = 980
self.boundary2 = self.boundary1 +\
self.cell_size * self.cell_size * self.boxes_per_cell
# S x S x C + S x S x B = 980 + 98 = 1078

self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())

# restore weights file
print('Restoring weights from: ' + self.weights_file)
self.saver = tf.train.Saver()
self.saver.restore(self.sess, self.weights_file)

def draw_result(self, img, result):
for i in range(len(result)):
x = int(result[i][1])
y = int(result[i][2])
w = int(result[i][3] / 2)
h = int(result[i][4] / 2)
cv2.rectangle(img, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
cv2.rectangle(img, (x - w, y - h - 20),
(x + w, y - h), (125, 125, 125), -1)
lineType = cv2.LINE_AA if cv2.__version__ > '3' else cv2.CV_AA
cv2.putText(
img, result[i][0] + ' : %.2f' % result[i][5],
(x - w + 5, y - h - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
(0, 0, 0), 1, lineType)

def detect(self, img):
img_h, img_w, _ = img.shape
img_h, img_w, _ = img.shape # img_h, img_w, _
inputs = cv2.resize(img, (self.image_size, self.image_size))
# (448, 448, 3)
inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB).astype(np.float32)
inputs = (inputs / 255.0) * 2.0 - 1.0
inputs = (inputs / 255.0) * 2.0 - 1.0 # normalization
inputs = np.reshape(inputs, (1, self.image_size, self.image_size, 3))
# (1, 448, 448, 3)

result = self.detect_from_cvmat(inputs)[0]

for i in range(len(result)):
result[i][1] *= (1.0 * img_w / self.image_size)
result[i][2] *= (1.0 * img_h / self.image_size)
result[i][3] *= (1.0 * img_w / self.image_size)
result[i][4] *= (1.0 * img_h / self.image_size)
result[i][1] *= (img_w / self.image_size)
result[i][2] *= (img_h / self.image_size)
result[i][3] *= (img_w / self.image_size)
result[i][4] *= (img_h / self.image_size)

return result

def detect_from_cvmat(self, inputs):
net_output = self.sess.run(self.net.logits,
feed_dict={self.net.images: inputs})
# _, 1470 = S x S x (B * 5 + C)

results = []
for i in range(net_output.shape[0]):
results.append(self.interpret_output(net_output[i]))

return results

def interpret_output(self, output):

probs = np.zeros((self.cell_size, self.cell_size,
self.boxes_per_cell, self.num_class))
# (S, S, B, C)

# Conditional Class Probablity, Pr(Class_i|Object)
class_probs = np.reshape(
output[0:self.boundary1],
(self.cell_size, self.cell_size, self.num_class))
# (S, S, C)

# Confidence Score, Pr(Object)
scales = np.reshape(
output[self.boundary1:self.boundary2],
(self.cell_size, self.cell_size, self.boxes_per_cell))
# (S, S, B)

# Bounding Box, (x, y, w, h) in the range [0, 1]
boxes = np.reshape(
output[self.boundary2:],
(self.cell_size, self.cell_size, self.boxes_per_cell, 4))
offset = np.array(
[np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell)
offset = np.transpose(
np.reshape(
offset,
[self.boxes_per_cell, self.cell_size, self.cell_size]),
(1, 2, 0))

# (S, S, B, 4)

# interpret network output (x, y, w, h) using offset
offset = np.transpose(np.reshape(np.array(
[np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
(self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))
offset_tran = np.transpose(offset, (1, 0, 2))
# (S, S, B)

boxes[:, :, :, 0] += offset
boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2))
boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, 0:2] / self.cell_size
boxes[:, :, :, 1] += offset_tran
boxes[:, :, :, :2] /= self.cell_size
boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:])

boxes *= self.image_size

# Class Specific Confidence Score
for i in range(self.boxes_per_cell):
for j in range(self.num_class):
probs[:, :, i, j] = np.multiply(
class_probs[:, :, j], scales[:, :, i])

# filtering via class specific confidence score
filter_mat_probs = np.array(probs >= self.threshold, dtype='bool')
filter_mat_boxes = np.nonzero(filter_mat_probs)

boxes_filtered = boxes[filter_mat_boxes[0],
filter_mat_boxes[1], filter_mat_boxes[2]]
probs_filtered = probs[filter_mat_probs]
classes_num_filtered = np.argmax(
filter_mat_probs, axis=3)[
probs, axis=3)[
filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]

argsort = np.array(np.argsort(probs_filtered))[::-1]

# Since filter_mat_probs has boolean values, the unexpected results might be occurred.
# classes_num_filtered = np.argmax(
# filter_mat_probs, axis=3)[
# filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]

# non-maximal suppression
# step-1: performing descending sort along class specific confidence score
argsort = np.argsort(probs_filtered)[::-1]
boxes_filtered = boxes_filtered[argsort]
probs_filtered = probs_filtered[argsort]
classes_num_filtered = classes_num_filtered[argsort]

# step-2: filtering via iou
for i in range(len(boxes_filtered)):
if probs_filtered[i] == 0:
continue
Expand All @@ -140,16 +153,47 @@ def interpret_output(self, output):
boxes_filtered[i][2],
boxes_filtered[i][3],
probs_filtered[i]])
# (class, x, y, w, h, score)

return result

def iou(self, box1, box2):
tb = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - \
lr = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - \
max(box1[0] - 0.5 * box1[2], box2[0] - 0.5 * box2[2])
lr = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - \
tb = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - \
max(box1[1] - 0.5 * box1[3], box2[1] - 0.5 * box2[3])
inter = 0 if tb < 0 or lr < 0 else tb * lr
return inter / (box1[2] * box1[3] + box2[2] * box2[3] - inter)
inter = 0 if lr < 0 or tb < 0 else lr * tb
union = box1[2] * box1[3] + box2[2] * box2[3] - inter
return inter / union

def display_result(self, img, result):
for i in range(len(result)):
x1 = int(result[i][1]) - int(result[i][3] / 2)
y1 = int(result[i][2]) - int(result[i][4] / 2)
x2 = int(result[i][1]) + int(result[i][3] / 2)
y2 = int(result[i][2]) + int(result[i][4] / 2)
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.rectangle(img, (x1, y1 - 20),
(x2, y1), (125, 125, 125), -1)
lineType = cv2.LINE_AA if cv2.__version__ > '3' else cv2.CV_AA
cv2.putText(
img, result[i][0] + ' : %.3f' % result[i][5],
(x1 + 5, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
(0, 0, 0), 1, lineType)

def image_detector(self, imname, wait=0):
detect_timer = Timer()
image = cv2.imread(imname)

detect_timer.tic()
result = self.detect(image)
detect_timer.toc()
print('Average detecting time: {:.3f}s'.format(
detect_timer.average_time))

self.display_result(image, result)
cv2.imshow('Image', image)
cv2.waitKey(wait)

def camera_detector(self, cap, wait=10):
detect_timer = Timer()
Expand All @@ -163,48 +207,36 @@ def camera_detector(self, cap, wait=10):
print('Average detecting time: {:.3f}s'.format(
detect_timer.average_time))

self.draw_result(frame, result)
self.display_result(frame, result)
cv2.imshow('Camera', frame)
cv2.waitKey(wait)

ret, frame = cap.read()

def image_detector(self, imname, wait=0):
detect_timer = Timer()
image = cv2.imread(imname)

detect_timer.tic()
result = self.detect(image)
detect_timer.toc()
print('Average detecting time: {:.3f}s'.format(
detect_timer.average_time))

self.draw_result(image, result)
cv2.imshow('Image', image)
cv2.waitKey(wait)


def main():

parser = argparse.ArgumentParser()
parser.add_argument('--weights', default="YOLO_small.ckpt", type=str)
parser.add_argument('--weight_dir', default='weights', type=str)
parser.add_argument('--data_dir', default="data", type=str)
parser.add_argument('--weight_dir', default='weights', type=str)
parser.add_argument('--weights', default="YOLO_small.ckpt", type=str)
parser.add_argument('--gpu', default='', type=str)
args = parser.parse_args()

os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
if args.gpu is not None:
cfg.GPU = args.gpu
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU

yolo = YOLONet(False)
weight_file = os.path.join(args.data_dir, args.weight_dir, args.weights)
detector = Detector(yolo, weight_file)

# detect from camera
# cap = cv2.VideoCapture(-1)
# detector.camera_detector(cap)

# detect from image file
imname = 'test/person.jpg'
imname = 'person.jpg'
detector.image_detector(imname)

# detect from camera
#cap = cv2.VideoCapture(-1)
#detector.camera_detector(cap)


if __name__ == '__main__':
Expand Down
Loading