-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreader.py
253 lines (207 loc) · 8.51 KB
/
reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# -*- coding: utf-8 -*-
# 此文件中定义数据读取相关的函数
import os
import cv2
import numpy as np
import paddle
import functools
from image_utils import image_augment
from insects_reader import get_insect_names, get_annotations
def get_bbox(gt_bbox, gt_class):
# 对于一般的检测任务来说,一张图片上往往会有多个目标物体
# 设置参数MAX_NUM = 50, 即一张图片最多取50个真实框;如果真实
# 框的数目少于50个,则将不足部分的gt_bbox, gt_class和gt_score的各项数值全设置为0
MAX_NUM = 50
gt_bbox2 = np.zeros((MAX_NUM, 4))
gt_class2 = np.zeros((MAX_NUM,))
for i in range(len(gt_bbox)):
gt_bbox2[i, :] = gt_bbox[i, :]
gt_class2[i] = gt_class[i]
if i >= MAX_NUM:
break
return gt_bbox2, gt_class2
# 根据record里面保存的信息,获取单张图片及图片里面的物体真实框和标签等信息
def get_img_data_from_file(record):
"""
record is a dict as following,
record = {
'im_file': img_file,
'im_id': im_id,
'h': im_h,
'w': im_w,
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_poly': [],
'difficult': difficult
}
"""
im_file = record['im_file']
h = record['h']
w = record['w']
is_crowd = record['is_crowd']
gt_class = record['gt_class']
gt_bbox = record['gt_bbox']
difficult = record['difficult']
img = cv2.imread(im_file)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# check if h and w in record equals that read from img
assert img.shape[0] == int(h), \
"image height of {} inconsistent in record({}) and img file({})".format(
im_file, h, img.shape[0])
assert img.shape[1] == int(w), \
"image width of {} inconsistent in record({}) and img file({})".format(
im_file, w, img.shape[1])
gt_boxes, gt_labels = get_bbox(gt_bbox, gt_class)
# gt_bbox 用相对值
gt_boxes[:, 0] = gt_boxes[:, 0] / float(w)
gt_boxes[:, 1] = gt_boxes[:, 1] / float(h)
gt_boxes[:, 2] = gt_boxes[:, 2] / float(w)
gt_boxes[:, 3] = gt_boxes[:, 3] / float(h)
return img, gt_boxes, gt_labels, (h, w)
# 读取图片信息,并且对图片做图像增广,调用image_augment完成
def get_img_data(record, size=640):
img, gt_boxes, gt_labels, scales = get_img_data_from_file(record)
img, gt_boxes, gt_labels = image_augment(img, gt_boxes, gt_labels, size)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1))
img = (img / 255.0 - mean) / std
img = img.astype('float32').transpose((2, 0, 1))
return img, gt_boxes, gt_labels, scales
# 获取图片缩放的尺寸
def get_img_size(mode):
if (mode == 'train') or (mode == 'valid'):
inds = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ii = np.random.choice(inds)
img_size = 320 + ii * 32
else:
img_size = 608
return img_size
# 将 list形式的batch数据 转化成多个array构成的tuple
def make_array(batch_data):
img_array = np.array([item[0] for item in batch_data], dtype='float32')
gt_box_array = np.array([item[1] for item in batch_data], dtype='float32')
gt_labels_array = np.array([item[2] for item in batch_data], dtype='int32')
img_scale = np.array([item[3] for item in batch_data], dtype='int32')
return img_array, gt_box_array, gt_labels_array, img_scale
# 批量读取数据,同一批次内图像的尺寸大小必须是一样的,
# 不同批次之间的大小是随机的,
# 由上面定义的get_img_size函数产生
def data_loader(datadir, batch_size=10, mode='train'):
cname2cid = get_insect_names()
records = get_annotations(cname2cid, datadir)
def reader():
if mode == 'train':
np.random.shuffle(records)
batch_data = []
img_size = get_img_size(mode)
for record in records:
# print(record)
img, gt_bbox, gt_labels, im_shape = get_img_data(record,
size=img_size)
batch_data.append((img, gt_bbox, gt_labels, im_shape))
if len(batch_data) == batch_size:
yield make_array(batch_data)
batch_data = []
img_size = get_img_size(mode)
if len(batch_data) > 0:
yield make_array(batch_data)
return reader
# 使用paddle.reader.xmap_readers实现多线程读取数据
def multithread_loader(datadir, batch_size=10, mode='train'):
cname2cid = get_insect_names()
records = get_annotations(cname2cid, datadir)
def reader():
if mode == 'train':
np.random.shuffle(records)
img_size = get_img_size(mode)
batch_data = []
for record in records:
batch_data.append((record, img_size))
if len(batch_data) == batch_size:
yield batch_data
batch_data = []
img_size = get_img_size(mode)
if len(batch_data) > 0:
yield batch_data
def get_data(samples):
batch_data = []
for sample in samples:
record = sample[0]
img_size = sample[1]
img, gt_bbox, gt_labels, im_shape = get_img_data(record, size=img_size)
batch_data.append((img, gt_bbox, gt_labels, im_shape))
return make_array(batch_data)
mapper = functools.partial(get_data, )
return paddle.reader.xmap_readers(mapper, reader, 8, 10)
# 将 list形式的测试batch数据 转化成多个array构成的tuple
def make_test_array(batch_data):
img_name_array = np.array([item[0] for item in batch_data])
img_data_array = np.array([item[1] for item in batch_data], dtype='float32')
img_scale_array = np.array([item[2] for item in batch_data], dtype='int32')
return img_name_array, img_data_array, img_scale_array
# 测试数据读取
def test_data_loader(datadir, batch_size=10, test_image_size=608, mode='test'):
"""
加载测试用的图片,测试数据没有groundtruth标签
"""
image_names = os.listdir(datadir)
def reader():
batch_data = []
img_size = test_image_size
for image_name in image_names:
file_path = os.path.join(datadir, image_name)
img = cv2.imread(file_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
H = img.shape[0]
W = img.shape[1]
img = cv2.resize(img, (img_size, img_size))
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1))
out_img = (img / 255.0 - mean) / std
out_img = out_img.astype('float32').transpose((2, 0, 1))
img = out_img # np.transpose(out_img, (2,0,1))
im_shape = [H, W]
batch_data.append((image_name.split('.')[0], img, im_shape))
if len(batch_data) == batch_size:
yield make_test_array(batch_data)
batch_data = []
if len(batch_data) > 0:
yield make_test_array(batch_data)
return reader
# 读取单张测试图片
def single_image_data_loader(filename, test_image_size=608, mode='test'):
"""
加载测试用的图片,测试数据没有groundtruth标签
"""
batch_size = 1
def reader():
batch_data = []
img_size = test_image_size
file_path = os.path.join(filename)
img = cv2.imread(file_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
H = img.shape[0]
W = img.shape[1]
img = cv2.resize(img, (img_size, img_size))
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1))
out_img = (img / 255.0 - mean) / std
out_img = out_img.astype('float32').transpose((2, 0, 1))
img = out_img # np.transpose(out_img, (2,0,1))
im_shape = [H, W]
batch_data.append((filename.split('.')[0], img, im_shape))
if len(batch_data) == batch_size:
yield make_test_array(batch_data)
batch_data = []
return reader
if __name__ == '__main__':
d = multithread_loader('./insects/train', batch_size=2, mode='train')
img, gt_boxes, gt_labels, im_shape = next(d())
print(img.shape, gt_boxes.shape, gt_labels.shape, im_shape.shape)