-
Notifications
You must be signed in to change notification settings - Fork 13
/
prepare_data.py
50 lines (40 loc) · 1.2 KB
/
prepare_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import cv2
import numpy as np
import os
batch_size = 1000
inp_shape = (256, 256)
data_path = "data/"
raw_data_path = os.path.join(data_path, "raw")
prepared_data_path = os.path.join(data_path, "prepared")
if not os.path.isdir(prepared_data_path):
os.makedirs(prepared_data_path)
files = os.listdir(raw_data_path)
folders = []
for file in files:
if os.path.isdir(os.path.join(raw_data_path, file)):
folders.append(file)
counter = 1
def dump_np(data, folder_name):
global counter
directory = os.path.join(prepared_data_path, folder_name)
if not os.path.isdir(directory):
os.makedirs(directory)
file_path = os.path.join(directory, str(counter))
np.save(file_path, data)
counter += 1
for folder in folders:
folder_path = os.path.join(raw_data_path, folder)
files = os.listdir(folder_path)
bulk = []
for i, file in enumerate(files, 1):
file_path = os.path.join(folder_path, file)
image = cv2.imread(file_path)
image = cv2.resize(image, (inp_shape[1], inp_shape[0]))
bulk.append(image)
if len(bulk) >= batch_size or i == len(files):
print("dumping")
bulk = np.asarray(bulk)
dump_np(bulk, folder)
bulk = []
print("Folder: %s, Files: %s/%s"%(folder, i,len(files)))
print("Completed")