forked from mikesj-public/dcgan-autoencoder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataprocessing.py
executable file
·99 lines (75 loc) · 2.31 KB
/
dataprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
from PIL import Image
from os import listdir
from os.path import isfile, join
import numpy as np
import pickle
from time import time
import sys
import h5py
from tqdm import tqdm
image_dir = 'img_align_celeba/'
try:
image_locs = [join(image_dir,f) for f in listdir(image_dir) if isfile(join(image_dir,f))]
except:
print "expected aligned images directory, see README"
total_imgs = len(image_locs)
print "found %i images in directory" %total_imgs
def process_image(im):
if im.mode != "RGB":
im = im.convert("RGB")
new_size = [int(i/1.3) for i in im.size]
im.thumbnail(new_size, Image.ANTIALIAS)
target = np.array(im)[3:-3,4:-4,:]
im = Image.fromarray(target)
new_size = [i/4 for i in im.size]
im.thumbnail(new_size, Image.ANTIALIAS)
input = np.array(im)
return input, target
def proc_loc(loc):
try:
i = Image.open(loc)
input, target = process_image(i)
return (input, target)
except KeyboardInterrupt:
raise
except:
return None
try:
hf = h5py.File('faces.hdf5','r+')
except:
hf = h5py.File('faces.hdf5','w')
try:
dset_t = hf.create_dataset("target", (1,160,128,3),
maxshape= (1e6,160,128,3), chunks = (1,160,128,3), compression = "gzip")
except:
dset_t = hf['target']
try:
dset_i = hf.create_dataset("input", (1, 40, 32, 3),
maxshape= (1e6, 40, 32, 3), chunks = (1, 40, 32, 3), compression = "gzip")
except:
dset_i = hf['input']
batch_size = 1024
num_iter = total_imgs / 1024
insert_point = 0
print "STARTING PROCESSING IN BATCHES OF %i" %batch_size
for i in tqdm(range(num_iter)):
sys.stdout.flush()
X_in = []
X_ta = []
a = time()
locs = image_locs[i * batch_size : (i + 1) * batch_size]
proc = [proc_loc(loc) for loc in locs]
for pair in proc:
if pair is not None:
input, target = pair
X_in.append(input)
X_ta.append(target)
X_in = np.array(X_in)
X_ta = np.array(X_ta)
dset_i.resize((insert_point + len(X_in),40, 32, 3))
dset_t.resize((insert_point + len(X_in),160,128,3))
dset_i[insert_point:insert_point + len(X_in)] = X_in
dset_t[insert_point:insert_point + len(X_in)] = X_ta
insert_point += len(X_in)
hf.close()