-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodelnet_h5_dataset.py
executable file
·141 lines (123 loc) · 5.04 KB
/
modelnet_h5_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
'''
ModelNet dataset. Support ModelNet40, XYZ channels. Up to 2048 points.
Faster IO than ModelNetDataset in the first epoch.
'''
import os
import sys
import numpy as np
import h5py
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
ROOT_DIR = BASE_DIR
sys.path.append(os.path.join(ROOT_DIR, 'utils'))
import provider
# Download dataset for point cloud classification
# DATA_DIR = '/data'
# if not os.path.exists(DATA_DIR):
# os.mkdir(DATA_DIR)
# if not os.path.exists(os.path.join(DATA_DIR, 'modelnet40_ply_hdf5_2048')):
# www = 'https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip'
# zipfile = os.path.basename(www)
# os.system('wget %s; unzip %s' % (www, zipfile))
# os.system('mv %s %s' % (zipfile[:-4], DATA_DIR))
# os.system('rm %s' % (zipfile))
def pc_normalize(pc):
l = pc.shape[0]
centroid = np.mean(pc, axis=1)
pc = pc - centroid[:,np.newaxis,:]
m = np.max(np.sqrt(np.sum(np.power(pc,2), axis=2)),axis=1)
pc = pc / m[:,np.newaxis,np.newaxis]
return pc
def shuffle_data(data, labels):
""" Shuffle data and labels.
Input:
data: B,N,... numpy array
label: B,... numpy array
Return:
shuffled data, label and shuffle indices
"""
idx = np.arange(len(labels))
np.random.shuffle(idx)
return data[idx, ...], labels[idx], idx
def getDataFiles(list_filename):
return [line.rstrip() for line in open(list_filename)]
def load_h5(h5_filename):
f = h5py.File(h5_filename)
data = f['data'][:]
normal = f['normal'][:]
label = f['label'][:]
return (np.concatenate([data,normal],axis=-1), label)
def loadDataFile(filename):
return load_h5(filename)
class ModelNetH5Dataset(object):
def __init__(self, root,list_filename, batch_size = 32, npoints = 1024, shuffle=False,normalize=True):
self.list_filename = list_filename
self.batch_size = batch_size
self.npoints = npoints
self.shuffle = shuffle
self.normalize = normalize
self.h5_files = getDataFiles(os.path.join(root,list_filename))
self.root=root
self.reset()
@staticmethod
def augment_batch_data(batch_data, augment, rotate=0):
if augment:
# augment points
jittered_data = provider.random_scale_point_cloud(batch_data[:, :, 0:3])
jittered_data = provider.shift_point_cloud(jittered_data)
jittered_data = provider.jitter_point_cloud(jittered_data)
batch_data[:, :, 0:3] = jittered_data
if rotate == 2:
# rotated points and normal
batch_data = provider.rotate_point_cloud_with_normal(batch_data)
elif rotate == 3:
batch_data = provider.rotate_perturbation_point_cloud_with_normal(batch_data)
return provider.shuffle_points(batch_data)
def reset(self):
''' reset order of h5 files '''
self.file_idxs = np.arange(0, len(self.h5_files))
# if self.shuffle: np.random.shuffle(self.file_idxs)
self.current_data = None
self.current_label = None
self.current_file_idx = 0
self.batch_idx = 0
def _get_data_filename(self):
return os.path.join(self.root,self.h5_files[self.file_idxs[self.current_file_idx]])
def _load_data_file(self, filename):
self.current_data,self.current_label = loadDataFile(filename)
self.current_label = np.squeeze(self.current_label)
self.batch_idx = 0
if self.shuffle:
self.current_data, self.current_label,_ = shuffle_data(self.current_data,self.current_label)
def _has_next_batch_in_file(self):
return self.batch_idx*self.batch_size < self.current_data.shape[0]
def num_channel(self):
return 3
def has_next_batch(self):
# TODO: add backend thread to load data
if (self.current_data is None) or (not self._has_next_batch_in_file()):
if self.current_file_idx >= len(self.h5_files):
return False
self._load_data_file(self._get_data_filename())
self.batch_idx = 0
self.current_file_idx += 1
return self._has_next_batch_in_file()
def next_batch(self, augment=False,rotate=0):
''' returned dimension may be smaller than self.batch_size '''
start_idx = self.batch_idx * self.batch_size
end_idx = min((self.batch_idx+1) * self.batch_size, self.current_data.shape[0])
bsize = end_idx - start_idx
data_batch = self.current_data[start_idx:end_idx, :, :].copy()
label_batch = self.current_label[start_idx:end_idx].copy()
self.batch_idx += 1
batch_data = self.augment_batch_data(data_batch,augment,rotate)
if self.normalize:
batch_data[:,:,:3]=pc_normalize(batch_data[:,:,:3])
return batch_data, label_batch
if __name__=='__main__':
d = ModelNetH5Dataset('/data/modelnet40_ply_hdf5_2048/test_files.txt')
print(d.shuffle)
print(d.has_next_batch())
ps_batch, cls_batch = d.next_batch(True)
print(ps_batch.shape)
print(cls_batch.shape)