forked from tensorpack/tensorpack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtimitdata.py
58 lines (46 loc) · 1.52 KB
/
timitdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# -*- coding: utf-8 -*-
# File: timitdata.py
# Author: Yuxin Wu
import numpy as np
from six.moves import range
from tensorpack import ProxyDataFlow
__all__ = ['TIMITBatch']
def batch_feature(feats):
# pad to the longest in the batch
maxlen = max([k.shape[0] for k in feats])
bsize = len(feats)
ret = np.zeros((bsize, maxlen, feats[0].shape[1]))
for idx, feat in enumerate(feats):
ret[idx, :feat.shape[0], :] = feat
return ret
def sparse_label(labels):
maxlen = max([k.shape[0] for k in labels])
shape = [len(labels), maxlen] # bxt
indices = []
values = []
for bid, lab in enumerate(labels):
for tid, c in enumerate(lab):
indices.append([bid, tid])
values.append(c)
indices = np.asarray(indices)
values = np.asarray(values)
return (indices, values, shape)
class TIMITBatch(ProxyDataFlow):
def __init__(self, ds, batch):
self.batch = batch
self.ds = ds
def __len__(self):
return len(self.ds) // self.batch
def __iter__(self):
itr = self.ds.__iter__()
for _ in range(self.__len__()):
feats = []
labs = []
for b in range(self.batch):
feat, lab = next(itr)
feats.append(feat)
labs.append(lab)
batchfeat = batch_feature(feats)
batchlab = sparse_label(labs)
seqlen = np.asarray([k.shape[0] for k in feats])
yield [batchfeat, batchlab[0], batchlab[1], batchlab[2], seqlen]