-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_loader.py
executable file
·145 lines (126 loc) · 7.73 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import os.path
import math
import threading
import torch
import torch.utils.data
import numpy as np
import csv
import torch.nn.functional as F
import librosa as lr
from third_octave import ThirdOctaveTransform
import sys
np.set_printoptions(threshold=sys.maxsize)
class PresPredDataset(torch.utils.data.Dataset):
def __init__(self, settings, evalSet=False, subset='train'):
self.datasetName = settings['eval_dataset_name'] if evalSet else settings['dataset_name']
self.datasetPath = os.path.join(settings['root_dir'], self.datasetName)
self.sr = settings['sr']
self.tLen = settings['texture_length']
self.fLen = settings['frame_length']
self.hLen = settings['eval_hop_length'] if evalSet else settings['hop_length']
self.lvlOffset = settings['level_offset_db']
#self.valSplit = 0 if evalSet else settings['val_split']
self.seqLen = settings['eval_seq_length'] if evalSet else settings['seq_length']
self.classes = settings['classes']
self.zero_bands_lower = None
if 'zero_bands_lower' in settings.keys():
self.zero_bands_lower = settings['zero_bands_lower']
self.nClasses = len(self.classes)
if not os.path.isfile(self.datasetPath+'_'+subset+'_tob.npy'):
assert settings['audio_dir'] is not None, 'No location for dataset audio files specified.'
if not os.path.exists(settings['root_dir']):
os.makedirs(settings['root_dir'])
self.create_dataset(os.path.join(settings['audio_dir'], self.datasetName+'_'+subset), settings['root_dir'], subset=subset, padExamples=settings['pad_examples'])
self.data_tob = np.load(self.datasetPath+'_'+subset+'_tob.npy', mmap_mode='r')
self.data_pres = np.load(self.datasetPath+'_'+subset+'_pres.npy', mmap_mode='r')
self.nFrames = self.data_pres.shape[1]
self.allowExampleOverlap = settings['allow_example_overlap']
print('{} dataset {} split length: {}'.format(self.datasetName, subset, self.__len__()))
def create_dataset(self, location, outputDir, subset='train', padExamples=False):
print('create dataset from audio files at', location)
files, pres_files = list_all_audio_files(os.path.join(location, 'sound'), os.path.join(location, 'pres_profile'))
tob_transform = ThirdOctaveTransform(self.sr, self.fLen, self.hLen)
# Dummy extraction for dimensions
x, _ = lr.load(path=files[0], sr=self.sr, mono=True)
if padExamples:
nFrames = int(np.ceil(((x.shape[0]-(self.tLen*self.fLen))/self.hLen)+1)) # assume equal for all files
else:
nFrames = int(np.floor(((x.shape[0]-(self.tLen*self.fLen))/self.hLen)+1)) # assume equal for all files
nBands = 29
nFiles = len(files)
x_tob = np.zeros((nFiles, nFrames+(self.tLen-1), nBands)) # nFrames+(self.tLen-1) = number of 125ms frames
x_pres = np.zeros((nFiles, nFrames, self.nClasses))
for iF, file in enumerate(files):
print(' -> Processed ' + str(iF) + ' of ' + str(nFiles) + ' files')
print(pres_files[iF])
x, _ = lr.load(path=file, sr=self.sr, mono=True)
x_pres_temp = np.zeros((self.nClasses, nFrames))
# Presence profile
for iC in range(self.nClasses):
with open(pres_files[iF]+'_pp_'+self.classes[iC]+'.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile, delimiter=',')
for row in csvreader:
if len(row) == nFrames:
x_pres_temp[iC, :] = np.asarray([[float(l) for l in row]])
x_tob[iF, :, :] = tob_transform.wave_to_third_octave(x, padExamples).T
x_pres[iF, :, :] = x_pres_temp.T
np.save(os.path.join(outputDir, self.datasetName+'_'+subset+'_tob.npy'), x_tob)
np.save(os.path.join(outputDir, self.datasetName+'_'+subset+'_pres.npy'), x_pres)
def __getitem__(self, idx):
if self.seqLen == 1: # CNN
iFile = int(np.floor(idx/self.nFrames)) # Index of file
iEx = np.mod(idx, self.nFrames) # Index of exemple within file
input_x = torch.unsqueeze(torch.from_numpy(self.data_tob[iFile, iEx:iEx+self.tLen, :]), 0)
pres = torch.unsqueeze(torch.from_numpy(self.data_pres[iFile, iEx, :self.nClasses]), 0)
return F.pad(input_x+self.lvlOffset, (0, 3)), pres # Pad last dimension (freq) from 29 to 32 for the encoder, plus level correction
else: # RNN
if self.allowExampleOverlap:
iFile = int(np.floor(idx/(self.nFrames-int(np.round(self.fLen/self.hLen))*(self.seqLen-1))))
iEx = np.mod(idx, self.nFrames-int(np.round(self.fLen/self.hLen))*(self.seqLen-1)) # Index of exemple within file
input_x = torch.unsqueeze(torch.from_numpy(self.data_tob[iFile, iEx:iEx+int(np.round(self.fLen/self.hLen))*(self.seqLen-1)+1:int(np.round(self.fLen/self.hLen)), :]), 0)
pres = torch.unsqueeze(torch.from_numpy(self.data_pres[iFile, iEx:iEx+int(np.round(self.fLen/self.hLen))*(self.seqLen-1)+1:int(np.round(self.fLen/self.hLen)), :self.nClasses]), 0)
else:
iFile = int(np.floor(idx/int(np.floor(self.nFrames/self.seqLen))))
iEx = np.mod(idx, int(np.floor(self.nFrames/self.seqLen))) # Index of exemple within file
input_x = torch.unsqueeze(torch.from_numpy(self.data_tob[iFile, iEx*self.seqLen:(iEx+1)*self.seqLen+(self.tLen-1), :]), 0)
pres = torch.unsqueeze(torch.from_numpy(self.data_pres[iFile, iEx*self.seqLen:(iEx+1)*self.seqLen, :self.nClasses]), 0)
#print(input_x)
# ----- ZEROS ON BANDS LOWER THAN 100Hz ----- TODO
# input_x = input_x.type(torch.FloatTensor)
# input_x[:,:,:self.zero_bands_lower] = -self.lvlOffset
# ----- ZEROS ON BANDS LOWER THAN 100Hz ----- TODO
return F.pad(input_x+self.lvlOffset, (0, 3)), pres # Pad last dimension (freq) from 29 to 32 for the encoder, plus level correction
def __len__(self):
if self.seqLen == 1: # CNN
return self.data_pres.shape[0]*self.nFrames
else: # RNN
if self.allowExampleOverlap:
return self.data_pres.shape[0]*(self.nFrames-int(np.round(self.fLen/self.hLen))*(self.seqLen-1))
else:
return self.data_pres.shape[0]*int(np.floor(self.nFrames/self.seqLen))
def wav_to_npy_no_labels(settings, dataPath, datasetName):
files, _ = list_all_audio_files(os.path.join(dataPath, datasetName, 'sound'))
tob_transform = ThirdOctaveTransform(settings['sr'], settings['frame_length'], settings['eval_hop_length'])
x_tob = []
for iF, file in enumerate(files):
print(files[iF])
x, _ = lr.load(path=file, sr=settings['sr'], mono=True)
print(x.shape)
x_tob.append(tob_transform.wave_to_third_octave(x, True).T)
#print(x_tob[-1])
#print(x_tob[-1].shape)
#print(x_tob[-1][5,:]+101)
print(' -> Processed ' + str(iF+1) + ' of ' + str(len(files)) + ' files')
np.save(os.path.join(dataPath, datasetName+'_spectralData.npy'), x_tob, allow_pickle=True)
def list_all_audio_files(audio_location, pres_location=None):
pres_files = []
audio_files = []
for dirpath, dirnames, filenames in os.walk(audio_location):
for filename in [f for f in sorted(filenames) if f.endswith(('.mp3', '.wav', '.aif', 'aiff')) and 'channel' not in f]:
if pres_location is not None:
pres_files.append(os.path.join(pres_location, filename[:-4]))
audio_files.append(os.path.join(dirpath, filename))
if len(audio_files) == 0:
print('found no audio files in ' + audio_location)
return audio_files, pres_files