-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpreproc.py
131 lines (98 loc) · 3.5 KB
/
preproc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import argparse
import os
from multiprocessing import Pool, RLock, current_process, freeze_support
from pathlib import Path
from random import shuffle
import librosa
import numpy as np
import torch
from omegaconf import DictConfig, OmegaConf
from torchaudio.functional import highpass_biquad
from tqdm import tqdm
from pitch import BasePE
from vuv import VUVEstimator
def process(
config: DictConfig,
audio_path: Path,
pitch_extractor: BasePE,
vuv_extractor: VUVEstimator,
):
save_path = audio_path.with_suffix(".npy")
if save_path.exists():
return
data = {"path": str(audio_path)}
audio, _ = librosa.load(audio_path, sr=config.sample_rate, mono=True)
data["audio"] = audio
audio = torch.from_numpy(audio).unsqueeze(0)
audio = highpass_biquad(audio, config.sample_rate, config.f_min)
f0, _, f0_0 = pitch_extractor(audio, None)
f0 = f0.cpu().numpy()
if config.preprocessing.vuv:
vuv = vuv_extractor.get_vuv(audio, f0_0)
data["vuv"] = vuv
if config.preprocessing.oversampling > 1:
f0 = np.interp(
np.linspace(
np.min(f0),
np.max(f0),
len(f0) // config.preprocessing.oversampling,
),
np.linspace(np.min(f0), np.max(f0), len(f0)),
f0,
)
data["pitch"] = f0
np.save(save_path, data)
def chunks(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i : i + n]
def run(config, files):
current = current_process()
pos = current._identity[0] - 1
hop_length = config.hop_length
if config.preprocessing.vuv:
vuv_extractor = VUVEstimator(config)
else:
vuv_extractor = None
if config.preprocessing.oversampling > 1:
hop_length = hop_length // config.preprocessing.oversampling
pitch_extractor_cls = getattr(
__import__("pitch", fromlist=[config.preprocessing.pitch_extractor.name]),
config.preprocessing.pitch_extractor.name,
)
pitch_extractor = pitch_extractor_cls(
sample_rate=config.sample_rate,
hop_length=hop_length,
keep_zeros=config.preprocessing.pitch_extractor.keep_zeros,
f0_min=config.preprocessing.f0_min,
f0_max=config.preprocessing.f0_max,
)
for af in tqdm(files, position=pos):
process(config, af, pitch_extractor, vuv_extractor)
if __name__ == "__main__":
freeze_support()
argparser = argparse.ArgumentParser()
argparser.add_argument("--config", type=str, required=True)
argparser.add_argument("--path", type=str, required=True)
argparser.add_argument("--clean", action="store_true")
args = argparser.parse_args()
config = OmegaConf.load(args.config)
if args.clean:
print("Cleaning *.npy files...")
for dirpath, _, dirnames in os.walk(args.path):
for name in dirnames:
if name.endswith(".npy"):
os.remove(Path(dirpath, name))
print("Done!")
audio_files = []
for dirpath, _, dirnames in os.walk(args.path):
for name in dirnames:
if name.endswith(".wav"):
audio_files.append(Path(dirpath, name))
shuffle(audio_files)
splits = np.array_split(np.array(audio_files), config.preprocessing.threads)
splits = [(config, files) for files in splits]
with Pool(
config.preprocessing.threads, initializer=tqdm.set_lock, initargs=(RLock(),)
) as pool:
pool.starmap(run, splits)