Skip to content

Commit 5f15e35

Browse files
committed
Do not overwrite audio
also fixes a bug for r > 1
1 parent 5aea7cd commit 5f15e35

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

generate_aligned_predictions.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
4949
model.make_generation_fast_()
5050

5151
mel_org = np.load(join(in_dir, mel_filename))
52+
# zero padd
53+
b_pad = r # imitates initial state
54+
e_pad = r - len(mel_org) % r if len(mel_org) % r > 0 else 0
55+
mel_org = np.pad(mel_org, [(b_pad, e_pad), (0, 0)],
56+
mode="constant", constant_values=0)
57+
5258
mel = Variable(torch.from_numpy(mel_org)).unsqueeze(0).contiguous()
5359

5460
# Downsample mel spectrogram
@@ -78,10 +84,10 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
7884
frame_positions=frame_positions, speaker_ids=speaker_ids)
7985

8086
mel_output = mel_outputs[0].data.cpu().numpy()
81-
8287
# **Time resolution adjustment**
83-
# remove begenning audio used for first mel prediction
84-
wav = np.load(join(in_dir, audio_filename))[hparams.hop_size * downsample_step:]
88+
mel_output = mel_output[:-(b_pad + e_pad)]
89+
90+
wav = np.load(join(in_dir, audio_filename))
8591
assert len(wav) % hparams.hop_size == 0
8692

8793
# Coarse upsample just for convenience
@@ -102,8 +108,6 @@ def preprocess(model, in_dir, out_dir, text, audio_filename, mel_filename,
102108
timesteps = len(wav)
103109

104110
# save
105-
np.save(join(out_dir, audio_filename), wav.astype(np.int16),
106-
allow_pickle=False)
107111
np.save(join(out_dir, mel_filename), mel_output.astype(np.float32),
108112
allow_pickle=False)
109113

0 commit comments

Comments
 (0)