Skip to content

Commit

Permalink
Merge pull request #870 from AznamirWoW/fixed_reference
Browse files Browse the repository at this point in the history
added a fixed reference for the tensorboard audio section
  • Loading branch information
blaisewf authored Nov 13, 2024
2 parents ea7c629 + 2fed6f6 commit 76bef85
Show file tree
Hide file tree
Showing 13 changed files with 30 additions and 8 deletions.
Binary file added logs/reference/ref32000.wav
Binary file not shown.
Binary file added logs/reference/ref32000_f0c.npy
Binary file not shown.
Binary file added logs/reference/ref32000_f0f.npy
Binary file not shown.
Binary file added logs/reference/ref32000_feats.npy
Binary file not shown.
Binary file added logs/reference/ref40000.wav
Binary file not shown.
Binary file added logs/reference/ref40000_f0c.npy
Binary file not shown.
Binary file added logs/reference/ref40000_f0f.npy
Binary file not shown.
Binary file added logs/reference/ref40000_feats.npy
Binary file not shown.
Binary file added logs/reference/ref48000.wav
Binary file not shown.
Binary file added logs/reference/ref48000_f0c.npy
Binary file not shown.
Binary file added logs/reference/ref48000_f0f.npy
Binary file not shown.
Binary file added logs/reference/ref48000_feats.npy
Binary file not shown.
38 changes: 30 additions & 8 deletions rvc/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,16 +439,38 @@ def run(

cache = []
# get the first sample as reference for tensorboard evaluation
for info in train_loader:
phone, phone_lengths, pitch, pitchf, _, _, _, _, sid = info
if os.path.isfile(os.path.join("logs", "reference", f"ref{sample_rate}.wav")):
import numpy as np
phone = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_feats.npy"))
#expanding x2 to match pitch size
phone = np.repeat(phone, 2, axis=0)
phone = torch.FloatTensor(phone).unsqueeze(0).to(device)
phone_lengths = torch.LongTensor(phone.size(0)).to(device)
pitch = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_f0c.npy"))
# removed last frame to match features
pitch = torch.LongTensor(pitch[:-1]).unsqueeze(0).to(device)
pitchf = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_f0f.npy"))
# removed last frame to match features
pitchf = torch.FloatTensor(pitchf[:-1]).unsqueeze(0).to(device)
sid = torch.LongTensor([0]).to(device)
reference = (
phone.to(device),
phone_lengths.to(device),
pitch.to(device) if pitch_guidance else None,
pitchf.to(device) if pitch_guidance else None,
sid.to(device),
phone,
phone_lengths,
pitch if pitch_guidance else None,
pitchf if pitch_guidance else None,
sid
)
break
else:
for info in train_loader:
phone, phone_lengths, pitch, pitchf, _, _, _, _, sid = info
reference = (
phone.to(device),
phone_lengths.to(device),
pitch.to(device) if pitch_guidance else None,
pitchf.to(device) if pitch_guidance else None,
sid.to(device),
)
break

for epoch in range(epoch_str, total_epoch + 1):
train_and_evaluate(
Expand Down

0 comments on commit 76bef85

Please sign in to comment.