Merge pull request #870 from AznamirWoW/fixed_reference

added a fixed reference for the tensorboard audio section
IAHispano · Nov 13, 2024 · 76bef85 · 76bef85
2 parents ea7c629 + 2fed6f6
commit 76bef85
Show file tree

Hide file tree

Showing 13 changed files with 30 additions and 8 deletions.
diff --git a/logs/reference/ref32000.wav b/logs/reference/ref32000.wav
diff --git a/logs/reference/ref32000_f0c.npy b/logs/reference/ref32000_f0c.npy
diff --git a/logs/reference/ref32000_f0f.npy b/logs/reference/ref32000_f0f.npy
diff --git a/logs/reference/ref32000_feats.npy b/logs/reference/ref32000_feats.npy
diff --git a/logs/reference/ref40000.wav b/logs/reference/ref40000.wav
diff --git a/logs/reference/ref40000_f0c.npy b/logs/reference/ref40000_f0c.npy
diff --git a/logs/reference/ref40000_f0f.npy b/logs/reference/ref40000_f0f.npy
diff --git a/logs/reference/ref40000_feats.npy b/logs/reference/ref40000_feats.npy
diff --git a/logs/reference/ref48000.wav b/logs/reference/ref48000.wav
diff --git a/logs/reference/ref48000_f0c.npy b/logs/reference/ref48000_f0c.npy
diff --git a/logs/reference/ref48000_f0f.npy b/logs/reference/ref48000_f0f.npy
diff --git a/logs/reference/ref48000_feats.npy b/logs/reference/ref48000_feats.npy
diff --git a/rvc/train/train.py b/rvc/train/train.py
@@ -439,16 +439,38 @@ def run(
 
     cache = []
     # get the first sample as reference for tensorboard evaluation
-    for info in train_loader:
-        phone, phone_lengths, pitch, pitchf, _, _, _, _, sid = info
+    if os.path.isfile(os.path.join("logs", "reference", f"ref{sample_rate}.wav")):
+        import numpy as np
+        phone = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_feats.npy"))
+        #expanding x2 to match pitch size
+        phone = np.repeat(phone, 2, axis=0)
+        phone = torch.FloatTensor(phone).unsqueeze(0).to(device)
+        phone_lengths = torch.LongTensor(phone.size(0)).to(device)
+        pitch = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_f0c.npy"))
+        # removed last frame to match features
+        pitch = torch.LongTensor(pitch[:-1]).unsqueeze(0).to(device)
+        pitchf = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_f0f.npy"))
+        # removed last frame to match features
+        pitchf = torch.FloatTensor(pitchf[:-1]).unsqueeze(0).to(device)
+        sid = torch.LongTensor([0]).to(device)
         reference = (
-            phone.to(device),
-            phone_lengths.to(device),
-            pitch.to(device) if pitch_guidance else None,
-            pitchf.to(device) if pitch_guidance else None,
-            sid.to(device),
+            phone,
+            phone_lengths,
+            pitch if pitch_guidance else None,
+            pitchf if pitch_guidance else None,
+            sid
         )
-        break
+    else:
+        for info in train_loader:
+            phone, phone_lengths, pitch, pitchf, _, _, _, _, sid = info
+            reference = (
+                phone.to(device),
+                phone_lengths.to(device),
+                pitch.to(device) if pitch_guidance else None,
+                pitchf.to(device) if pitch_guidance else None,
+                sid.to(device),
+            )
+            break
 
     for epoch in range(epoch_str, total_epoch + 1):
         train_and_evaluate(