From 365a19bb8cc555be221cc28fe57127ca9a6edb5b Mon Sep 17 00:00:00 2001 From: Alexey Shmelev Date: Thu, 17 Oct 2024 05:38:09 -0400 Subject: [PATCH] moved audio normalization to pre-splitting to avoid an unexpected volume boost of the quiet split parts --- rvc/train/preprocess/preprocess.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/rvc/train/preprocess/preprocess.py b/rvc/train/preprocess/preprocess.py index d27f623f5..f6c030b4d 100644 --- a/rvc/train/preprocess/preprocess.py +++ b/rvc/train/preprocess/preprocess.py @@ -63,15 +63,11 @@ def _normalize_audio(self, audio: np.ndarray): def process_audio_segment( self, - audio_segment: np.ndarray, + normalized_audio: np.ndarray, sid: int, idx0: int, idx1: int, - process_effects: bool, ): - normalized_audio = ( - self._normalize_audio(audio_segment) if process_effects else audio_segment - ) if normalized_audio is None: print(f"{sid}-{idx0}-{idx1}-filtered") return @@ -105,6 +101,7 @@ def process_audio( audio_length = librosa.get_duration(y=audio, sr=self.sr) if process_effects: audio = signal.lfilter(self.b_high, self.a_high, audio) + audio = self._normalize_audio(audio) if noise_reduction: audio = nr.reduce_noise( y=audio, sr=self.sr, prop_decrease=reduction_strength @@ -121,18 +118,18 @@ def process_audio( start : start + int(self.per * self.sr) ] self.process_audio_segment( - tmp_audio, sid, idx0, idx1, process_effects + tmp_audio, sid, idx0, idx1, ) idx1 += 1 else: tmp_audio = audio_segment[start:] self.process_audio_segment( - tmp_audio, sid, idx0, idx1, process_effects + tmp_audio, sid, idx0, idx1, ) idx1 += 1 break else: - self.process_audio_segment(audio, sid, idx0, idx1, process_effects) + self.process_audio_segment(audio, sid, idx0, idx1,) except Exception as error: print(f"Error processing audio: {error}") return audio_length