From b664bc77771d3a84d931a57d0cdd701ca925860a Mon Sep 17 00:00:00 2001 From: Yushen CHEN <45333109+SWivid@users.noreply.github.com> Date: Fri, 1 Nov 2024 18:20:39 +0800 Subject: [PATCH] Update finetune_gradio.py --- src/f5_tts/train/finetune_gradio.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/f5_tts/train/finetune_gradio.py b/src/f5_tts/train/finetune_gradio.py index beb509a83..9b46b0102 100644 --- a/src/f5_tts/train/finetune_gradio.py +++ b/src/f5_tts/train/finetune_gradio.py @@ -28,7 +28,7 @@ from scipy.io import wavfile from transformers import pipeline from cached_path import cached_path -from f5_tts.api import F5TTS, target_sample_rate +from f5_tts.api import F5TTS from f5_tts.model.utils import convert_char_to_pinyin from importlib.resources import files @@ -174,15 +174,7 @@ def load_settings(project_name): def get_audio_duration(audio_path): """Calculate the duration mono of an audio file.""" audio, sample_rate = torchaudio.load(audio_path) - - if audio.shape[0] > 1: - audio = torch.mean(audio, dim=0, keepdim=True) - - if sample_rate != target_sample_rate: - audio = torchaudio.transforms.Resample(sample_rate, target_sample_rate) - - num_channels = audio.shape[0] - return audio.shape[1] / (sample_rate * num_channels) + return audio.shape[1] / sample_rate def clear_text(text):