From dfb130ac0889a953ff865cd47beac0f5b184da8f Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Wed, 8 Nov 2023 18:07:26 -0800 Subject: [PATCH] Remove audio resampling moved to neon-speech https://github.com/NeonGeckoCom/neon_speech/pull/180 Closes #28 --- neon_iris/web_client.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/neon_iris/web_client.py b/neon_iris/web_client.py index b9a0db2..68a63e6 100644 --- a/neon_iris/web_client.py +++ b/neon_iris/web_client.py @@ -118,45 +118,6 @@ def update_profile(self, stt_lang: str, tts_lang: str, tts_lang_2: str, LOG.info(f"Updated profile for: {session_id}") return session_id - def send_audio(self, audio_file: str, lang: str = "en-us", - username: Optional[str] = None, - user_profiles: Optional[list] = None, - context: Optional[dict] = None): - """ - @param audio_file: path to wav audio file to send to speech module - @param lang: language code associated with request - @param username: username associated with request - @param user_profiles: user profiles expecting a response - """ - # TODO: Audio conversion is really slow here. check ovos-stt-http-server - audio_file = self.convert_audio(audio_file) - self._send_audio(audio_file, lang, username, user_profiles, context) - - def convert_audio(self, audio_file: str, target_sr=16000, target_channels=1, - dtype='int16') -> str: - """ - @param audio_file: path to audio file to convert for speech model - @returns: path to converted audio file - """ - # Load the audio file - y, sr = librosa.load(audio_file, sr=None, mono=False) # Load without changing sample rate or channels - - # If the file has more than one channel, mix it down to one channel - if y.ndim > 1 and target_channels == 1: - y = librosa.to_mono(y) - - # Resample the audio to the target sample rate - y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr) - - # Ensure the audio array is in the correct format (int16 for 2-byte samples) - y_resampled = (y_resampled * (2 ** (8 * 2 - 1))).astype(dtype) - - output_path = join(self._audio_path, f"{time()}.wav") - # Save the audio file with the new sample rate and sample width - sf.write(output_path, y_resampled, target_sr, format='WAV', subtype='PCM_16') - LOG.info(f"Converted audio file to {output_path}") - return output_path - def on_user_input(self, utterance: str, *args, **kwargs) -> str: """ Callback to handle textual user input