Skip to content

Commit

Permalink
Remove audio resampling moved to neon-speech NeonGeckoCom/neon_speech…
Browse files Browse the repository at this point in the history
…#180

Closes #28
  • Loading branch information
NeonDaniel committed Nov 9, 2023
1 parent 68378bd commit dfb130a
Showing 1 changed file with 0 additions and 39 deletions.
39 changes: 0 additions & 39 deletions neon_iris/web_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,45 +118,6 @@ def update_profile(self, stt_lang: str, tts_lang: str, tts_lang_2: str,
LOG.info(f"Updated profile for: {session_id}")
return session_id

def send_audio(self, audio_file: str, lang: str = "en-us",
username: Optional[str] = None,
user_profiles: Optional[list] = None,
context: Optional[dict] = None):
"""
@param audio_file: path to wav audio file to send to speech module
@param lang: language code associated with request
@param username: username associated with request
@param user_profiles: user profiles expecting a response
"""
# TODO: Audio conversion is really slow here. check ovos-stt-http-server
audio_file = self.convert_audio(audio_file)
self._send_audio(audio_file, lang, username, user_profiles, context)

def convert_audio(self, audio_file: str, target_sr=16000, target_channels=1,
dtype='int16') -> str:
"""
@param audio_file: path to audio file to convert for speech model
@returns: path to converted audio file
"""
# Load the audio file
y, sr = librosa.load(audio_file, sr=None, mono=False) # Load without changing sample rate or channels

# If the file has more than one channel, mix it down to one channel
if y.ndim > 1 and target_channels == 1:
y = librosa.to_mono(y)

# Resample the audio to the target sample rate
y_resampled = librosa.resample(y, orig_sr=sr, target_sr=target_sr)

# Ensure the audio array is in the correct format (int16 for 2-byte samples)
y_resampled = (y_resampled * (2 ** (8 * 2 - 1))).astype(dtype)

output_path = join(self._audio_path, f"{time()}.wav")
# Save the audio file with the new sample rate and sample width
sf.write(output_path, y_resampled, target_sr, format='WAV', subtype='PCM_16')
LOG.info(f"Converted audio file to {output_path}")
return output_path

def on_user_input(self, utterance: str, *args, **kwargs) -> str:
"""
Callback to handle textual user input
Expand Down

0 comments on commit dfb130a

Please sign in to comment.