Skip to content

Commit

Permalink
split audio and processing it step by step
Browse files Browse the repository at this point in the history
  • Loading branch information
arhihihipov committed Apr 9, 2024
1 parent 305a1fa commit 52192e1
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 14 deletions.
2 changes: 1 addition & 1 deletion app/audio_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class StuckAudioResender:
Class to resend stuck raw audio files.
"""

def __init__(self, resend_stuck_audio_timeout_seconds=300, is_stuck_predicate=default_is_stuck_predicate):
def __init__(self, resend_stuck_audio_timeout_seconds=30, is_stuck_predicate=default_is_stuck_predicate):
self._resend_stuck_audio_timeout_seconds = resend_stuck_audio_timeout_seconds
self._is_stuck_predicate = is_stuck_predicate

Expand Down
44 changes: 31 additions & 13 deletions app/audio_recognizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import asyncio
import json
import wave
from pydub import AudioSegment
from io import BytesIO

import requests

Expand Down Expand Up @@ -44,7 +46,22 @@ def recognize(self, audio):
recognized_words = list(map(self.parse_recognizer_result, recognizer_results))
return RecognizedAudio(recognized_words)

def send_audio_to_recognizer(self, audio, language='ru'):
def send_audio_to_recognizer(self, audio_file, language='ru'):
audio_data = audio_file.read()
audio_file.close()

audio = AudioSegment.from_file(BytesIO(audio_data), format="mp3")
duration_seconds = audio.duration_seconds

segments = []
start_time = 0
while start_time < duration_seconds:
end_time = min(start_time + 10, duration_seconds)
segment = audio[start_time * 1000: end_time * 1000]
segments.append((segment, start_time))
start_time = end_time

# Параметры запроса
params = {
'task': 'transcribe',
'language': language,
Expand All @@ -53,18 +70,19 @@ def send_audio_to_recognizer(self, audio, language='ru'):
}
headers = {'accept': 'application/json'}

audio_to_recognize_buffer = audio.read()
audio.close()

try:
files = {'audio_file': ("student_speech", audio_to_recognize_buffer, 'audio/mpeg')}
response = requests.post(self._url, params=params, headers=headers, files=files)
response.raise_for_status()
except requests.exceptions.RequestException as e:
logger.info(f"Recognition error occurred while processing audio file: {e}")
return []

data = response.json()
# Распознавание речи по сегментам
recognizer_results = []
for segment, segment_start_time in segments:
audio_to_recognize_buffer = segment.export(format="mp3").read()
try:
files = {'audio_file': ("student_speech", audio_to_recognize_buffer, 'audio/mpeg')}
response = requests.post(self._url, params=params, headers=headers, files=files)
response.raise_for_status()
except requests.exceptions.RequestException as e:
logger.info(f"Recognition error occurred while processing audio file: {e}")
return []

data = response.json()

recognizer_results = []
for segment in data["segments"]:
Expand Down

0 comments on commit 52192e1

Please sign in to comment.