-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
387 deploy whisper #388
387 deploy whisper #388
Changes from 9 commits
a539494
d4a47ec
5358360
23c579a
f9d5b9d
142111c
3bc3398
9e34833
6884eab
4301952
0a46c88
a5af166
15c8182
edd5157
57aeccd
6620f9c
7895f4d
81763d7
2b2af94
7444699
1fb986b
d84fc9e
7330454
7ccda4c
2525f08
0adc439
d9c2a9f
f6f707e
0a53514
9a92766
95624bb
17c68ac
1dacb4d
5728e9c
4bd9c00
48fb3f9
07ee0f6
2f4cabf
9740468
496c079
034351a
3384b42
234fad6
45b70dd
7e0a4dd
5dd6866
1a901b0
6af5a48
d84ae55
25f5068
5be8e9c
90684a6
4e91e9f
c82a185
0dbb056
fbec381
a67df4f
cf032d2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
import json | ||
import wave | ||
|
||
import websockets | ||
import requests | ||
|
||
from app import utils | ||
from app.recognized_audio import RecognizedAudio | ||
|
@@ -25,23 +25,17 @@ def recognize(self, audio): | |
return RecognizedAudio(recognized_words) | ||
|
||
|
||
class VoskAudioRecognizer(AudioRecognizer): | ||
HadronCollider marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def __init__(self, host): | ||
self._host = host | ||
self._event_loop = asyncio.get_event_loop() | ||
|
||
class WhisperAudioRecognizer(AudioRecognizer): | ||
def parse_recognizer_result(self, recognizer_result): | ||
return RecognizedWord( | ||
word=Word(recognizer_result['word']), | ||
begin_timestamp=recognizer_result['start'], | ||
end_timestamp=recognizer_result['end'], | ||
probability=recognizer_result['conf'], | ||
probability=recognizer_result['probability'], | ||
) | ||
|
||
def recognize_wav(self, audio): | ||
recognizer_results = self._event_loop.run_until_complete( | ||
self.send_audio_to_recognizer(audio.name) | ||
) | ||
recognizer_results = self.send_audio_to_recognizer(audio.name) | ||
recognized_words = list(map(self.parse_recognizer_result, recognizer_results)) | ||
return RecognizedAudio(recognized_words) | ||
|
||
|
@@ -50,19 +44,22 @@ def recognize(self, audio): | |
Denoiser.process_wav_to_wav(temp_wav_file, temp_wav_file, noise_length=3) | ||
return self.recognize_wav(temp_wav_file) | ||
|
||
async def send_audio_to_recognizer(self, file_name): | ||
def send_audio_to_recognizer(self, file_name, language='ru'): | ||
url = 'http://whisper:9000/asr' | ||
HadronCollider marked this conversation as resolved.
Show resolved
Hide resolved
|
||
params = { | ||
'task': 'transcribe', | ||
'language': language, | ||
'word_timestamps': 'true', | ||
'output': 'json' | ||
} | ||
headers = {'accept': 'application/json'} | ||
files = {'audio_file': (file_name, open(file_name, 'rb'), 'audio/mpeg')} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Не будет ли проблем с размером файла, если кто-то наговорит на пару часов? (условная 413 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Не совсем понимаю, как это можно проверить в коде, и нужно ли? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Тут скорее вопрос (на исследование/проверку) - какого максимального размера аудио можно отправить в asr? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Пока что удалось установить, что верхний предел у Whisper всё же существует
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Вынесите свои исследования в https://github.com/OSLL/web_speech_trainer/wiki |
||
response = requests.post(url, params=params, headers=headers, files=files) | ||
|
||
data = response.json() | ||
HadronCollider marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
recognizer_results = [] | ||
async with websockets.connect(self._host) as websocket: | ||
wf = wave.open(file_name, "rb") | ||
await websocket.send('''{"config" : { "sample_rate" : 8000.0 }}''') | ||
while True: | ||
data = wf.readframes(1000) | ||
if len(data) == 0: | ||
break | ||
await websocket.send(data) | ||
json_data = json.loads(await websocket.recv()) | ||
if 'result' in json_data: | ||
recognizer_results += json_data['result'] | ||
await websocket.send('{"eof" : 1}') | ||
await websocket.recv() | ||
return recognizer_results | ||
for segment in data["segments"]: | ||
for recognized_word in segment["words"]: | ||
recognizer_results.append(recognized_word) | ||
return recognizer_results |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Как вы предполагаете автоматизированный запуск подобного теста?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Соглашусь, такой код для тестирования не подойдёт. Хотелось бы узнать, стоит ли вообще писать тест для Whisper, и если да, то что именно нужно протестировать? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Добавьте тестирование своего класса WhisperAudioRecognizer (пусть даже он частично будет общаться с whisper-контейнером, и это будет уже не совсем unit-тестирование)
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Давайте пока оставим тесты в том виде в котором есть (с печатью распознанных слов) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. А по поводу wav и denoiser - посмотрите, насколько они нужны при работе с whisper (создал задачу - #401 ) - wav нужен был для vosk, возможно, сейчас от него можно избавиться? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import requests | ||
import time | ||
import librosa | ||
|
||
def test_whisper(file): | ||
url = "http://whisper:9000/asr" | ||
params = { | ||
'task': 'transcribe', | ||
'language': 'ru', | ||
'word_timestamps': 'true', | ||
'output': 'json' | ||
} | ||
headers = {'accept': 'application/json'} | ||
print(f"Processing file \"{file}\"") | ||
files = {'audio_file': (file, open(file, 'rb'), 'audio/mpeg')} | ||
|
||
audio_length = librosa.get_duration(filename=file) | ||
|
||
start_time = time.time() | ||
response = requests.post(url, params=params, headers=headers, files=files) | ||
end_time = time.time() | ||
processing_time = end_time - start_time | ||
RTF = processing_time / audio_length | ||
print(f"RTF = {RTF}") | ||
|
||
#parsing | ||
data = response.json() | ||
words = [] | ||
for segment in data["segments"]: | ||
for word_structure in segment["words"]: | ||
words.append(word_structure) | ||
print(word_structure) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Добавьте в сообщение длительность аудио (вы её получаете выше) - так будут более полезные логи (длина записи + время её обработки)