-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeech_to_text.py
executable file
·61 lines (48 loc) · 1.44 KB
/
speech_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
from record import record
from faster_whisper import WhisperModel
from torch import device
def speech_to_text(model_size, language):
"""
Transcribe speech from microphone to text
Parameters:
- model_size: Model size to use for transcription
- language: Language to use for transcription
Returns:
str: Transcribed text
"""
if not isinstance(model_size, str) or model_size == "":
raise ValueError(
"speech_to_text: model_size must be a non-empty string"
)
if not isinstance(language, str) or language == "":
raise ValueError(
"speech_to_text: language must be a non-empty string"
)
filename = record()
try:
if device.type == "cuda":
model = WhisperModel(
model_size,
device="cuda",
compute_type="int8"
)
else:
model = WhisperModel(
model_size,
device="cpu",
compute_type="int8"
)
segments, _ = model.transcribe(
filename,
beam_size=5,
language=language
)
except ValueError as e:
print(e)
return ""
text = "".join(segment.text for segment in segments)
print("\nUSER: {}".format(text))
return text
if __name__ == "__main__":
print(speech_to_text("tiny", "fr"))