-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspeech_to_text.py
175 lines (142 loc) · 4.83 KB
/
speech_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
This module contains the Speech_To_Text class, which is used to convert audio to text
"""
import openai
import wave
import logging
import numpy as np
import pyaudio
import os
from API.api import GPT_API
import json
logging.basicConfig(level=logging.INFO)
class Recording:
def __init__(self, threshold=0.01, chunk_size=1024, format=pyaudio.paInt16,
channels=1, rate=44100, max_duration=6):
"""
:param threshold: minimum amplitude to trigger recording
:param chunk_size: number of samples to read at a time
:param format: audio format (check pyaudio docs)
:param channels: number of audio channels
:param rate: sampling frequency
:param max_duration: maximum duration of the recording
"""
self.threshold = threshold
self.chunk_size = chunk_size
self.format = format
self.channels = channels
self.rate = rate
self.max_duration = max_duration
self.frames = []
self.recording_started = False
self.audio = pyaudio.PyAudio()
self.stream = self.audio.open(format=self.format,
channels=self.channels,
rate=self.rate,
input=True,
frames_per_buffer=self.chunk_size)
def start(self):
"""
Start recording
:return: None
"""
logging.info("Listening...")
while True:
data = self.stream.read(self.chunk_size)
audio_np = np.frombuffer(data, dtype=np.int16)
amplitude = np.abs(audio_np).mean()
if amplitude > self.threshold and not self.recording_started:
logging.info("Recording started.")
self.recording_started = True
if self.recording_started:
self.frames.append(data)
if self.recording_started and (
len(self.frames) * self.chunk_size) >= (self.rate * self.max_duration) or (
self.recording_started and amplitude < self.threshold):
logging.info("Recording stopped.")
break
def stop(self):
"""
Stop recording
:return: None
"""
self.stream.stop_stream()
self.stream.close()
self.audio.terminate()
def save_as_wav(self, filename):
"""
Save the recorded audio as a WAV file
:param filename: name of the file
:return: None
"""
try:
wf = wave.open(filename, 'wb')
wf.setnchannels(self.channels)
wf.setsampwidth(self.audio.get_sample_size(self.format))
wf.setframerate(self.rate)
wf.writeframes(b''.join(self.frames))
wf.close()
logging.info("Audio saved as %s", filename)
except Exception as e:
raise ValueError("Error saving audio: " + str(e))
def run(self):
"""
Run the recording
:return: None
"""
try:
r = Recording()
r.start()
r.stop()
r.save_as_wav("audio.wav")
except ValueError as ve:
logging.error(ve)
except Exception as e:
logging.error("An error occurred: " + str(e))
class Speech_To_Text:
def __init__(self, audio="audio.wav"):
"""
Initialize the API
:param audio: audio file to convert to text
:return: None
"""
self._audio = audio
self._api = GPT_API()
self._api.init_api()
logging.info("api initialize correctly")
def convert_audio_text(self):
"""
Convert audio to text
:return: answer of the api of openAI
"""
try:
audio_file:str = open(self._audio,"rb")
transcript:str = openai.Audio.transcribe("whisper-1", audio_file)
answer:str = transcript["text"]
return answer
except Exception as e:
raise ValueError("\nError converting audio to text: " + str(e))
class GPT_Speech_to_Text:
def __init__(self):
"""
Initialize the API
:return: None
"""
self._recording = Recording()
self._speech_to_text = Speech_To_Text()
def run(self):
"""
:return: answer of the api of openAI
"""
try :
self._recording.run()
answer:str = self._speech_to_text.convert_audio_text()
#write it in a json file
with open("answer.txt", "w") as f:
f.write(answer)
return answer
except Exception as e:
raise ValueError("Error " + str(e))
if __name__ == "__main__":
gpt_speech_to_text = GPT_Speech_to_Text()
e=gpt_speech_to_text.run()