-
Notifications
You must be signed in to change notification settings - Fork 1
/
voice_transcription.py
158 lines (131 loc) · 4.66 KB
/
voice_transcription.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
Whisper Voice Transcription
Author: Salman Mian
Created: 11/12/2023
Description:
This script records audio through the microphone, transcribes it using OpenAI's API,
and pastes the transcription into the current active window. It's designed to be controlled
through keyboard shortcuts, specifically using Ctrl+R to start and stop recording.
"""
#using API key file
import pyaudio
import wave
import threading
from pynput import keyboard
from openai import OpenAI # Importing OpenAI for transcription
import pyperclip # Import pyperclip
import pyautogui # allows you to programmatically control the mouse and keyboard.
import os
#import datetime # Used for timestamping events
# Define the path to the API key file
API_KEY_FILE = 'openai_api_key.txt'
def get_api_key():
# Check if the API key file exists in the current directory
if os.path.isfile(API_KEY_FILE):
with open(API_KEY_FILE, 'r') as file:
api_key = file.read().strip()
#print(f"API Key: {api_key}") # Temporary print statement for debugging
return api_key
else:
raise FileNotFoundError(f"API key file not found in the current directory. Please create a file named '{API_KEY_FILE}' with your OpenAI API key.")
# Set the recording parameters
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024
FILE_NAME = "recording.wav"
# Global variables to control recording state
is_recording = False
recording_thread = None
ctrl_pressed = False # To track the state of Ctrl key
# Initialize PyAudio
audio = pyaudio.PyAudio()
# Function to start recording
def start_recording():
global paste_done
paste_done = False # Reset the flag when starting a new recording
global is_recording
is_recording = True
print("Recording started")
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
frames = []
while is_recording:
data = stream.read(CHUNK)
frames.append(data)
# Stop and close the stream
stream.stop_stream()
stream.close()
# Save the recording
wave_file = wave.open(FILE_NAME, 'wb')
wave_file.setnchannels(CHANNELS)
wave_file.setsampwidth(audio.get_sample_size(FORMAT))
wave_file.setframerate(RATE)
wave_file.writeframes(b''.join(frames))
wave_file.close()
# After recording, transcribe the audio
transcribe_audio()
def transcribe_audio():
print("Transcribe audio called") # Debug print
global paste_done
api_key = get_api_key()
client = OpenAI(api_key=api_key)
with open(FILE_NAME, "rb") as audio_file:
transcript_response = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
if hasattr(transcript_response, 'text'):
transcript = transcript_response.text
print(transcript)
pyperclip.copy(transcript) # Copy to clipboard
print("Transcription copied to clipboard.")
if not paste_done: # Check if paste hasn't been done yet
#print("Clipboard content before paste:", pyperclip.paste()) # Debug print
pyautogui.hotkey('command', 'v') # Simulate Cmd+V to paste
paste_done = True # Set the flag to True after pasting
else:
print("Transcription failed or the response format is not as expected.")
# Function to stop recording
def stop_recording():
global is_recording
is_recording = False
print("Recording stopped")
# Toggle recording state
def toggle_recording():
global is_recording, recording_thread
#print(f"Active Threads before toggling: {threading.active_count()}") # New line
if is_recording:
stop_recording()
else:
recording_thread = threading.Thread(target=start_recording)
recording_thread.daemon = True # Set thread as daemon
recording_thread.start()
# Listener for keyboard events
def on_press(key):
global ctrl_pressed
try:
if key == keyboard.Key.ctrl:
ctrl_pressed = True
#print("Ctrl pressed") # Debug print
elif key.char == 'r' and ctrl_pressed:
print("Ctrl+R pressed") # Debug print
toggle_recording()
except AttributeError:
pass
def on_release(key):
global ctrl_pressed
if key == keyboard.Key.ctrl:
ctrl_pressed = False
#print("Ctrl released") # Debug print
# Main function
def main():
try:
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
listener.join()
except KeyboardInterrupt:
print("Exiting script...")
stop_recording()
if __name__ == "__main__":
main()