-
Notifications
You must be signed in to change notification settings - Fork 397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Websocket transport #81
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,7 +127,7 @@ You can use [use-package](https://github.com/jwiegley/use-package) to install [p | |
:defer t | ||
:hook ((python-mode . py-autopep8-mode)) | ||
:config | ||
(setq py-autopep8-options '("-a" "-a"))) | ||
(setq py-autopep8-options '("-a" "-a", "--max-line-length=100"))) | ||
``` | ||
|
||
`autopep8` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs. | ||
|
@@ -152,6 +152,7 @@ Install the | |
}, | ||
"autopep8.args": [ | ||
"-a", | ||
"-a" | ||
"-a", | ||
"--max-line-length=100" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the |
||
], | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
syntax = "proto3"; | ||
|
||
package dailyai_proto; | ||
|
||
message TextFrame { | ||
string text = 1; | ||
} | ||
|
||
message AudioFrame { | ||
bytes audio = 1; | ||
} | ||
|
||
message TranscriptionFrame { | ||
string text = 1; | ||
string participant_id = 2; | ||
string timestamp = 3; | ||
} | ||
|
||
message Frame { | ||
oneof frame { | ||
TextFrame text = 1; | ||
AudioFrame audio = 2; | ||
TranscriptionFrame transcription = 3; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
|
||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<script src="//cdn.jsdelivr.net/npm/[email protected]/dist/protobuf.min.js"></script> | ||
<title>WebSocket Audio Stream</title> | ||
</head> | ||
|
||
<body> | ||
<h1>WebSocket Audio Stream</h1> | ||
<button id="startAudioBtn">Start Audio</button> | ||
<button id="stopAudioBtn">Stop Audio</button> | ||
<script> | ||
const SAMPLE_RATE = 16000; | ||
const BUFFER_SIZE = 8192; | ||
const MIN_AUDIO_SIZE = 6400; | ||
|
||
let audioContext; | ||
let microphoneStream; | ||
let scriptProcessor; | ||
let source; | ||
let frame; | ||
let audioChunks = []; | ||
let isPlaying = false; | ||
let ws; | ||
|
||
const proto = protobuf.load("frames.proto", (err, root) => { | ||
if (err) throw err; | ||
frame = root.lookupType("dailyai_proto.Frame"); | ||
}); | ||
|
||
function initWebSocket() { | ||
ws = new WebSocket('ws://localhost:8765'); | ||
|
||
ws.addEventListener('open', () => console.log('WebSocket connection established.')); | ||
ws.addEventListener('message', handleWebSocketMessage); | ||
ws.addEventListener('close', (event) => console.log("WebSocket connection closed.", event.code, event.reason)); | ||
ws.addEventListener('error', (event) => console.error('WebSocket error:', event)); | ||
} | ||
|
||
async function handleWebSocketMessage(event) { | ||
const arrayBuffer = await event.data.arrayBuffer(); | ||
enqueueAudioFromProto(arrayBuffer); | ||
} | ||
|
||
function enqueueAudioFromProto(arrayBuffer) { | ||
const parsedFrame = frame.decode(new Uint8Array(arrayBuffer)); | ||
if (!parsedFrame?.audio) return false; | ||
|
||
const frameCount = parsedFrame.audio.data.length / 2; | ||
const audioOutBuffer = audioContext.createBuffer(1, frameCount, SAMPLE_RATE); | ||
const nowBuffering = audioOutBuffer.getChannelData(0); | ||
const view = new Int16Array(parsedFrame.audio.data.buffer); | ||
|
||
for (let i = 0; i < frameCount; i++) { | ||
const word = view[i]; | ||
nowBuffering[i] = ((word + 32768) % 65536 - 32768) / 32768.0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nm, you are modifying the contents. |
||
} | ||
|
||
audioChunks.push(audioOutBuffer); | ||
if (!isPlaying) playNextChunk(); | ||
} | ||
|
||
function playNextChunk() { | ||
if (audioChunks.length === 0) { | ||
isPlaying = false; | ||
return; | ||
} | ||
|
||
isPlaying = true; | ||
const audioOutBuffer = audioChunks.shift(); | ||
const source = audioContext.createBufferSource(); | ||
source.buffer = audioOutBuffer; | ||
source.connect(audioContext.destination); | ||
source.onended = playNextChunk; | ||
source.start(); | ||
} | ||
|
||
function startAudio() { | ||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { | ||
alert('getUserMedia is not supported in your browser.'); | ||
return; | ||
} | ||
|
||
navigator.mediaDevices.getUserMedia({ audio: true }) | ||
.then((stream) => { | ||
microphoneStream = stream; | ||
audioContext = new (window.AudioContext || window.webkitAudioContext)(); | ||
scriptProcessor = audioContext.createScriptProcessor(BUFFER_SIZE, 1, 1); | ||
source = audioContext.createMediaStreamSource(stream); | ||
source.connect(scriptProcessor); | ||
scriptProcessor.connect(audioContext.destination); | ||
|
||
const audioBuffer = []; | ||
const skipRatio = Math.floor(audioContext.sampleRate / (SAMPLE_RATE * 2)); | ||
|
||
scriptProcessor.onaudioprocess = (event) => { | ||
const rawLeftChannelData = event.inputBuffer.getChannelData(0); | ||
for (let i = 0; i < rawLeftChannelData.length; i += skipRatio) { | ||
const normalized = ((rawLeftChannelData[i] * 32768.0) + 32768) % 65536 - 32768; | ||
const swappedBytes = ((normalized & 0xff) << 8) | ((normalized >> 8) & 0xff); | ||
audioBuffer.push(swappedBytes); | ||
} | ||
|
||
if (audioBuffer.length >= MIN_AUDIO_SIZE) { | ||
const audioFrame = frame.create({ audio: { audio: audioBuffer.slice(0, MIN_AUDIO_SIZE) } }); | ||
const encodedFrame = new Uint8Array(frame.encode(audioFrame).finish()); | ||
ws.send(encodedFrame); | ||
audioBuffer.splice(0, MIN_AUDIO_SIZE); | ||
} | ||
}; | ||
|
||
initWebSocket(); | ||
}) | ||
.catch((error) => console.error('Error accessing microphone:', error)); | ||
} | ||
|
||
function stopAudio() { | ||
if (ws) { | ||
ws.close(); | ||
scriptProcessor.disconnect(); | ||
source.disconnect(); | ||
ws = undefined; | ||
} | ||
} | ||
|
||
document.getElementById('startAudioBtn').addEventListener('click', startAudio); | ||
document.getElementById('stopAudioBtn').addEventListener('click', stopAudio); | ||
</script> | ||
</body> | ||
|
||
</html> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import asyncio | ||
import aiohttp | ||
import logging | ||
import os | ||
from dailyai.pipeline.frame_processor import FrameProcessor | ||
from dailyai.pipeline.frames import TextFrame, TranscriptionQueueFrame | ||
from dailyai.pipeline.pipeline import Pipeline | ||
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService | ||
from dailyai.services.websocket_transport_service import WebsocketTransport | ||
from dailyai.services.whisper_ai_services import WhisperSTTService | ||
|
||
logging.basicConfig(format="%(levelno)s %(asctime)s %(message)s") | ||
logger = logging.getLogger("dailyai") | ||
logger.setLevel(logging.DEBUG) | ||
|
||
|
||
class WhisperTranscriber(FrameProcessor): | ||
async def process_frame(self, frame): | ||
if isinstance(frame, TranscriptionQueueFrame): | ||
print(f"Transcribed: {frame.text}") | ||
else: | ||
yield frame | ||
|
||
|
||
async def main(): | ||
async with aiohttp.ClientSession() as session: | ||
transport = WebsocketTransport( | ||
mic_enabled=True, | ||
speaker_enabled=True, | ||
) | ||
tts = ElevenLabsTTSService( | ||
aiohttp_session=session, | ||
api_key=os.getenv("ELEVENLABS_API_KEY"), | ||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"), | ||
) | ||
|
||
pipeline = Pipeline([ | ||
WhisperSTTService(), | ||
WhisperTranscriber(), | ||
tts, | ||
]) | ||
|
||
@transport.on_connection | ||
async def queue_frame(): | ||
await pipeline.queue_frames([TextFrame("Hello there!")]) | ||
|
||
await transport.run(pipeline) | ||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
syntax = "proto3"; | ||
|
||
package dailyai_proto; | ||
|
||
message TextFrame { | ||
string text = 1; | ||
} | ||
|
||
message AudioFrame { | ||
bytes data = 1; | ||
} | ||
|
||
message TranscriptionFrame { | ||
string text = 1; | ||
string participantId = 2; | ||
string timestamp = 3; | ||
} | ||
|
||
message Frame { | ||
oneof frame { | ||
TextFrame text = 1; | ||
AudioFrame audio = 2; | ||
TranscriptionFrame transcription = 3; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hahahaha. OK.... but what will we do in 2040?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
by then my vision will be bad enough that I'll lobby we go back to 80 columns 😂
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
😂
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh... we should update the README with this setting.