Skip to content

Commit

Permalink
Merge branch 'juberti-patch-1' of https://github.com/fixie-ai/fixie-s…
Browse files Browse the repository at this point in the history
…dk-js into juberti-patch-1
  • Loading branch information
juberti committed Dec 12, 2023
2 parents de68eac + cfed9dd commit 495547c
Showing 1 changed file with 41 additions and 29 deletions.
70 changes: 41 additions & 29 deletions packages/fixie/src/voice.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/** This file defines the Fixie Voice SDK. */

import { AgentId, ConversationId } from './types.js';
import {
createLocalTracks,
Expand All @@ -11,10 +13,11 @@ import {
TrackEvent,
} from 'livekit-client';

/**
* Represents the state of a VoiceSession.
*/
/** Represents the state of a VoiceSession. */
export enum VoiceSessionState {
DISCONNECTED = 'disconnected',
CONNECTING = 'connecting',
CONNECTED = 'connected',
IDLE = 'idle',
LISTENING = 'listening',
THINKING = 'thinking',
Expand All @@ -32,9 +35,7 @@ export interface VoiceSessionInit {
webrtcUrl?: string;
}

/**
* Web Audio AnalyserNode for an audio stream.
*/
/** Web Audio AnalyserNode for an audio stream. */
export class StreamAnalyzer {
source: MediaStreamAudioSourceNode;
analyzer: AnalyserNode;
Expand All @@ -48,15 +49,20 @@ export class StreamAnalyzer {
}
}

/**
* Manages a single voice session with a Fixie agent.
*/
export class VoiceSessionError extends Error {
constructor(message: string) {
super(message);
this.name = 'VoiceSessionError';
}
}

/** Manages a single voice session with a Fixie agent. */
export class VoiceSession {
private readonly audioContext = new AudioContext();
private readonly audioElement = new Audio();
private readonly textEncoder = new TextEncoder();
private readonly textDecoder = new TextDecoder();
private _state = VoiceSessionState.IDLE;
private _state = VoiceSessionState.DISCONNECTED;
private socket?: WebSocket;
private audioStarted = false;
private started = false;
Expand All @@ -81,15 +87,10 @@ export class VoiceSession {
onLatencyChange?: (metric: string, value: number) => void;

/** Called when an error occurs. */
onError?: () => void;
onError?: (error: VoiceSessionError) => void;

constructor(
private readonly agentId: AgentId,
private readonly conversationId?: ConversationId,
private readonly params?: VoiceSessionInit
) {
constructor(readonly agentId: AgentId, public conversationId?: ConversationId, readonly params?: VoiceSessionInit) {
console.log('[voiceSession] creating VoiceSession');
this.warmup();
}

/** Returns the current state of this VoiceSession. */
Expand All @@ -110,11 +111,18 @@ export class VoiceSession {
/** Warm up the VoiceSession by making network connections. This is called automatically when the VoiceSession object is created. */
warmup(): void {
console.log('[voiceSession] warming up');
const url = this.params?.webrtcUrl ?? 'wss://wsapi.fixie.ai';
this.socket = new WebSocket(url);
this.socket.onopen = () => this.handleSocketOpen();
this.socket.onmessage = (event) => this.handleSocketMessage(event);
this.socket.onclose = (event) => this.handleSocketClose(event);
try {
const url = this.params?.webrtcUrl ?? 'wss://wsapi.fixie.ai';
this.socket = new WebSocket(url);
this.socket.onopen = () => this.handleSocketOpen();
this.socket.onmessage = (event) => this.handleSocketMessage(event);
this.socket.onclose = (event) => this.handleSocketClose(event);
this.changeState(VoiceSessionState.CONNECTING);
} catch (e) {
const err = e as Error;
console.error('[voiceSession] failed to create socket', e);
this.onError?.(new VoiceSessionError(`Failed to create socket: ${err.message}`));
}
}

/** Start the audio channels associated with this VoiceSession. This will request microphone permissions from the user. */
Expand Down Expand Up @@ -158,9 +166,9 @@ export class VoiceSession {
this.localAudioTrack = undefined;
this.socket?.close();
this.socket = undefined;
this.changeState(VoiceSessionState.IDLE);
this.audioStarted = false;
this.started = false;
this.changeState(VoiceSessionState.DISCONNECTED);
}

/** Interrupt this VoiceSession. */
Expand Down Expand Up @@ -194,6 +202,7 @@ export class VoiceSession {
console.log('[voiceSession] publishing local audio track');
const opts = { name: 'audio', simulcast: false, source: Track.Source.Microphone };
this.room.localParticipant.publishTrack(this.localAudioTrack, opts);
this.changeState(VoiceSessionState.IDLE);
} else {
console.log(
`[voiceSession] not publishing local audio track - room state is ${this.room?.state}, local audio is ${
Expand All @@ -209,6 +218,7 @@ export class VoiceSession {

private handleSocketOpen() {
console.log('[voiceSession] socket opened');
this.changeState(VoiceSessionState.CONNECTED);
const obj = {
type: 'init',
params: {
Expand Down Expand Up @@ -252,28 +262,28 @@ export class VoiceSession {

private handleSocketClose(event: CloseEvent) {
console.log(`[voiceSession] socket closed: ${event.code} ${event.reason}`);
this.changeState(VoiceSessionState.DISCONNECTED);
if (event.code === 1000) {
// We initiated this shutdown, so we've already cleaned up.
// Reconnect to prepare for the next session.
console.log('[voiceSession] socket closed normally - calling warmup again');
this.warmup();
console.log('[voiceSession] socket closed normally');
} else if (event.code === 1006) {
// This occurs when running a Next.js app in debug mode and the VoiceSession is
// initialized twice, the first socket will receive this error that we can ignore.
console.log('[voiceSession] got event 1006');
} else {
console.warn(`[voiceSession] socket closed unexpectedly: ${event.code} ${event.reason}`);
this.onError?.();
this.onError?.(new VoiceSessionError(`Socket closed unexpectedly: ${event.code} ${event.reason}`));
}
}

private handleTrackSubscribed(track: RemoteTrack) {
console.log(`[voiceSession] subscribed to remote audio track ${track.sid}`);
const audioTrack = track as RemoteAudioTrack;
audioTrack.on(TrackEvent.AudioPlaybackStarted, () => console.log('[voiceSession] audio playback started'));
audioTrack.on(TrackEvent.AudioPlaybackFailed, (err: any) =>
console.error('[voiceSession] audio playback failed', err)
);
audioTrack.on(TrackEvent.AudioPlaybackFailed, (err: any) => {
console.error('[voiceSession] audio playback failed', err);
});
audioTrack.attach(this.audioElement);
this.outAnalyzer = new StreamAnalyzer(this.audioContext, track.mediaStream!);
if (this.delayedSpeakingState) {
Expand Down Expand Up @@ -306,6 +316,8 @@ export class VoiceSession {
this.handleOutputChange(msg.text, msg.final);
} else if (msg.type == 'latency') {
this.handleLatency(msg.kind, msg.value);
} else if (msg.type == 'conversation_created') {
this.conversationId = msg.conversationId;
}
}

Expand Down

0 comments on commit 495547c

Please sign in to comment.