-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
658 additions
and
186 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
import { useCallback, useMemo, useRef } from 'react'; | ||
import { ElevenLabsVoiceData } from '../model/eleven-labs'; | ||
import { mapPronounsToGender, ttsInputToText } from '../utils/chat-messages'; | ||
import useSWR, { useSWRConfig } from 'swr'; | ||
import { ELEVEN_LABS_SUBSCRIPTION_ENDPOINT } from '../chat/TtsHealth'; | ||
import { TtsApi, TtsHookOptions, TtsInput, TtsLimits } from '../model/tts'; | ||
import { useSettings } from '../contexts/settings-context'; | ||
import { SettingName } from '../model/settings'; | ||
|
||
const useElevenLabsLimits = (): TtsLimits => { | ||
const { settings: { [SettingName.ELEVEN_LABS_TOKEN]: token } } = useSettings(); | ||
|
||
const { data: subscriptionData } = useSWR(ELEVEN_LABS_SUBSCRIPTION_ENDPOINT, async (key: string) => { | ||
if (!token) { | ||
return null; | ||
} | ||
|
||
const r = await fetch(key, { | ||
method: 'GET', | ||
headers: { | ||
'xi-api-key': token, | ||
}, | ||
}); | ||
return await r.json(); | ||
}, { | ||
refreshInterval: 60e3, | ||
revalidateOnFocus: false, | ||
keepPreviousData: true, | ||
}); | ||
|
||
return useMemo((): TtsLimits => { | ||
let maxChars = 0; | ||
let usedChars = 0; | ||
if (typeof subscriptionData === 'object' && subscriptionData !== null) { | ||
if ('character_limit' in subscriptionData && typeof subscriptionData.character_limit === 'number') { | ||
maxChars = subscriptionData.character_limit; | ||
} | ||
if ('character_count' in subscriptionData && typeof subscriptionData.character_count === 'number') { | ||
usedChars = subscriptionData.character_count; | ||
} | ||
} | ||
return { maxChars, usedChars }; | ||
}, [subscriptionData]); | ||
}; | ||
|
||
export const useElevenLabsTts = ({ | ||
token, | ||
enabled, | ||
lastReadTextRef, | ||
currentlyReadingRef, | ||
pickQueueItem, | ||
requestPlayer, | ||
readFirstInQueue, | ||
setAudioSource, | ||
clearPlayingAudio, | ||
clearQueue, | ||
clearIdsFromQueue, | ||
queueText, | ||
}: TtsHookOptions): TtsApi => { | ||
const voicesRef = useRef<ElevenLabsVoiceData['voices']>([]); | ||
const getVoiceId = useCallback((ttsInput?: TtsInput): string | undefined => { | ||
const targetGender = mapPronounsToGender(ttsInput?.pronouns); | ||
const matchingVoice = voicesRef.current.find(voice => { | ||
const { age, gender, 'use case': useCase } = voice.labels; | ||
return age === 'young' && gender === targetGender && useCase === 'narration'; | ||
}); | ||
return matchingVoice ? matchingVoice.voice_id : undefined; | ||
}, []); | ||
|
||
const { mutate } = useSWRConfig(); | ||
|
||
const processQueue = useCallback(async (debugSource: string): Promise<void> => { | ||
if (!enabled) return; | ||
|
||
if (!token) { | ||
console.error('Token is missing (%s)', debugSource); | ||
return; | ||
} | ||
|
||
const firstQueueItem = pickQueueItem(); | ||
if (!firstQueueItem) { | ||
return; | ||
} | ||
|
||
const voiceId = getVoiceId(firstQueueItem); | ||
if (!voiceId) { | ||
console.error('No voice found (%s)', debugSource); | ||
return; | ||
} | ||
|
||
if (!requestPlayer()) { | ||
return; | ||
} | ||
|
||
const ttsInput = readFirstInQueue(); | ||
const textToRead = ttsInputToText(ttsInput, lastReadTextRef.current); | ||
const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`, { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json', | ||
'xi-api-key': token, | ||
'accept': 'audio/mpeg' | ||
}, | ||
body: JSON.stringify({ text: textToRead }) | ||
}); | ||
const audioBlob = await response.blob(); | ||
void mutate(ELEVEN_LABS_SUBSCRIPTION_ENDPOINT); | ||
|
||
return setAudioSource(URL.createObjectURL(audioBlob)).then(() => { | ||
clearPlayingAudio(ttsInput); | ||
return processQueue('ended handler'); | ||
}); | ||
}, [clearPlayingAudio, enabled, getVoiceId, lastReadTextRef, mutate, pickQueueItem, readFirstInQueue, requestPlayer, setAudioSource, token]); | ||
|
||
const readText = useCallback((text: TtsInput) => { | ||
if (!enabled) return; | ||
|
||
queueText(text); | ||
void processQueue('readText'); | ||
}, [enabled, queueText, processQueue]); | ||
|
||
const clearIds = useCallback((clearedIds: string[]) => { | ||
clearIdsFromQueue(clearedIds); | ||
|
||
if (!currentlyReadingRef.current) { | ||
void processQueue('clearIds'); | ||
} | ||
}, [clearIdsFromQueue, currentlyReadingRef, processQueue]); | ||
|
||
const fetchVoices = useCallback(() => { | ||
if (!enabled || !token || voicesRef.current.length) return; | ||
|
||
fetch('https://api.elevenlabs.io/v1/voices', { | ||
method: 'GET', | ||
headers: { accept: 'application/json' }, | ||
}).then(async (r) => { | ||
// TODO data validation | ||
const voiceData = await r.json(); | ||
voicesRef.current = voiceData['voices']; | ||
void processQueue('voices fetching'); | ||
}); | ||
}, [enabled, processQueue, token]); | ||
|
||
return { | ||
readText, | ||
fetchVoices, | ||
clearQueue, | ||
clearIds, | ||
limitProviderHook: useElevenLabsLimits, | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
import { useCallback, useMemo, useRef } from 'react'; | ||
import { TtsApi, TtsHookOptions, TtsInput } from '../model/tts'; | ||
import { mapPronounsToGender, ttsInputToText } from '../utils/chat-messages'; | ||
import { PlayHtVoiceData } from '../model/play-ht'; | ||
|
||
export interface PlayHtTtsParams extends TtsHookOptions { | ||
userId: string | null; | ||
} | ||
|
||
export const usePlayHtTts = ({ | ||
token, | ||
enabled, | ||
userId, | ||
lastReadTextRef, | ||
requestPlayer, | ||
setAudioSource, | ||
readFirstInQueue, | ||
pickQueueItem, | ||
clearQueue, | ||
clearIdsFromQueue, | ||
queueText, | ||
}: PlayHtTtsParams): TtsApi => { | ||
const voicesRef = useRef<PlayHtVoiceData[]>([]); | ||
const getVoiceId = useCallback((ttsInput?: TtsInput): string | undefined => { | ||
const targetGender = mapPronounsToGender(ttsInput?.pronouns); | ||
const matchingVoice = voicesRef.current.find(voice => { | ||
const { age, gender, style, loudness } = voice; | ||
return age === 'youth' && gender === targetGender && style === 'narrative' && loudness === 'neutral'; | ||
}); | ||
return matchingVoice ? matchingVoice.id : undefined; | ||
}, []); | ||
const apiAuthHeaders = useMemo(() => { | ||
const authHeaders: Record<string, string> = {}; | ||
if (token) authHeaders['Authorization'] = token; | ||
if (userId) authHeaders['X-USER-ID'] = userId; | ||
return authHeaders; | ||
}, [token, userId]); | ||
|
||
const processQueue = useCallback(async (debugSource: string): Promise<void> => { | ||
if (!enabled) return; | ||
|
||
if (!token) { | ||
console.error('Token is missing (%s)', debugSource); | ||
return; | ||
} | ||
|
||
if (!userId) { | ||
console.error('User ID is missing (%s)', debugSource); | ||
return; | ||
} | ||
|
||
const firstQueueItem = pickQueueItem(); | ||
if (!firstQueueItem) { | ||
return; | ||
} | ||
|
||
const voiceId = getVoiceId(firstQueueItem); | ||
if (!voiceId) { | ||
console.error('No voice found (%s)', debugSource); | ||
return; | ||
} | ||
|
||
if (!requestPlayer()) { | ||
return; | ||
} | ||
|
||
const ttsInput = readFirstInQueue(); | ||
const textToRead = ttsInputToText(ttsInput, lastReadTextRef.current); | ||
try { | ||
// Make API request to Play.ht (adjust URL and headers) | ||
const response = await fetch('https://api.play.ht/api/v2/tts', { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json', | ||
'Accept': 'audio/mpeg', | ||
...apiAuthHeaders, | ||
}, | ||
body: JSON.stringify({ | ||
text: textToRead, | ||
voice: getVoiceId(ttsInput), | ||
output_format: 'mp3', | ||
speed: 1, | ||
sample_rate: 44100, | ||
voice_engine: 'PlayHT2.0-turbo' | ||
}), | ||
}); | ||
|
||
const audioUrl = response.ok ? response.headers.get('Location') : undefined; | ||
if (!audioUrl) { | ||
throw new Error(response.statusText); | ||
} | ||
|
||
return setAudioSource(audioUrl); | ||
} catch (error) { | ||
console.error('Error generating audio:', error); | ||
} | ||
}, [apiAuthHeaders, enabled, getVoiceId, lastReadTextRef, pickQueueItem, readFirstInQueue, requestPlayer, setAudioSource, token, userId]); | ||
|
||
const fetchVoices = useCallback(() => { | ||
if (!enabled || !token || voicesRef.current.length) return; | ||
|
||
fetch('https://api.play.ht/api/v2/voices', { | ||
method: 'GET', | ||
headers: { accept: 'application/json', ...apiAuthHeaders }, | ||
}).then(async (r) => { | ||
// TODO data validation | ||
voicesRef.current = await r.json(); | ||
void processQueue('voices fetching'); | ||
}); | ||
}, [apiAuthHeaders, enabled, processQueue, token]); | ||
|
||
const readText = useCallback((text: TtsInput) => { | ||
if (!enabled) return; | ||
|
||
queueText(text); | ||
void processQueue('readText'); | ||
}, [enabled, queueText, processQueue]); | ||
|
||
return { | ||
readText, | ||
fetchVoices, | ||
clearQueue, | ||
clearIds: clearIdsFromQueue, | ||
}; | ||
}; |
Oops, something went wrong.