Skip to content

Commit

Permalink
Add theoretical PlayHT TTS support
Browse files Browse the repository at this point in the history
  • Loading branch information
DJDavid98 committed Mar 4, 2024
1 parent 8b2fb5a commit df29230
Show file tree
Hide file tree
Showing 12 changed files with 658 additions and 186 deletions.
9 changes: 4 additions & 5 deletions src/js/chat/Chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,23 @@ import { ChatMessage } from './ChatMessage';
import DurationUnitFormat from 'intl-unofficial-duration-unit-format';
import { useSettings } from '../contexts/settings-context';
import { SettingName } from '../model/settings';
import { useTts } from '../hooks/use-tts';
import { TtsHealth } from './TtsHealth';
import { RemovableElement } from '../RemovableElement';
import { RemovableElementId } from '../model/removable-element-id';
import { useTts } from '../hooks/use-tts';

const MAX_MESSAGE_COUNT = 12;

export const Chat: FC = () => {
const {
settings: {
[SettingName.ELEVEN_LABS_TOKEN]: elevenLabsToken,
[SettingName.TTS_ENABLED]: ttsEnabled,
[SettingName.CHAT_SONG_PREVIEWS]: chatSongPreviews,
}
} = useSettings();
const [messages, setMessages] = useState<Array<DisplayableMessage>>(() => []);
const socket = useSocket();
const tts = useTts(elevenLabsToken, ttsEnabled);
const tts = useTts();
const df = useMemo(() => new DurationUnitFormat('en-US', {
style: DurationUnitFormat.styles.LONG,
format: '{days} {hour} {minutes} {seconds}'
Expand Down Expand Up @@ -202,8 +201,8 @@ export const Chat: FC = () => {
}, [addMessage, chatSongPreviews, df, socket, tts]);

return <Fragment>
{ttsEnabled && elevenLabsToken &&
<RemovableElement id={RemovableElementId.TTS_HEALTH}><TtsHealth token={elevenLabsToken} /></RemovableElement>}
{ttsEnabled && tts.limitProviderHook &&
<RemovableElement id={RemovableElementId.TTS_HEALTH}><TtsHealth useLimitProvider={tts.limitProviderHook} /></RemovableElement>}
{messages.map(message => <ChatMessage key={message.id} message={message} />)}
</Fragment>;
};
31 changes: 4 additions & 27 deletions src/js/chat/TtsHealth.tsx
Original file line number Diff line number Diff line change
@@ -1,45 +1,22 @@
import { FC, useMemo } from 'react';
import useSWR from 'swr';
import * as styles from '../../scss/modules/TtsHealth.module.scss';
import { TtsLimitProviderHook } from '../model/tts';

export const ELEVEN_LABS_SUBSCRIPTION_ENDPOINT = 'https://api.elevenlabs.io/v1/user/subscription';

export interface TtsHealthProps {
token: string;
useLimitProvider: TtsLimitProviderHook;
}

export const TtsHealth: FC<TtsHealthProps> = ({ token }) => {
export const TtsHealth: FC<TtsHealthProps> = ({ useLimitProvider }) => {
const pf = useMemo(() => new Intl.NumberFormat('en-US', {
style: 'percent',
minimumFractionDigits: 0,
maximumFractionDigits: 0
}), []);
const nf = useMemo(() => new Intl.NumberFormat('en-US'), []);

const { data: subscriptionData } = useSWR(ELEVEN_LABS_SUBSCRIPTION_ENDPOINT, (key: string) => fetch(key, {
method: 'GET',
headers: {
'xi-api-key': token,
},
}).then(r => r.json()), {
refreshInterval: 60e3,
revalidateOnFocus: false,
keepPreviousData: true,
});

const limits = useMemo(() => {
let maxChars = 0;
let usedChars = 0;
if (typeof subscriptionData === 'object' && subscriptionData !== null) {
if ('character_limit' in subscriptionData && typeof subscriptionData.character_limit === 'number') {
maxChars = subscriptionData.character_limit;
}
if ('character_count' in subscriptionData && typeof subscriptionData.character_count === 'number') {
usedChars = subscriptionData.character_count;
}
}
return { maxChars, usedChars };
}, [subscriptionData]);
const limits = useLimitProvider();

const ttsUsedPercent = limits.maxChars > 0 ? limits.usedChars / limits.maxChars : 1;
const charsAvailable = limits.maxChars - limits.usedChars;
Expand Down
151 changes: 151 additions & 0 deletions src/js/hooks/use-eleven-labs-tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import { useCallback, useMemo, useRef } from 'react';
import { ElevenLabsVoiceData } from '../model/eleven-labs';
import { mapPronounsToGender, ttsInputToText } from '../utils/chat-messages';
import useSWR, { useSWRConfig } from 'swr';
import { ELEVEN_LABS_SUBSCRIPTION_ENDPOINT } from '../chat/TtsHealth';
import { TtsApi, TtsHookOptions, TtsInput, TtsLimits } from '../model/tts';
import { useSettings } from '../contexts/settings-context';
import { SettingName } from '../model/settings';

const useElevenLabsLimits = (): TtsLimits => {
const { settings: { [SettingName.ELEVEN_LABS_TOKEN]: token } } = useSettings();

const { data: subscriptionData } = useSWR(ELEVEN_LABS_SUBSCRIPTION_ENDPOINT, async (key: string) => {
if (!token) {
return null;
}

const r = await fetch(key, {
method: 'GET',
headers: {
'xi-api-key': token,
},
});
return await r.json();
}, {
refreshInterval: 60e3,
revalidateOnFocus: false,
keepPreviousData: true,
});

return useMemo((): TtsLimits => {
let maxChars = 0;
let usedChars = 0;
if (typeof subscriptionData === 'object' && subscriptionData !== null) {
if ('character_limit' in subscriptionData && typeof subscriptionData.character_limit === 'number') {
maxChars = subscriptionData.character_limit;
}
if ('character_count' in subscriptionData && typeof subscriptionData.character_count === 'number') {
usedChars = subscriptionData.character_count;
}
}
return { maxChars, usedChars };
}, [subscriptionData]);
};

export const useElevenLabsTts = ({
token,
enabled,
lastReadTextRef,
currentlyReadingRef,
pickQueueItem,
requestPlayer,
readFirstInQueue,
setAudioSource,
clearPlayingAudio,
clearQueue,
clearIdsFromQueue,
queueText,
}: TtsHookOptions): TtsApi => {
const voicesRef = useRef<ElevenLabsVoiceData['voices']>([]);
const getVoiceId = useCallback((ttsInput?: TtsInput): string | undefined => {
const targetGender = mapPronounsToGender(ttsInput?.pronouns);
const matchingVoice = voicesRef.current.find(voice => {
const { age, gender, 'use case': useCase } = voice.labels;
return age === 'young' && gender === targetGender && useCase === 'narration';
});
return matchingVoice ? matchingVoice.voice_id : undefined;
}, []);

const { mutate } = useSWRConfig();

const processQueue = useCallback(async (debugSource: string): Promise<void> => {
if (!enabled) return;

if (!token) {
console.error('Token is missing (%s)', debugSource);
return;
}

const firstQueueItem = pickQueueItem();
if (!firstQueueItem) {
return;
}

const voiceId = getVoiceId(firstQueueItem);
if (!voiceId) {
console.error('No voice found (%s)', debugSource);
return;
}

if (!requestPlayer()) {
return;
}

const ttsInput = readFirstInQueue();
const textToRead = ttsInputToText(ttsInput, lastReadTextRef.current);
const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'xi-api-key': token,
'accept': 'audio/mpeg'
},
body: JSON.stringify({ text: textToRead })
});
const audioBlob = await response.blob();
void mutate(ELEVEN_LABS_SUBSCRIPTION_ENDPOINT);

return setAudioSource(URL.createObjectURL(audioBlob)).then(() => {
clearPlayingAudio(ttsInput);
return processQueue('ended handler');
});
}, [clearPlayingAudio, enabled, getVoiceId, lastReadTextRef, mutate, pickQueueItem, readFirstInQueue, requestPlayer, setAudioSource, token]);

const readText = useCallback((text: TtsInput) => {
if (!enabled) return;

queueText(text);
void processQueue('readText');
}, [enabled, queueText, processQueue]);

const clearIds = useCallback((clearedIds: string[]) => {
clearIdsFromQueue(clearedIds);

if (!currentlyReadingRef.current) {
void processQueue('clearIds');
}
}, [clearIdsFromQueue, currentlyReadingRef, processQueue]);

const fetchVoices = useCallback(() => {
if (!enabled || !token || voicesRef.current.length) return;

fetch('https://api.elevenlabs.io/v1/voices', {
method: 'GET',
headers: { accept: 'application/json' },
}).then(async (r) => {
// TODO data validation
const voiceData = await r.json();
voicesRef.current = voiceData['voices'];
void processQueue('voices fetching');
});
}, [enabled, processQueue, token]);

return {
readText,
fetchVoices,
clearQueue,
clearIds,
limitProviderHook: useElevenLabsLimits,
};
};
125 changes: 125 additions & 0 deletions src/js/hooks/use-play-ht-tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import { useCallback, useMemo, useRef } from 'react';
import { TtsApi, TtsHookOptions, TtsInput } from '../model/tts';
import { mapPronounsToGender, ttsInputToText } from '../utils/chat-messages';
import { PlayHtVoiceData } from '../model/play-ht';

export interface PlayHtTtsParams extends TtsHookOptions {
userId: string | null;
}

export const usePlayHtTts = ({
token,
enabled,
userId,
lastReadTextRef,
requestPlayer,
setAudioSource,
readFirstInQueue,
pickQueueItem,
clearQueue,
clearIdsFromQueue,
queueText,
}: PlayHtTtsParams): TtsApi => {
const voicesRef = useRef<PlayHtVoiceData[]>([]);
const getVoiceId = useCallback((ttsInput?: TtsInput): string | undefined => {
const targetGender = mapPronounsToGender(ttsInput?.pronouns);
const matchingVoice = voicesRef.current.find(voice => {
const { age, gender, style, loudness } = voice;
return age === 'youth' && gender === targetGender && style === 'narrative' && loudness === 'neutral';
});
return matchingVoice ? matchingVoice.id : undefined;
}, []);
const apiAuthHeaders = useMemo(() => {
const authHeaders: Record<string, string> = {};
if (token) authHeaders['Authorization'] = token;
if (userId) authHeaders['X-USER-ID'] = userId;
return authHeaders;
}, [token, userId]);

const processQueue = useCallback(async (debugSource: string): Promise<void> => {
if (!enabled) return;

if (!token) {
console.error('Token is missing (%s)', debugSource);
return;
}

if (!userId) {
console.error('User ID is missing (%s)', debugSource);
return;
}

const firstQueueItem = pickQueueItem();
if (!firstQueueItem) {
return;
}

const voiceId = getVoiceId(firstQueueItem);
if (!voiceId) {
console.error('No voice found (%s)', debugSource);
return;
}

if (!requestPlayer()) {
return;
}

const ttsInput = readFirstInQueue();
const textToRead = ttsInputToText(ttsInput, lastReadTextRef.current);
try {
// Make API request to Play.ht (adjust URL and headers)
const response = await fetch('https://api.play.ht/api/v2/tts', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Accept': 'audio/mpeg',
...apiAuthHeaders,
},
body: JSON.stringify({
text: textToRead,
voice: getVoiceId(ttsInput),
output_format: 'mp3',
speed: 1,
sample_rate: 44100,
voice_engine: 'PlayHT2.0-turbo'
}),
});

const audioUrl = response.ok ? response.headers.get('Location') : undefined;
if (!audioUrl) {
throw new Error(response.statusText);
}

return setAudioSource(audioUrl);
} catch (error) {
console.error('Error generating audio:', error);
}
}, [apiAuthHeaders, enabled, getVoiceId, lastReadTextRef, pickQueueItem, readFirstInQueue, requestPlayer, setAudioSource, token, userId]);

const fetchVoices = useCallback(() => {
if (!enabled || !token || voicesRef.current.length) return;

fetch('https://api.play.ht/api/v2/voices', {
method: 'GET',
headers: { accept: 'application/json', ...apiAuthHeaders },
}).then(async (r) => {
// TODO data validation
voicesRef.current = await r.json();
void processQueue('voices fetching');
});
}, [apiAuthHeaders, enabled, processQueue, token]);

const readText = useCallback((text: TtsInput) => {
if (!enabled) return;

queueText(text);
void processQueue('readText');
}, [enabled, queueText, processQueue]);

return {
readText,
fetchVoices,
clearQueue,
clearIds: clearIdsFromQueue,
};
};
Loading

0 comments on commit df29230

Please sign in to comment.