From 313d23f2c9726ca2eb5b23068155d0c6ff15add5 Mon Sep 17 00:00:00 2001 From: nzambello Date: Tue, 15 Oct 2024 16:06:27 +0300 Subject: [PATCH] Revert "feat: improve lip sync (#20)" This reverts commit e13f520eb91bd8af1cc42578da29316f0b25033e. --- src/components/Avatar/Avatar.tsx | 10 +- .../AvatarComponent/avatarComponent.tsx | 51 +-- .../components/fullbodyAvatar.tsx | 352 +++++++----------- .../components/halfbodyAvatar.tsx | 8 +- src/components/Avatar/AvatarView/index.tsx | 17 +- src/components/MemoriWidget/MemoriWidget.tsx | 300 ++++++++------- src/components/layouts/ZoomedFullBody.tsx | 67 ++-- .../ZoomedFullBody.test.tsx.snap | 46 ++- src/components/layouts/zoomed-full-body.css | 16 - src/context/visemeContext.tsx | 350 ++++++++++++----- src/helpers/utils.ts | 17 +- src/styles.css | 1 - 12 files changed, 657 insertions(+), 578 deletions(-) delete mode 100644 src/components/layouts/zoomed-full-body.css diff --git a/src/components/Avatar/Avatar.tsx b/src/components/Avatar/Avatar.tsx index 54e9beb8..64f1dedf 100644 --- a/src/components/Avatar/Avatar.tsx +++ b/src/components/Avatar/Avatar.tsx @@ -57,7 +57,7 @@ const Avatar: React.FC = ({ const { t } = useTranslation(); const [isClient, setIsClient] = useState(false); - const { stopProcessing, updateCurrentViseme, resetVisemeQueue } = useViseme(); + const { setMeshRef, clearVisemes, setEmotion } = useViseme(); useEffect(() => { setIsClient(true); @@ -132,7 +132,6 @@ const Avatar: React.FC = ({ } > = ({ speaking={isPlayingAudio} loading={loading} style={getAvatarStyle()} - stopProcessing={stopProcessing} - resetVisemeQueue={resetVisemeQueue} - isZoomed={isZoomed} + clearVisemes={clearVisemes} + setMeshRef={setMeshRef} + isZoomed={isZoomed} chatEmission={chatProps?.dialogState?.emission} + setEmotion={setEmotion} /> ); diff --git a/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx b/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx index 402d6f35..3f6e43b0 100644 --- a/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx +++ b/src/components/Avatar/AvatarView/AvatarComponent/avatarComponent.tsx @@ -14,9 +14,9 @@ interface Props { speaking: boolean; isZoomed: boolean; chatEmission: any; - stopProcessing: () => void; - resetVisemeQueue: () => void; - updateCurrentViseme: (currentTime: number) => { name: string; weight: number } | null; + setMeshRef: any; + clearVisemes: () => void; + setEmotion: (emotion: string) => void; } interface BaseAction { @@ -50,9 +50,9 @@ const baseActions: Record = { Loading3: { weight: 0 }, }; - export const AvatarView: React.FC = ({ - stopProcessing, + setMeshRef, + clearVisemes, chatEmission, showControls, animation, @@ -64,8 +64,7 @@ export const AvatarView: React.FC = ({ halfBody, loading, isZoomed, - updateCurrentViseme, - resetVisemeQueue, + setEmotion, }) => { const [currentBaseAction, setCurrentBaseAction] = useState({ action: animation || 'Idle1', @@ -78,9 +77,6 @@ export const AvatarView: React.FC = ({ const [morphTargetDictionary, setMorphTargetDictionary] = useState<{ [key: string]: number; }>({}); - const [emotionMorphTargets, setEmotionMorphTargets] = useState<{ - [key: string]: number; - }>({}); const [timeScale, setTimeScale] = useState(0.8); @@ -97,7 +93,7 @@ export const AvatarView: React.FC = ({ //remove the last character from the action const newEmotion = action.slice(0, -1); - // setEmotion(newEmotion); + setEmotion(newEmotion); const defaultEmotions = Object.keys(emotionMap).reduce((acc, key) => { acc[key] = 0; @@ -109,8 +105,9 @@ export const AvatarView: React.FC = ({ const emotionValues = emotion === 'default' ? defaultEmotions : emotionMap[emotion]; - setEmotionMorphTargets(prevEmotions => ({ - ...prevEmotions, + setMorphTargetInfluences(prevInfluences => ({ + ...prevInfluences, + ...defaultEmotions, ...emotionValues, })); }, []); @@ -165,11 +162,6 @@ export const AvatarView: React.FC = ({ const emotion = `${outputContent}${randomNumber}`; onBaseActionChange(emotion); - } else { - //Set a random idle animation - const randomNumber = Math.floor(Math.random() * 5) + 1; - const animation = `Idle${randomNumber === 3 ? 4 : randomNumber}`; - onBaseActionChange(animation); } }, [chatEmission]); @@ -182,13 +174,6 @@ export const AvatarView: React.FC = ({ } }, [loading]); - // useEffect(() => { - // if (speaking && currentBaseAction.action !== 'Idle1') { - // const animation = `Idle1`; - // onBaseActionChange(animation); - // } - // }, [speaking]); - return ( <> {showControls && ( @@ -206,28 +191,30 @@ export const AvatarView: React.FC = ({ {halfBody ? ( ) : ( )} diff --git a/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx b/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx index 34341a2e..e11e2bc6 100644 --- a/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx +++ b/src/components/Avatar/AvatarView/AvatarComponent/components/fullbodyAvatar.tsx @@ -1,17 +1,21 @@ -import React, { useEffect, useRef, useState } from 'react'; +import React, { useEffect, useRef, useState, useCallback } from 'react'; import { Vector3, Euler, AnimationMixer, SkinnedMesh, Object3D, - MathUtils, AnimationAction, - LoopOnce, } from 'three'; import { useAnimations, useGLTF } from '@react-three/drei'; -import { useGraph, useFrame } from '@react-three/fiber'; +import { useGraph, dispose, useFrame } from '@react-three/fiber'; import { correctMaterials, isSkinnedMesh } from '../../../../../helpers/utils'; +import { useAvatarBlink } from '../../utils/useEyeBlink'; +import { useViseme } from '../../../../../context/visemeContext'; + +const lerp = (start: number, end: number, alpha: number): number => { + return start * (1 - alpha) + end * alpha; +}; interface FullbodyAvatarProps { url: string; @@ -22,23 +26,16 @@ interface FullbodyAvatarProps { weight: number; }; timeScale: number; + loading?: boolean; + speaking?: boolean; isZoomed?: boolean; + setMorphTargetInfluences: (influences: { [key: string]: number }) => void; + setMorphTargetDictionary: (dictionary: { [key: string]: number }) => void; + morphTargetInfluences: { [key: string]: number }; + morphTargetDictionary: { [key: string]: number }; + setMeshRef: any; eyeBlink?: boolean; - stopProcessing: () => void; - resetVisemeQueue: () => void; - updateCurrentViseme: ( - currentTime: number - ) => { name: string; weight: number } | null; - smoothMorphTarget?: boolean; - morphTargetSmoothing?: number; - morphTargetInfluences: Record; - setMorphTargetDictionary: ( - morphTargetDictionary: Record - ) => void; - setMorphTargetInfluences: ( - morphTargetInfluences: Record - ) => void; - emotionMorphTargets: Record; + clearVisemes: () => void; } const AVATAR_POSITION = new Vector3(0, -1, 0); @@ -46,19 +43,11 @@ const AVATAR_ROTATION = new Euler(0.175, 0, 0); const AVATAR_POSITION_ZOOMED = new Vector3(0, -1.45, 0); const ANIMATION_URLS = { - MALE: 'https://assets.memori.ai/api/v2/asset/2c5e88a4-cf62-408b-9ef0-518b099dfcb2.glb', + MALE: 'https://assets.memori.ai/api/v2/asset/1c350a21-97d8-4add-82cc-9dc10767a26b.glb', FEMALE: - 'https://assets.memori.ai/api/v2/asset/0e49aa5d-f757-4292-a170-d843c2839a41.glb', -}; - -// Blink configuration -const BLINK_CONFIG = { - minInterval: 1000, - maxInterval: 5000, - blinkDuration: 150, + 'https://assets.memori.ai/api/v2/asset/c2b07166-de10-4c66-918b-7b7cd380cca7.glb', }; - -const EMOTION_TRANSITION_SPEED = 0.1; // Adjust this value to control emotion transition speed +const ANIMATION_DURATION = 3000; // Duration in milliseconds for non-idle animations export default function FullbodyAvatar({ url, @@ -67,69 +56,85 @@ export default function FullbodyAvatar({ currentBaseAction, timeScale, isZoomed, - eyeBlink, - stopProcessing, - morphTargetSmoothing = 0.5, - updateCurrentViseme, - setMorphTargetDictionary, setMorphTargetInfluences, - resetVisemeQueue, - emotionMorphTargets, + setMorphTargetDictionary, + morphTargetInfluences, + eyeBlink, + setMeshRef, + clearVisemes, }: FullbodyAvatarProps) { const { scene } = useGLTF(url); const { animations } = useGLTF(ANIMATION_URLS[sex]); const { nodes, materials } = useGraph(scene); const { actions } = useAnimations(animations, scene); + const [mixer] = useState(() => new AnimationMixer(scene)); - const mixer = useRef(new AnimationMixer(scene)); - const headMeshRef = useRef(); + const avatarMeshRef = useRef(); const currentActionRef = useRef(null); - const [isTransitioningToIdle, setIsTransitioningToIdle] = useState(false); + const isTransitioningRef = useRef(false); + + // Blink animation + useAvatarBlink({ + enabled: eyeBlink || false, + setMorphTargetInfluences, + config: { + minInterval: 1500, + maxInterval: 4000, + blinkDuration: 120, + }, + }); - // Blink state - const lastBlinkTime = useRef(0); - const nextBlinkTime = useRef(0); - const isBlinking = useRef(false); - const blinkStartTime = useRef(0); + // Idle animation when emotion animation is finished + const transitionToIdle = useCallback(() => { + if (!actions || isTransitioningRef.current) return; - // Morph targets - const currentEmotionRef = useRef>({}); - const previousEmotionKeysRef = useRef>(new Set()); + isTransitioningRef.current = true; - useEffect(() => { - correctMaterials(materials); + const finishCurrentAnimation = () => { + if (currentActionRef.current && !currentActionRef.current.paused) { + const remainingTime = (currentActionRef.current.getClip().duration - currentActionRef.current.time) * 1000; + setTimeout(() => { + startIdleAnimation(); + }, remainingTime); + } else { + startIdleAnimation(); + } + }; - scene.traverse((object: Object3D) => { - if (object instanceof SkinnedMesh) { - if (object.name === 'GBNL__Head' || object.name === 'Wolf3D_Avatar') { - headMeshRef.current = object; - if (object.morphTargetDictionary && object.morphTargetInfluences) { - setMorphTargetDictionary(object.morphTargetDictionary); + const startIdleAnimation = () => { + const idleAnimations = Object.keys(actions).filter(key => + key.startsWith('Idle') + ); + const randomIdle = + idleAnimations[Math.floor(Math.random() * idleAnimations.length)]; - const initialInfluences = Object.keys( - object.morphTargetDictionary - ).reduce((acc, key) => ({ ...acc, [key]: 0 }), {}); - setMorphTargetInfluences(initialInfluences); - } - } + const idleAction = actions[randomIdle]; + const fadeOutDuration = 0.5; + const fadeInDuration = 0.5; + + if (currentActionRef.current) { + currentActionRef.current.fadeOut(fadeOutDuration); } - }); - onLoaded?.(); + idleAction?.reset().fadeIn(fadeInDuration).play(); + currentActionRef.current = idleAction; - return () => { - Object.values(materials).forEach(material => material.dispose()); - Object.values(nodes) - .filter(isSkinnedMesh) - .forEach(mesh => mesh.geometry.dispose()); - stopProcessing(); - resetVisemeQueue(); + setTimeout(() => { + isTransitioningRef.current = false; + }, (fadeOutDuration + fadeInDuration) * 1000); }; - }, [materials, nodes, url, onLoaded, stopProcessing, resetVisemeQueue, scene]); - // Handle base animation changes + if (currentActionRef.current && !currentActionRef.current.getClip().name.startsWith('Idle')) { + finishCurrentAnimation(); + } else { + startIdleAnimation(); + } + }, [actions]); + + // Base animation useEffect(() => { - if (!actions || !currentBaseAction.action) return; + if (!actions || !currentBaseAction.action || isTransitioningRef.current) + return; const newAction = actions[currentBaseAction.action]; if (!newAction) { @@ -142,156 +147,79 @@ export default function FullbodyAvatar({ const fadeOutDuration = 0.8; const fadeInDuration = 0.8; + if (!currentBaseAction.action.startsWith('Idle')) { + setTimeout(() => { + transitionToIdle(); + }, ANIMATION_DURATION); + } + if (currentActionRef.current) { currentActionRef.current.fadeOut(fadeOutDuration); } - - console.log(newAction); - newAction.reset().fadeIn(fadeInDuration).play(); - currentActionRef.current = newAction; - // Set the time scale for the new action newAction.timeScale = timeScale; + newAction.reset().fadeIn(fadeInDuration).play(); + currentActionRef.current = newAction; + }, [currentBaseAction, timeScale, actions, transitionToIdle]); - // If it's an emotion animation, set it to play once and then transition to idle - if ( - currentBaseAction.action.startsWith('Gioia') || - currentBaseAction.action.startsWith('Rabbia') || - currentBaseAction.action.startsWith('Sorpresa') || - currentBaseAction.action.startsWith('Timore') || - currentBaseAction.action.startsWith('Tristezza') - ) { - newAction.setLoop(LoopOnce, 1); - newAction.clampWhenFinished = true; - setIsTransitioningToIdle(true); - } - }, [actions, currentBaseAction, timeScale]); - - useFrame(state => { - if ( - headMeshRef.current && - headMeshRef.current.morphTargetDictionary && - headMeshRef.current.morphTargetInfluences - ) { - const currentTime = state.clock.getElapsedTime() * 1000; // Convert to milliseconds - - // Handle blinking - let blinkValue = 0; - if (eyeBlink) { - if (currentTime >= nextBlinkTime.current && !isBlinking.current) { - isBlinking.current = true; - blinkStartTime.current = currentTime; - lastBlinkTime.current = currentTime; - nextBlinkTime.current = - currentTime + - Math.random() * - (BLINK_CONFIG.maxInterval - BLINK_CONFIG.minInterval) + - BLINK_CONFIG.minInterval; - } + // Set up the mesh reference and morph target influences + useEffect(() => { + correctMaterials(materials); - if (isBlinking.current) { - const blinkProgress = - (currentTime - blinkStartTime.current) / BLINK_CONFIG.blinkDuration; - if (blinkProgress <= 0.5) { - // Eyes closing - blinkValue = blinkProgress * 2; - } else if (blinkProgress <= 1) { - // Eyes opening - blinkValue = 2 - blinkProgress * 2; - } else { - // Blink finished - isBlinking.current = false; - blinkValue = 0; - } + scene.traverse((object: Object3D) => { + if ( + object instanceof SkinnedMesh && + (object.name === 'GBNL__Head' || object.name === 'Wolf3D_Avatar') + ) { + avatarMeshRef.current = object; + setMeshRef(object); + + if (object.morphTargetDictionary && object.morphTargetInfluences) { + setMorphTargetDictionary(object.morphTargetDictionary); + + const initialInfluences = Object.keys( + object.morphTargetDictionary + ).reduce((acc, key) => ({ ...acc, [key]: 0 }), {}); + setMorphTargetInfluences(initialInfluences); } } + }); - const currentViseme = updateCurrentViseme(currentTime / 1000); - - // Create a set of current emotion keys - const currentEmotionKeys = new Set(Object.keys(emotionMorphTargets)); + onLoaded?.(); - // Reset old emotion morph targets - previousEmotionKeysRef.current.forEach(key => { - if (!currentEmotionKeys.has(key)) { - const index = headMeshRef.current!.morphTargetDictionary![key]; - if (typeof index === 'number') { - currentEmotionRef.current[key] = 0; - if (headMeshRef.current && headMeshRef.current.morphTargetInfluences) { - headMeshRef.current.morphTargetInfluences[index] = 0; - } - } + return () => { + Object.values(materials).forEach(dispose); + Object.values(nodes).filter(isSkinnedMesh).forEach(dispose); + clearVisemes(); + }; + }, [ + materials, + nodes, + url, + onLoaded, + setMorphTargetDictionary, + setMorphTargetInfluences, + setMeshRef, + clearVisemes, + ]); + + // Update morph target influences + useFrame((_, delta) => { + if (avatarMeshRef.current && avatarMeshRef.current.morphTargetDictionary) { + updateMorphTargetInfluences(); + } + mixer.update(delta * 0.001); + + function updateMorphTargetInfluences() { + Object.entries(morphTargetInfluences).forEach(([key, value]) => { + const index = avatarMeshRef.current!.morphTargetDictionary![key]; + if (typeof index === 'number' && + avatarMeshRef.current!.morphTargetInfluences) { + const currentValue = avatarMeshRef.current!.morphTargetInfluences[index]; + const smoothValue = lerp(currentValue, value, 0.1); + avatarMeshRef.current!.morphTargetInfluences[index] = smoothValue; } }); - - // Update morph targets - Object.entries(headMeshRef.current.morphTargetDictionary).forEach( - ([key, index]) => { - if (typeof index === 'number') { - let targetValue = 0; - - // Handle emotions (base layer) - if (Object.prototype.hasOwnProperty.call(emotionMorphTargets, key)) { - const targetEmotionValue = emotionMorphTargets[key]; - const currentEmotionValue = currentEmotionRef.current[key] || 0; - const newEmotionValue = MathUtils.lerp( - currentEmotionValue, - targetEmotionValue * 2, - EMOTION_TRANSITION_SPEED - ); - currentEmotionRef.current[key] = newEmotionValue; - targetValue += newEmotionValue; - } - - // Handle visemes (additive layer) - if (currentViseme && key === currentViseme.name) { - targetValue += currentViseme.weight * 1.2; // Amplify the effect - } - - // Handle blinking (additive layer, only for 'eyesClosed') - if (key === 'eyesClosed' && eyeBlink) { - targetValue += blinkValue; - } - - // Clamp the final value between 0 and 1 - targetValue = MathUtils.clamp(targetValue, 0, 1); - - // Apply smoothing - if (headMeshRef.current && headMeshRef.current.morphTargetInfluences) { - headMeshRef.current.morphTargetInfluences[index] = MathUtils.lerp( - headMeshRef.current.morphTargetInfluences[index], - targetValue, - morphTargetSmoothing - ); - } - } - } - ); - - // Update the set of previous emotion keys for the next frame - previousEmotionKeysRef.current = currentEmotionKeys; - - // Handle transition from emotion animation to idle - if (isTransitioningToIdle && currentActionRef.current) { - if ( - currentActionRef.current.time >= - currentActionRef.current.getClip().duration - ) { - // Transition to the idle animation - const idleNumber = Math.floor(Math.random() * 5) + 1; // Randomly choose 1, 2, 3, 4 or 5 - const idleAction = actions[`Idle${idleNumber == 3 ? 4 : idleNumber}`]; - - if (idleAction) { - currentActionRef.current.fadeOut(0.5); - idleAction.reset().fadeIn(0.5).play(); - currentActionRef.current = idleAction; - setIsTransitioningToIdle(false); - } - } - } - - // Update the animation mixer - mixer.current.update(0.01); // Fixed delta time for consistent animation speed } }); @@ -303,4 +231,4 @@ export default function FullbodyAvatar({ ); -} +} \ No newline at end of file diff --git a/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx b/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx index 279f0bd2..4f0ad93f 100644 --- a/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx +++ b/src/components/Avatar/AvatarView/AvatarComponent/components/halfbodyAvatar.tsx @@ -14,6 +14,8 @@ interface HalfBodyAvatarProps { headMovement?: boolean; speaking?: boolean; onLoaded?: () => void; + setMeshRef: (mesh: Object3D) => void; + clearVisemes: () => void; setMorphTargetDictionary: (morphTargetDictionary: any) => void; eyeBlink?: boolean; morphTargetInfluences: any; @@ -29,7 +31,9 @@ export default function HalfBodyAvatar({ setMorphTargetDictionary, headMovement, eyeBlink, + setMeshRef, onLoaded, + clearVisemes, morphTargetInfluences, }: HalfBodyAvatarProps) { const { scene } = useGLTF(url); @@ -54,6 +58,7 @@ export default function HalfBodyAvatar({ // Set mesh reference for the first SkinnedMesh found const firstSkinnedMesh = Object.values(nodes).find(isSkinnedMesh) as SkinnedMesh; if (firstSkinnedMesh) { + setMeshRef(firstSkinnedMesh); avatarMeshRef.current = firstSkinnedMesh; if (firstSkinnedMesh.morphTargetDictionary && firstSkinnedMesh.morphTargetInfluences) { setMorphTargetDictionary(firstSkinnedMesh.morphTargetDictionary); @@ -72,11 +77,12 @@ export default function HalfBodyAvatar({ const disposeObjects = () => { Object.values(materials).forEach(dispose); Object.values(nodes).filter(isSkinnedMesh).forEach(dispose); + clearVisemes(); }; disposeObjects(); }; - }, [materials, nodes, url, onLoaded]); + }, [materials, nodes, url, onLoaded, clearVisemes]); const skinnedMeshes = useMemo( () => Object.values(nodes).filter(isSkinnedMesh), diff --git a/src/components/Avatar/AvatarView/index.tsx b/src/components/Avatar/AvatarView/index.tsx index cd9bac69..0c3be35f 100644 --- a/src/components/Avatar/AvatarView/index.tsx +++ b/src/components/Avatar/AvatarView/index.tsx @@ -23,9 +23,8 @@ export interface Props { isZoomed?: boolean; chatEmission?: any; setMeshRef?: any; - stopProcessing: () => void; - resetVisemeQueue: () => void; - updateCurrentViseme: (currentTime: number) => { name: string; weight: number } | null; + clearVisemes: () => void; + setEmotion: (emotion: string) => void; } const defaultStyles = { @@ -89,9 +88,9 @@ export default function ContainerAvatarView({ showControls = false, isZoomed, chatEmission, - stopProcessing, - resetVisemeQueue, - updateCurrentViseme, + setMeshRef, + clearVisemes, + setEmotion, }: Props) { return ( diff --git a/src/components/MemoriWidget/MemoriWidget.tsx b/src/components/MemoriWidget/MemoriWidget.tsx index e6257c63..bc9df956 100644 --- a/src/components/MemoriWidget/MemoriWidget.tsx +++ b/src/components/MemoriWidget/MemoriWidget.tsx @@ -543,19 +543,12 @@ const MemoriWidget = ({ const [hideEmissions, setHideEmissions] = useState(false); const { - startProcessing, - stopProcessing, - addViseme, - resetVisemeQueue, - isProcessing, + addVisemeToQueue, + processVisemeQueue, + clearVisemes, + emotion, + getAzureStyleForEmotion, } = useViseme(); - const audioContextRef = useRef(null); - const speechSynthesizerRef = useRef(null); - const audioDestinationRef = useRef( - null - ); - const currentSpeechRef = useRef<{ cancel: () => void } | null>(null); - useEffect(() => { setIsPlayingAudio(!!speechSynthesizer); @@ -1890,167 +1883,200 @@ const MemoriWidget = ({ const e = new CustomEvent('MemoriEndSpeak'); document.dispatchEvent(e); }; - const initializeAudioContext = useCallback(() => { - if (!audioContextRef.current || audioContextRef.current.state === 'closed') { - audioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)() as unknown as IAudioContext; - } - return audioContextRef.current; - }, []); - - const initializeSpeechSynthesizer = useCallback((audioConfig: speechSdk.AudioConfig) => { - if (!speechSynthesizerRef.current && AZURE_COGNITIVE_SERVICES_TTS_KEY) { - const speechConfig = speechSdk.SpeechConfig.fromSubscription( - AZURE_COGNITIVE_SERVICES_TTS_KEY, - 'eastus' - ); - speechSynthesizerRef.current = new speechSdk.SpeechSynthesizer(speechConfig, audioConfig); - } - return speechSynthesizerRef.current; - }, []); - const stopCurrentSpeech = useCallback(() => { - if (currentSpeechRef.current) { - currentSpeechRef.current.cancel(); - currentSpeechRef.current = null; - } - if (audioContextRef.current) { - audioContextRef.current.suspend(); - } - if (audioDestinationRef.current) { - audioDestinationRef.current.pause(); - } - setIsPlayingAudio(false); - stopProcessing(); - resetVisemeQueue(); - }, []); - - const speak = useCallback(async (text: string): Promise => { + const speak = (text: string): void => { if (!AZURE_COGNITIVE_SERVICES_TTS_KEY || preview) { emitEndSpeakEvent(); return; } - stopListening(); - stopCurrentSpeech(); // Stop any ongoing speech + // stopAudio(); - if (preview || muteSpeaker || speakerMuted) { - setIsPlayingAudio(false); + if (preview) return; + + if (muteSpeaker || speakerMuted) { + memoriSpeaking = false; setMemoriTyping(false); + emitEndSpeakEvent(); + + // trigger start continuous listening if set, see MemoriChat if (continuousSpeech) { setListeningTimeout(); } return; } - try { - const audioContext = initializeAudioContext(); - await audioContext.resume(); + if (audioDestination) audioDestination.pause(); + + let isSafari = + window.navigator.userAgent.includes('Safari') && + !window.navigator.userAgent.includes('Chrome'); + let isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent); + if ((audioContext.state as string) === 'interrupted') { + audioContext.resume().then(() => speak(text)); + return; + } + if (audioContext.state === 'closed') { + audioContext = new AudioContext(); + let buffer = audioContext.createBuffer(1, 10000, 22050); + let source = audioContext.createBufferSource(); + source.buffer = buffer; + source.connect(audioContext.destination); + } else if (audioContext.state === 'suspended') { + stopAudio(); - if (!audioDestinationRef.current) { - audioDestinationRef.current = new speechSdk.SpeakerAudioDestination(); + audioContext = new AudioContext(); + let buffer = audioContext.createBuffer(1, 10000, 22050); + let source = audioContext.createBufferSource(); + source.buffer = buffer; + source.connect(audioContext.destination); + } + + if (!speechSynthesizer) { + if (!isIOS) { + audioDestination = new speechSdk.SpeakerAudioDestination(); } + let audioConfig = + speechSdk.AudioConfig.fromSpeakerOutput(audioDestination); + speechSynthesizer = new speechSdk.SpeechSynthesizer( + speechConfig, + audioConfig + ); + } - const audioConfig = speechSdk.AudioConfig.fromSpeakerOutput(audioDestinationRef.current); - const speechSynthesizer = initializeSpeechSynthesizer(audioConfig); + const source = audioContext.createBufferSource(); + source.addEventListener('ended', () => { + setIsPlayingAudio(false); + memoriSpeaking = false; + }); + audioDestination.onAudioEnd = () => { + setIsPlayingAudio(false); + memoriSpeaking = false; + source.disconnect(); - if (speechSynthesizer) { + emitEndSpeakEvent(); - // Add the new visemeReceived event listener - speechSynthesizer.visemeReceived = (_, e) => { - addViseme(e.visemeId, e.audioOffset); - console.log('viseme added') - }; - } - startProcessing(); - - const textToSpeak = escapeHTML(stripMarkdown(stripEmojis(stripHTML(stripOutputTags(text))))); - - const ssml = ` - - - ${replaceTextWithPhonemes(textToSpeak, userLang.toLowerCase())} - - - `; - - const speakPromise = new Promise((resolve, reject) => { - speechSynthesizer?.speakSsmlAsync( - ssml, - result => resolve(result), - error => reject(error) - ); + // trigger start continuous listening if set + onEndSpeakStartListen(); + }; + + // Clear any existing visemes before starting new speech + clearVisemes(); + + // Set up the viseme event handler + speechSynthesizer.visemeReceived = function (_, e) { + addVisemeToQueue({ + visemeId: e.visemeId, + audioOffset: e.audioOffset, }); + }; - currentSpeechRef.current = { - cancel: () => { - speechSynthesizer?.close(); - audioDestinationRef.current?.pause(); - } - }; + const textToSpeak = escapeHTML( + stripMarkdown(stripEmojis(stripHTML(stripOutputTags(text)))) + ); - const result = await speakPromise; + speechSynthesizer.speakSsmlAsync( + `${replaceTextWithPhonemes( + textToSpeak, + userLang.toLowerCase() + )}`, + result => { + if (result) { + setIsPlayingAudio(true); + memoriSpeaking = true; + + // Process the viseme data + processVisemeQueue(); - setIsPlayingAudio(true); + try { + // Decode the audio data + audioContext.decodeAudioData(result.audioData, function (buffer) { + source.buffer = buffer; + source.connect(audioContext.destination); - if (audioContext && result) { - const audioBuffer = await audioContext.decodeAudioData(result.audioData); - const source = audioContext.createBufferSource(); - source.buffer = audioBuffer; - source.connect(audioContext.destination); + if (history.length < 1 || (isSafari && isIOS)) { + source.start(0); + } + }); + + // Handle the audio context state changes + audioContext.onstatechange = () => { + if ( + audioContext.state === 'suspended' || + audioContext.state === 'closed' + ) { + source.disconnect(); + setIsPlayingAudio(false); + memoriSpeaking = false; + } else if ((audioContext.state as string) === 'interrupted') { + audioContext.resume(); + } + }; + + audioContext.resume(); - source.onended = () => { + if (speechSynthesizer) { + speechSynthesizer.close(); + speechSynthesizer = null; + } + } catch (e) { + console.warn('speak error: ', e); + window.speechSynthesis.speak(new SpeechSynthesisUtterance(text)); + clearVisemes(); + setIsPlayingAudio(false); + memoriSpeaking = false; + + if (speechSynthesizer) { + speechSynthesizer.close(); + speechSynthesizer = null; + } + emitEndSpeakEvent(); + } + } else { + audioContext.resume(); + clearVisemes(); setIsPlayingAudio(false); - stopProcessing(); - resetVisemeQueue(); - currentSpeechRef.current = null; + memoriSpeaking = false; emitEndSpeakEvent(); - onEndSpeakStartListen(); - }; - - await audioContext.resume(); - source.start(0); - } else { - stopProcessing(); - resetVisemeQueue(); - throw new Error('No result from speech synthesis'); + } + }, + error => { + console.error('speak:', error); + window.speechSynthesis.speak(new SpeechSynthesisUtterance(text)); + setIsPlayingAudio(false); + memoriSpeaking = false; + emitEndSpeakEvent(); } - } catch (error) { - console.error('Speech synthesis error:', error); - stopProcessing(); - resetVisemeQueue(); - // Fallback to browser's speech synthesis - const utterance = new SpeechSynthesisUtterance(text); - window.speechSynthesis.speak(utterance); - } finally { - setMemoriTyping(false); - } - }, [initializeAudioContext, initializeSpeechSynthesizer, stopCurrentSpeech]); + ); - const stopAudio = useCallback(() => { + setMemoriTyping(false); + }; + const stopAudio = () => { setIsPlayingAudio(false); memoriSpeaking = false; try { - if (speechSynthesizerRef.current) { - speechSynthesizerRef.current.close(); - speechSynthesizerRef.current = null; + if (speechSynthesizer) { + speechSynthesizer.close(); + speechSynthesizer = null; } - if (audioContextRef.current && audioContextRef.current.state !== 'closed') { - audioContextRef.current.close(); + if (audioContext.state !== 'closed') { + audioContext.close(); } - if (audioDestinationRef.current) { - audioDestinationRef.current.pause(); - audioDestinationRef.current.close(); + if (audioDestination) { + audioDestination.pause(); + audioDestination.close(); } - stopCurrentSpeech(); } catch (e) { console.debug('stopAudio error: ', e); } - }, [stopCurrentSpeech]); - + }; /** * Focus on the chat input on mount @@ -2214,12 +2240,6 @@ const MemoriWidget = ({ useEffect(() => { return () => { resetUIEffects(); - if (speechSynthesizerRef.current) { - speechSynthesizerRef.current.close(); - } - if (audioContextRef.current) { - audioContextRef.current.close(); - } }; // eslint-disable-next-line react-hooks/exhaustive-deps }, []); diff --git a/src/components/layouts/ZoomedFullBody.tsx b/src/components/layouts/ZoomedFullBody.tsx index 488e9324..77fd0cfe 100644 --- a/src/components/layouts/ZoomedFullBody.tsx +++ b/src/components/layouts/ZoomedFullBody.tsx @@ -1,8 +1,8 @@ -import React, { useEffect, useCallback } from 'react'; +import React from 'react'; import Spin from '../ui/Spin'; import { LayoutProps } from '../MemoriWidget/MemoriWidget'; -const ZoomedFullBodyLayout: React.FC = ({ +const FullPageLayout: React.FC = ({ Header, headerProps, Avatar, @@ -20,45 +20,36 @@ const ZoomedFullBodyLayout: React.FC = ({ showInstruct = false, loading = false, poweredBy, -}) => { - useEffect(() => { - document.body.style.overflow = 'hidden'; - return () => { - document.body.style.overflow = ''; - }; - }, []); +}) => ( + <> + {integrationStyle} + {integrationBackground} - return ( - <> - {integrationStyle} - {integrationBackground} + + {showInstruct && ChangeMode && changeModeProps && ( + + )} - - {showInstruct && ChangeMode && changeModeProps && } + {Header && headerProps &&
} - {Header && headerProps &&
} +
+
+ {Avatar && avatarProps && } -
-
- {Avatar && avatarProps && ( - - )} - -
-
-
- {sessionId && hasUserActivatedSpeak && Chat && chatProps ? ( - - ) : startPanelProps ? ( - - ) : null} -
- -
{poweredBy}
+
- - - ); -}; +
+ {sessionId && hasUserActivatedSpeak && Chat && chatProps ? ( + + ) : startPanelProps ? ( + + ) : null} +
+ + {poweredBy} +
+ + +); -export default ZoomedFullBodyLayout; +export default FullPageLayout; diff --git a/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap b/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap index b5162ce9..c0d7a523 100644 --- a/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap +++ b/src/components/layouts/__snapshots__/ZoomedFullBody.test.tsx.snap @@ -36,7 +36,7 @@ exports[`renders ZOOMED_FULL_BODY layout unchanged 1`] = ` />
-
- -

- - Powered by - - - Memori.AI - -

-
+ +

+ + Powered by + + + Memori.AI + +

void; - updateCurrentViseme: (currentTime: number) => Viseme | null; - startProcessing: () => void; - stopProcessing: () => void; - resetVisemeQueue: () => void; - isProcessing: boolean; + setMeshRef: (mesh: SkinnedMesh | null) => void; + addVisemeToQueue: (viseme: AzureViseme) => void; + processVisemeQueue: () => ProcessedViseme[]; + clearVisemes: () => void; + isMeshSet: boolean; + setEmotion: (emotion: string) => void; + emotion: string; + getAzureStyleForEmotion: (emotion: string) => string; } const VisemeContext = createContext(undefined); +const VISEME_SMOOTHING = 0.5; +const DEFAULT_VISEME_DURATION = 0.1; +const MINIMUM_ELAPSED_TIME = 0.01; +const VISEME_SPEED_FACTOR = 1.0; +const AUDIO_PLAYBACK_RATE = 1.0; +const VISEME_BASE_SPEED = 1.0; + const VISEME_MAP: { [key: number]: string } = { 0: 'viseme_sil', // silence 1: 'viseme_PP', // p, b, m @@ -44,102 +63,253 @@ const VISEME_MAP: { [key: number]: string } = { 21: 'viseme_PP', // y (closest match, could be debated) }; -const DEFAULT_VISEME_DURATION = 0.04 //0; // Reduced from 0.4 for smoother transitions -const VISEME_OVERLAP = 0.35; // Slightly increased from 0.04 for more overlap -const SMOOTHING_FACTOR = 0.35 // New constant for weight smoothing -const TIME_OFFSET =-0.25; // Adjust this value as needed (in seconds) -const PRELOAD_TIME = 0.525; // Preload visemes 0.5 seconds in advance - -export const VisemeProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { - const visemeQueueRef = useRef([]); +export const VisemeProvider: React.FC<{ children: React.ReactNode }> = ({ + children, +}) => { + const [isMeshSet, setIsMeshSet] = useState(false); + const [emotion, setEmotion] = useState('Neutral'); + const isAnimatingRef = useRef(false); + const currentVisemesRef = useRef([]); + const visemeQueueRef = useRef([]); + const animationFrameRef = useRef(null); const startTimeRef = useRef(null); - const [isProcessing, setIsProcessing] = useState(false); - const lastVisemeRef = useRef(null); - - const addViseme = useCallback((visemeId: number, audioOffset: number) => { - const visemeName = VISEME_MAP[visemeId] || 'viseme_sil'; - const startTime = audioOffset / 10000000 + TIME_OFFSET; - const endTime = startTime + DEFAULT_VISEME_DURATION; - const newViseme: Viseme = { - name: visemeName, - weight: 0, - startTime, - endTime, - }; - visemeQueueRef.current.push(newViseme); + const currentVisemeWeightRef = useRef<{ [key: string]: number }>({}); + const meshRef = useRef(null); - if (!isProcessing) { - startProcessing(); - } - }, [isProcessing]); + const lerp = (start: number, end: number, alpha: number): number => { + return start * (1 - alpha) + end * alpha; + }; - const updateCurrentViseme = useCallback((currentTime: number): Viseme | null => { - if (!isProcessing || startTimeRef.current === null) { - console.log('StartTimeRef not set'); - return null; - } + const easeInOutCubic = (x: number): number => { + return x < 0.5 ? 4 * x * x * x : 1 - Math.pow(-2 * x + 2, 3) / 2; + }; - const elapsedTime = currentTime - startTimeRef.current + PRELOAD_TIME + const setMeshRef = useCallback( + (mesh: SkinnedMesh | null) => { + if (mesh && mesh.morphTargetDictionary && mesh.morphTargetInfluences) { + meshRef.current = mesh; + setIsMeshSet(true); + // console.log('Mesh set successfully:', mesh); + } else { + console.error('Invalid mesh provided:', mesh); + } + }, + [meshRef] + ); - // Remove expired visemes - visemeQueueRef.current = visemeQueueRef.current.filter(v => v.endTime > elapsedTime); + const addVisemeToQueue = useCallback((viseme: AzureViseme) => { + visemeQueueRef.current.push(viseme); + // console.log('Viseme added to queue:', viseme); + }, []); + + const getCurrentViseme = useCallback((elapsedTime: number) => { + if (elapsedTime < MINIMUM_ELAPSED_TIME) return null; + + return currentVisemesRef.current.find((viseme, index) => { + const nextViseme = currentVisemesRef.current[index + 1]; + return ( + elapsedTime >= viseme.startTime && + (!nextViseme || elapsedTime < nextViseme.startTime) + ); + }); + }, []); - const currentViseme = visemeQueueRef.current.find(v => - v.startTime <= elapsedTime && v.endTime > elapsedTime - VISEME_OVERLAP + const getDynamicSpeedFactor = (visemeDuration: number): number => { + const baseDuration = 0.1; // Average expected viseme duration + return ( + VISEME_BASE_SPEED * (baseDuration / visemeDuration) * AUDIO_PLAYBACK_RATE ); + }; + + const applyViseme = useCallback( + (viseme: ProcessedViseme, elapsedTime: number) => { + if (!meshRef.current) { + console.error('Mesh not set'); + return; + } + + const visemeProgress = Math.min( + (elapsedTime - viseme.startTime) / viseme.duration, + 1 + ); + + const dynamicSpeedFactor = getDynamicSpeedFactor(viseme.duration); + const adjustedProgress = visemeProgress * dynamicSpeedFactor; + + // Use a cubic easing function for smoother transitions + const easedProgress = easeInOutCubic(adjustedProgress); + const targetWeight = Math.sin(easedProgress * Math.PI) * viseme.weight; + + currentVisemeWeightRef.current[viseme.name] = lerp( + currentVisemeWeightRef.current[viseme.name] || 0, + targetWeight, + VISEME_SMOOTHING + ); + + const visemeIndex = meshRef.current.morphTargetDictionary?.[viseme.name]; + if ( + typeof visemeIndex === 'number' && + meshRef.current.morphTargetInfluences + ) { + meshRef.current.morphTargetInfluences[visemeIndex] = + currentVisemeWeightRef.current[viseme.name]; + // console.log(`Applied viseme: ${viseme.name}, weight: ${currentVisemeWeightRef.current[viseme.name]}`); + } else { + console.error( + `Viseme not found in morph target dictionary: ${viseme.name}` + ); + } + }, + [] + ); + + const animate = useCallback( + (time: number) => { + if (startTimeRef.current === null) { + startTimeRef.current = time; + } + + const elapsedTime = + ((time - startTimeRef.current) / 1000) * VISEME_SPEED_FACTOR; + + const currentViseme = getCurrentViseme(elapsedTime); - if (currentViseme) { - console.log('CurrentViseme Found!') - const visemeProgress = (elapsedTime - currentViseme.startTime) / (currentViseme.endTime - currentViseme.startTime); - const targetWeight = Math.sin(Math.PI * Math.min(visemeProgress, 1)); - - // Smooth the weight transition - const smoothedWeight = lastVisemeRef.current - ? lastVisemeRef.current.weight + (targetWeight - lastVisemeRef.current.weight) * SMOOTHING_FACTOR - : targetWeight; - - const updatedViseme = { ...currentViseme, weight: smoothedWeight }; - lastVisemeRef.current = updatedViseme; - return updatedViseme; + if (currentViseme) { + applyViseme(currentViseme, elapsedTime); + } + + if ( + currentVisemesRef.current.length > 0 && + elapsedTime < + currentVisemesRef.current[currentVisemesRef.current.length - 1] + .startTime + + currentVisemesRef.current[currentVisemesRef.current.length - 1] + .duration + ) { + animationFrameRef.current = requestAnimationFrame(animate); + } else { + clearVisemes(); + } + }, + [getCurrentViseme, applyViseme] + ); + + const processVisemeQueue = useCallback(() => { + const azureVisemes = [...visemeQueueRef.current]; + visemeQueueRef.current = []; + + if (azureVisemes.length === 0) { + // console.log('No visemes to process'); + return []; } - // Gradually reduce weight when no viseme is active - if (lastVisemeRef.current) { - const reducedWeight = lastVisemeRef.current.weight * (1 - SMOOTHING_FACTOR); - if (reducedWeight > 0.01) { - lastVisemeRef.current = { ...lastVisemeRef.current, weight: reducedWeight }; - return lastVisemeRef.current; + const processedVisemes: ProcessedViseme[] = azureVisemes.map( + (currentViseme, i) => { + const nextViseme = azureVisemes[i + 1]; + const duration = nextViseme + ? (nextViseme.audioOffset - currentViseme.audioOffset) / 10000000 + : DEFAULT_VISEME_DURATION; + + const processedViseme = { + name: VISEME_MAP[currentViseme.visemeId] || 'viseme_sil', + duration, + weight: 1, + startTime: currentViseme.audioOffset / 10000000, + }; + //console.log('Processed viseme:', processedViseme); + return processedViseme; + } + ); + + currentVisemesRef.current = processedVisemes; + + // Start animation immediately if not already animating + if (!isAnimatingRef.current) { + isAnimatingRef.current = true; + startTimeRef.current = performance.now(); + // console.log('Starting animation'); + animationFrameRef.current = requestAnimationFrame(animate); + } else { + // If already animating, adjust the start time for the new visemes + if (startTimeRef.current !== null) { + const currentTime = performance.now(); + const elapsedTime = + ((currentTime - startTimeRef.current) / 1000) * VISEME_SPEED_FACTOR; + startTimeRef.current = + currentTime - (elapsedTime / VISEME_SPEED_FACTOR) * 1000; } } - lastVisemeRef.current = null; - return null; - }, [isProcessing]); + return processedVisemes; + }, [isMeshSet, animate]); + + const clearVisemes = useCallback(() => { + currentVisemesRef.current = []; + visemeQueueRef.current = []; + + if (animationFrameRef.current !== null) { + cancelAnimationFrame(animationFrameRef.current); + animationFrameRef.current = null; + } - const startProcessing = useCallback(() => { - if (isProcessing) return; - startTimeRef.current = performance.now() / 1000; - setIsProcessing(true); - }, [isProcessing]); + if ( + meshRef.current?.morphTargetDictionary && + meshRef.current?.morphTargetInfluences + ) { + Object.values(meshRef.current.morphTargetDictionary).forEach(index => { + if (typeof index === 'number') { + meshRef.current!.morphTargetInfluences![index] = 0; + } + }); + } - const stopProcessing = useCallback(() => { - setIsProcessing(false); + currentVisemeWeightRef.current = {}; startTimeRef.current = null; - lastVisemeRef.current = null; + isAnimatingRef.current = false; + // console.log('Visemes cleared'); }, []); - const resetVisemeQueue = useCallback(() => { - visemeQueueRef.current = []; - lastVisemeRef.current = null; + // Your existing emotion map + const emotionMap: Record> = { + Gioia: { Gioria: 1 }, + Rabbia: { Rabbia: 1 }, + Sorpresa: { Sorpresa: 1 }, + Tristezza: { Tristezza: 1 }, + Timore: { Timore: 1 }, + }; + + // Mapping from your emotions to Azure styles + const emotionToAzureStyleMap: Record = { + Gioia: 'cheerful', + Rabbia: 'angry', + Sorpresa: 'excited', + Tristezza: 'sad', + Timore: 'terrified', + }; + + // Function to get Azure style from emotion + function getAzureStyleForEmotion(emotion: string): string { + return emotionToAzureStyleMap[emotion] || 'neutral'; + } + + + useEffect(() => { + return () => { + if (animationFrameRef.current !== null) { + cancelAnimationFrame(animationFrameRef.current); + } + }; }, []); - const contextValue = { - addViseme, - updateCurrentViseme, - startProcessing, - stopProcessing, - resetVisemeQueue, - isProcessing, + const contextValue: VisemeContextType = { + setMeshRef, + addVisemeToQueue, + processVisemeQueue, + clearVisemes, + isMeshSet, + setEmotion, + emotion, + getAzureStyleForEmotion, }; return ( @@ -151,8 +321,8 @@ export const VisemeProvider: React.FC<{ children: React.ReactNode }> = ({ childr export const useViseme = (): VisemeContextType => { const context = useContext(VisemeContext); - if (!context) { + if (context === undefined) { throw new Error('useViseme must be used within a VisemeProvider'); } return context; -}; \ No newline at end of file +}; diff --git a/src/helpers/utils.ts b/src/helpers/utils.ts index 99a35432..01f53cb9 100644 --- a/src/helpers/utils.ts +++ b/src/helpers/utils.ts @@ -184,16 +184,15 @@ export const stripMarkdown = (text: string) => { }; export const stripOutputTags = (text: string): string => { - const outputTagRegex = //gs; - - if (!outputTagRegex.test(text)) { - return text; - } + let hasTags = text.includes(''); + + if (!hasTags) return text; + + let output = text.split(''); + let textBefore = output[0].split(' { diff --git a/src/styles.css b/src/styles.css index bafbabea..8df84b2c 100644 --- a/src/styles.css +++ b/src/styles.css @@ -52,7 +52,6 @@ @import url('./components/layouts/totem.css'); @import url('./components/layouts/website-assistant.css'); @import url('./components/layouts/chat.css'); -@import url('./components/layouts/zoomed-full-body.css'); @import url('https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css');