From 1bbdf02618cbebe95799b0b72908a459d0b8d735 Mon Sep 17 00:00:00 2001 From: lauridsdev <65713140+lauridsdev@users.noreply.github.com> Date: Sat, 14 Sep 2024 21:51:55 +0200 Subject: [PATCH] add voice control for speed and emotions --- src/pipecat/services/cartesia.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/pipecat/services/cartesia.py b/src/pipecat/services/cartesia.py index ac02ea469..aa27cb00b 100644 --- a/src/pipecat/services/cartesia.py +++ b/src/pipecat/services/cartesia.py @@ -102,6 +102,10 @@ def __init__( self._context_id = None self._receive_task = None + # Initialize default values for speed and emotion controls + self._speed = "normal" # default speed + self._emotion = [] # default to no emotion + def can_generate_metrics(self) -> bool: return True @@ -200,8 +204,19 @@ async def _receive_task_handler(self): except Exception as e: logger.exception(f"{self} exception: {e}") + # Method to set speed and emotion externally + def set_voice_controls(self, speed: str = "normal", emotion: list[str] = None): + """ + Set the voice controls for TTS (speed and emotion). + """ + self._speed = speed + self._emotion = emotion or [] + async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: - logger.debug(f"Generating TTS: [{text}]") + """ + Generate TTS using the configured speed and emotion settings. + """ + logger.debug(f"Generating TTS: [{text}], Speed: [{self._speed}], Emotion: [{self._emotion}]") try: if not self._websocket: @@ -212,6 +227,12 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: await self.start_ttfb_metrics() self._context_id = str(uuid.uuid4()) + # Define the voice control parameters using stored instance variables + voice_controls = { + "speed": self._speed, + "emotion": self._emotion + } + msg = { "transcript": text + " ", "continue": True, @@ -224,7 +245,9 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: "output_format": self._output_format, "language": self._language, "add_timestamps": True, + "__experimental_controls": voice_controls # Add the voice controls } + try: await self._websocket.send(json.dumps(msg)) await self.start_tts_usage_metrics(text) @@ -234,6 +257,7 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: await self._disconnect() await self._connect() return + yield None except Exception as e: - logger.exception(f"{self} exception: {e}") + logger.exception(f"{self} exception: {e}") \ No newline at end of file