From d05717a1bd709ecbf475bb2ff410763bcd8783cd Mon Sep 17 00:00:00 2001 From: Jin Kim Date: Thu, 26 Sep 2024 19:52:25 +0900 Subject: [PATCH] Apply Ruff formater --- src/pipecat/services/cartesia.py | 41 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/pipecat/services/cartesia.py b/src/pipecat/services/cartesia.py index f08c06dea..c1c296046 100644 --- a/src/pipecat/services/cartesia.py +++ b/src/pipecat/services/cartesia.py @@ -71,15 +71,16 @@ class InputParams(BaseModel): emotion: Optional[List[str]] = [] def __init__( - self, - *, - api_key: str, - voice_id: str, - cartesia_version: str = "2024-06-10", - url: str = "wss://api.cartesia.ai/tts/websocket", - model_id: str = "sonic-english", - params: InputParams = InputParams(), - **kwargs): + self, + *, + api_key: str, + voice_id: str, + cartesia_version: str = "2024-06-10", + url: str = "wss://api.cartesia.ai/tts/websocket", + model_id: str = "sonic-english", + params: InputParams = InputParams(), + **kwargs, + ): # Aggregating sentences still gives cleaner-sounding results and fewer # artifacts than streaming one word at a time. On average, waiting for a # full sentence should only "cost" us 15ms or so with GPT-4o or a Llama @@ -91,7 +92,10 @@ def __init__( # can use those to generate text frames ourselves aligned with the # playout timing of the audio! super().__init__( - aggregate_sentences=True, push_text_frames=False, sample_rate=params.sample_rate, **kwargs + aggregate_sentences=True, + push_text_frames=False, + sample_rate=params.sample_rate, + **kwargs, ) self._api_key = api_key @@ -137,11 +141,10 @@ async def set_language(self, language: Language): logger.debug(f"Switching TTS language to: [{language}]") self._language = language_to_cartesia_language(language) - def _build_msg(self, text: str = "", continue_transcript: bool = True, add_timestamps: bool = True): - voice_config = { - "mode": "id", - "id": self._voice_id - } + def _build_msg( + self, text: str = "", continue_transcript: bool = True, add_timestamps: bool = True + ): + voice_config = {"mode": "id", "id": self._voice_id} if self._speed or self._emotion: voice_config["__experimental_controls"] = {} @@ -236,8 +239,7 @@ async def _receive_task_handler(self): await self.add_word_timestamps([("LLMFullResponseEndFrame", 0)]) elif msg["type"] == "timestamps": await self.add_word_timestamps( - list(zip(msg["word_timestamps"]["words"], - msg["word_timestamps"]["start"])) + list(zip(msg["word_timestamps"]["words"], msg["word_timestamps"]["start"])) ) elif msg["type"] == "chunk": await self.stop_ttfb_metrics() @@ -254,8 +256,7 @@ async def _receive_task_handler(self): await self.stop_all_metrics() await self.push_error(ErrorFrame(f'{self} error: {msg["error"]}')) else: - logger.error( - f"Cartesia error, unknown message type: {msg}") + logger.error(f"Cartesia error, unknown message type: {msg}") except asyncio.CancelledError: pass except Exception as e: @@ -379,7 +380,7 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: output_format=self._output_format, language=self._language, stream=False, - _experimental_voice_controls=voice_controls + _experimental_voice_controls=voice_controls, ) await self.stop_ttfb_metrics()