From 900b95eb9286dd8b00346ae3c6ec16d40cc26ca0 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Fri, 20 Dec 2024 09:56:53 -0500 Subject: [PATCH] Update PlayHT to use the latest Websocket connection endpoint --- CHANGELOG.md | 3 +++ src/pipecat/services/playht.py | 28 +++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0622d4c3a..12f631da4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- `PlayHTTTSService` uses the new v4 websocket API, which also fixes an issue + where text inputted to the TTS didn't return audio. + - The default model for `ElevenLabsTTSService` is now `eleven_flash_v2_5`. - `OpenAIRealtimeBetaLLMService` now takes a `model` parameter in the diff --git a/src/pipecat/services/playht.py b/src/pipecat/services/playht.py index 43e1739c9..115198c02 100644 --- a/src/pipecat/services/playht.py +++ b/src/pipecat/services/playht.py @@ -209,7 +209,7 @@ async def _disconnect_websocket(self): async def _get_websocket_url(self): async with aiohttp.ClientSession() as session: async with session.post( - "https://api.play.ht/api/v3/websocket-auth", + "https://api.play.ht/api/v4/websocket-auth", headers={ "Authorization": f"Bearer {self._api_key}", "X-User-Id": self._user_id, @@ -218,10 +218,19 @@ async def _get_websocket_url(self): ) as response: if response.status in (200, 201): data = await response.json() - if "websocket_url" in data and isinstance(data["websocket_url"], str): - self._websocket_url = data["websocket_url"] + # Handle the new response format with multiple URLs + if "websocket_urls" in data: + # Select URL based on voice_engine + if self._settings["voice_engine"] in data["websocket_urls"]: + self._websocket_url = data["websocket_urls"][ + self._settings["voice_engine"] + ] + else: + raise ValueError( + f"Unsupported voice engine: {self._settings['voice_engine']}" + ) else: - raise ValueError("Invalid or missing WebSocket URL in response") + raise ValueError("Invalid response: missing websocket_urls") else: raise Exception(f"Failed to get WebSocket URL: {response.status}") @@ -248,9 +257,14 @@ async def _receive_messages(self): logger.debug(f"Received text message: {message}") try: msg = json.loads(message) - if "request_id" in msg and msg["request_id"] == self._request_id: - await self.push_frame(TTSStoppedFrame()) - self._request_id = None + if msg.get("type") == "start": + # Handle start of stream + logger.debug(f"Started processing request: {msg.get('request_id')}") + elif msg.get("type") == "end": + # Handle end of stream + if "request_id" in msg and msg["request_id"] == self._request_id: + await self.push_frame(TTSStoppedFrame()) + self._request_id = None elif "error" in msg: logger.error(f"{self} error: {msg}") await self.push_error(ErrorFrame(f'{self} error: {msg["error"]}'))