Skip to content

Commit

Permalink
Merge pull request #208 from pipecat-ai/aleix/cartesia-voice-load-sta…
Browse files Browse the repository at this point in the history
…rtup

services(cartesia): load voices on startup
  • Loading branch information
aconchillo authored Jun 4, 2024
2 parents 20a5256 + 659ecee commit 9d36a48
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 13 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Fixed an issue with Deepgram TTS that was introduced in the previous release.

### Performance

- Load Cartesia voice on startup.

## [0.0.25] - 2024-05-31

### Added
Expand Down
23 changes: 10 additions & 13 deletions src/pipecat/services/cartesia.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@

from cartesia.tts import AsyncCartesiaTTS

import time
from typing import AsyncGenerator

from pipecat.frames.frames import AudioRawFrame, ErrorFrame, Frame
from pipecat.frames.frames import AudioRawFrame, Frame
from pipecat.services.ai_services import TTSService

from loguru import logger
Expand All @@ -28,18 +27,18 @@ def __init__(
self._api_key = api_key
self._voice_name = voice_name

self._client = None
try:
self._client = AsyncCartesiaTTS(api_key=self._api_key)
voices = self._client.get_voices()
voice_id = voices[self._voice_name]["id"]
self._voice = self._client.get_voice_embedding(voice_id=voice_id)
except Exception as e:
logger.error(f"Cartesia initialization error: {e}")

async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"Transcribing text: [{text}]")

try:
if self._client is None:
self._client = AsyncCartesiaTTS(api_key=self._api_key)
voices = self._client.get_voices()
self._voice_id = voices[self._voice_name]["id"]
self._voice = self._client.get_voice_embedding(voice_id=self._voice_id)

chunk_generator = await self._client.generate(
transcript=text, voice=self._voice, stream=True,
model_id="upbeat-moon", data_rtype='array', output_format='pcm_16000',
Expand All @@ -49,8 +48,6 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
)

async for chunk in chunk_generator:
# print(f"")
frame = AudioRawFrame(chunk['audio'], 16000, 1)
yield frame
yield AudioRawFrame(chunk['audio'], 16000, 1)
except Exception as e:
logger.error(f"Exception {e}")
logger.error(f"Cartesia error: {e}")

0 comments on commit 9d36a48

Please sign in to comment.