Skip to content

Commit

Permalink
Add voice options and make to use InputParams for Cartesia.
Browse files Browse the repository at this point in the history
  • Loading branch information
golbin committed Sep 9, 2024
1 parent 7c342f7 commit fa0deed
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 17 deletions.
4 changes: 3 additions & 1 deletion examples/foundational/07d-interruptible-cartesia.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ async def main():
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
sample_rate=44100,
params=CartesiaTTSService.InputParams(
sample_rate=44100,
),
)

llm = OpenAILLMService(
Expand Down
4 changes: 3 additions & 1 deletion examples/foundational/12c-describe-video-anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ async def main():
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
sample_rate=16000,
params=CartesiaTTSService.InputParams(
sample_rate=16000,
),
)

@transport.event_handler("on_first_participant_joined")
Expand Down
4 changes: 3 additions & 1 deletion examples/studypal/studypal.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ async def main():
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id=os.getenv("CARTESIA_VOICE_ID", "4d2fd738-3b3d-4368-957a-bb4805275bd9"),
# British Narration Lady: 4d2fd738-3b3d-4368-957a-bb4805275bd9
sample_rate=44100,
params=CartesiaTTSService.InputParams(
sample_rate=44100,
),
)

llm = OpenAILLMService(
Expand Down
45 changes: 31 additions & 14 deletions src/pipecat/services/cartesia.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import asyncio
import time

from typing import AsyncGenerator, Mapping
from typing import AsyncGenerator, Optional
from pydantic.main import BaseModel

from pipecat.frames.frames import (
CancelFrame,
Expand Down Expand Up @@ -61,6 +62,14 @@ def language_to_cartesia_language(language: Language) -> str | None:


class CartesiaTTSService(TTSService):
class InputParams(BaseModel):
model_id: Optional[str] = "sonic-english"
encoding: Optional[str] = "pcm_s16le"
sample_rate: Optional[int] = 16000
container: Optional[str] = "raw"
language: Optional[str] = "en"
speed: Optional[str] = None
emotion: Optional[list[str]] = []

def __init__(
self,
Expand All @@ -69,10 +78,7 @@ def __init__(
voice_id: str,
cartesia_version: str = "2024-06-10",
url: str = "wss://api.cartesia.ai/tts/websocket",
model_id: str = "sonic-english",
encoding: str = "pcm_s16le",
sample_rate: int = 16000,
language: str = "en",
params: InputParams = InputParams(),
**kwargs):
super().__init__(**kwargs)

Expand All @@ -92,13 +98,15 @@ def __init__(
self._cartesia_version = cartesia_version
self._url = url
self._voice_id = voice_id
self._model_id = model_id
self._model_id = params.model_id
self._output_format = {
"container": "raw",
"encoding": encoding,
"sample_rate": sample_rate,
"container": params.container,
"encoding": params.encoding,
"sample_rate": params.sample_rate,
}
self._language = language
self._language = params.language
self._speed = params.speed
self._emotion = params.emotion

self._websocket = None
self._context_id = None
Expand Down Expand Up @@ -249,15 +257,24 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
await self.start_ttfb_metrics()
self._context_id = str(uuid.uuid4())

voice_config = {
"mode": "id",
"id": self._voice_id
}

if self._speed or self._emotion:
voice_config["__experimental_controls"] = {}
if self._speed:
voice_config["__experimental_controls"]["speed"] = self._speed
if self._emotion:
voice_config["__experimental_controls"]["emotion"] = self._emotion

msg = {
"transcript": text + " ",
"continue": True,
"context_id": self._context_id,
"model_id": self._model_id,
"voice": {
"mode": "id",
"id": self._voice_id
},
"voice": voice_config,
"output_format": self._output_format,
"language": self._language,
"add_timestamps": True,
Expand Down

0 comments on commit fa0deed

Please sign in to comment.