Skip to content

Commit

Permalink
PlayHTHttpTTSService fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
markbackman committed Jan 7, 2025
1 parent 386ba61 commit 8258a10
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 4 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,23 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased] - TBD

### Added

- Added a new foundational example `07e-interruptible-playht-http.py` for easy
testing of `PlayHTHttpTTSService`.

### Changed

- Changed the default model for `PlayHTHttpTTSService` to `Play3.0-mini-http`.

### Fixed

- Fixed an import issue for `PlayHTHttpTTSService`.

- Fixed an issue where languages couldn't be used with the `PlayHTHttpTTSService`.

## [0.0.52] - 2024-12-24

### Added
Expand Down
101 changes: 101 additions & 0 deletions examples/foundational/07e-interruptible-playht-http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import asyncio
import os
import sys

import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure

from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.openai import OpenAILLMService
from pipecat.services.playht import PlayHTHttpTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.services.daily import DailyParams, DailyTransport

load_dotenv(override=True)

logger.remove(0)
logger.add(sys.stderr, level="DEBUG")


async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)

transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)

tts = PlayHTHttpTTSService(
user_id=os.getenv("PLAYHT_USER_ID"),
api_key=os.getenv("PLAYHT_API_KEY"),
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
)

llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")

messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]

context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)

pipeline = Pipeline(
[
transport.input(), # Transport user input
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)

task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)

@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])

runner = PipelineRunner()

await runner.run(task)


if __name__ == "__main__":
asyncio.run(main())
17 changes: 13 additions & 4 deletions src/pipecat/services/playht.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@

try:
from pyht.async_client import AsyncClient
from pyht.client import TTSOptions
from pyht.protos.api_pb2 import Format
from pyht.client import Format, Language as PlayHTLanguage, TTSOptions
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error(
Expand Down Expand Up @@ -363,7 +362,7 @@ def __init__(
api_key: str,
user_id: str,
voice_url: str,
voice_engine: str = "Play3.0-mini",
voice_engine: str = "Play3.0-mini-http", # Options: Play3.0-mini-ws, Play3.0-mini-http, Play3.0-mini-grpc
sample_rate: int = 24000,
params: InputParams = InputParams(),
**kwargs,
Expand All @@ -389,9 +388,19 @@ def __init__(
}
self.set_model_name(voice_engine)
self.set_voice(voice_url)

language_str = self._settings["language"]
playht_language = None
if language_str:
# Convert string to PlayHT Language enum
for lang in PlayHTLanguage:
if lang.value == language_str:
playht_language = lang
break

self._options = TTSOptions(
voice=self._voice_id,
language=self._settings["language"],
language=playht_language,
sample_rate=self._settings["sample_rate"],
format=self._settings["format"],
speed=self._settings["speed"],
Expand Down

0 comments on commit 8258a10

Please sign in to comment.