Skip to content

Commit

Permalink
add playback_rate in AzureTTSService
Browse files Browse the repository at this point in the history
  • Loading branch information
duyalei committed Sep 24, 2024
1 parent 867ce87 commit 5a83f8c
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions src/pipecat/services/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@
CancellationReason,
languageconfig,
)
from azure.cognitiveservices.speech.audio import AudioStreamFormat, PushAudioInputStream
from azure.cognitiveservices.speech.audio import (
AudioStreamFormat,
PushAudioInputStream,
)
from azure.cognitiveservices.speech.dialog import AudioConfig
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
Expand All @@ -54,7 +57,12 @@

class AzureLLMService(BaseOpenAILLMService):
def __init__(
self, *, api_key: str, endpoint: str, model: str, api_version: str = "2023-12-01-preview"
self,
*,
api_key: str,
endpoint: str,
model: str,
api_version: str = "2023-12-01-preview",
):
# Initialize variables before calling parent __init__() because that
# will call create_client() and we need those values there.
Expand All @@ -78,6 +86,7 @@ def __init__(
region: str,
voice="en-US-SaraNeural",
sample_rate: int = 16000,
playback_rate: float = 1.0,
**kwargs,
):
super().__init__(sample_rate=sample_rate, **kwargs)
Expand All @@ -87,6 +96,7 @@ def __init__(

self._voice = voice
self._sample_rate = sample_rate
self._playback_rate = playback_rate

def can_generate_metrics(self) -> bool:
return True
Expand All @@ -106,7 +116,7 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
f"<voice name='{self._voice}'>"
"<mstts:silence type='Sentenceboundary' value='20ms' />"
"<mstts:express-as style='lyrical' styledegree='2' role='SeniorFemale'>"
"<prosody rate='1.05'>"
f"<prosody rate='{self._playback_rate}'>"
f"{text}"
"</prosody></mstts:express-as></voice></speak> "
)
Expand All @@ -119,7 +129,9 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
await self.push_frame(TTSStartedFrame())
# Azure always sends a 44-byte header. Strip it off.
yield TTSAudioRawFrame(
audio=result.audio_data[44:], sample_rate=self._sample_rate, num_channels=1
audio=result.audio_data[44:],
sample_rate=self._sample_rate,
num_channels=1,
)
await self.push_frame(TTSStoppedFrame())
elif result.reason == ResultReason.Canceled:
Expand Down Expand Up @@ -154,12 +166,15 @@ def __init__(
speech_config=speech_config,
audio_config=audio_config,
auto_detect_source_language_config=languageconfig.AutoDetectSourceLanguageConfig(
languages=language)
languages=language
),
)
else:
self._speech_recognizer = SpeechRecognizer(speech_config=speech_config,
audio_config=audio_config,
language=language)
self._speech_recognizer = SpeechRecognizer(
speech_config=speech_config,
audio_config=audio_config,
language=language,
)
self._speech_recognizer.recognized.connect(self._on_handle_recognized)

async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
Expand Down Expand Up @@ -253,6 +268,9 @@ async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
image_stream = io.BytesIO(await response.content.read())
image = Image.open(image_stream)
frame = URLImageRawFrame(
url=image_url, image=image.tobytes(), size=image.size, format=image.format
url=image_url,
image=image.tobytes(),
size=image.size,
format=image.format,
)
yield frame

0 comments on commit 5a83f8c

Please sign in to comment.