Skip to content

Commit

Permalink
Live translation (#61)
Browse files Browse the repository at this point in the history
* added translator

* fixup
  • Loading branch information
chadbailey59 authored Mar 18, 2024
1 parent 141a5bb commit 78638d2
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 3 deletions.
5 changes: 3 additions & 2 deletions src/dailyai/services/azure_ai_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,21 @@


class AzureTTSService(TTSService):
def __init__(self, *, api_key, region):
def __init__(self, *, api_key, region, voice="en-US-SaraNeural"):
super().__init__()

self.speech_config = SpeechConfig(subscription=api_key, region=region)
self.speech_synthesizer = SpeechSynthesizer(
speech_config=self.speech_config, audio_config=None
)
self._voice = voice

async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
self.logger.info("Running azure tts")
ssml = (
"<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' "
"xmlns:mstts='http://www.w3.org/2001/mstts'>"
"<voice name='en-US-SaraNeural'>"
f"<voice name='{self._voice}'>"
"<mstts:silence type='Sentenceboundary' value='20ms' />"
"<mstts:express-as style='lyrical' styledegree='2' role='SeniorFemale'>"
"<prosody rate='1.05'>"
Expand Down
4 changes: 3 additions & 1 deletion src/dailyai/services/elevenlabs_ai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,18 @@ def __init__(
aiohttp_session: aiohttp.ClientSession,
api_key,
voice_id,
model="eleven_turbo_v2",
):
super().__init__()

self._api_key = api_key
self._voice_id = voice_id
self._aiohttp_session = aiohttp_session
self._model = model

async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream"
payload = {"text": sentence, "model_id": "eleven_turbo_v2"}
payload = {"text": sentence, "model_id": self._model}
querystring = {"output_format": "pcm_16000", "optimize_streaming_latency": 2}
headers = {
"xi-api-key": self._api_key,
Expand Down
84 changes: 84 additions & 0 deletions src/examples/starter-apps/translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import asyncio
import aiohttp
import logging
import os
from PIL import Image
from typing import AsyncGenerator

from dailyai.pipeline.aggregators import (
LLMResponseAggregator,
UserResponseAggregator,
SentenceAggregator,
)
from dailyai.pipeline.frames import Frame, LLMMessagesQueueFrame, TextFrame
from dailyai.pipeline.frame_processor import FrameProcessor
from dailyai.services.ai_services import AIService, FrameLogger
from dailyai.pipeline.pipeline import Pipeline
from dailyai.services.daily_transport_service import DailyTransportService
from dailyai.services.azure_ai_services import AzureTTSService
from dailyai.services.open_ai_services import OpenAILLMService
from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService
from examples.support.runner import configure

logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
logger = logging.getLogger("dailyai")
logger.setLevel(logging.DEBUG)

"""
This example looks a bit different than the chatbot example, because it isn't waiting on the user to stop talking to start translating.
It also isn't saving what the user or bot says into the context object for use in subsequent interactions.
"""


# We need to use a custom service here to yield LLM frames without saving any context
class TranslationProcessor(FrameProcessor):
def __init__(self, language):
self._language = language

async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
if isinstance(frame, TextFrame):
context = [
{
"role": "system",
"content": f"You will be provided with a sentence in English, and your task is to translate it into {self._language}.",
},
{"role": "user", "content": frame.text},
]
yield LLMMessagesQueueFrame(context)
else:
yield frame


async def main(room_url: str, token):
async with aiohttp.ClientSession() as session:
transport = DailyTransportService(
room_url,
token,
"Translator",
duration_minutes=5,
start_transcription=True,
mic_enabled=True,
mic_sample_rate=16000,
camera_enabled=False,
vad_enabled=True,
)
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
voice="es-ES-AlvaroNeural",
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
sa = SentenceAggregator()
tp = TranslationProcessor("Spanish")
pipeline = Pipeline([sa, tp, llm, tts])

transport.transcription_settings["extra"]["endpointing"] = True
transport.transcription_settings["extra"]["punctuate"] = True
await transport.run(pipeline)


if __name__ == "__main__":
(url, token) = configure()
asyncio.run(main(url, token))

0 comments on commit 78638d2

Please sign in to comment.