Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/vid 592 live translation #806

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions examples/translation-chatbot/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.azure import AzureTTSService
from pipecat.services.azure import AzureTTSService, AzureSTTService, language_to_azure_language
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import (
DailyParams,
Expand Down Expand Up @@ -46,9 +46,10 @@
# We need to use a custom service here to yield LLM frames without saving
# any context
class TranslationProcessor(FrameProcessor):
def __init__(self, language):
def __init__(self, source_language, language):
super().__init__()
self._language = language
self._source_language = source_language

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
Expand All @@ -57,7 +58,8 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
context = [
{
"role": "system",
"content": f"You will be provided with a sentence in English, and your task is to translate it into {self._language}.",
"content": f"You will be provided with a sentence in {self._source_language}, and your task is to only ßtranslate it into {self._language}.",
#"content": f"Translate the sentence from {self._source_language} into {self._language}.",
},
{"role": "user", "content": frame.text},
]
Expand Down Expand Up @@ -97,25 +99,39 @@ async def main():
"Translator",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
transcription_settings=DailyTranscriptionSettings(extra={"interim_results": False}),
# transcription_enabled=True,
# transcription_settings=DailyTranscriptionSettings(extra={"interim_results": False}),
),
)

stt = AzureSTTService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
#language="ko-KR" #azure language code
language="nl-NL" #azure language code
#language="en-US" #azure language code

)
#print("Debug: STT=", stt)

tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
voice="es-ES-AlvaroNeural",
#voice="es-ES-AlvaroNeural",
voice="en-US-AndrewMultilingualNeural"
#voice="nl-NL-MaartenNeural"
)

llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")

sa = SentenceAggregator()
tp = TranslationProcessor("Spanish")
#tp = TranslationProcessor("Spanish")
tp = TranslationProcessor(source_language="Dutch", language="English") # LLM Prompt
lfra = LLMFullResponseAggregator()
ts = TranslationSubtitles("spanish")
ts = TranslationSubtitles("dutch")

pipeline = Pipeline([transport.input(), sa, tp, llm, lfra, ts, tts, transport.output()])
# pipeline = Pipeline([transport.input(), sa, tp, llm, lfra, ts, tts, transport.output()])
pipeline = Pipeline([transport.input(), stt, tp, llm, ts, tts, transport.output()])

task = PipelineTask(pipeline)

Expand Down