diff --git a/examples/translation-chatbot/bot.py b/examples/translation-chatbot/bot.py index e654c0159..94cc6e2ec 100644 --- a/examples/translation-chatbot/bot.py +++ b/examples/translation-chatbot/bot.py @@ -16,7 +16,7 @@ from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator from pipecat.processors.aggregators.sentence import SentenceAggregator from pipecat.processors.frame_processor import FrameDirection, FrameProcessor -from pipecat.services.azure import AzureTTSService +from pipecat.services.azure import AzureTTSService, AzureSTTService, language_to_azure_language from pipecat.services.openai import OpenAILLMService from pipecat.transports.services.daily import ( DailyParams, @@ -46,9 +46,10 @@ # We need to use a custom service here to yield LLM frames without saving # any context class TranslationProcessor(FrameProcessor): - def __init__(self, language): + def __init__(self, source_language, language): super().__init__() self._language = language + self._source_language = source_language async def process_frame(self, frame: Frame, direction: FrameDirection): await super().process_frame(frame, direction) @@ -57,7 +58,8 @@ async def process_frame(self, frame: Frame, direction: FrameDirection): context = [ { "role": "system", - "content": f"You will be provided with a sentence in English, and your task is to translate it into {self._language}.", + "content": f"You will be provided with a sentence in {self._source_language}, and your task is to only ßtranslate it into {self._language}.", + #"content": f"Translate the sentence from {self._source_language} into {self._language}.", }, {"role": "user", "content": frame.text}, ] @@ -97,25 +99,39 @@ async def main(): "Translator", DailyParams( audio_out_enabled=True, - transcription_enabled=True, - transcription_settings=DailyTranscriptionSettings(extra={"interim_results": False}), + # transcription_enabled=True, + # transcription_settings=DailyTranscriptionSettings(extra={"interim_results": False}), ), ) + stt = AzureSTTService( + api_key=os.getenv("AZURE_SPEECH_API_KEY"), + region=os.getenv("AZURE_SPEECH_REGION"), + #language="ko-KR" #azure language code + language="nl-NL" #azure language code + #language="en-US" #azure language code + + ) + #print("Debug: STT=", stt) + tts = AzureTTSService( api_key=os.getenv("AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION"), - voice="es-ES-AlvaroNeural", + #voice="es-ES-AlvaroNeural", + voice="en-US-AndrewMultilingualNeural" + #voice="nl-NL-MaartenNeural" ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o") sa = SentenceAggregator() - tp = TranslationProcessor("Spanish") + #tp = TranslationProcessor("Spanish") + tp = TranslationProcessor(source_language="Dutch", language="English") # LLM Prompt lfra = LLMFullResponseAggregator() - ts = TranslationSubtitles("spanish") + ts = TranslationSubtitles("dutch") - pipeline = Pipeline([transport.input(), sa, tp, llm, lfra, ts, tts, transport.output()]) + # pipeline = Pipeline([transport.input(), sa, tp, llm, lfra, ts, tts, transport.output()]) + pipeline = Pipeline([transport.input(), stt, tp, llm, ts, tts, transport.output()]) task = PipelineTask(pipeline)