diff --git a/examples/foundational/26b-gemini-multimodal-live-function-calling.py b/examples/foundational/26b-gemini-multimodal-live-function-calling.py new file mode 100644 index 000000000..482cc7caf --- /dev/null +++ b/examples/foundational/26b-gemini-multimodal-live-function-calling.py @@ -0,0 +1,142 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os +import sys +from datetime import datetime + +import aiohttp +from dotenv import load_dotenv +from loguru import logger +from runner import configure + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService +from pipecat.transports.services.daily import DailyParams, DailyTransport + +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback): + temperature = 75 if args["format"] == "fahrenheit" else 24 + await result_callback( + { + "conditions": "nice", + "temperature": temperature, + "format": args["format"], + "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), + } + ) + + +tools = [ + { + "function_declarations": [ + { + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use. Infer this from the users location.", + }, + }, + "required": ["location", "format"], + }, + }, + ] + } +] + +system_instruction = """ +You are a helpful assistant who can answer questions and use tools. + +You have a tool called "get_current_weather" that can be used to get the current weather. If the user asks +for the weather, call this function. +""" + + +async def main(): + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_in_sample_rate=16000, + audio_out_sample_rate=24000, + audio_out_enabled=True, + vad_enabled=True, + vad_audio_passthrough=True, + # set stop_secs to something roughly similar to the internal setting + # of the Multimodal Live api, just to align events. This doesn't really + # matter because we can only use the Multimodal Live API's phrase + # endpointing, for now. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + ) + + llm = GeminiMultimodalLiveLLMService( + api_key=os.getenv("GOOGLE_API_KEY"), + system_instruction=system_instruction, + tools=tools, + ) + + llm.register_function("get_current_weather", fetch_weather_from_api) + + context = OpenAILLMContext( + [{"role": "user", "content": "Say hello."}], + ) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), + context_aggregator.user(), + llm, + context_aggregator.assistant(), + transport.output(), + ] + ) + + task = PipelineTask( + pipeline, + PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + ), + ) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + await task.queue_frames([context_aggregator.user().get_context_frame()]) + + runner = PipelineRunner() + + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/foundational/26b-gemini-multimodal-live-video.py b/examples/foundational/26c-gemini-multimodal-live-video.py similarity index 100% rename from examples/foundational/26b-gemini-multimodal-live-video.py rename to examples/foundational/26c-gemini-multimodal-live-video.py