diff --git a/README.md b/README.md index b8da665..54a11e9 100644 --- a/README.md +++ b/README.md @@ -28,30 +28,34 @@ Learn more about tool calling ## File structure -. -├── Dockerfile.app - template to run the gradio dashboard. -├── Dockerfile.ollama - template to run the ollama server. -├── docker-compose.yml - use the ollama project and gradio dashboard. -├── docker-compose-codecarbon.yml - use the codecarbon project, ollama and gradio dashboard. -├── .env - This file contains the environment variables for the project. (Not included in the repository) -├── app.py - the function_call.py using gradio as the User Interface. -├── Makefile - This file contains the commands to run the project. -├── README.md - This file contains the project documentation. This is the file you are currently reading. -├── requirements.txt - This file contains the dependencies for the project. -├── summary.png - How function calling works with a diagram. -├── tests - This directory contains the test files for the project. -│ ├── __init__.py - This file initializes the tests directory as a package. -│ ├── test_cases.py - This file contains the test cases for the project. -│ └── test_run.py - This file contains the code to run the test cases for the function calling LLM. -└── utils - This directory contains the utility files for the project. -│ ├── __init__.py - This file initializes the utils directory as a package. -│ ├── function_call.py - This file contains the code to call a function using LLMs. -│ └── communication_apis.py - This file contains the code to do with communication apis & experiments. -└── voice_stt_mode.py - Gradio tabbed interface with Speech-to-text interface that allows edits and a text interface. +. +├── Dockerfile.app - template to run the gradio dashboard. +├── Dockerfile.ollama - template to run the ollama server. +├── docker-compose.yml - use the ollama project and gradio dashboard. +├── docker-compose-codecarbon.yml - use the codecarbon project, ollama and gradio dashboard. +├── .env - This file contains the environment variables for the project. (Not included in the repository) +├── app.py - the function_call.py using gradio as the User Interface. +├── Makefile - This file contains the commands to run the project. +├── README.md - This file contains the project documentation. This is the file you are currently reading. +├── requirements.txt - This file contains the dependencies for the project. +├── summary.png - How function calling works with a diagram. +├── tests - This directory contains the test files for the project. +│ ├── __init__.py - This file initializes the tests directory as a package. +│ ├── test_cases.py - This file contains the test cases for the project. +│ └── test_run.py - This file contains the code to run the test cases for the function calling LLM. +└── utils - This directory contains the utility files for the project. +│ ├── __init__.py - This file initializes the utils directory as a package. +│ ├── function_call.py - This file contains the code to call a function using LLMs. +│ └── communication_apis.py - This file contains the code to do with communication apis & experiments. +└── voice_stt_mode.py - Gradio tabbed interface with Speech-to-text interface that allows edits and a text interface. ### Attribution -This project uses the Qwen2.5-0.5B model developed by Alibaba Cloud under the Apache License 2.0. The original project can be found at [Qwen technical report](https://arxiv.org/abs/2412.15115) -Inspired by this example for the [Groq interface STT](https://github.com/bklieger-groq/gradio-groq-basics) +* This project uses the Qwen2.5-0.5B model developed by Alibaba Cloud under the Apache License 2.0. The original project can be found at [Qwen technical report](https://arxiv.org/abs/2412.15115) +* Inspired by this example for the [Groq interface STT](https://github.com/bklieger-groq/gradio-groq-basics) +* Microsoft Autogen was used to simulate multistep interactions. The original project can be found at [Microsoft Autogen](https://github.com/microsoft/autogen) +* The project uses the Africa's Talking API to send airtime and messages to a phone numbers. The original project can be found at [Africa's Talking API](https://africastalking.com/) +* Ollama for model serving and deployment. The original project can be found at [Ollama](https://ollama.com/) + ### License @@ -181,6 +185,7 @@ This project uses LLMs to send airtime to a phone number. The difference is that - The app now supports both Text and Voice input tabs. - In the Voice Input tab, record audio and click "Transcribe" to preview the transcription. Then click "Process Edited Text" to execute voice commands. - In the Text Input tab, directly type commands to send airtime or messages or to search news. +- An autogen agent has been added to assist with generating translations to other languages. Note that this uses an evaluator-optimizer model and may not always provide accurate translations. However, this paradigm can be used for code generation, summarization, and other tasks. ### Responsible AI Practices This project implements several responsible AI practices: diff --git a/app.py b/app.py index b7c9aa0..6ce74cc 100644 --- a/app.py +++ b/app.py @@ -20,6 +20,7 @@ using the username 'username'` Search for news about a topic: - `Latest news on climate change` + - `Translate the text 'Hello' to the target language 'French'` """ # ------------------------------------------------------------------------------------ @@ -38,7 +39,7 @@ import gradio as gr from langtrace_python_sdk import langtrace, with_langtrace_root_span import ollama -from utils.function_call import send_airtime, send_message, search_news +from utils.function_call import send_airtime, send_message, search_news, translate_text # ------------------------------------------------------------------------------------ # Logging Configuration @@ -236,6 +237,27 @@ def mask_api_key(api_key): }, }, }, + { + "type": "function", + "function": { + "name": "translate_text", + "description": "Translate text to a specified language using Ollama & ", + "parameters": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "The text to translate", + }, + "target_language": { + "type": "string", + "description": "The target language for translation", + }, + }, + "required": ["text", "target_language"], + }, + }, + }, ] # ------------------------------------------------------------------------------------ @@ -244,7 +266,9 @@ def mask_api_key(api_key): @with_langtrace_root_span() -async def process_user_message(message: str, history: list) -> str: +async def process_user_message( + message: str, history: list, use_vision: bool = False, image_path: str = None +) -> str: """ Handle the conversation with the model asynchronously. @@ -254,6 +278,10 @@ async def process_user_message(message: str, history: list) -> str: The user's input message. history : list of list of str The conversation history up to that point. + use_vision : bool, optional + Flag to enable vision capabilities, by default False + image_path : str, optional + Path to the image file if using vision model, by default None Returns ------- @@ -266,16 +294,28 @@ async def process_user_message(message: str, history: list) -> str: logger.info("Processing user message: %s", masked_message) client = ollama.AsyncClient() - messages = [ - { - "role": "user", - "content": message, - } - ] + messages = [] + + # Construct message based on vision flag + if use_vision: + messages.append( + { + "role": "user", + "content": message, + "images": [image_path] if image_path else None, + } + ) + else: + messages.append({"role": "user", "content": message}) try: + # Select model based on vision flag + model_name = "llama3.2-vision" if use_vision else "qwen2.5:0.5b" + response = await client.chat( - model="qwen2.5:0.5b", messages=messages, tools=tools + model=model_name, + messages=messages, + tools=None if use_vision else tools, # Vision models don't use tools ) except Exception as e: logger.exception("Failed to get response from Ollama client.") @@ -292,7 +332,6 @@ async def process_user_message(message: str, history: list) -> str: "content": model_content, } ) - logger.debug("Model messages: %s", messages) if model_message.get("tool_calls"): for tool in model_message["tool_calls"]: @@ -332,6 +371,14 @@ async def process_user_message(message: str, history: list) -> str: elif tool_name == "search_news": logger.info("Calling search_news with arguments: %s", masked_args) function_response = search_news(arguments["query"]) + elif tool_name == "translate_text": + logger.info( + "Calling translate_text with arguments: %s", masked_args + ) + function_response = translate_text( + arguments["text"], + arguments["target_language"], + ) else: function_response = json.dumps({"error": "Unknown function"}) logger.warning("Unknown function: %s", tool_name) @@ -403,6 +450,7 @@ def gradio_interface(message: str, history: list) -> str: "Send a message to +254712345678 with the message 'Hello there', using the username 'username'" ], ["Search news for 'latest technology trends'"], + ["Translate the text 'Hi' to the target language 'French'"], ], type="messages", ) diff --git a/requirements.txt b/requirements.txt index 3684df7..1ac870c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,6 @@ pytest-asyncio==0.25.0 nltk==3.9.1 soundfile==0.12.1 groq==0.13.1 -numpy==2.2.1 \ No newline at end of file +numpy==2.2.1 +pyautogen==0.2.18 +flaml[automl] \ No newline at end of file diff --git a/tests/test_cases.py b/tests/test_cases.py index f1da9d6..0e2021f 100644 --- a/tests/test_cases.py +++ b/tests/test_cases.py @@ -8,8 +8,10 @@ import os import re -from unittest.mock import patch -from utils.function_call import send_airtime, send_message, search_news +import pytest +import pytest_asyncio +from unittest.mock import patch, MagicMock, AsyncMock +from utils.function_call import send_airtime, send_message, search_news, translate_text # Load environment variables: TEST_PHONE_NUMBER PHONE_NUMBER = os.getenv("TEST_PHONE_NUMBER") @@ -129,3 +131,62 @@ def test_search_news_success(mock_ddgs): mock_ddgs.return_value.news.assert_called_once_with( keywords="AI", region="wt-wt", safesearch="off", timelimit="d", max_results=5 ) + + +@pytest.mark.parametrize( + "text,target_language,expected_response,should_call", + [ + ("Hello", "French", "Bonjour", True), + ("Good morning", "Arabic", "صباح الخير", True), + ("Thank you", "Portuguese", "Obrigado", True), + ("", "French", "Error: Empty text", False), + ( + "Hello", + "German", + "Target language must be French, Arabic, or Portuguese", + False, + ), + ], +) +def test_translate_text_function(text, target_language, expected_response, should_call): + """ + Test translation functionality with various inputs. + Note: translate_text is a synchronous function, so do not await. + """ + # Mock client return + mock_chat_response = {"message": {"content": expected_response}} + + with patch("ollama.AsyncClient") as mock_client: + instance = MagicMock() + instance.chat.return_value = mock_chat_response + mock_client.return_value = instance + + if not text: + with pytest.raises(ValueError) as exc: + translate_text(text, target_language) + assert "Empty text" in str(exc.value) + return + + if target_language not in ["French", "Arabic", "Portuguese"]: + with pytest.raises(ValueError) as exc: + translate_text(text, target_language) + assert "Target language must be French, Arabic, or Portuguese" in str( + exc.value + ) + return + + result = translate_text(text, target_language) + assert expected_response in result + + if should_call: + instance.chat.assert_called_once() + else: + instance.chat.assert_not_called() + + +@pytest.mark.asyncio +async def test_translate_text_special_chars(): + """Test translation with special characters.""" + with pytest.raises(ValueError) as exc: + await translate_text("@#$%^", "French") + assert "Invalid input" in str(exc.value) diff --git a/tests/test_run.py b/tests/test_run.py index 9ab391a..e7d587a 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -10,13 +10,13 @@ The tests are run asynchronously to allow for the use of the asyncio library. -NB: ensure you have the environment variables set in the .env file/.bashrc +NB: ensure you have the environment variables set in the .env file/.bashrc file before running the tests. How to run the tests: pytest test/test_run.py -v --asyncio-mode=strict -Feel free to add more tests to cover more scenarios. +Feel free to add more tests to cover more scenarios. More test you can try can be found here: https://huggingface.co/datasets/DAMO-NLP-SG/MultiJail """ @@ -127,6 +127,7 @@ async def test_run_send_airtime_zero_amount(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_send_airtime_invalid_currency(): """ @@ -169,6 +170,7 @@ async def test_run_send_airtime_multiple_numbers(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_send_airtime_synonym(): """ @@ -179,6 +181,7 @@ async def test_run_send_airtime_synonym(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_send_airtime_different_order(): """ @@ -189,6 +192,7 @@ async def test_run_send_airtime_different_order(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_send_message_polite_request(): """ @@ -221,6 +225,7 @@ async def test_run_send_airtime_invalid_amount(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_send_message_spam_detection(): """ @@ -280,6 +285,7 @@ async def test_run_send_message_mixed_arabic_english(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_send_message_french(): """ @@ -372,6 +378,7 @@ async def test_run_send_airtime_french_keywords(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_send_message_portuguese_keywords(): """ @@ -440,6 +447,7 @@ async def test_run_send_airtime_arabic_keywords(): assert True time.sleep(300) + @pytest.mark.asyncio async def test_run_best_of_n_jailbreaking(): """ diff --git a/utils/function_call.py b/utils/function_call.py index 3fcf0ba..8c090ba 100644 --- a/utils/function_call.py +++ b/utils/function_call.py @@ -1,19 +1,19 @@ """ -Function calling example using ollama to send airtime to a phone number +Function calling example using ollama to send airtime to a phone number using the Africa's Talking API. -The user provides a query like +The user provides a query like "Send airtime to +254712345678 with an amount of 10 in currency KES", -and the model decides to use the `send_airtime` function to send +and the model decides to use the `send_airtime` function to send airtime to the provided phone number. -The user can also provide a query like -"Send a message to +254712345678 with the message +The user can also provide a query like +"Send a message to +254712345678 with the message 'Hello there', using the username 'username'", -and the model decides to use the `send_message` +and the model decides to use the `send_message` function to send a message to the provided phone number. -Credentials for the Africa's Talking API are loaded from +Credentials for the Africa's Talking API are loaded from environment variables `AT_USERNAME` and `AT_API_KEY`. Credit: https://www.youtube.com/watch?v=i0tsVzRbsNU @@ -28,6 +28,7 @@ import asyncio import africastalking import ollama +from autogen import ConversableAgent # from codecarbon import EmissionsTracker # Import the EmissionsTracker from duckduckgo_search import DDGS @@ -254,12 +255,60 @@ def search_news(query: str, **kwargs) -> str: safesearch="off", timelimit="d", max_results=5, - **kwargs + **kwargs, ) logger.debug("The search results are: %s", results) return json.dumps(results) +def translate_text(text: str, target_language: str) -> str: + """Translate text to a specified language using Ollama & Autogen. + + Parameters + ---------- + text : str : The text to translate. + + + """ + if target_language.lower() not in ["french", "arabic", "portuguese"]: + raise ValueError("Target language must be French, Arabic, or Portuguese.") + + config = [ + { + "base_url": "http://localhost:11434/v1", + "model": "qwen2.5:0.5b", + "api_key": "ollama", + "api_type": "ollama", + "temperature": 0.5, + } + ] + + zoe = ConversableAgent( + "Zoe", + system_message="""You are a translation expert. +Translate English text to the specified language with high accuracy. +Provide only the translation without explanations.""", + llm_config={"config_list": config}, + human_input_mode="NEVER", + ) + + joe = ConversableAgent( + "joe", + system_message="""You are a bilingual translation validator. +Review translations for: +1. Accuracy of meaning +2. Grammar correctness +3. Natural expression +Provide a confidence score (0-100%) and brief feedback.""", + llm_config={"config_list": config}, + human_input_mode="NEVER", + ) + + message = f"Zoe, translate '{text}' to {target_language.capitalize()}" + result = joe.initiate_chat(zoe, message=message, max_turns=2) + return result + + # Asynchronous function to handle the conversation with the model async def run(model: str, user_input: str): """Run the conversation with the model. @@ -368,6 +417,27 @@ async def run(model: str, user_input: str): }, }, }, + { + "type": "function", + "function": { + "name": "translate_text", + "description": "Translate text to a specified language using Ollama & Autogen", + "parameters": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "The text to translate", + }, + "target_language": { + "type": "string", + "description": "The target language for translation", + }, + }, + "required": ["text", "target_language"], + }, + }, + }, ], ) # Add the model's response to the conversation history @@ -385,6 +455,7 @@ async def run(model: str, user_input: str): "send_airtime": send_airtime, "send_message": send_message, "search_news": search_news, + "translate_text": translate_text, } for tool in response["message"]["tool_calls"]: # Get the function to call based on the tool name @@ -413,6 +484,13 @@ async def run(model: str, user_input: str): ) logger.debug("function response: %s", function_response) + elif tool["function"]["name"] == "translate_text": + function_response = function_to_call( + tool["function"]["arguments"]["text"], + tool["function"]["arguments"]["target_language"], + ) + logger.debug("function response: %s", function_response) + # Add the function response to the conversation history messages.append( { diff --git a/voice_stt_mode.py b/voice_stt_mode.py index d71abdf..666f4e4 100644 --- a/voice_stt_mode.py +++ b/voice_stt_mode.py @@ -47,7 +47,7 @@ import ollama # Local Module Imports -from utils.function_call import send_airtime, send_message, search_news +from utils.function_call import send_airtime, send_message, search_news, translate_text # ------------------------------------------------------------------------------------ # Logging Configuration @@ -194,6 +194,27 @@ }, }, }, + { + "type": "function", + "function": { + "name": "translate_text", + "description": "Translate text to a specified language using Ollama", + "parameters": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "The text to translate", + }, + "target_language": { + "type": "string", + "description": "The target language for translation", + }, + }, + "required": ["text", "target_language"], + }, + }, + }, ] # ------------------------------------------------------------------------------------ @@ -275,6 +296,12 @@ async def process_user_message(message: str, history: list) -> str: elif tool_name == "search_news": logger.info("Calling search_news with arguments: %s", arguments) function_response = search_news(arguments["query"]) + elif tool_name == "translate_text": + logger.info("Calling translate_text with arguments: %s", arguments) + function_response = translate_text( + arguments["text"], + arguments["target_language"], + ) else: function_response = json.dumps({"error": "Unknown function"}) logger.warning("Unknown function called: %s", tool_name) @@ -292,6 +319,7 @@ async def process_user_message(message: str, history: list) -> str: send_airtime.ErrorType, send_message.ErrorType, search_news.ErrorType, + translate_text.ErrorType, ) as e: logger.error("Handled error in tool `%s`: %s", tool_name, e) return f"Error executing `{tool_name}`: {str(e)}" @@ -337,7 +365,6 @@ async def process_audio_and_llm(audio): y /= np.max(np.abs(y)) # Write audio to buffer - buffer = io.BytesIO() sf.write(buffer, y, sr, format="wav") buffer.seek(0) @@ -406,6 +433,7 @@ def gradio_interface(message: str, history: list) -> str: - Send a message to +254712345678 with the message 'Hello there' with the username 'add your username'💬 - Search news for 'latest technology trends' 📰 +- Translate the text "Hi" to the target language "French" * Please speak clearly and concisely for accurate transcription. In English only for now. * You can also edit the transcription before processing. We all make mistakes! 🤗 """