crewAIInc · joaomdmoura · Dec 27, 2024 · Dec 23, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/src/crewai/agent.py b/src/crewai/agent.py
@@ -17,6 +17,7 @@
 from crewai.task import Task
 from crewai.tools import BaseTool
 from crewai.tools.agent_tools.agent_tools import AgentTools
+from crewai.tools.base_tool import Tool
 from crewai.utilities import Converter, Prompts
 from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
 from crewai.utilities.converter import generate_model_description
@@ -114,6 +115,10 @@ class Agent(BaseAgent):
         default=2,
         description="Maximum number of retries for an agent to execute a task when an error occurs.",
     )
+    multimodal: bool = Field(
+        default=False,
+        description="Whether the agent is multimodal.",
+    )
     code_execution_mode: Literal["safe", "unsafe"] = Field(
         default="safe",
         description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).",
@@ -406,6 +411,10 @@ def get_delegation_tools(self, agents: List[BaseAgent]):
         tools = agent_tools.tools()
         return tools
 
+    def get_multimodal_tools(self) -> List[Tool]:
+        from crewai.tools.agent_tools.add_image_tool import AddImageTool
+        return [AddImageTool()]
+
     def get_code_execution_tools(self):
         try:
             from crewai_tools import CodeInterpreterTool

diff --git a/src/crewai/agents/crew_agent_executor.py b/src/crewai/agents/crew_agent_executor.py
@@ -143,10 +143,20 @@ def _invoke_loop(self, formatted_answer=None):
                         tool_result = self._execute_tool_and_check_finality(
                             formatted_answer
                         )
-                        if self.step_callback:
-                            self.step_callback(tool_result)
 
-                        formatted_answer.text += f"\nObservation: {tool_result.result}"
+                        # Directly append the result to the messages if the
+                        # tool is "Add image to content" in case of multimodal
+                        # agents
+                        if formatted_answer.tool == self._i18n.tools("add_image")["name"]:
+                            self.messages.append(tool_result.result)
+                            continue
+
+                        else:
+                            if self.step_callback:
+                                self.step_callback(tool_result)
+
+                            formatted_answer.text += f"\nObservation: {tool_result.result}"
+
                         formatted_answer.result = tool_result.result
                         if tool_result.result_as_answer:
                             return AgentFinish(

diff --git a/src/crewai/crew.py b/src/crewai/crew.py
@@ -35,6 +35,7 @@
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
 from crewai.tools.agent_tools.agent_tools import AgentTools
+from crewai.tools.base_tool import Tool
 from crewai.types.usage_metrics import UsageMetrics
 from crewai.utilities import I18N, FileHandler, Logger, RPMController
 from crewai.utilities.constants import TRAINING_DATA_FILE
@@ -533,9 +534,6 @@ def kickoff(
             if not agent.function_calling_llm:  # type: ignore # "BaseAgent" has no attribute "function_calling_llm"
                 agent.function_calling_llm = self.function_calling_llm  # type: ignore # "BaseAgent" has no attribute "function_calling_llm"
 
-            if agent.allow_code_execution:  # type: ignore # BaseAgent" has no attribute "allow_code_execution"
-                agent.tools += agent.get_code_execution_tools()  # type: ignore # "BaseAgent" has no attribute "get_code_execution_tools"; maybe "get_delegation_tools"?
-
             if not agent.step_callback:  # type: ignore # "BaseAgent" has no attribute "step_callback"
                 agent.step_callback = self.step_callback  # type: ignore # "BaseAgent" has no attribute "step_callback"
 
@@ -672,7 +670,6 @@ def _create_manager_agent(self):
                 )
                 manager.tools = []
                 raise Exception("Manager agent should not have tools")
-            manager.tools = self.manager_agent.get_delegation_tools(self.agents)
         else:
             self.manager_llm = (
                 getattr(self.manager_llm, "model_name", None)
@@ -684,6 +681,7 @@ def _create_manager_agent(self):
                 goal=i18n.retrieve("hierarchical_manager_agent", "goal"),
                 backstory=i18n.retrieve("hierarchical_manager_agent", "backstory"),
                 tools=AgentTools(agents=self.agents).tools(),
+                allow_delegation=True,
                 llm=self.manager_llm,
                 verbose=self.verbose,
             )
@@ -726,7 +724,14 @@ def _execute_tasks(
                     f"No agent available for task: {task.description}. Ensure that either the task has an assigned agent or a manager agent is provided."
                 )
 
-            self._prepare_agent_tools(task)
+            # Determine which tools to use - task tools take precedence over agent tools
+            tools_for_task = task.tools or agent_to_use.tools or []
+            tools_for_task = self._prepare_tools(
+                agent_to_use,
+                task,
+                tools_for_task
+            )
+
             self._log_task_start(task, agent_to_use.role)
 
             if isinstance(task, ConditionalTask):
@@ -743,7 +748,7 @@ def _execute_tasks(
                 future = task.execute_async(
                     agent=agent_to_use,
                     context=context,
-                    tools=agent_to_use.tools,
+                    tools=tools_for_task,
                 )
                 futures.append((task, future, task_index))
             else:
@@ -755,7 +760,7 @@ def _execute_tasks(
                 task_output = task.execute_sync(
                     agent=agent_to_use,
                     context=context,
-                    tools=agent_to_use.tools,
+                    tools=tools_for_task,
                 )
                 task_outputs = [task_output]
                 self._process_task_result(task, task_output)
@@ -792,60 +797,81 @@ def _handle_conditional_task(
             return skipped_task_output
         return None
 
-    def _prepare_agent_tools(self, task: Task):
-        if self.process == Process.hierarchical:
-            if self.manager_agent:
-                self._update_manager_tools(task)
-            else:
-                raise ValueError("Manager agent is required for hierarchical process.")
-        elif task.agent and task.agent.allow_delegation:
-            self._add_delegation_tools(task)
+    def _prepare_tools(self, agent: BaseAgent, task: Task, tools: List[Tool]) -> List[Tool]:
+        # Add delegation tools if agent allows delegation
+        if agent.allow_delegation:
+            if self.process == Process.hierarchical:
+                if self.manager_agent:
+                    tools = self._update_manager_tools(task, tools)
+                else:
+                    raise ValueError("Manager agent is required for hierarchical process.")
+
+            elif agent and agent.allow_delegation:
+                tools = self._add_delegation_tools(task, tools)
+
+        # Add code execution tools if agent allows code execution
+        if agent.allow_code_execution:
+            tools = self._add_code_execution_tools(agent, tools)
+
+        if agent and agent.multimodal:
+            tools = self._add_multimodal_tools(agent, tools)
+
+        return tools
 
     def _get_agent_to_use(self, task: Task) -> Optional[BaseAgent]:
         if self.process == Process.hierarchical:
             return self.manager_agent
         return task.agent
 
-    def _add_delegation_tools(self, task: Task):
+    def _merge_tools(self, existing_tools: List[Tool], new_tools: List[Tool]) -> List[Tool]:
+        """Merge new tools into existing tools list, avoiding duplicates by tool name."""
+        if not new_tools:
+            return existing_tools
+
+        # Create mapping of tool names to new tools
+        new_tool_map = {tool.name: tool for tool in new_tools}
+
+        # Remove any existing tools that will be replaced
+        tools = [tool for tool in existing_tools if tool.name not in new_tool_map]
+
+        # Add all new tools
+        tools.extend(new_tools)
+
+        return tools
+
+    def _inject_delegation_tools(self, tools: List[Tool], task_agent: BaseAgent, agents: List[BaseAgent]):
+        delegation_tools = task_agent.get_delegation_tools(agents)
+        return self._merge_tools(tools, delegation_tools)
+
+    def _add_multimodal_tools(self, agent: BaseAgent, tools: List[Tool]):
+        multimodal_tools = agent.get_multimodal_tools()
+        return self._merge_tools(tools, multimodal_tools)
+
+    def _add_code_execution_tools(self, agent: BaseAgent, tools: List[Tool]):
+        code_tools = agent.get_code_execution_tools()
+        return self._merge_tools(tools, code_tools)
+
+    def _add_delegation_tools(self, task: Task, tools: List[Tool]):
         agents_for_delegation = [agent for agent in self.agents if agent != task.agent]
         if len(self.agents) > 1 and len(agents_for_delegation) > 0 and task.agent:
-            delegation_tools = task.agent.get_delegation_tools(agents_for_delegation)
-
-            # Add tools if they are not already in task.tools
-            for new_tool in delegation_tools:
-                # Find the index of the tool with the same name
-                existing_tool_index = next(
-                    (
-                        index
-                        for index, tool in enumerate(task.tools or [])
-                        if tool.name == new_tool.name
-                    ),
-                    None,
-                )
-                if not task.tools:
-                    task.tools = []
-
-                if existing_tool_index is not None:
-                    # Replace the existing tool
-                    task.tools[existing_tool_index] = new_tool
-                else:
-                    # Add the new tool
-                    task.tools.append(new_tool)
+            if not tools:
+                tools = []
+            tools = self._inject_delegation_tools(tools, task.agent, agents_for_delegation)
+        return tools
 
     def _log_task_start(self, task: Task, role: str = "None"):
         if self.output_log_file:
             self._file_handler.log(
                 task_name=task.name, task=task.description, agent=role, status="started"
             )
 
-    def _update_manager_tools(self, task: Task):
+    def _update_manager_tools(self, task: Task, tools: List[Tool]):
         if self.manager_agent:
             if task.agent:
-                self.manager_agent.tools = task.agent.get_delegation_tools([task.agent])
+                tools = self._inject_delegation_tools(tools, task.agent, [task.agent])
             else:
-                self.manager_agent.tools = self.manager_agent.get_delegation_tools(
-                    self.agents
-                )
+                tools = self._inject_delegation_tools(tools, self.manager_agent, self.agents)
+        return tools
 
     def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
         context = (

diff --git a/src/crewai/llm.py b/src/crewai/llm.py
@@ -64,6 +64,8 @@ def flush(self):
     "llama3-70b-8192": 8192,
     "llama3-8b-8192": 8192,
     "mixtral-8x7b-32768": 32768,
+    "llama-3.3-70b-versatile": 128000,
+    "llama-3.3-70b-instruct": 128000,
 }
 
 DEFAULT_CONTEXT_WINDOW_SIZE = 8192

diff --git a/src/crewai/tools/agent_tools/add_image_tool.py b/src/crewai/tools/agent_tools/add_image_tool.py
@@ -0,0 +1,45 @@
+from typing import Dict, Optional, Union
+
+from pydantic import BaseModel, Field
+
+from crewai.tools.base_tool import BaseTool
+from crewai.utilities import I18N
+
+i18n = I18N()
+
+class AddImageToolSchema(BaseModel):
+    image_url: str = Field(..., description="The URL or path of the image to add")
+    action: Optional[str] = Field(
+        default=None,
+        description="Optional context or question about the image"
+    )
+
+
+class AddImageTool(BaseTool):
+    """Tool for adding images to the content"""
+
+    name: str = Field(default_factory=lambda: i18n.tools("add_image")["name"])  # type: ignore
+    description: str = Field(default_factory=lambda: i18n.tools("add_image")["description"])  # type: ignore
+    args_schema: type[BaseModel] = AddImageToolSchema
+
+    def _run(
+        self,
+        image_url: str,
+        action: Optional[str] = None,
+        **kwargs,
+    ) -> dict:
+        action = action or i18n.tools("add_image")["default_action"]  # type: ignore
+        content = [
+            {"type": "text", "text": action},
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": image_url,
+                },
+            }
+        ]
+
+        return {
+            "role": "user",
+            "content": content
+        }
diff --git a/src/crewai/tools/agent_tools/agent_tools.py b/src/crewai/tools/agent_tools/agent_tools.py
@@ -20,13 +20,13 @@ def tools(self) -> list[BaseTool]:
         delegate_tool = DelegateWorkTool(
             agents=self.agents,
             i18n=self.i18n,
-            description=self.i18n.tools("delegate_work").format(coworkers=coworkers),
+            description=self.i18n.tools("delegate_work").format(coworkers=coworkers),  # type: ignore
         )
 
         ask_tool = AskQuestionTool(
             agents=self.agents,
             i18n=self.i18n,
-            description=self.i18n.tools("ask_question").format(coworkers=coworkers),
+            description=self.i18n.tools("ask_question").format(coworkers=coworkers),  # type: ignore
         )
 
         return [delegate_tool, ask_tool]
diff --git a/src/crewai/tools/tool_usage.py b/src/crewai/tools/tool_usage.py
@@ -10,6 +10,7 @@
 from crewai.task import Task
 from crewai.telemetry import Telemetry
 from crewai.tools import BaseTool
+from crewai.tools.structured_tool import CrewStructuredTool
 from crewai.tools.tool_calling import InstructorToolCalling, ToolCalling
 from crewai.tools.tool_usage_events import ToolUsageError, ToolUsageFinished
 from crewai.utilities import I18N, Converter, ConverterError, Printer
@@ -18,8 +19,7 @@
     import agentops  # type: ignore
 except ImportError:
     agentops = None
-
-OPENAI_BIGGER_MODELS = ["gpt-4", "gpt-4o", "o1-preview", "o1-mini"]
+OPENAI_BIGGER_MODELS = ["gpt-4", "gpt-4o", "o1-preview", "o1-mini", "o1", "o3", "o3-mini"]
 
 
 class ToolUsageErrorException(Exception):
@@ -103,6 +103,19 @@ def use(
             if self.agent.verbose:
                 self._printer.print(content=f"\n\n{error}\n", color="red")
             return error
+
+        if isinstance(tool, CrewStructuredTool) and tool.name == self._i18n.tools("add_image")["name"]:  # type: ignore
+            try:
+                result = self._use(tool_string=tool_string, tool=tool, calling=calling)
+                return result
+
+            except Exception as e:
+                error = getattr(e, "message", str(e))
+                self.task.increment_tools_errors()
+                if self.agent.verbose:
+                    self._printer.print(content=f"\n\n{error}\n", color="red")
+                return error
+
         return f"{self._use(tool_string=tool_string, tool=tool, calling=calling)}"  # type: ignore # BUG?: "_use" of "ToolUsage" does not return a value (it only ever returns None)
 
     def _use(

diff --git a/src/crewai/translations/en.json b/src/crewai/translations/en.json
@@ -37,6 +37,11 @@
   },
   "tools": {
     "delegate_work": "Delegate a specific task to one of the following coworkers: {coworkers}\nThe input to this tool should be the coworker, the task you want them to do, and ALL necessary context to execute the task, they know nothing about the task, so share absolute everything you know, don't reference things but instead explain them.",
-    "ask_question": "Ask a specific question to one of the following coworkers: {coworkers}\nThe input to this tool should be the coworker, the question you have for them, and ALL necessary context to ask the question properly, they know nothing about the question, so share absolute everything you know, don't reference things but instead explain them."
+    "ask_question": "Ask a specific question to one of the following coworkers: {coworkers}\nThe input to this tool should be the coworker, the question you have for them, and ALL necessary context to ask the question properly, they know nothing about the question, so share absolute everything you know, don't reference things but instead explain them.",
+    "add_image": {
+      "name": "Add image to content",
+      "description": "See image to understand it's content, you can optionally ask a question about the image",
+      "default_action": "Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe."
+    }
   }
 }
diff --git a/src/crewai/utilities/i18n.py b/src/crewai/utilities/i18n.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 
 from pydantic import BaseModel, Field, PrivateAttr, model_validator
 
@@ -41,8 +41,8 @@ def slice(self, slice: str) -> str:
     def errors(self, error: str) -> str:
         return self.retrieve("errors", error)
 
-    def tools(self, error: str) -> str:
-        return self.retrieve("tools", error)
+    def tools(self, tool: str) -> Union[str, Dict[str, str]]:
+        return self.retrieve("tools", tool)
 
     def retrieve(self, kind, key) -> str:
         try: