crewAIInc · joaomdmoura · Dec 27, 2024 · Dec 23, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/src/crewai/agent.py b/src/crewai/agent.py
@@ -114,6 +114,10 @@ class Agent(BaseAgent):
         default=2,
         description="Maximum number of retries for an agent to execute a task when an error occurs.",
     )
+    multimodal: bool = Field(
+        default=False,
+        description="Whether the agent is multimodal.",
+    )
     code_execution_mode: Literal["safe", "unsafe"] = Field(
         default="safe",
         description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).",
@@ -406,6 +410,10 @@ def get_delegation_tools(self, agents: List[BaseAgent]):
         tools = agent_tools.tools()
         return tools
 
+    def get_multimodal_tools(self):
+        from crewai.tools.agent_tools.add_image_tool import AddImageTool
+        return [AddImageTool()]
+
     def get_code_execution_tools(self):
         try:
             from crewai_tools import CodeInterpreterTool

diff --git a/src/crewai/agents/crew_agent_executor.py b/src/crewai/agents/crew_agent_executor.py
@@ -143,10 +143,20 @@ def _invoke_loop(self, formatted_answer=None):
                         tool_result = self._execute_tool_and_check_finality(
                             formatted_answer
                         )
-                        if self.step_callback:
-                            self.step_callback(tool_result)
 
-                        formatted_answer.text += f"\nObservation: {tool_result.result}"
+                        # Directly append the result to the messages if the
+                        # tool is "Add image to content" in case of multimodal
+                        # agents
+                        if formatted_answer.tool == "Add image to content":
+                            self.messages.append(tool_result.result)
+                            continue
+
+                        else:
+                            if self.step_callback:
+                                self.step_callback(tool_result)
+
+                            formatted_answer.text += f"\nObservation: {tool_result.result}"
+
                         formatted_answer.result = tool_result.result
                         if tool_result.result_as_answer:
                             return AgentFinish(

diff --git a/src/crewai/crew.py b/src/crewai/crew.py
@@ -35,6 +35,7 @@
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
 from crewai.tools.agent_tools.agent_tools import AgentTools
+from crewai.tools.base_tool import Tool
 from crewai.types.usage_metrics import UsageMetrics
 from crewai.utilities import I18N, FileHandler, Logger, RPMController
 from crewai.utilities.constants import TRAINING_DATA_FILE
@@ -533,9 +534,6 @@ def kickoff(
             if not agent.function_calling_llm:  # type: ignore # "BaseAgent" has no attribute "function_calling_llm"
                 agent.function_calling_llm = self.function_calling_llm  # type: ignore # "BaseAgent" has no attribute "function_calling_llm"
 
-            if agent.allow_code_execution:  # type: ignore # BaseAgent" has no attribute "allow_code_execution"
-                agent.tools += agent.get_code_execution_tools()  # type: ignore # "BaseAgent" has no attribute "get_code_execution_tools"; maybe "get_delegation_tools"?
-
             if not agent.step_callback:  # type: ignore # "BaseAgent" has no attribute "step_callback"
                 agent.step_callback = self.step_callback  # type: ignore # "BaseAgent" has no attribute "step_callback"
 
@@ -672,7 +670,6 @@ def _create_manager_agent(self):
                 )
                 manager.tools = []
                 raise Exception("Manager agent should not have tools")
-            manager.tools = self.manager_agent.get_delegation_tools(self.agents)
         else:
             self.manager_llm = (
                 getattr(self.manager_llm, "model_name", None)
@@ -684,6 +681,7 @@ def _create_manager_agent(self):
                 goal=i18n.retrieve("hierarchical_manager_agent", "goal"),
                 backstory=i18n.retrieve("hierarchical_manager_agent", "backstory"),
                 tools=AgentTools(agents=self.agents).tools(),
+                allow_delegation=True,
                 llm=self.manager_llm,
                 verbose=self.verbose,
             )
@@ -726,7 +724,14 @@ def _execute_tasks(
                     f"No agent available for task: {task.description}. Ensure that either the task has an assigned agent or a manager agent is provided."
                 )
 
-            self._prepare_agent_tools(task)
+            # Determine which tools to use - task tools take precedence over agent tools
+            tools_for_task = task.tools if task.tools else agent_to_use.tools or []
+            tools_for_task = self._prepare_tools(
+                agent_to_use,
+                task,
+                tools_for_task
+            )
+
             self._log_task_start(task, agent_to_use.role)
 
             if isinstance(task, ConditionalTask):
@@ -743,7 +748,7 @@ def _execute_tasks(
                 future = task.execute_async(
                     agent=agent_to_use,
                     context=context,
-                    tools=agent_to_use.tools,
+                    tools=tools_for_task,
                 )
                 futures.append((task, future, task_index))
             else:
@@ -755,7 +760,7 @@ def _execute_tasks(
                 task_output = task.execute_sync(
                     agent=agent_to_use,
                     context=context,
-                    tools=agent_to_use.tools,
+                    tools=tools_for_task,
                 )
                 task_outputs = [task_output]
                 self._process_task_result(task, task_output)
@@ -792,60 +797,82 @@ def _handle_conditional_task(
             return skipped_task_output
         return None
 
-    def _prepare_agent_tools(self, task: Task):
-        if self.process == Process.hierarchical:
-            if self.manager_agent:
-                self._update_manager_tools(task)
-            else:
-                raise ValueError("Manager agent is required for hierarchical process.")
-        elif task.agent and task.agent.allow_delegation:
-            self._add_delegation_tools(task)
+    def _prepare_tools(self, agent: BaseAgent, task: Task, tools: List[Tool]):
+        # Add delegation tools if agent allows delegation
+        if agent.allow_delegation:
+            if self.process == Process.hierarchical:
+                if self.manager_agent:
+                    tools = self._update_manager_tools(task, tools)
+                else:
+                    raise ValueError("Manager agent is required for hierarchical process.")
+
+            elif agent and agent.allow_delegation:
+                tools = self._add_delegation_tools(task, tools)
+
+        # Add code execution tools if agent allows code execution
+        if agent.allow_code_execution:
+            tools = self._add_code_execution_tools(agent, tools)
+
+        if agent and agent.multimodal:
+            tools = self._add_multimodal_tools(agent, tools)
+
+        return tools
 
     def _get_agent_to_use(self, task: Task) -> Optional[BaseAgent]:
         if self.process == Process.hierarchical:
             return self.manager_agent
         return task.agent
 
-    def _add_delegation_tools(self, task: Task):
+    def _merge_tools(self, existing_tools: List[Tool], new_tools: List[Tool]) -> List[Tool]:
+        """Merge new tools into existing tools list, avoiding duplicates by tool name."""
+        if not new_tools:
+            return existing_tools
+
+        # Create mapping of tool names to new tools
+        new_tool_map = {tool.name: tool for tool in new_tools}
+
+        # Remove any existing tools that will be replaced
+        tools = [tool for tool in existing_tools if tool.name not in new_tool_map]
+
+        # Add all new tools
+        tools.extend(new_tools)
+
+        return tools
+
+    def _inject_delegation_tools(self, tools: List[Tool], task_agent: BaseAgent, agents: List[BaseAgent]):
+        delegation_tools = task_agent.get_delegation_tools(agents)
+        return self._merge_tools(tools, delegation_tools)
+
+    def _add_multimodal_tools(self, agent: BaseAgent, tools: List[Tool]):
+        multimodal_tools = agent.get_multimodal_tools()
+        return self._merge_tools(tools, multimodal_tools)
+
+    def _add_code_execution_tools(self, agent: BaseAgent, tools: List[Tool]):
+        code_tools = agent.get_code_execution_tools()
+        return self._merge_tools(tools, code_tools)
+
+    def _add_delegation_tools(self, task: Task, tools: List[Tool]):
         agents_for_delegation = [agent for agent in self.agents if agent != task.agent]
         if len(self.agents) > 1 and len(agents_for_delegation) > 0 and task.agent:
-            delegation_tools = task.agent.get_delegation_tools(agents_for_delegation)
-
-            # Add tools if they are not already in task.tools
-            for new_tool in delegation_tools:
-                # Find the index of the tool with the same name
-                existing_tool_index = next(
-                    (
-                        index
-                        for index, tool in enumerate(task.tools or [])
-                        if tool.name == new_tool.name
-                    ),
-                    None,
-                )
-                if not task.tools:
-                    task.tools = []
-
-                if existing_tool_index is not None:
-                    # Replace the existing tool
-                    task.tools[existing_tool_index] = new_tool
-                else:
-                    # Add the new tool
-                    task.tools.append(new_tool)
+            if not tools:
+                tools = []
+            tools = self._inject_delegation_tools(tools, task.agent, agents_for_delegation)
+        return tools
 
     def _log_task_start(self, task: Task, role: str = "None"):
         if self.output_log_file:
             self._file_handler.log(
                 task_name=task.name, task=task.description, agent=role, status="started"
             )
 
-    def _update_manager_tools(self, task: Task):
+    def _update_manager_tools(self, task: Task, tools: List[Tool]):
         if self.manager_agent:
             if task.agent:
-                self.manager_agent.tools = task.agent.get_delegation_tools([task.agent])
+                tools = self._inject_delegation_tools(tools, task.agent, [task.agent])
             else:
-                self.manager_agent.tools = self.manager_agent.get_delegation_tools(
-                    self.agents
-                )
+                tools = self._inject_delegation_tools(tools, self.manager_agent, self.agents)
+        # self.manager_agent.tools = tools
+        return tools
 
     def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
         context = (

diff --git a/src/crewai/llm.py b/src/crewai/llm.py
@@ -64,6 +64,8 @@ def flush(self):
     "llama3-70b-8192": 8192,
     "llama3-8b-8192": 8192,
     "mixtral-8x7b-32768": 32768,
+    "llama-3.3-70b-versatile": 128000,
+    "llama-3.3-70b-instruct": 128000,
 }
 
 DEFAULT_CONTEXT_WINDOW_SIZE = 8192

diff --git a/src/crewai/memory/storage/mem0_storage.py b/src/crewai/memory/storage/mem0_storage.py
@@ -1,9 +1,8 @@
 import os
 from typing import Any, Dict, List
 
-from mem0 import MemoryClient
-
 from crewai.memory.storage.interface import Storage
+from mem0 import MemoryClient
 
 
 class Mem0Storage(Storage):

diff --git a/src/crewai/memory/storage/rag_storage.py b/src/crewai/memory/storage/rag_storage.py
@@ -7,7 +7,6 @@
 from typing import Any, Dict, List, Optional
 
 from chromadb.api import ClientAPI
-
 from crewai.memory.storage.base_rag_storage import BaseRAGStorage
 from crewai.utilities import EmbeddingConfigurator
 from crewai.utilities.constants import MAX_FILE_NAME_LENGTH

diff --git a/src/crewai/tools/agent_tools/add_image_tool.py b/src/crewai/tools/agent_tools/add_image_tool.py
@@ -0,0 +1,40 @@
+from crewai.tools.base_tool import BaseTool
+from pydantic import BaseModel, Field
+
+
+class AddImageToolSchema(BaseModel):
+    image_url: str = Field(..., description="The URL or path of the image to add")
+    action: str = Field(
+        default="Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe.",
+        description="Optional context or question about the image"
+    )
+
+
+class AddImageTool(BaseTool):
+    """Tool for adding images to the content"""
+
+    name: str = "Add image to content"
+    description: str = "See image to understand it's content, you can optionally ask a question about the image"
+    args_schema: type[BaseModel] = AddImageToolSchema
+
+    def _run(
+        self,
+        image_url: str,
+        action: str = None,
+        **kwargs,
+    ) -> dict:
+        action = action or "Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe."
+        content = [
+            {"type": "text", "text": action},
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": image_url,
+                },
+            }
+        ]
+
+        return {
+            "role": "user",
+            "content": content
+        }
diff --git a/src/crewai/tools/agent_tools/ask_question_tool.py b/src/crewai/tools/agent_tools/ask_question_tool.py
@@ -1,8 +1,7 @@
 from typing import Optional
 
-from pydantic import BaseModel, Field
-
 from crewai.tools.agent_tools.base_agent_tools import BaseAgentTool
+from pydantic import BaseModel, Field
 
 
 class AskQuestionToolSchema(BaseModel):

diff --git a/src/crewai/tools/agent_tools/base_agent_tools.py b/src/crewai/tools/agent_tools/base_agent_tools.py
@@ -1,11 +1,10 @@
 from typing import Optional, Union
 
-from pydantic import Field
-
 from crewai.agents.agent_builder.base_agent import BaseAgent
 from crewai.task import Task
 from crewai.tools.base_tool import BaseTool
 from crewai.utilities import I18N
+from pydantic import Field
 
 
 class BaseAgentTool(BaseTool):

diff --git a/src/crewai/tools/agent_tools/delegate_work_tool.py b/src/crewai/tools/agent_tools/delegate_work_tool.py
@@ -1,8 +1,7 @@
 from typing import Optional
 
-from pydantic import BaseModel, Field
-
 from crewai.tools.agent_tools.base_agent_tools import BaseAgentTool
+from pydantic import BaseModel, Field
 
 
 class DelegateWorkToolSchema(BaseModel):

diff --git a/src/crewai/tools/tool_usage.py b/src/crewai/tools/tool_usage.py
@@ -10,6 +10,7 @@
 from crewai.task import Task
 from crewai.telemetry import Telemetry
 from crewai.tools import BaseTool
+from crewai.tools.structured_tool import CrewStructuredTool
 from crewai.tools.tool_calling import InstructorToolCalling, ToolCalling
 from crewai.tools.tool_usage_events import ToolUsageError, ToolUsageFinished
 from crewai.utilities import I18N, Converter, ConverterError, Printer
@@ -103,6 +104,19 @@ def use(
             if self.agent.verbose:
                 self._printer.print(content=f"\n\n{error}\n", color="red")
             return error
+
+        if isinstance(tool, CrewStructuredTool) and tool.name == 'Add image to content':
+            try:
+                result = self._use(tool_string=tool_string, tool=tool, calling=calling)
+                return result
+
+            except Exception as e:
+                error = getattr(e, "message", str(e))
+                self.task.increment_tools_errors()
+                if self.agent.verbose:
+                    self._printer.print(content=f"\n\n{error}\n", color="red")
+                return error
+
         return f"{self._use(tool_string=tool_string, tool=tool, calling=calling)}"  # type: ignore # BUG?: "_use" of "ToolUsage" does not return a value (it only ever returns None)
 
     def _use(

diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -1,14 +1,13 @@
 from collections import defaultdict
 
-from pydantic import BaseModel, Field
-from rich.box import HEAVY_EDGE
-from rich.console import Console
-from rich.table import Table
-
 from crewai.agent import Agent
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
+from pydantic import BaseModel, Field
+from rich.box import HEAVY_EDGE
+from rich.console import Console
+from rich.table import Table
 
 
 class TaskEvaluationPydanticOutput(BaseModel):

diff --git a/src/crewai/utilities/evaluators/task_evaluator.py b/src/crewai/utilities/evaluators/task_evaluator.py
@@ -1,9 +1,8 @@
 from typing import List
 
-from pydantic import BaseModel, Field
-
 from crewai.utilities import Converter
 from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser
+from pydantic import BaseModel, Field
 
 agentops = None
 try: