From 5aa56b29d0a2051b3ae8ed43f9799c5d7dc9bedb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kaan=20=C3=87ayl=C4=B1?=
 <38523756+kaancayli@users.noreply.github.com>
Date: Wed, 27 Nov 2024 23:56:57 +0100
Subject: [PATCH] Migrate the course-chat pipeline to native tool calling agent
 (#179)

---
 app/pipeline/chat/course_chat_pipeline.py     | 67 +++++++------------
 .../chat/exercise_chat_agent_pipeline.py      | 24 +------
 .../prompts/iris_course_chat_prompts.py       | 48 -------------
 .../iris_course_chat_prompts_elicit.py        | 48 -------------
 app/pipeline/shared/utils.py                  | 23 +++++++
 5 files changed, 50 insertions(+), 160 deletions(-)
 create mode 100644 app/pipeline/shared/utils.py

diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py
index 31b5a8c3..d934222f 100644
--- a/app/pipeline/chat/course_chat_pipeline.py
+++ b/app/pipeline/chat/course_chat_pipeline.py
@@ -6,13 +6,13 @@
 from typing import List, Optional, Union
 
 import pytz
-from langchain.agents import create_structured_chat_agent, AgentExecutor
-from langchain_core.output_parsers import StrOutputParser
+from langchain.agents import create_tool_calling_agent, AgentExecutor
+from langchain_core.messages import SystemMessage
+from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.prompts import (
     ChatPromptTemplate,
 )
 from langchain_core.runnables import Runnable
-from langchain_core.tools import tool
 from langsmith import traceable
 from weaviate.collections.classes.filters import Filter
 
@@ -21,6 +21,7 @@
 )
 from .lecture_chat_pipeline import LectureChatPipeline
 from ..shared.citation_pipeline import CitationPipeline
+from ..shared.utils import generate_structured_tools_from_functions
 from ...common.message_converters import convert_iris_message_to_langchain_message
 from ...common.pyris_message import PyrisMessage
 from ...domain.data.metrics.competency_jol_dto import CompetencyJolDTO
@@ -30,7 +31,6 @@
     tell_begin_agent_prompt,
     tell_chat_history_exists_prompt,
     tell_no_chat_history_prompt,
-    tell_format_reminder_prompt,
     tell_begin_agent_jol_prompt,
 )
 from ..prompts.iris_course_chat_prompts_elicit import (
@@ -38,7 +38,6 @@
     elicit_begin_agent_prompt,
     elicit_chat_history_exists_prompt,
     elicit_no_chat_history_prompt,
-    elicit_format_reminder_prompt,
     elicit_begin_agent_jol_prompt,
 )
 from ...domain import CourseChatPipelineExecutionDTO
@@ -97,13 +96,9 @@ def __init__(
         request_handler = CapabilityRequestHandler(
             requirements=RequirementList(
                 gpt_version_equivalent=4.5,
-                context_length=16385,
-                json_mode=True,
             )
         )
-        completion_args = CompletionArguments(
-            temperature=0, max_tokens=2000, response_format="JSON"
-        )
+        completion_args = CompletionArguments(temperature=0.5, max_tokens=2000)
         self.llm = IrisLangchainChatModel(
             request_handler=request_handler, completion_args=completion_args
         )
@@ -115,7 +110,7 @@ def __init__(
         self.citation_pipeline = CitationPipeline()
 
         # Create the pipeline
-        self.pipeline = self.llm | StrOutputParser()
+        self.pipeline = self.llm | JsonOutputParser()
         self.tokens = []
 
     def __repr__(self):
@@ -134,7 +129,6 @@ def __call__(self, dto: CourseChatPipelineExecutionDTO, **kwargs):
         logger.debug(dto.model_dump_json(indent=4))
 
         # Define tools
-        @tool
         def get_exercise_list() -> list[dict]:
             """
             Get the list of exercises in the course.
@@ -158,7 +152,6 @@ def get_exercise_list() -> list[dict]:
                 exercises.append(exercise_dict)
             return exercises
 
-        @tool
         def get_course_details() -> dict:
             """
             Get the following course details: course name, course description, programming language, course start date,
@@ -191,7 +184,6 @@ def get_course_details() -> dict:
                 ),
             }
 
-        @tool
         def get_student_exercise_metrics(
             exercise_ids: typing.List[int],
         ) -> Union[dict[int, dict], str]:
@@ -232,7 +224,6 @@ def get_student_exercise_metrics(
             else:
                 return "No data available! Do not requery."
 
-        @tool
         def get_competency_list() -> list:
             """
             Get the list of competencies in the course.
@@ -243,25 +234,24 @@ def get_competency_list() -> list:
             regarding their progress overall or in a specific area.
             A competency has the following attributes: name, description, taxonomy, soft due date, optional,
             and mastery threshold.
-            The response may include metrics for each competency, such as progress and mastery (0%-100%).
+            The response may include metrics for each competency, such as progress and mastery (0% - 100%).
             These are system-generated.
-            The judgment of learning (JOL) values indicate the self-reported confidence by the student (0-5, 5 star).
-            The object describing it also indicates the system-computed confidence at the time when the student
+            The judgment of learning (JOL) values indicate the self-reported mastery by the student (0 - 5, 5 star).
+            The object describing it also indicates the system-computed mastery at the time when the student
             added their JoL assessment.
             """
             self.callback.in_progress("Reading competency list ...")
             if not dto.metrics or not dto.metrics.competency_metrics:
                 return dto.course.competencies
             competency_metrics = dto.metrics.competency_metrics
-            weight = 2.0 / 3.0
             return [
                 {
                     "info": competency_metrics.competency_information.get(comp, None),
                     "exercise_ids": competency_metrics.exercises.get(comp, []),
                     "progress": competency_metrics.progress.get(comp, 0),
-                    "mastery": (
-                        (1 - weight) * competency_metrics.progress.get(comp, 0)
-                        + weight * competency_metrics.confidence.get(comp, 0)
+                    "mastery": get_mastery(
+                        competency_metrics.progress.get(comp, 0),
+                        competency_metrics.confidence.get(comp, 0),
                     ),
                     "judgment_of_learning": (
                         competency_metrics.jol_values.get[comp].json()
@@ -273,7 +263,6 @@ def get_competency_list() -> list:
                 for comp in competency_metrics.competency_information
             ]
 
-        @tool
         def lecture_content_retrieval() -> str:
             """
             Retrieve content from indexed lecture slides.
@@ -309,14 +298,12 @@ def lecture_content_retrieval() -> str:
             begin_agent_prompt = tell_begin_agent_prompt
             chat_history_exists_prompt = tell_chat_history_exists_prompt
             no_chat_history_prompt = tell_no_chat_history_prompt
-            format_reminder_prompt = tell_format_reminder_prompt
             begin_agent_jol_prompt = tell_begin_agent_jol_prompt
         else:
             iris_initial_system_prompt = elicit_iris_initial_system_prompt
             begin_agent_prompt = elicit_begin_agent_prompt
             chat_history_exists_prompt = elicit_chat_history_exists_prompt
             no_chat_history_prompt = elicit_no_chat_history_prompt
-            format_reminder_prompt = elicit_format_reminder_prompt
             begin_agent_jol_prompt = elicit_begin_agent_jol_prompt
 
         try:
@@ -374,47 +361,43 @@ def lecture_content_retrieval() -> str:
                 ]
                 self.prompt = ChatPromptTemplate.from_messages(
                     [
-                        (
-                            "system",
+                        SystemMessage(
                             initial_prompt_with_date
                             + "\n"
                             + chat_history_exists_prompt
                             + "\n"
-                            + agent_prompt,
+                            + agent_prompt
                         ),
                         *chat_history_messages,
-                        ("system", format_reminder_prompt),
+                        ("placeholder", "{agent_scratchpad}"),
                     ]
                 )
             else:
                 self.prompt = ChatPromptTemplate.from_messages(
                     [
-                        (
-                            "system",
-                            initial_prompt_with_date
-                            + "\n"
-                            + agent_prompt
-                            + "\n"
-                            + format_reminder_prompt,
+                        SystemMessage(
+                            initial_prompt_with_date + "\n" + agent_prompt + "\n"
                         ),
+                        ("placeholder", "{agent_scratchpad}"),
                     ]
                 )
 
-            tools = [
+            tool_list = [
                 get_course_details,
                 get_exercise_list,
                 get_student_exercise_metrics,
                 get_competency_list,
             ]
             if self.should_allow_lecture_tool(dto.course.id):
-                tools.append(lecture_content_retrieval)
+                tool_list.append(lecture_content_retrieval)
 
-            agent = create_structured_chat_agent(
+            tools = generate_structured_tools_from_functions(tool_list)
+            # No idea why we need this extra contrary to exercise chat agent in this case, but solves the issue.
+            params.update({"tools": tools})
+            agent = create_tool_calling_agent(
                 llm=self.llm, tools=tools, prompt=self.prompt
             )
-            agent_executor = AgentExecutor(
-                agent=agent, tools=tools, verbose=True, max_iterations=5
-            )
+            agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False)
 
             out = None
             self.callback.in_progress()
diff --git a/app/pipeline/chat/exercise_chat_agent_pipeline.py b/app/pipeline/chat/exercise_chat_agent_pipeline.py
index 12edac52..27e67262 100644
--- a/app/pipeline/chat/exercise_chat_agent_pipeline.py
+++ b/app/pipeline/chat/exercise_chat_agent_pipeline.py
@@ -2,7 +2,7 @@
 import traceback
 from datetime import datetime
 from operator import attrgetter
-from typing import List, Callable
+from typing import List
 
 import pytz
 from langchain.agents import create_tool_calling_agent, AgentExecutor
@@ -10,7 +10,6 @@
 from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
 from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
 from langchain_core.runnables import Runnable
-from langchain_core.tools import StructuredTool
 from langsmith import traceable
 
 from .code_feedback_pipeline import CodeFeedbackPipeline
@@ -29,6 +28,7 @@
 
 from ..shared.citation_pipeline import CitationPipeline
 from ..shared.reranker_pipeline import RerankerPipeline
+from ..shared.utils import generate_structured_tools_from_functions
 from ...common.PipelineEnum import PipelineEnum
 from ...common.message_converters import convert_iris_message_to_langchain_human_message
 from ...common.pyris_message import PyrisMessage, IrisMessageRole
@@ -65,26 +65,6 @@ def add_exercise_context_to_prompt(
     """
 
 
-def generate_structured_tool_from_function(tool_function: Callable) -> StructuredTool:
-    """
-    Generates a structured tool from a function
-    :param tool_function: The tool function
-    :return: The structured tool
-    """
-    return StructuredTool.from_function(tool_function)
-
-
-def generate_structured_tools_from_functions(
-    tools: List[Callable],
-) -> List[StructuredTool]:
-    """
-    Generates a list of structured tools from a list of functions
-    :param tools: The list of tool functions
-    :return: The list of structured tools
-    """
-    return [generate_structured_tool_from_function(_tool) for _tool in tools]
-
-
 def convert_chat_history_to_str(chat_history: List[PyrisMessage]) -> str:
     """
     Converts the chat history to a string
diff --git a/app/pipeline/prompts/iris_course_chat_prompts.py b/app/pipeline/prompts/iris_course_chat_prompts.py
index aa228176..b836d227 100644
--- a/app/pipeline/prompts/iris_course_chat_prompts.py
+++ b/app/pipeline/prompts/iris_course_chat_prompts.py
@@ -29,37 +29,6 @@
 * The mastery increases when the student proportionally achieved more points in exercises marked as hard compared to the distribution of points in the competency and vice versa.
 * A similar measurement applies to easy exercises, where the mastery is decreased for achieving proportionally more points in easy exercises.
 * If the student quickly solves programming exercises with a score of at least 80% based on the amount of pushes, the mastery increases. There is no decrease in mastery for slower students!
-
-Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
-Valid "action" values: "Final Answer" or {tool_names}
-Provide only ONE  action per $JSON_BLOB, as shown:
-```
-{{
-  "thought": "(First|Next), I need to ... so ...",
-  "action": $TOOL_NAME,
-  "action_input": $INPUT
-}}
-```
-
-Follow this format:
-
-Question: input to answer
-Thought: consider previous and subsequent steps
-Action:
-```
-$JSON_BLOB
-```
-
-Observation: action result
-... (repeat Thought/Action/Observation N times)
-Thought: I know what to respond
-Action:
-```
-{{
-  "thought": "I know what to respond",
-  "action": "Final Answer",
-  "action_input": "Final response to human"
-}}
 """
 
 tell_chat_history_exists_prompt = """
@@ -105,23 +74,6 @@
 messages must ALWAYS BE NEW AND ORIGINAL. It MUST NOT be a copy of any previous message. Do not repeat yourself. Do not repeat yourself. Do not repeat yourself.
 """
 
-tell_format_reminder_prompt = """
-Reminder to ALWAYS respond with a valid json blob of a single action. 
-Respond directly if appropriate (with "Final Answer" as action).
-You are not forced to use tools if the question is off-topic or chatter only.
-Never invoke the same tool twice in a row with the same arguments - they will always return the same output.
-Remember to ALWAYS respond with valid JSON in schema:
-{{
-  "thought": "Your thought process",
-  "action": $TOOL_NAME,
-  "action_input": $INPUT
-}}
-Valid "action" values: "Final Answer" or {tool_names}
-
-This is your thinking history to generate this answer, your "memory" while solving this task iteratively. If this is the first call to you it might be empty:             
-{agent_scratchpad}
-"""
-
 tell_course_system_prompt = """
 These are the details about the course:
 - Course name: {course_name}
diff --git a/app/pipeline/prompts/iris_course_chat_prompts_elicit.py b/app/pipeline/prompts/iris_course_chat_prompts_elicit.py
index 03e07c2b..355f6982 100644
--- a/app/pipeline/prompts/iris_course_chat_prompts_elicit.py
+++ b/app/pipeline/prompts/iris_course_chat_prompts_elicit.py
@@ -37,37 +37,6 @@
 * The mastery increases when the student proportionally achieved more points in exercises marked as hard compared to the distribution of points in the competency and vice versa.
 * A similar measurement applies to easy exercises, where the mastery is decreased for achieving proportionally more points in easy exercises.
 * If the student quickly solves programming exercises with a score of at least 80% based on the amount of pushes, the mastery increases. There is no decrease in mastery for slower students!
-
-Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
-Valid "action" values: "Final Answer" or {tool_names}
-Provide only ONE  action per $JSON_BLOB, as shown:
-```
-{{
-  "thought": "(First|Next), I need to ... so ...",
-  "action": $TOOL_NAME,
-  "action_input": $INPUT
-}}
-```
-
-Follow this format:
-
-Question: input to answer
-Thought: consider previous and subsequent steps
-Action:
-```
-$JSON_BLOB
-```
-
-Observation: action result
-... (repeat Thought/Action/Observation N times)
-Thought: I know what to respond
-Action:
-```
-{{
-  "thought": "I know what to respond",
-  "action": "Final Answer",
-  "action_input": "Final response to human"
-}}
 """
 
 elicit_chat_history_exists_prompt = """
@@ -109,23 +78,6 @@
 messages must ALWAYS BE NEW AND ORIGINAL. It MUST NOT be a copy of any previous message.
 """
 
-elicit_format_reminder_prompt = """
-Reminder to ALWAYS respond with a valid json blob of a single action. 
-Respond directly if appropriate (with "Final Answer" as action).
-You are not forced to use tools if the question is off-topic or chatter only.
-Never invoke the same tool twice in a row with the same arguments - they will always return the same output.
-Remember to ALWAYS respond with valid JSON in schema:
-{{
-  "thought": "Your thought process",
-  "action": $TOOL_NAME,
-  "action_input": $INPUT
-}}
-Valid "action" values: "Final Answer" or {tool_names}
-
-                     
-{agent_scratchpad}
-"""
-
 elicit_course_system_prompt = """
 These are the details about the course:
 - Course name: {course_name}
diff --git a/app/pipeline/shared/utils.py b/app/pipeline/shared/utils.py
new file mode 100644
index 00000000..b3e73236
--- /dev/null
+++ b/app/pipeline/shared/utils.py
@@ -0,0 +1,23 @@
+from typing import Callable, List
+
+from langchain_core.tools import StructuredTool
+
+
+def generate_structured_tool_from_function(tool_function: Callable) -> StructuredTool:
+    """
+    Generates a structured tool from a function
+    :param tool_function: The tool function
+    :return: The structured tool
+    """
+    return StructuredTool.from_function(tool_function)
+
+
+def generate_structured_tools_from_functions(
+    tools: List[Callable],
+) -> List[StructuredTool]:
+    """
+    Generates a list of structured tools from a list of functions
+    :param tools: The list of tool functions
+    :return: The list of structured tools
+    """
+    return [generate_structured_tool_from_function(_tool) for _tool in tools]