Lecture content first draft ready for review

ls1intum · Mar 25, 2024 · 303f6d4 · 303f6d4
1 parent 738e7a0
commit 303f6d4
Show file tree

Hide file tree

Showing 5 changed files with 146 additions and 39 deletions.
diff --git a/app/content_service/Retrieval/lecture_retrieval.py b/app/content_service/Retrieval/lecture_retrieval.py
@@ -22,8 +22,8 @@ def retrieve(
         user_message: str,
         hybrid_factor: float,
         lecture_id: int = None,
-        message_vector: [float] = None,
-    ) -> List[str]:
+        embedding_vector: [float] = None,
+    ) -> List[dict]:
         response = self.collection.query.hybrid(
             query=user_message,
             filters=(
@@ -32,13 +32,12 @@ def retrieve(
                 else None
             ),
             alpha=hybrid_factor,
-            vector=message_vector,
+            vector=embedding_vector,
             return_properties=[
                 LectureSchema.PAGE_TEXT_CONTENT,
                 LectureSchema.PAGE_IMAGE_DESCRIPTION,
-                LectureSchema.COURSE_NAME,
             ],
-            limit=5,
+            limit=3,
         )
         print(json.dumps(response, indent=2))
-        return response
+        return response["data"]["Get"][self.collection.name][0]
diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py
@@ -11,17 +11,18 @@
 from ...domain.data.build_log_entry import BuildLogEntryDTO
 from ...domain.data.feedback_dto import FeedbackDTO
 from ..prompts.iris_tutor_chat_prompts import (
-    iris_initial_system_prompt,
+    iris_exercise_initial_system_prompt,
     chat_history_system_prompt,
     final_system_prompt,
-    guide_system_prompt,
+    guide_exercise_system_prompt,
 )
 from ...domain import TutorChatPipelineExecutionDTO
 from ...domain.data.submission_dto import SubmissionDTO
 from ...domain.data.message_dto import MessageDTO
 from ...web.status.status_update import TutorChatStatusCallback
 from .file_selector_pipeline import FileSelectorPipeline
 from ...llm.langchain import IrisLangchainChatModel
+from tutor_chat_pipeline import _add_conversation_to_prompt
 
 from ..pipeline import Pipeline
 
@@ -58,7 +59,7 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs):
         # Set up the initial prompt
         self.prompt = ChatPromptTemplate.from_messages(
             [
-                ("system", iris_initial_system_prompt),
+                ("system", iris_exercise_initial_system_prompt),
                 ("system", chat_history_system_prompt),
             ]
         )
@@ -80,7 +81,7 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs):
         programming_language = dto.exercise.programming_language.value.lower()
 
         # Add the chat history and user question to the prompt
-        self._add_conversation_to_prompt(history, query)
+        self.prompt = _add_conversation_to_prompt(history, query, self.prompt)
 
         self.callback.in_progress("Looking up files in the repository...")
         # Create the file selection prompt based on the current prompt
@@ -121,35 +122,14 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs):
             response_draft = (self.prompt | self.pipeline).invoke({})
             self.prompt += AIMessagePromptTemplate.from_template(f"{response_draft}")
             self.prompt += SystemMessagePromptTemplate.from_template(
-                guide_system_prompt
+                guide_exercise_system_prompt
             )
             response = (self.prompt | self.pipeline).invoke({})
             logger.info(f"Response from Exercise chat pipeline: {response}")
             self.callback.done("Generated response", final_result=response)
         except Exception as e:
             self.callback.error(f"Failed to generate response: {e}")
 
-    def _add_conversation_to_prompt(
-            self,
-            chat_history: List[MessageDTO],
-            user_question: MessageDTO,
-    ):
-        """
-        Adds the chat history and user question to the prompt
-            :param chat_history: The chat history
-            :param user_question: The user question
-            :return: The prompt with the chat history
-        """
-        if chat_history is not None and len(chat_history) > 0:
-            chat_history_messages = [
-                message.convert_to_langchain_message() for message in chat_history
-            ]
-            self.prompt += chat_history_messages
-            self.prompt += SystemMessagePromptTemplate.from_template(
-                "Now, consider the student's newest and latest input:"
-            )
-        self.prompt += user_question.convert_to_langchain_message()
-
     def _add_student_repository_to_prompt(
             self, student_repository: Dict[str, str], selected_files: List[str]
     ):

diff --git a/app/pipeline/chat/lecture_chat_pipeline.py b/app/pipeline/chat/lecture_chat_pipeline.py
@@ -1,13 +1,23 @@
 import logging
+from typing import List
+
 from langchain_core.prompts import (
-    ChatPromptTemplate,
+    ChatPromptTemplate, AIMessagePromptTemplate, SystemMessagePromptTemplate,
 )
 from langchain_core.runnables import Runnable
+
+from ..prompts.iris_tutor_chat_prompts import iris_lecture_initial_system_prompt, chat_history_system_prompt, \
+    guide_lecture_system_prompt
+from ...content_service.Retrieval.lecture_retrieval import LectureRetrieval
 from ...domain import TutorChatPipelineExecutionDTO
+from ...domain.data.message_dto import MessageDTO
+from ...vector_database.lectureschema import LectureSchema
 from ...web.status.status_update import TutorChatStatusCallback
-from ...llm.langchain import IrisLangchainChatModel
+from ...llm.langchain import IrisLangchainChatModel, IrisLangchainEmbeddingModel
 from ..pipeline import Pipeline
 from weaviate import WeaviateClient
+from ...vector_database.db import VectorDatabase
+from tutor_chat_pipeline import _add_conversation_to_prompt
 
 logger = logging.getLogger(__name__)
 
@@ -16,16 +26,22 @@ class LectureChatPipeline(Pipeline):
     """Exercise chat pipeline that answers exercises related questions from students."""
 
     llm: IrisLangchainChatModel
+    llm_embedding: IrisLangchainEmbeddingModel
     pipeline: Runnable
     callback: TutorChatStatusCallback
     prompt: ChatPromptTemplate
     db: WeaviateClient
+    retriever: LectureRetrieval
 
-    def __init__(self, callback: TutorChatStatusCallback, pipeline: Runnable, llm: IrisLangchainChatModel):
+    def __init__(self, callback: TutorChatStatusCallback, pipeline: Runnable, llm: IrisLangchainChatModel,
+                 llm_embedding: IrisLangchainEmbeddingModel):
         super().__init__(implementation_id="lecture_chat_pipeline")
         self.llm = llm
+        self.llm_embedding = llm_embedding
         self.callback = callback
         self.pipeline = pipeline
+        self.db = VectorDatabase().client
+        self.retriever = LectureRetrieval(self.db)
 
     def __repr__(self):
         return f"{self.__class__.__name__}(llm={self.llm})"
@@ -38,4 +54,58 @@ def __call__(self, dto: TutorChatPipelineExecutionDTO, **kwargs):
         Runs the pipeline
             :param kwargs: The keyword arguments
         """
-        pass
+        # Set up the initial prompt
+        self.prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", iris_lecture_initial_system_prompt),
+                ("system", chat_history_system_prompt),
+            ]
+        )
+        logger.info("Running tutor chat pipeline...")
+        history: List[MessageDTO] = dto.chat_history[:-1]
+        query: MessageDTO = dto.chat_history[-1]
+
+        # Add the chat history and user question to the prompt
+        self.prompt = _add_conversation_to_prompt(history, query, self.prompt)
+        self.callback.in_progress("Retrieve relevant chunks of the lectures...")
+        retrieved_lecture_chunks = self.retriever.retrieve(query.contents[0].text_content,
+                                                           hybrid_factor=1,
+                                                           embedding_vector=self.llm_embedding.embed_query(
+                                                               query.contents[0].text_content))
+        self._add_relevant_chunks_to_prompt(retrieved_lecture_chunks)
+        self.prompt += SystemMessagePromptTemplate.from_template(
+            "Answer the user query based on the above provided Context"
+        )
+        # Retrieve relevant chunks of the lectures
+        self.callback.in_progress("Generating response...")
+
+        try:
+            response_draft = (self.prompt | self.pipeline).invoke({})
+            self.prompt += AIMessagePromptTemplate.from_template(f"{response_draft}")
+            self.prompt += SystemMessagePromptTemplate.from_template(
+                guide_lecture_system_prompt
+            )
+            response = (self.prompt | self.pipeline).invoke({})
+            logger.info(f"Response from Lecture chat pipeline: {response}")
+            self.callback.done("Generated response", final_result=response)
+        except Exception as e:
+            self.callback.error(f"Failed to generate response: {e}")
+
+    def _add_relevant_chunks_to_prompt(
+            self,
+            retrieved_lecture_chunks: List[dict],
+    ):
+        """
+        Adds the relevant chunks of the lecture to the prompt
+            :param retrieved_lecture_chunks: The retrieved lecture chunks
+        """
+        for chunk in retrieved_lecture_chunks:
+            self.prompt += SystemMessagePromptTemplate.from_template(
+                "Next you will find the relevant chunks of the lecture:"
+            )
+            self.prompt += SystemMessagePromptTemplate.from_template(
+                LectureSchema.PAGE_TEXT_CONTENT + ": " + chunk[LectureSchema.PAGE_TEXT_CONTENT]
+            )
+            self.prompt += SystemMessagePromptTemplate.from_template(
+                LectureSchema.PAGE_IMAGE_DESCRIPTION + ": " + chunk[LectureSchema.PAGE_IMAGE_DESCRIPTION]
+            )
diff --git a/app/pipeline/chat/tutor_chat_pipeline.py b/app/pipeline/chat/tutor_chat_pipeline.py
@@ -1,10 +1,13 @@
 import logging
+from typing import List
+
 from exercise_chat_pipeline import ExerciseChatPipeline
 from lecture_chat_pipeline import LectureChatPipeline
 from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import PromptTemplate
+from langchain_core.prompts import PromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate
 from langchain_core.runnables import Runnable
 from ...domain import TutorChatPipelineExecutionDTO
+from ...domain.data.message_dto import MessageDTO
 from ...web.status.status_update import TutorChatStatusCallback
 from ...llm import BasicRequestHandler, CompletionArguments
 from ...llm.langchain import IrisLangchainChatModel
@@ -13,6 +16,28 @@
 logger = logging.getLogger(__name__)
 
 
+def _add_conversation_to_prompt(
+        chat_history: List[MessageDTO],
+        user_question: MessageDTO,
+        prompt: ChatPromptTemplate
+):
+    """
+    Adds the chat history and user question to the prompt
+        :param chat_history: The chat history
+        :param user_question: The user question
+        :return: The prompt with the chat history
+    """
+    if chat_history is not None and len(chat_history) > 0:
+        chat_history_messages = [
+            message.convert_to_langchain_message() for message in chat_history
+        ]
+        prompt += chat_history_messages
+        prompt += SystemMessagePromptTemplate.from_template(
+            "Now, consider the student's newest and latest input:"
+        )
+    prompt += user_question.convert_to_langchain_message()
+
+
 class TutorChatPipeline(Pipeline):
     """Tutor chat pipeline that answers exercises related questions from students."""
 

diff --git a/app/pipeline/prompts/iris_tutor_chat_prompts.py b/app/pipeline/prompts/iris_tutor_chat_prompts.py
@@ -1,4 +1,4 @@
-iris_initial_system_prompt = """You're Iris, the AI programming tutor integrated into Artemis, the online learning
+iris_exercise_initial_system_prompt = """You're Iris, the AI programming tutor integrated into Artemis, the online learning
 platform of the Technical University of Munich (TUM).
 
 You are a guide and an educator. Your main goal is to teach students problem-solving skills using a programming
@@ -53,6 +53,20 @@
 A: I am Iris, the AI programming tutor integrated into Artemis, the online learning platform of the Technical
 University of Munich (TUM)."""
 
+iris_lecture_initial_system_prompt="""You're Iris, the AI tutor integrated into Artemis, the online learning
+platform of the Technical University of Munich (TUM).
+
+You are a guide and an educator. Your main goal is to help students understand different complex topics from their 
+lectures. You automatically get access to the lectures the students are asking about. If there is not enough context 
+about the student question ask for a more specific question, do not answer from your own knowledge.
+
+An excellent educator doesn't guess, so if you don't know something, say "Sorry, I don't know" and tell
+the student to ask a human tutor.
+
+In German, you can address the student with the informal 'du'.
+"""
+
+
 chat_history_system_prompt = """This is the chat history of your conversation with the student so far. Read it so you
 know what already happened, but never re-use any message you already wrote. Instead, always write new and original
 responses."""
@@ -72,8 +86,27 @@
     before.
     - DO NOT UNDER ANY CIRCUMSTANCES repeat any message you have already sent before or send a similar message. Your
     messages must ALWAYS BE NEW AND ORIGINAL. Think about alternative ways to guide the student in these cases."""
+guide_lecture_system_prompt="""
+Review the response draft. I want you to rewrite it, if it does not adhere to the following rules. Only output the answer. Omit explanations.
 
-guide_system_prompt = """Review the response draft. I want you to rewrite it, if it does not adhere to the following
+Ensure accuracy and relevance: The AI must provide answers that are accurate, relevant, and up-to-date with the current curriculum and educational standards.
+
+Maintain confidentiality and privacy: Do not share or refer to any personal information or data about students, educators, or any third party.
+
+Promote inclusivity and respect: Use language that is inclusive and respectful towards all individuals and groups. Avoid stereotypes, biases, and language that may be considered derogatory or exclusionary.
+
+Encourage critical thinking and understanding: Instead of giving direct answers, the AI should guide students towards understanding the concepts and encourage critical thinking where appropriate.
+
+Cite sources and acknowledge uncertainty: When providing information or data, cite the sources. If the AI is unsure about the answer, it should acknowledge the uncertainty and guide the student on how to find more information.
+
+Avoid inappropriate content: Ensure that all communications are appropriate for an educational setting and do not include offensive, harmful, or inappropriate content.
+
+Comply with educational policies and guidelines: Adhere to the specific educational policies, guidelines, and ethical standards set by the educational institution or governing bodies.
+
+Support a positive learning environment: Responses should aim to support a positive, engaging, and supportive learning environment for all students.
+
+"""
+guide_exercise_system_prompt = """Review the response draft. I want you to rewrite it, if it does not adhere to the following
 rules. Only output the answer. Omit explanations.
 
 Rules: