Fix merge conflicts of main (#126)

ls1intum · Jun 21, 2024 · 0768e5a · 0768e5a
1 parent e572223
commit 0768e5a
Show file tree

Hide file tree

Showing 9 changed files with 86 additions and 65 deletions.
diff --git a/app/domain/data/lecture_unit_dto.py b/app/domain/data/lecture_unit_dto.py
@@ -1,15 +1,14 @@
-from typing import Optional
-
 from pydantic import BaseModel, Field
 
 
 class LectureUnitDTO(BaseModel):
-    to_update: Optional[bool] = Field(alias="toUpdate", default=None)
-    pdf_file_base64: Optional[str] = Field(alias="pdfFile", default=None)
-    lecture_unit_id: Optional[int] = Field(alias="lectureUnitId", default=None)
-    lecture_unit_name: Optional[str] = Field(alias="lectureUnitName", default=None)
-    lecture_id: Optional[int] = Field(alias="lectureId", default=None)
-    lecture_name: Optional[str] = Field(alias="lectureName", default=None)
-    course_id: Optional[int] = Field(alias="courseId", default=None)
-    course_name: Optional[str] = Field(alias="courseName", default=None)
-    course_description: Optional[str] = Field(alias="courseDescription", default=None)
+    to_update: bool = Field(alias="toUpdate")
+    base_url: str = Field(alias="artemisBaseUrl")
+    pdf_file_base64: str = Field(default="", alias="pdfFile")
+    lecture_unit_id: int = Field(alias="lectureUnitId")
+    lecture_unit_name: str = Field(default="", alias="lectureUnitName")
+    lecture_id: int = Field(alias="lectureId")
+    lecture_name: str = Field(default="", alias="lectureName")
+    course_id: int = Field(alias="courseId")
+    course_name: str = Field(default="", alias="courseName")
+    course_description: str = Field(default="", alias="courseDescription")
diff --git a/app/domain/pipeline_execution_settings_dto.py b/app/domain/pipeline_execution_settings_dto.py
@@ -1,11 +1,11 @@
-from typing import List, Optional
+from typing import List
 
 from pydantic import BaseModel, Field
 
 
 class PipelineExecutionSettingsDTO(BaseModel):
     authentication_token: str = Field(alias="authenticationToken")
-    allowed_model_identifiers: Optional[List[str]] = Field(
-        alias="allowedModelIdentifiers", default=[]
+    allowed_model_identifiers: List[str] = Field(
+        default=[], alias="allowedModelIdentifiers"
     )
     artemis_base_url: str = Field(alias="artemisBaseUrl")
diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py
@@ -29,7 +29,9 @@
 from ...common import convert_iris_message_to_langchain_message
 from ...domain import ExerciseChatPipelineExecutionDTO
 from ...domain import PyrisMessage
-from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import LectureChatPipelineExecutionDTO
+from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import (
+    LectureChatPipelineExecutionDTO,
+)
 from ...domain.data.build_log_entry import BuildLogEntryDTO
 from ...domain.data.feedback_dto import FeedbackDTO
 from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO
@@ -46,7 +48,7 @@
 
 
 class ExerciseChatPipeline(Pipeline):
-    """Exercise chat pipeline that answers exercises related questions from students. """
+    """Exercise chat pipeline that answers exercises related questions from students."""
 
     llm: IrisLangchainChatModel
     pipeline: Runnable
@@ -99,7 +101,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO):
                     settings=dto.settings,
                     course=dto.course,
                     chatHistory=dto.chat_history,
-                    user=dto.user
+                    user=dto.user,
                 )
                 lecture_chat_thread = threading.Thread(
                     target=self._run_lecture_chat_pipeline(execution_dto), args=(dto,)
@@ -122,7 +124,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO):
             self.callback.error(f"Failed to generate response: {e}")
 
     def choose_best_response(
-            self, paragraphs: list[str], query: str, chat_history: List[PyrisMessage]
+        self, paragraphs: list[str], query: str, chat_history: List[PyrisMessage]
     ):
         """
         Chooses the best response from the reranker pipeline
@@ -163,7 +165,11 @@ def _run_lecture_chat_pipeline(self, dto: LectureChatPipelineExecutionDTO):
         pipeline = LectureChatPipeline()
         self.lecture_chat_response = pipeline(dto=dto)
 
-    def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, should_execute_lecture_pipeline: bool = False):
+    def _run_exercise_chat_pipeline(
+        self,
+        dto: ExerciseChatPipelineExecutionDTO,
+        should_execute_lecture_pipeline: bool = False,
+    ):
         """
         Runs the pipeline
         :param dto:  execution data transfer object
@@ -204,7 +210,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
                     chat_history=history,
                     question=query,
                     repository=repository,
-                    feedbacks=(submission.latest_result.feedbacks if submission and submission.latest_result else [])
+                    feedbacks=(
+                        submission.latest_result.feedbacks
+                        if submission and submission.latest_result
+                        else []
+                    ),
                 )
                 self.callback.done()
             except Exception as e:
@@ -244,7 +254,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
         )
         self.prompt = ChatPromptTemplate.from_messages(prompt_val)
         try:
-            response_draft = (self.prompt | self.pipeline).with_config({"run_name": "Response Drafting"}).invoke({})
+            response_draft = (
+                (self.prompt | self.pipeline)
+                .with_config({"run_name": "Response Drafting"})
+                .invoke({})
+            )
             self.prompt = ChatPromptTemplate.from_messages(
                 [
                     SystemMessagePromptTemplate.from_template(guide_system_prompt),
@@ -253,7 +267,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
             prompt_val = self.prompt.format_messages(response=response_draft)
             self.prompt = ChatPromptTemplate.from_messages(prompt_val)
 
-            guide_response = (self.prompt | self.pipeline).with_config({"run_name": "Response Refining"}).invoke({})
+            guide_response = (
+                (self.prompt | self.pipeline)
+                .with_config({"run_name": "Response Refining"})
+                .invoke({})
+            )
 
             if "!ok!" in guide_response:
                 print("Response is ok and not rewritten!!!")
@@ -268,9 +286,9 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
             return "Failed to generate response"
 
     def _add_conversation_to_prompt(
-            self,
-            chat_history: List[PyrisMessage],
-            user_question: PyrisMessage,
+        self,
+        chat_history: List[PyrisMessage],
+        user_question: PyrisMessage,
     ):
         """
         Adds the chat history and user question to the prompt
@@ -290,7 +308,7 @@ def _add_conversation_to_prompt(
         self.prompt += convert_iris_message_to_langchain_message(user_question)
 
     def _add_student_repository_to_prompt(
-            self, student_repository: Dict[str, str], selected_files: List[str]
+        self, student_repository: Dict[str, str], selected_files: List[str]
     ):
         """Adds the student repository to the prompt
         :param student_repository: The student repository
@@ -306,9 +324,9 @@ def _add_student_repository_to_prompt(
                 )
 
     def _add_exercise_context_to_prompt(
-            self,
-            submission: ProgrammingSubmissionDTO,
-            selected_files: List[str],
+        self,
+        submission: ProgrammingSubmissionDTO,
+        selected_files: List[str],
     ):
         """Adds the exercise context to the prompt
         :param submission: The submission
@@ -330,22 +348,22 @@ def _add_feedbacks_to_prompt(self, feedbacks: List[FeedbackDTO]):
         """
         if feedbacks is not None and len(feedbacks) > 0:
             prompt = (
-                         "These are the feedbacks for the student's repository:\n%s"
-                     ) % "\n---------\n".join(str(log) for log in feedbacks)
+                "These are the feedbacks for the student's repository:\n%s"
+            ) % "\n---------\n".join(str(log) for log in feedbacks)
             self.prompt += SystemMessagePromptTemplate.from_template(prompt)
 
     def _add_build_logs_to_prompt(
-            self, build_logs: List[BuildLogEntryDTO], build_failed: bool
+        self, build_logs: List[BuildLogEntryDTO], build_failed: bool
     ):
         """Adds the build logs to the prompt
         :param build_logs: The build logs
         :param build_failed: Whether the build failed
         """
         if build_logs is not None and len(build_logs) > 0:
             prompt = (
-                         f"Last build failed: {build_failed}\n"
-                         "These are the build logs for the student's repository:\n%s"
-                     ) % "\n".join(str(log) for log in build_logs)
+                f"Last build failed: {build_failed}\n"
+                "These are the build logs for the student's repository:\n%s"
+            ) % "\n".join(str(log) for log in build_logs)
             self.prompt += SystemMessagePromptTemplate.from_template(prompt)
 
     def _add_relevant_chunks_to_prompt(self, retrieved_lecture_chunks: List[dict]):

diff --git a/app/pipeline/chat/lecture_chat_pipeline.py b/app/pipeline/chat/lecture_chat_pipeline.py
@@ -11,7 +11,9 @@
 from ..shared.citation_pipeline import CitationPipeline
 from ...common import convert_iris_message_to_langchain_message
 from ...domain import PyrisMessage
-from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import LectureChatPipelineExecutionDTO
+from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import (
+    LectureChatPipelineExecutionDTO,
+)
 from ...llm import CapabilityRequestHandler, RequirementList
 from ...retrieval.lecture_retrieval import LectureRetrieval
 from ...vector_database.database import VectorDatabase

diff --git a/app/pipeline/chat/output_models/output_models/selected_paragraphs.py b/app/pipeline/chat/output_models/output_models/selected_paragraphs.py
@@ -7,5 +7,5 @@ class SelectedParagraphs(BaseModel):
     selected_paragraphs: List[int] = Field(
         default=[],
         description="List of paragraphs sorted from most relevant to least relevant to the student question, "
-                    "each with a relevance score.",
+        "each with a relevance score.",
     )
diff --git a/app/pipeline/lecture_ingestion_pipeline.py b/app/pipeline/lecture_ingestion_pipeline.py
@@ -61,7 +61,9 @@ def save_pdf(pdf_file_base64):
     return temp_pdf_file_path
 
 
-def create_page_data(page_num, page_splits, lecture_unit_dto, course_language, base_url):
+def create_page_data(
+    page_num, page_splits, lecture_unit_dto, course_language, base_url
+):
     """
     Create and return a list of dictionnaries to be ingested in the Vector Database.
     """
@@ -129,9 +131,11 @@ def __call__(self) -> bool:
             for i, lecture_unit in enumerate(self.dto.lecture_units):
                 pdf_path = save_pdf(lecture_unit.pdf_file_base64)
                 chunks.extend(
-                    self.chunk_data(lecture_pdf=pdf_path,
-                                    lecture_unit_dto=lecture_unit,
-                                    base_url=self.dto.settings.artemis_base_url)
+                    self.chunk_data(
+                        lecture_pdf=pdf_path,
+                        lecture_unit_dto=lecture_unit,
+                        base_url=self.dto.settings.artemis_base_url,
+                    )
                 )
                 cleanup_temporary_file(pdf_path)
             self.callback.done("Lecture Chunking and interpretation Finished")
@@ -223,10 +227,10 @@ def interpret_image(
         Interpret the image passed
         """
         image_interpretation_prompt = TextMessageContentDTO(
-            text_content=f"This page is part of the {name_of_lecture} university lecture,"
-            f" explain what is on the slide in an academic way,"
-            f" respond only with the explanation in {course_language}."
-            f" For more context here is the content of the previous slide: "
+            text_content=f"This page is part of the {name_of_lecture} university lecture, "
+            f" explain what is on the slide in an academic way, "
+            f"respond only with the explanation in {course_language}."
+            f"For more context here is the content of the previous slide: "
             f" {last_page_content}"
         )
         image = ImageMessageContentDTO(base64=img_base64)
@@ -296,7 +300,7 @@ def delete_old_lectures(self):
                     lecture_unit.course_id,
                     lecture_unit.lecture_id,
                     lecture_unit.lecture_unit_id,
-                    self.dto.settings.artemis_base_url
+                    self.dto.settings.artemis_base_url,
                 ):
                     logger.info("Lecture deleted successfully")
                 else:

diff --git a/app/pipeline/prompts/lecture_retrieval_prompts.py b/app/pipeline/prompts/lecture_retrieval_prompts.py
@@ -29,10 +29,10 @@
 
 write_hypothetical_answer_prompt = """
  Please provide a response in {course_language}.
+ You should create a slide like response to the student query.
  Craft your response to closely reflect the style and content of university lecture materials.
-  Do not exceed 300 words.
+ Do not exceed 350 words.
  Add keywords and phrases that are relevant to student intent.
- You should create a slide like response to the student query.
  """
 
 rewrite_student_query_prompt_with_exercise_context = """
@@ -48,5 +48,6 @@
                """
 
 write_hypothetical_answer_with_exercise_context_prompt = """ Please provide a response in {course_language}.
+ You should create a slide like response to the student query.
  Craft your response to closely reflect the style and content of university lecture materials.
-  Do not exceed 500 characters. Add keywords and phrases that are relevant to student intent."""
+ Do not exceed 350 words.. Add keywords and phrases that are relevant to student intent."""
diff --git a/app/pipeline/shared/reranker_pipeline.py b/app/pipeline/shared/reranker_pipeline.py
@@ -30,14 +30,11 @@ def __init__(self):
             requirements=RequirementList(
                 gpt_version_equivalent=3.5,
                 context_length=16385,
-                json_mode=True,
             )
         )
         self.llm = IrisLangchainChatModel(
             request_handler=request_handler,
-            completion_args=CompletionArguments(
-                temperature=0, max_tokens=4000, response_format="JSON"
-            ),
+            completion_args=CompletionArguments(temperature=0, max_tokens=4000),
         )
         dirname = os.path.dirname(__file__)
         prompt_file_path = os.path.join(dirname, "..", "prompts", "reranker_prompt.txt")
@@ -76,12 +73,12 @@ def __str__(self):
         return f"{self.__class__.__name__}(llm={self.llm})"
 
     def __call__(
-            self,
-            paragraphs: Union[List[dict], List[str]],
-            query: str,
-            prompt: Optional[PromptTemplate] = None,
-            chat_history: list[PyrisMessage] = None,
-            **kwargs,
+        self,
+        paragraphs: Union[List[dict], List[str]],
+        query: str,
+        prompt: Optional[PromptTemplate] = None,
+        chat_history: list[PyrisMessage] = None,
+        **kwargs,
     ) -> List[str]:
         """
         Runs the pipeline
@@ -106,11 +103,11 @@ def __call__(
                 "Invalid input type for paragraphs. Must be a list of dictionaries or a list of strings."
             )
         text_chat_history = [
-                                chat_history[-i - 1].contents[0].text_content
-                                for i in range(min(10, len(chat_history)))  # Ensure no out-of-bounds error
-                            ][
-                            ::-1
-                            ]  # Reverse to get the messages in chronological order of their appearance  data["question"] = query
+            chat_history[-i - 1].contents[0].text_content
+            for i in range(min(4, len(chat_history)))  # Ensure no out-of-bounds error
+        ][
+            ::-1
+        ]  # Reverse to get the messages in chronological order of their appearance  data["question"] = query
         data["chat_history"] = text_chat_history
         data["question"] = query
         if prompt is None:

diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py
@@ -271,7 +271,7 @@ def rewrite_elaborated_query(
         prompt = ChatPromptTemplate.from_messages(prompt_val)
         try:
             response = (prompt | self.pipeline).invoke({})
-            logger.info(f"Response from exercise chat pipeline: {response}")
+            logger.info(f"Response from retirval pipeline: {response}")
             return response
         except Exception as e:
             raise e