From 0768e5adb581457fc4473cb66c62346f8a72d1e5 Mon Sep 17 00:00:00 2001 From: Yassine Souissi <74144843+yassinsws@users.noreply.github.com> Date: Fri, 21 Jun 2024 21:07:30 +0200 Subject: [PATCH] Fix merge conflicts of main (#126) --- app/domain/data/lecture_unit_dto.py | 21 ++++--- app/domain/pipeline_execution_settings_dto.py | 6 +- app/pipeline/chat/exercise_chat_pipeline.py | 60 ++++++++++++------- app/pipeline/chat/lecture_chat_pipeline.py | 4 +- .../output_models/selected_paragraphs.py | 2 +- app/pipeline/lecture_ingestion_pipeline.py | 22 ++++--- .../prompts/lecture_retrieval_prompts.py | 7 ++- app/pipeline/shared/reranker_pipeline.py | 27 ++++----- app/retrieval/lecture_retrieval.py | 2 +- 9 files changed, 86 insertions(+), 65 deletions(-) diff --git a/app/domain/data/lecture_unit_dto.py b/app/domain/data/lecture_unit_dto.py index d207166b..26ef1785 100644 --- a/app/domain/data/lecture_unit_dto.py +++ b/app/domain/data/lecture_unit_dto.py @@ -1,15 +1,14 @@ -from typing import Optional - from pydantic import BaseModel, Field class LectureUnitDTO(BaseModel): - to_update: Optional[bool] = Field(alias="toUpdate", default=None) - pdf_file_base64: Optional[str] = Field(alias="pdfFile", default=None) - lecture_unit_id: Optional[int] = Field(alias="lectureUnitId", default=None) - lecture_unit_name: Optional[str] = Field(alias="lectureUnitName", default=None) - lecture_id: Optional[int] = Field(alias="lectureId", default=None) - lecture_name: Optional[str] = Field(alias="lectureName", default=None) - course_id: Optional[int] = Field(alias="courseId", default=None) - course_name: Optional[str] = Field(alias="courseName", default=None) - course_description: Optional[str] = Field(alias="courseDescription", default=None) + to_update: bool = Field(alias="toUpdate") + base_url: str = Field(alias="artemisBaseUrl") + pdf_file_base64: str = Field(default="", alias="pdfFile") + lecture_unit_id: int = Field(alias="lectureUnitId") + lecture_unit_name: str = Field(default="", alias="lectureUnitName") + lecture_id: int = Field(alias="lectureId") + lecture_name: str = Field(default="", alias="lectureName") + course_id: int = Field(alias="courseId") + course_name: str = Field(default="", alias="courseName") + course_description: str = Field(default="", alias="courseDescription") diff --git a/app/domain/pipeline_execution_settings_dto.py b/app/domain/pipeline_execution_settings_dto.py index 86242d23..340e8783 100644 --- a/app/domain/pipeline_execution_settings_dto.py +++ b/app/domain/pipeline_execution_settings_dto.py @@ -1,11 +1,11 @@ -from typing import List, Optional +from typing import List from pydantic import BaseModel, Field class PipelineExecutionSettingsDTO(BaseModel): authentication_token: str = Field(alias="authenticationToken") - allowed_model_identifiers: Optional[List[str]] = Field( - alias="allowedModelIdentifiers", default=[] + allowed_model_identifiers: List[str] = Field( + default=[], alias="allowedModelIdentifiers" ) artemis_base_url: str = Field(alias="artemisBaseUrl") diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index e8025d98..7fc614ea 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -29,7 +29,9 @@ from ...common import convert_iris_message_to_langchain_message from ...domain import ExerciseChatPipelineExecutionDTO from ...domain import PyrisMessage -from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import LectureChatPipelineExecutionDTO +from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import ( + LectureChatPipelineExecutionDTO, +) from ...domain.data.build_log_entry import BuildLogEntryDTO from ...domain.data.feedback_dto import FeedbackDTO from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO @@ -46,7 +48,7 @@ class ExerciseChatPipeline(Pipeline): - """Exercise chat pipeline that answers exercises related questions from students. """ + """Exercise chat pipeline that answers exercises related questions from students.""" llm: IrisLangchainChatModel pipeline: Runnable @@ -99,7 +101,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): settings=dto.settings, course=dto.course, chatHistory=dto.chat_history, - user=dto.user + user=dto.user, ) lecture_chat_thread = threading.Thread( target=self._run_lecture_chat_pipeline(execution_dto), args=(dto,) @@ -122,7 +124,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO): self.callback.error(f"Failed to generate response: {e}") def choose_best_response( - self, paragraphs: list[str], query: str, chat_history: List[PyrisMessage] + self, paragraphs: list[str], query: str, chat_history: List[PyrisMessage] ): """ Chooses the best response from the reranker pipeline @@ -163,7 +165,11 @@ def _run_lecture_chat_pipeline(self, dto: LectureChatPipelineExecutionDTO): pipeline = LectureChatPipeline() self.lecture_chat_response = pipeline(dto=dto) - def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, should_execute_lecture_pipeline: bool = False): + def _run_exercise_chat_pipeline( + self, + dto: ExerciseChatPipelineExecutionDTO, + should_execute_lecture_pipeline: bool = False, + ): """ Runs the pipeline :param dto: execution data transfer object @@ -204,7 +210,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho chat_history=history, question=query, repository=repository, - feedbacks=(submission.latest_result.feedbacks if submission and submission.latest_result else []) + feedbacks=( + submission.latest_result.feedbacks + if submission and submission.latest_result + else [] + ), ) self.callback.done() except Exception as e: @@ -244,7 +254,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho ) self.prompt = ChatPromptTemplate.from_messages(prompt_val) try: - response_draft = (self.prompt | self.pipeline).with_config({"run_name": "Response Drafting"}).invoke({}) + response_draft = ( + (self.prompt | self.pipeline) + .with_config({"run_name": "Response Drafting"}) + .invoke({}) + ) self.prompt = ChatPromptTemplate.from_messages( [ SystemMessagePromptTemplate.from_template(guide_system_prompt), @@ -253,7 +267,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho prompt_val = self.prompt.format_messages(response=response_draft) self.prompt = ChatPromptTemplate.from_messages(prompt_val) - guide_response = (self.prompt | self.pipeline).with_config({"run_name": "Response Refining"}).invoke({}) + guide_response = ( + (self.prompt | self.pipeline) + .with_config({"run_name": "Response Refining"}) + .invoke({}) + ) if "!ok!" in guide_response: print("Response is ok and not rewritten!!!") @@ -268,9 +286,9 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho return "Failed to generate response" def _add_conversation_to_prompt( - self, - chat_history: List[PyrisMessage], - user_question: PyrisMessage, + self, + chat_history: List[PyrisMessage], + user_question: PyrisMessage, ): """ Adds the chat history and user question to the prompt @@ -290,7 +308,7 @@ def _add_conversation_to_prompt( self.prompt += convert_iris_message_to_langchain_message(user_question) def _add_student_repository_to_prompt( - self, student_repository: Dict[str, str], selected_files: List[str] + self, student_repository: Dict[str, str], selected_files: List[str] ): """Adds the student repository to the prompt :param student_repository: The student repository @@ -306,9 +324,9 @@ def _add_student_repository_to_prompt( ) def _add_exercise_context_to_prompt( - self, - submission: ProgrammingSubmissionDTO, - selected_files: List[str], + self, + submission: ProgrammingSubmissionDTO, + selected_files: List[str], ): """Adds the exercise context to the prompt :param submission: The submission @@ -330,12 +348,12 @@ def _add_feedbacks_to_prompt(self, feedbacks: List[FeedbackDTO]): """ if feedbacks is not None and len(feedbacks) > 0: prompt = ( - "These are the feedbacks for the student's repository:\n%s" - ) % "\n---------\n".join(str(log) for log in feedbacks) + "These are the feedbacks for the student's repository:\n%s" + ) % "\n---------\n".join(str(log) for log in feedbacks) self.prompt += SystemMessagePromptTemplate.from_template(prompt) def _add_build_logs_to_prompt( - self, build_logs: List[BuildLogEntryDTO], build_failed: bool + self, build_logs: List[BuildLogEntryDTO], build_failed: bool ): """Adds the build logs to the prompt :param build_logs: The build logs @@ -343,9 +361,9 @@ def _add_build_logs_to_prompt( """ if build_logs is not None and len(build_logs) > 0: prompt = ( - f"Last build failed: {build_failed}\n" - "These are the build logs for the student's repository:\n%s" - ) % "\n".join(str(log) for log in build_logs) + f"Last build failed: {build_failed}\n" + "These are the build logs for the student's repository:\n%s" + ) % "\n".join(str(log) for log in build_logs) self.prompt += SystemMessagePromptTemplate.from_template(prompt) def _add_relevant_chunks_to_prompt(self, retrieved_lecture_chunks: List[dict]): diff --git a/app/pipeline/chat/lecture_chat_pipeline.py b/app/pipeline/chat/lecture_chat_pipeline.py index 61fddde1..8699cd40 100644 --- a/app/pipeline/chat/lecture_chat_pipeline.py +++ b/app/pipeline/chat/lecture_chat_pipeline.py @@ -11,7 +11,9 @@ from ..shared.citation_pipeline import CitationPipeline from ...common import convert_iris_message_to_langchain_message from ...domain import PyrisMessage -from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import LectureChatPipelineExecutionDTO +from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import ( + LectureChatPipelineExecutionDTO, +) from ...llm import CapabilityRequestHandler, RequirementList from ...retrieval.lecture_retrieval import LectureRetrieval from ...vector_database.database import VectorDatabase diff --git a/app/pipeline/chat/output_models/output_models/selected_paragraphs.py b/app/pipeline/chat/output_models/output_models/selected_paragraphs.py index 54c1bcda..87bf0daa 100644 --- a/app/pipeline/chat/output_models/output_models/selected_paragraphs.py +++ b/app/pipeline/chat/output_models/output_models/selected_paragraphs.py @@ -7,5 +7,5 @@ class SelectedParagraphs(BaseModel): selected_paragraphs: List[int] = Field( default=[], description="List of paragraphs sorted from most relevant to least relevant to the student question, " - "each with a relevance score.", + "each with a relevance score.", ) diff --git a/app/pipeline/lecture_ingestion_pipeline.py b/app/pipeline/lecture_ingestion_pipeline.py index b720a888..2cbf629d 100644 --- a/app/pipeline/lecture_ingestion_pipeline.py +++ b/app/pipeline/lecture_ingestion_pipeline.py @@ -61,7 +61,9 @@ def save_pdf(pdf_file_base64): return temp_pdf_file_path -def create_page_data(page_num, page_splits, lecture_unit_dto, course_language, base_url): +def create_page_data( + page_num, page_splits, lecture_unit_dto, course_language, base_url +): """ Create and return a list of dictionnaries to be ingested in the Vector Database. """ @@ -129,9 +131,11 @@ def __call__(self) -> bool: for i, lecture_unit in enumerate(self.dto.lecture_units): pdf_path = save_pdf(lecture_unit.pdf_file_base64) chunks.extend( - self.chunk_data(lecture_pdf=pdf_path, - lecture_unit_dto=lecture_unit, - base_url=self.dto.settings.artemis_base_url) + self.chunk_data( + lecture_pdf=pdf_path, + lecture_unit_dto=lecture_unit, + base_url=self.dto.settings.artemis_base_url, + ) ) cleanup_temporary_file(pdf_path) self.callback.done("Lecture Chunking and interpretation Finished") @@ -223,10 +227,10 @@ def interpret_image( Interpret the image passed """ image_interpretation_prompt = TextMessageContentDTO( - text_content=f"This page is part of the {name_of_lecture} university lecture," - f" explain what is on the slide in an academic way," - f" respond only with the explanation in {course_language}." - f" For more context here is the content of the previous slide: " + text_content=f"This page is part of the {name_of_lecture} university lecture, " + f" explain what is on the slide in an academic way, " + f"respond only with the explanation in {course_language}." + f"For more context here is the content of the previous slide: " f" {last_page_content}" ) image = ImageMessageContentDTO(base64=img_base64) @@ -296,7 +300,7 @@ def delete_old_lectures(self): lecture_unit.course_id, lecture_unit.lecture_id, lecture_unit.lecture_unit_id, - self.dto.settings.artemis_base_url + self.dto.settings.artemis_base_url, ): logger.info("Lecture deleted successfully") else: diff --git a/app/pipeline/prompts/lecture_retrieval_prompts.py b/app/pipeline/prompts/lecture_retrieval_prompts.py index 2e0da5d2..0611e2f1 100644 --- a/app/pipeline/prompts/lecture_retrieval_prompts.py +++ b/app/pipeline/prompts/lecture_retrieval_prompts.py @@ -29,10 +29,10 @@ write_hypothetical_answer_prompt = """ Please provide a response in {course_language}. + You should create a slide like response to the student query. Craft your response to closely reflect the style and content of university lecture materials. - Do not exceed 300 words. + Do not exceed 350 words. Add keywords and phrases that are relevant to student intent. - You should create a slide like response to the student query. """ rewrite_student_query_prompt_with_exercise_context = """ @@ -48,5 +48,6 @@ """ write_hypothetical_answer_with_exercise_context_prompt = """ Please provide a response in {course_language}. + You should create a slide like response to the student query. Craft your response to closely reflect the style and content of university lecture materials. - Do not exceed 500 characters. Add keywords and phrases that are relevant to student intent.""" + Do not exceed 350 words.. Add keywords and phrases that are relevant to student intent.""" diff --git a/app/pipeline/shared/reranker_pipeline.py b/app/pipeline/shared/reranker_pipeline.py index ac9f0a6d..f3d3921c 100644 --- a/app/pipeline/shared/reranker_pipeline.py +++ b/app/pipeline/shared/reranker_pipeline.py @@ -30,14 +30,11 @@ def __init__(self): requirements=RequirementList( gpt_version_equivalent=3.5, context_length=16385, - json_mode=True, ) ) self.llm = IrisLangchainChatModel( request_handler=request_handler, - completion_args=CompletionArguments( - temperature=0, max_tokens=4000, response_format="JSON" - ), + completion_args=CompletionArguments(temperature=0, max_tokens=4000), ) dirname = os.path.dirname(__file__) prompt_file_path = os.path.join(dirname, "..", "prompts", "reranker_prompt.txt") @@ -76,12 +73,12 @@ def __str__(self): return f"{self.__class__.__name__}(llm={self.llm})" def __call__( - self, - paragraphs: Union[List[dict], List[str]], - query: str, - prompt: Optional[PromptTemplate] = None, - chat_history: list[PyrisMessage] = None, - **kwargs, + self, + paragraphs: Union[List[dict], List[str]], + query: str, + prompt: Optional[PromptTemplate] = None, + chat_history: list[PyrisMessage] = None, + **kwargs, ) -> List[str]: """ Runs the pipeline @@ -106,11 +103,11 @@ def __call__( "Invalid input type for paragraphs. Must be a list of dictionaries or a list of strings." ) text_chat_history = [ - chat_history[-i - 1].contents[0].text_content - for i in range(min(10, len(chat_history))) # Ensure no out-of-bounds error - ][ - ::-1 - ] # Reverse to get the messages in chronological order of their appearance data["question"] = query + chat_history[-i - 1].contents[0].text_content + for i in range(min(4, len(chat_history))) # Ensure no out-of-bounds error + ][ + ::-1 + ] # Reverse to get the messages in chronological order of their appearance data["question"] = query data["chat_history"] = text_chat_history data["question"] = query if prompt is None: diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index 6e0cdf2f..f1746d07 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -271,7 +271,7 @@ def rewrite_elaborated_query( prompt = ChatPromptTemplate.from_messages(prompt_val) try: response = (prompt | self.pipeline).invoke({}) - logger.info(f"Response from exercise chat pipeline: {response}") + logger.info(f"Response from retirval pipeline: {response}") return response except Exception as e: raise e