Skip to content

Commit

Permalink
Fix merge conflicts of main (#126)
Browse files Browse the repository at this point in the history
  • Loading branch information
yassinsws authored Jun 21, 2024
1 parent e572223 commit 0768e5a
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 65 deletions.
21 changes: 10 additions & 11 deletions app/domain/data/lecture_unit_dto.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from typing import Optional

from pydantic import BaseModel, Field


class LectureUnitDTO(BaseModel):
to_update: Optional[bool] = Field(alias="toUpdate", default=None)
pdf_file_base64: Optional[str] = Field(alias="pdfFile", default=None)
lecture_unit_id: Optional[int] = Field(alias="lectureUnitId", default=None)
lecture_unit_name: Optional[str] = Field(alias="lectureUnitName", default=None)
lecture_id: Optional[int] = Field(alias="lectureId", default=None)
lecture_name: Optional[str] = Field(alias="lectureName", default=None)
course_id: Optional[int] = Field(alias="courseId", default=None)
course_name: Optional[str] = Field(alias="courseName", default=None)
course_description: Optional[str] = Field(alias="courseDescription", default=None)
to_update: bool = Field(alias="toUpdate")
base_url: str = Field(alias="artemisBaseUrl")
pdf_file_base64: str = Field(default="", alias="pdfFile")
lecture_unit_id: int = Field(alias="lectureUnitId")
lecture_unit_name: str = Field(default="", alias="lectureUnitName")
lecture_id: int = Field(alias="lectureId")
lecture_name: str = Field(default="", alias="lectureName")
course_id: int = Field(alias="courseId")
course_name: str = Field(default="", alias="courseName")
course_description: str = Field(default="", alias="courseDescription")
6 changes: 3 additions & 3 deletions app/domain/pipeline_execution_settings_dto.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import List, Optional
from typing import List

from pydantic import BaseModel, Field


class PipelineExecutionSettingsDTO(BaseModel):
authentication_token: str = Field(alias="authenticationToken")
allowed_model_identifiers: Optional[List[str]] = Field(
alias="allowedModelIdentifiers", default=[]
allowed_model_identifiers: List[str] = Field(
default=[], alias="allowedModelIdentifiers"
)
artemis_base_url: str = Field(alias="artemisBaseUrl")
60 changes: 39 additions & 21 deletions app/pipeline/chat/exercise_chat_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
from ...common import convert_iris_message_to_langchain_message
from ...domain import ExerciseChatPipelineExecutionDTO
from ...domain import PyrisMessage
from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import LectureChatPipelineExecutionDTO
from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import (
LectureChatPipelineExecutionDTO,
)
from ...domain.data.build_log_entry import BuildLogEntryDTO
from ...domain.data.feedback_dto import FeedbackDTO
from ...domain.data.programming_submission_dto import ProgrammingSubmissionDTO
Expand All @@ -46,7 +48,7 @@


class ExerciseChatPipeline(Pipeline):
"""Exercise chat pipeline that answers exercises related questions from students. """
"""Exercise chat pipeline that answers exercises related questions from students."""

llm: IrisLangchainChatModel
pipeline: Runnable
Expand Down Expand Up @@ -99,7 +101,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO):
settings=dto.settings,
course=dto.course,
chatHistory=dto.chat_history,
user=dto.user
user=dto.user,
)
lecture_chat_thread = threading.Thread(
target=self._run_lecture_chat_pipeline(execution_dto), args=(dto,)
Expand All @@ -122,7 +124,7 @@ def __call__(self, dto: ExerciseChatPipelineExecutionDTO):
self.callback.error(f"Failed to generate response: {e}")

def choose_best_response(
self, paragraphs: list[str], query: str, chat_history: List[PyrisMessage]
self, paragraphs: list[str], query: str, chat_history: List[PyrisMessage]
):
"""
Chooses the best response from the reranker pipeline
Expand Down Expand Up @@ -163,7 +165,11 @@ def _run_lecture_chat_pipeline(self, dto: LectureChatPipelineExecutionDTO):
pipeline = LectureChatPipeline()
self.lecture_chat_response = pipeline(dto=dto)

def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, should_execute_lecture_pipeline: bool = False):
def _run_exercise_chat_pipeline(
self,
dto: ExerciseChatPipelineExecutionDTO,
should_execute_lecture_pipeline: bool = False,
):
"""
Runs the pipeline
:param dto: execution data transfer object
Expand Down Expand Up @@ -204,7 +210,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
chat_history=history,
question=query,
repository=repository,
feedbacks=(submission.latest_result.feedbacks if submission and submission.latest_result else [])
feedbacks=(
submission.latest_result.feedbacks
if submission and submission.latest_result
else []
),
)
self.callback.done()
except Exception as e:
Expand Down Expand Up @@ -244,7 +254,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
)
self.prompt = ChatPromptTemplate.from_messages(prompt_val)
try:
response_draft = (self.prompt | self.pipeline).with_config({"run_name": "Response Drafting"}).invoke({})
response_draft = (
(self.prompt | self.pipeline)
.with_config({"run_name": "Response Drafting"})
.invoke({})
)
self.prompt = ChatPromptTemplate.from_messages(
[
SystemMessagePromptTemplate.from_template(guide_system_prompt),
Expand All @@ -253,7 +267,11 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
prompt_val = self.prompt.format_messages(response=response_draft)
self.prompt = ChatPromptTemplate.from_messages(prompt_val)

guide_response = (self.prompt | self.pipeline).with_config({"run_name": "Response Refining"}).invoke({})
guide_response = (
(self.prompt | self.pipeline)
.with_config({"run_name": "Response Refining"})
.invoke({})
)

if "!ok!" in guide_response:
print("Response is ok and not rewritten!!!")
Expand All @@ -268,9 +286,9 @@ def _run_exercise_chat_pipeline(self, dto: ExerciseChatPipelineExecutionDTO, sho
return "Failed to generate response"

def _add_conversation_to_prompt(
self,
chat_history: List[PyrisMessage],
user_question: PyrisMessage,
self,
chat_history: List[PyrisMessage],
user_question: PyrisMessage,
):
"""
Adds the chat history and user question to the prompt
Expand All @@ -290,7 +308,7 @@ def _add_conversation_to_prompt(
self.prompt += convert_iris_message_to_langchain_message(user_question)

def _add_student_repository_to_prompt(
self, student_repository: Dict[str, str], selected_files: List[str]
self, student_repository: Dict[str, str], selected_files: List[str]
):
"""Adds the student repository to the prompt
:param student_repository: The student repository
Expand All @@ -306,9 +324,9 @@ def _add_student_repository_to_prompt(
)

def _add_exercise_context_to_prompt(
self,
submission: ProgrammingSubmissionDTO,
selected_files: List[str],
self,
submission: ProgrammingSubmissionDTO,
selected_files: List[str],
):
"""Adds the exercise context to the prompt
:param submission: The submission
Expand All @@ -330,22 +348,22 @@ def _add_feedbacks_to_prompt(self, feedbacks: List[FeedbackDTO]):
"""
if feedbacks is not None and len(feedbacks) > 0:
prompt = (
"These are the feedbacks for the student's repository:\n%s"
) % "\n---------\n".join(str(log) for log in feedbacks)
"These are the feedbacks for the student's repository:\n%s"
) % "\n---------\n".join(str(log) for log in feedbacks)
self.prompt += SystemMessagePromptTemplate.from_template(prompt)

def _add_build_logs_to_prompt(
self, build_logs: List[BuildLogEntryDTO], build_failed: bool
self, build_logs: List[BuildLogEntryDTO], build_failed: bool
):
"""Adds the build logs to the prompt
:param build_logs: The build logs
:param build_failed: Whether the build failed
"""
if build_logs is not None and len(build_logs) > 0:
prompt = (
f"Last build failed: {build_failed}\n"
"These are the build logs for the student's repository:\n%s"
) % "\n".join(str(log) for log in build_logs)
f"Last build failed: {build_failed}\n"
"These are the build logs for the student's repository:\n%s"
) % "\n".join(str(log) for log in build_logs)
self.prompt += SystemMessagePromptTemplate.from_template(prompt)

def _add_relevant_chunks_to_prompt(self, retrieved_lecture_chunks: List[dict]):
Expand Down
4 changes: 3 additions & 1 deletion app/pipeline/chat/lecture_chat_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from ..shared.citation_pipeline import CitationPipeline
from ...common import convert_iris_message_to_langchain_message
from ...domain import PyrisMessage
from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import LectureChatPipelineExecutionDTO
from ...domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import (
LectureChatPipelineExecutionDTO,
)
from ...llm import CapabilityRequestHandler, RequirementList
from ...retrieval.lecture_retrieval import LectureRetrieval
from ...vector_database.database import VectorDatabase
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ class SelectedParagraphs(BaseModel):
selected_paragraphs: List[int] = Field(
default=[],
description="List of paragraphs sorted from most relevant to least relevant to the student question, "
"each with a relevance score.",
"each with a relevance score.",
)
22 changes: 13 additions & 9 deletions app/pipeline/lecture_ingestion_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def save_pdf(pdf_file_base64):
return temp_pdf_file_path


def create_page_data(page_num, page_splits, lecture_unit_dto, course_language, base_url):
def create_page_data(
page_num, page_splits, lecture_unit_dto, course_language, base_url
):
"""
Create and return a list of dictionnaries to be ingested in the Vector Database.
"""
Expand Down Expand Up @@ -129,9 +131,11 @@ def __call__(self) -> bool:
for i, lecture_unit in enumerate(self.dto.lecture_units):
pdf_path = save_pdf(lecture_unit.pdf_file_base64)
chunks.extend(
self.chunk_data(lecture_pdf=pdf_path,
lecture_unit_dto=lecture_unit,
base_url=self.dto.settings.artemis_base_url)
self.chunk_data(
lecture_pdf=pdf_path,
lecture_unit_dto=lecture_unit,
base_url=self.dto.settings.artemis_base_url,
)
)
cleanup_temporary_file(pdf_path)
self.callback.done("Lecture Chunking and interpretation Finished")
Expand Down Expand Up @@ -223,10 +227,10 @@ def interpret_image(
Interpret the image passed
"""
image_interpretation_prompt = TextMessageContentDTO(
text_content=f"This page is part of the {name_of_lecture} university lecture,"
f" explain what is on the slide in an academic way,"
f" respond only with the explanation in {course_language}."
f" For more context here is the content of the previous slide: "
text_content=f"This page is part of the {name_of_lecture} university lecture, "
f" explain what is on the slide in an academic way, "
f"respond only with the explanation in {course_language}."
f"For more context here is the content of the previous slide: "
f" {last_page_content}"
)
image = ImageMessageContentDTO(base64=img_base64)
Expand Down Expand Up @@ -296,7 +300,7 @@ def delete_old_lectures(self):
lecture_unit.course_id,
lecture_unit.lecture_id,
lecture_unit.lecture_unit_id,
self.dto.settings.artemis_base_url
self.dto.settings.artemis_base_url,
):
logger.info("Lecture deleted successfully")
else:
Expand Down
7 changes: 4 additions & 3 deletions app/pipeline/prompts/lecture_retrieval_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@

write_hypothetical_answer_prompt = """
Please provide a response in {course_language}.
You should create a slide like response to the student query.
Craft your response to closely reflect the style and content of university lecture materials.
Do not exceed 300 words.
Do not exceed 350 words.
Add keywords and phrases that are relevant to student intent.
You should create a slide like response to the student query.
"""

rewrite_student_query_prompt_with_exercise_context = """
Expand All @@ -48,5 +48,6 @@
"""

write_hypothetical_answer_with_exercise_context_prompt = """ Please provide a response in {course_language}.
You should create a slide like response to the student query.
Craft your response to closely reflect the style and content of university lecture materials.
Do not exceed 500 characters. Add keywords and phrases that are relevant to student intent."""
Do not exceed 350 words.. Add keywords and phrases that are relevant to student intent."""
27 changes: 12 additions & 15 deletions app/pipeline/shared/reranker_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,11 @@ def __init__(self):
requirements=RequirementList(
gpt_version_equivalent=3.5,
context_length=16385,
json_mode=True,
)
)
self.llm = IrisLangchainChatModel(
request_handler=request_handler,
completion_args=CompletionArguments(
temperature=0, max_tokens=4000, response_format="JSON"
),
completion_args=CompletionArguments(temperature=0, max_tokens=4000),
)
dirname = os.path.dirname(__file__)
prompt_file_path = os.path.join(dirname, "..", "prompts", "reranker_prompt.txt")
Expand Down Expand Up @@ -76,12 +73,12 @@ def __str__(self):
return f"{self.__class__.__name__}(llm={self.llm})"

def __call__(
self,
paragraphs: Union[List[dict], List[str]],
query: str,
prompt: Optional[PromptTemplate] = None,
chat_history: list[PyrisMessage] = None,
**kwargs,
self,
paragraphs: Union[List[dict], List[str]],
query: str,
prompt: Optional[PromptTemplate] = None,
chat_history: list[PyrisMessage] = None,
**kwargs,
) -> List[str]:
"""
Runs the pipeline
Expand All @@ -106,11 +103,11 @@ def __call__(
"Invalid input type for paragraphs. Must be a list of dictionaries or a list of strings."
)
text_chat_history = [
chat_history[-i - 1].contents[0].text_content
for i in range(min(10, len(chat_history))) # Ensure no out-of-bounds error
][
::-1
] # Reverse to get the messages in chronological order of their appearance data["question"] = query
chat_history[-i - 1].contents[0].text_content
for i in range(min(4, len(chat_history))) # Ensure no out-of-bounds error
][
::-1
] # Reverse to get the messages in chronological order of their appearance data["question"] = query
data["chat_history"] = text_chat_history
data["question"] = query
if prompt is None:
Expand Down
2 changes: 1 addition & 1 deletion app/retrieval/lecture_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def rewrite_elaborated_query(
prompt = ChatPromptTemplate.from_messages(prompt_val)
try:
response = (prompt | self.pipeline).invoke({})
logger.info(f"Response from exercise chat pipeline: {response}")
logger.info(f"Response from retirval pipeline: {response}")
return response
except Exception as e:
raise e
Expand Down

0 comments on commit 0768e5a

Please sign in to comment.