From 811e37c7e87aa6eade80f4c090496fe9812ab1fb Mon Sep 17 00:00:00 2001 From: Yassine Souissi <74144843+yassinsws@users.noreply.github.com> Date: Thu, 11 Jul 2024 14:19:30 +0200 Subject: [PATCH] Bugfix/index and parenthesis bug fixes (#138) --- app/pipeline/chat/exercise_chat_pipeline.py | 4 +++- app/pipeline/prompts/reranker_prompt.txt | 2 +- app/retrieval/lecture_retrieval.py | 5 +++-- app/web/status/status_update.py | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/app/pipeline/chat/exercise_chat_pipeline.py b/app/pipeline/chat/exercise_chat_pipeline.py index 3f87f686..8043e9ad 100644 --- a/app/pipeline/chat/exercise_chat_pipeline.py +++ b/app/pipeline/chat/exercise_chat_pipeline.py @@ -346,7 +346,9 @@ def _add_relevant_chunks_to_prompt(self, retrieved_lecture_chunks: List[dict]): ) txt += lct - self.prompt += SystemMessagePromptTemplate.from_template(txt) + self.prompt += SystemMessagePromptTemplate.from_template( + txt.replace("{", "{{").replace("}", "}}") + ) def should_execute_lecture_pipeline(self, course_id: int) -> bool: """ diff --git a/app/pipeline/prompts/reranker_prompt.txt b/app/pipeline/prompts/reranker_prompt.txt index c682c425..7bfea83d 100644 --- a/app/pipeline/prompts/reranker_prompt.txt +++ b/app/pipeline/prompts/reranker_prompt.txt @@ -3,7 +3,7 @@ Respond with the numbers of the paragraphs you should consult to answer the ques The relevance score is a number from 1 to 10 based on how relevant the paragraphs are to answer the question. Do not include any paragraphs that are not relevant to the question. Without any comment, return the result in the following JSON format, it is important to avoid giving -unnecessary information, only the number of the paragraph if it's really necessary for answering the student's question +unnecessary information, only the number of the paragraph if it's necessary for answering the student's question otherwise leave the array empty. {{"selected_paragraphs": [, , ...]}} diff --git a/app/retrieval/lecture_retrieval.py b/app/retrieval/lecture_retrieval.py index 566acb87..df832ebc 100644 --- a/app/retrieval/lecture_retrieval.py +++ b/app/retrieval/lecture_retrieval.py @@ -143,7 +143,8 @@ def __call__( selected_chunks_index = self.reranker_pipeline( paragraphs=merged_chunks, query=student_query, chat_history=chat_history ) - return [merged_chunks[int(i)] for i in selected_chunks_index] + if selected_chunks_index: + return [merged_chunks[int(i)] for i in selected_chunks_index] return [] @traceable(name="Basic Lecture Retrieval") @@ -467,7 +468,7 @@ def run_parallel_rewrite_tasks( response_future = executor.submit( self.search_in_db, query=rewritten_query, - hybrid_factor=0.7, + hybrid_factor=0.9, result_limit=result_limit, course_id=course_id, base_url=base_url, diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 6ab91f4c..533047ca 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -127,7 +127,7 @@ def error(self, message: str, exception=None): """ self.stage.state = StageStateEnum.ERROR self.stage.message = message - self.stage.result = None + self.status.result = None self.stage.suggestions = None # Set all subsequent stages to SKIPPED if an error occurs rest_of_index = ( @@ -157,7 +157,7 @@ def skip(self, message: Optional[str] = None, start_next_stage: bool = True): """ self.stage.state = StageStateEnum.SKIPPED self.stage.message = message - self.stage.result = None + self.status.result = None self.stage.suggestions = None next_stage = self.get_next_stage() if next_stage is not None: