From 86e1550693e661359ebe25b7ddbc1d9bd25e03ab Mon Sep 17 00:00:00 2001
From: Yassine Souissi <74144843+yassinsws@users.noreply.github.com>
Date: Sat, 22 Jun 2024 12:13:09 +0200
Subject: [PATCH] Replace gpt-4-vision with  gpt-o (#127)

---
 app/pipeline/lecture_ingestion_pipeline.py | 26 ++++++++++++----------
 app/pipeline/prompts/citation_prompt.txt   |  1 -
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/app/pipeline/lecture_ingestion_pipeline.py b/app/pipeline/lecture_ingestion_pipeline.py
index 2cbf629d..5deaecd2 100644
--- a/app/pipeline/lecture_ingestion_pipeline.py
+++ b/app/pipeline/lecture_ingestion_pipeline.py
@@ -96,10 +96,8 @@ def __init__(
         super().__init__()
         self.collection = init_lecture_schema(client)
         self.dto = dto
-        self.llm_vision = BasicRequestHandler("azure-gpt-4-vision")
-        self.llm_chat = BasicRequestHandler(
-            "azure-gpt-35-turbo"
-        )  # TODO change use langain model
+        self.llm_vision = BasicRequestHandler("azure-gpt-4-omni")
+        self.llm_chat = BasicRequestHandler("azure-gpt-35-turbo")
         self.llm_embedding = BasicRequestHandler("embedding-small")
         self.callback = callback
         request_handler = CapabilityRequestHandler(
@@ -190,18 +188,19 @@ def chunk_data(
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=512, chunk_overlap=102
         )
+        old_page_text = ""
         for page_num in range(doc.page_count):
             page = doc.load_page(page_num)
             page_text = page.get_text()
             if page.get_images(full=False):
                 # more pixels thus more details and better quality
-                matrix = fitz.Matrix(20.0, 20.0)
+                matrix = fitz.Matrix(5, 5)
                 pix = page.get_pixmap(matrix=matrix)
                 img_bytes = pix.tobytes("jpg")
                 img_base64 = base64.b64encode(img_bytes).decode("utf-8")
                 image_interpretation = self.interpret_image(
                     img_base64,
-                    page_text,
+                    old_page_text,
                     lecture_unit_dto.lecture_name,
                     course_language,
                 )
@@ -214,6 +213,7 @@ def chunk_data(
                     page_num, page_splits, lecture_unit_dto, course_language, base_url
                 )
             )
+            old_page_text = page_text
         return data
 
     def interpret_image(
@@ -227,11 +227,13 @@ def interpret_image(
         Interpret the image passed
         """
         image_interpretation_prompt = TextMessageContentDTO(
-            text_content=f"This page is part of the {name_of_lecture} university lecture, "
-            f" explain what is on the slide in an academic way, "
-            f"respond only with the explanation in {course_language}."
-            f"For more context here is the content of the previous slide: "
-            f" {last_page_content}"
+            text_content=f"This page is part of the {name_of_lecture} university lecture."
+            f"I am the professor that created these slides, "
+            f" please interpret this slide in an academic way. "
+            f"For more context here is the content of the previous slide:\n "
+            f" {last_page_content} \n\n"
+            f" Only repond with the slide explanation and interpretation in {course_language}, "
+            f"do not add anything else to your response.Your explanation should not exceed 350 words."
         )
         image = ImageMessageContentDTO(base64=img_base64)
         iris_message = PyrisMessage(
@@ -239,7 +241,7 @@ def interpret_image(
         )
         try:
             response = self.llm_vision.chat(
-                [iris_message], CompletionArguments(temperature=0, max_tokens=400)
+                [iris_message], CompletionArguments(temperature=0, max_tokens=512)
             )
         except Exception as e:
             logger.error(f"Error interpreting image: {e}")
diff --git a/app/pipeline/prompts/citation_prompt.txt b/app/pipeline/prompts/citation_prompt.txt
index 1c2b2fc0..11354ee2 100644
--- a/app/pipeline/prompts/citation_prompt.txt
+++ b/app/pipeline/prompts/citation_prompt.txt
@@ -2,7 +2,6 @@ In the paragraphs below you are provided with an answer to a question. Underneat
 Add citations of the paragraphs to the answer. Cite the paragraphs in brackets after the sentence where the information is used in the answer.
 At the end of the answer list each source with its corresponding number and provide the Lecture Title,as well as the page number in this format "[1] Lecture title, page number".
 Do not Include the Actual paragraphs, only the citations at the end.
-If the question is not a question, or is a greeting, do not add any citations.
 Here is an example how to rewrite the answer with citations:
 "
 Lorem ipsum dolor sit amet, consectetur adipiscing elit.[1] Ded do eiusmod tempor incididunt ut labore et dolore magna aliqua.[2]