Replace gpt-4-vision with gpt-o (#127)

ls1intum · Jun 22, 2024 · 86e1550 · 86e1550
1 parent 77a985a
commit 86e1550
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 13 deletions.
diff --git a/app/pipeline/lecture_ingestion_pipeline.py b/app/pipeline/lecture_ingestion_pipeline.py
@@ -96,10 +96,8 @@ def __init__(
         super().__init__()
         self.collection = init_lecture_schema(client)
         self.dto = dto
-        self.llm_vision = BasicRequestHandler("azure-gpt-4-vision")
-        self.llm_chat = BasicRequestHandler(
-            "azure-gpt-35-turbo"
-        )  # TODO change use langain model
+        self.llm_vision = BasicRequestHandler("azure-gpt-4-omni")
+        self.llm_chat = BasicRequestHandler("azure-gpt-35-turbo")
         self.llm_embedding = BasicRequestHandler("embedding-small")
         self.callback = callback
         request_handler = CapabilityRequestHandler(
@@ -190,18 +188,19 @@ def chunk_data(
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=512, chunk_overlap=102
         )
+        old_page_text = ""
         for page_num in range(doc.page_count):
             page = doc.load_page(page_num)
             page_text = page.get_text()
             if page.get_images(full=False):
                 # more pixels thus more details and better quality
-                matrix = fitz.Matrix(20.0, 20.0)
+                matrix = fitz.Matrix(5, 5)
                 pix = page.get_pixmap(matrix=matrix)
                 img_bytes = pix.tobytes("jpg")
                 img_base64 = base64.b64encode(img_bytes).decode("utf-8")
                 image_interpretation = self.interpret_image(
                     img_base64,
-                    page_text,
+                    old_page_text,
                     lecture_unit_dto.lecture_name,
                     course_language,
                 )
@@ -214,6 +213,7 @@ def chunk_data(
                     page_num, page_splits, lecture_unit_dto, course_language, base_url
                 )
             )
+            old_page_text = page_text
         return data
 
     def interpret_image(
@@ -227,19 +227,21 @@ def interpret_image(
         Interpret the image passed
         """
         image_interpretation_prompt = TextMessageContentDTO(
-            text_content=f"This page is part of the {name_of_lecture} university lecture, "
-            f" explain what is on the slide in an academic way, "
-            f"respond only with the explanation in {course_language}."
-            f"For more context here is the content of the previous slide: "
-            f" {last_page_content}"
+            text_content=f"This page is part of the {name_of_lecture} university lecture."
+            f"I am the professor that created these slides, "
+            f" please interpret this slide in an academic way. "
+            f"For more context here is the content of the previous slide:\n "
+            f" {last_page_content} \n\n"
+            f" Only repond with the slide explanation and interpretation in {course_language}, "
+            f"do not add anything else to your response.Your explanation should not exceed 350 words."
         )
         image = ImageMessageContentDTO(base64=img_base64)
         iris_message = PyrisMessage(
             sender=IrisMessageRole.USER, contents=[image_interpretation_prompt, image]
         )
         try:
             response = self.llm_vision.chat(
-                [iris_message], CompletionArguments(temperature=0, max_tokens=400)
+                [iris_message], CompletionArguments(temperature=0, max_tokens=512)
             )
         except Exception as e:
             logger.error(f"Error interpreting image: {e}")

diff --git a/app/pipeline/prompts/citation_prompt.txt b/app/pipeline/prompts/citation_prompt.txt
@@ -2,7 +2,6 @@ In the paragraphs below you are provided with an answer to a question. Underneat
 Add citations of the paragraphs to the answer. Cite the paragraphs in brackets after the sentence where the information is used in the answer.
 At the end of the answer list each source with its corresponding number and provide the Lecture Title,as well as the page number in this format "[1] Lecture title, page number".
 Do not Include the Actual paragraphs, only the citations at the end.
-If the question is not a question, or is a greeting, do not add any citations.
 Here is an example how to rewrite the answer with citations:
 "
 Lorem ipsum dolor sit amet, consectetur adipiscing elit.[1] Ded do eiusmod tempor incididunt ut labore et dolore magna aliqua.[2]