Skip to content

Commit

Permalink
Replace gpt-4-vision with gpt-o (#127)
Browse files Browse the repository at this point in the history
  • Loading branch information
yassinsws authored Jun 22, 2024
1 parent 0768e5a commit 41f3a66
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 13 deletions.
26 changes: 14 additions & 12 deletions app/pipeline/lecture_ingestion_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,8 @@ def __init__(
super().__init__()
self.collection = init_lecture_schema(client)
self.dto = dto
self.llm_vision = BasicRequestHandler("azure-gpt-4-vision")
self.llm_chat = BasicRequestHandler(
"azure-gpt-35-turbo"
) # TODO change use langain model
self.llm_vision = BasicRequestHandler("azure-gpt-4-omni")
self.llm_chat = BasicRequestHandler("azure-gpt-35-turbo")
self.llm_embedding = BasicRequestHandler("embedding-small")
self.callback = callback
request_handler = CapabilityRequestHandler(
Expand Down Expand Up @@ -190,18 +188,19 @@ def chunk_data(
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=512, chunk_overlap=102
)
old_page_text = ""
for page_num in range(doc.page_count):
page = doc.load_page(page_num)
page_text = page.get_text()
if page.get_images(full=False):
# more pixels thus more details and better quality
matrix = fitz.Matrix(20.0, 20.0)
matrix = fitz.Matrix(5, 5)
pix = page.get_pixmap(matrix=matrix)
img_bytes = pix.tobytes("jpg")
img_base64 = base64.b64encode(img_bytes).decode("utf-8")
image_interpretation = self.interpret_image(
img_base64,
page_text,
old_page_text,
lecture_unit_dto.lecture_name,
course_language,
)
Expand All @@ -214,6 +213,7 @@ def chunk_data(
page_num, page_splits, lecture_unit_dto, course_language, base_url
)
)
old_page_text = page_text
return data

def interpret_image(
Expand All @@ -227,19 +227,21 @@ def interpret_image(
Interpret the image passed
"""
image_interpretation_prompt = TextMessageContentDTO(
text_content=f"This page is part of the {name_of_lecture} university lecture, "
f" explain what is on the slide in an academic way, "
f"respond only with the explanation in {course_language}."
f"For more context here is the content of the previous slide: "
f" {last_page_content}"
text_content=f"This page is part of the {name_of_lecture} university lecture."
f"I am the professor that created these slides, "
f" please interpret this slide in an academic way. "
f"For more context here is the content of the previous slide:\n "
f" {last_page_content} \n\n"
f" Only repond with the slide explanation and interpretation in {course_language}, "
f"do not add anything else to your response.Your explanation should not exceed 350 words."
)
image = ImageMessageContentDTO(base64=img_base64)
iris_message = PyrisMessage(
sender=IrisMessageRole.USER, contents=[image_interpretation_prompt, image]
)
try:
response = self.llm_vision.chat(
[iris_message], CompletionArguments(temperature=0, max_tokens=400)
[iris_message], CompletionArguments(temperature=0, max_tokens=512)
)
except Exception as e:
logger.error(f"Error interpreting image: {e}")
Expand Down
1 change: 0 additions & 1 deletion app/pipeline/prompts/citation_prompt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ In the paragraphs below you are provided with an answer to a question. Underneat
Add citations of the paragraphs to the answer. Cite the paragraphs in brackets after the sentence where the information is used in the answer.
At the end of the answer list each source with its corresponding number and provide the Lecture Title,as well as the page number in this format "[1] Lecture title, page number".
Do not Include the Actual paragraphs, only the citations at the end.
If the question is not a question, or is a greeting, do not add any citations.
Here is an example how to rewrite the answer with citations:
"
Lorem ipsum dolor sit amet, consectetur adipiscing elit.[1] Ded do eiusmod tempor incididunt ut labore et dolore magna aliqua.[2]
Expand Down

0 comments on commit 41f3a66

Please sign in to comment.