From d278008f726c1f00e1ee23bf00970cedbba07d08 Mon Sep 17 00:00:00 2001
From: = Enea_Gore <ge72git@mytum.de>
Date: Mon, 2 Dec 2024 12:03:16 +0100
Subject: [PATCH] insure correct pdf retrieval

---
 .../retrieval_augmented_generation/agents.py  | 43 +++++++++++++++----
 .../prompt_generate_suggestions.py            |  6 +++
 modules/text/module_text_llm/poetry.lock      | 21 ++++++++-
 modules/text/module_text_llm/pyproject.toml   |  1 +
 4 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py
index 2d056bc0..f42ab730 100644
--- a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py
+++ b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py
@@ -41,7 +41,7 @@ def __init__(self, session_id="test-session"):
         self.model = ChatOpenAI(model="gpt-4o-mini") #gpt-4o-2024-08-06 , gpt-4o-mini
         self.memory = InMemoryChatMessageHistory(session_id=session_id)
         all_docs = []
-        file_paths = glob.glob("pdfs/*.pdf")
+        file_paths = glob.glob("module_text_llm/retrieval_augmented_generation/pdfs/*.pdf")
 
         for file_path in file_paths:
             loader = PyPDFLoader(file_path)
@@ -56,18 +56,43 @@ def __init__(self, session_id="test-session"):
 
         retriever = vectorstore.as_retriever()
         retriever_tool = create_retriever_tool(retriever, name="retrieve_document", description="Retrieves the pdf documents from the relevant lecture")
+        system_message = """\
+        You are an AI tutor for text assessment at a prestigious university.
 
+        # Task
+        Create graded feedback suggestions for a student\'s text submission that a human tutor would accept. \
+        Meaning, the feedback you provide should be applicable to the submission with little to no modification.
+        You have access to the provided document lecture slides to help you provide feedback. If you do use them, please reference the title and the page on your feedback.
+
+        # Style
+        1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual
+
+        Make use of the lecture slides provided. State clearly on your feedback which lecture you are using.
+
+        # Problem statement
+        {problem_statement}
+
+        # Example solution
+        {example_solution}
+
+        # Grading instructions
+        {grading_instructions}
+        Max points: {max_points}, bonus points: {bonus_points}\
+            
+        Respond only in json with the provided Assessment Feedback schema.
+        """
         # Define the prompt template with a system message placeholder
         self.prompt = ChatPromptTemplate.from_messages(
             [
-                ("system", """ You are an AI tutor for text assessment at a prestigious university.
-
-# Task
-Create graded feedback suggestions for a student\'s text submission that a human tutor would accept. \
-Meaning, the feedback you provide should be applicable to the submission with little to no modification.
-You have access to the provided document lecture slides to help you provide feedback. If you do use them, please reference the title and the page on your feedback.
-ALWAYS RESPONG IN A PYDNATIC OBJECT WITH THE PROVIDED ASSESSMENT MODEL SCHEMA.
-problem satement: {problem_statement}, max points: {max_points}"""),
+                ("system", system_message),
+#                 ("system", """ You are an AI tutor for text assessment at a prestigious university.
+
+# # Task
+# Create graded feedback suggestions for a student\'s text submission that a human tutor would accept. \
+# Meaning, the feedback you provide should be applicable to the submission with little to no modification.
+# You have access to the provided document lecture slides to help you provide feedback. If you do use them, please reference the title and the page on your feedback.
+# ALWAYS RESPONG IN A PYDNATIC OBJECT WITH THE PROVIDED ASSESSMENT MODEL SCHEMA.
+# problem satement: {problem_statement}, max points: {max_points}"""),
                 # ("placeholder", "{chat_history}"),  # History of interactions
                 ("human", "{submission}"),
                 ("placeholder", "{agent_scratchpad}"),  # Internal for steps created through function calling
diff --git a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py
index 29af0df7..dc35ab84 100644
--- a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py
+++ b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py
@@ -13,6 +13,12 @@
 # Style
 1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual
 
+Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you
+believe that the student could benefit from the slide refer it on your feedback.
+
+The grading instructions are there to guide you on which criteria to give points. 
+You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them.
+
 # Problem statement
 {problem_statement}
 
diff --git a/modules/text/module_text_llm/poetry.lock b/modules/text/module_text_llm/poetry.lock
index 236b739b..66df8be1 100644
--- a/modules/text/module_text_llm/poetry.lock
+++ b/modules/text/module_text_llm/poetry.lock
@@ -1757,6 +1757,25 @@ files = [
 [package.dependencies]
 pylint = ">=1.7"
 
+[[package]]
+name = "pypdf"
+version = "5.1.0"
+description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc"},
+    {file = "pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740"},
+]
+
+[package.extras]
+crypto = ["cryptography"]
+cryptodome = ["PyCryptodome"]
+dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"]
+docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"]
+full = ["Pillow (>=8.0.0)", "cryptography"]
+image = ["Pillow (>=8.0.0)"]
+
 [[package]]
 name = "python-dotenv"
 version = "1.0.0"
@@ -2442,4 +2461,4 @@ propcache = ">=0.2.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "3.11.*"
-content-hash = "f7ea4d2b80e6f70b76d43640120511ec255c3a7bf226592d711e142b9ece9192"
+content-hash = "6ef25141cac048e3a4583cbf742a3b5e17357dd5adf5a402d57e9f753d2ae742"
diff --git a/modules/text/module_text_llm/pyproject.toml b/modules/text/module_text_llm/pyproject.toml
index d0e73bd6..ce345bb1 100644
--- a/modules/text/module_text_llm/pyproject.toml
+++ b/modules/text/module_text_llm/pyproject.toml
@@ -15,6 +15,7 @@ gitpython = "3.1.41"
 nltk = "3.8.1"
 python-dotenv = "1.0.0"
 tiktoken = "0.7.0"
+pypdf = "5.1.0"
 
 [tool.poetry.dev-dependencies]
 pydantic = "1.10.17"