From d278008f726c1f00e1ee23bf00970cedbba07d08 Mon Sep 17 00:00:00 2001 From: = Enea_Gore Date: Mon, 2 Dec 2024 12:03:16 +0100 Subject: [PATCH] insure correct pdf retrieval --- .../retrieval_augmented_generation/agents.py | 43 +++++++++++++++---- .../prompt_generate_suggestions.py | 6 +++ modules/text/module_text_llm/poetry.lock | 21 ++++++++- modules/text/module_text_llm/pyproject.toml | 1 + 4 files changed, 61 insertions(+), 10 deletions(-) diff --git a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py index 2d056bc0..f42ab730 100644 --- a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py +++ b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py @@ -41,7 +41,7 @@ def __init__(self, session_id="test-session"): self.model = ChatOpenAI(model="gpt-4o-mini") #gpt-4o-2024-08-06 , gpt-4o-mini self.memory = InMemoryChatMessageHistory(session_id=session_id) all_docs = [] - file_paths = glob.glob("pdfs/*.pdf") + file_paths = glob.glob("module_text_llm/retrieval_augmented_generation/pdfs/*.pdf") for file_path in file_paths: loader = PyPDFLoader(file_path) @@ -56,18 +56,43 @@ def __init__(self, session_id="test-session"): retriever = vectorstore.as_retriever() retriever_tool = create_retriever_tool(retriever, name="retrieve_document", description="Retrieves the pdf documents from the relevant lecture") + system_message = """\ + You are an AI tutor for text assessment at a prestigious university. + # Task + Create graded feedback suggestions for a student\'s text submission that a human tutor would accept. \ + Meaning, the feedback you provide should be applicable to the submission with little to no modification. + You have access to the provided document lecture slides to help you provide feedback. If you do use them, please reference the title and the page on your feedback. + + # Style + 1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual + + Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. + + # Problem statement + {problem_statement} + + # Example solution + {example_solution} + + # Grading instructions + {grading_instructions} + Max points: {max_points}, bonus points: {bonus_points}\ + + Respond only in json with the provided Assessment Feedback schema. + """ # Define the prompt template with a system message placeholder self.prompt = ChatPromptTemplate.from_messages( [ - ("system", """ You are an AI tutor for text assessment at a prestigious university. - -# Task -Create graded feedback suggestions for a student\'s text submission that a human tutor would accept. \ -Meaning, the feedback you provide should be applicable to the submission with little to no modification. -You have access to the provided document lecture slides to help you provide feedback. If you do use them, please reference the title and the page on your feedback. -ALWAYS RESPONG IN A PYDNATIC OBJECT WITH THE PROVIDED ASSESSMENT MODEL SCHEMA. -problem satement: {problem_statement}, max points: {max_points}"""), + ("system", system_message), +# ("system", """ You are an AI tutor for text assessment at a prestigious university. + +# # Task +# Create graded feedback suggestions for a student\'s text submission that a human tutor would accept. \ +# Meaning, the feedback you provide should be applicable to the submission with little to no modification. +# You have access to the provided document lecture slides to help you provide feedback. If you do use them, please reference the title and the page on your feedback. +# ALWAYS RESPONG IN A PYDNATIC OBJECT WITH THE PROVIDED ASSESSMENT MODEL SCHEMA. +# problem satement: {problem_statement}, max points: {max_points}"""), # ("placeholder", "{chat_history}"), # History of interactions ("human", "{submission}"), ("placeholder", "{agent_scratchpad}"), # Internal for steps created through function calling diff --git a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py index 29af0df7..dc35ab84 100644 --- a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py +++ b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py @@ -13,6 +13,12 @@ # Style 1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual +Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you +believe that the student could benefit from the slide refer it on your feedback. + +The grading instructions are there to guide you on which criteria to give points. +You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them. + # Problem statement {problem_statement} diff --git a/modules/text/module_text_llm/poetry.lock b/modules/text/module_text_llm/poetry.lock index 236b739b..66df8be1 100644 --- a/modules/text/module_text_llm/poetry.lock +++ b/modules/text/module_text_llm/poetry.lock @@ -1757,6 +1757,25 @@ files = [ [package.dependencies] pylint = ">=1.7" +[[package]] +name = "pypdf" +version = "5.1.0" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc"}, + {file = "pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740"}, +] + +[package.extras] +crypto = ["cryptography"] +cryptodome = ["PyCryptodome"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow (>=8.0.0)", "cryptography"] +image = ["Pillow (>=8.0.0)"] + [[package]] name = "python-dotenv" version = "1.0.0" @@ -2442,4 +2461,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "3.11.*" -content-hash = "f7ea4d2b80e6f70b76d43640120511ec255c3a7bf226592d711e142b9ece9192" +content-hash = "6ef25141cac048e3a4583cbf742a3b5e17357dd5adf5a402d57e9f753d2ae742" diff --git a/modules/text/module_text_llm/pyproject.toml b/modules/text/module_text_llm/pyproject.toml index d0e73bd6..ce345bb1 100644 --- a/modules/text/module_text_llm/pyproject.toml +++ b/modules/text/module_text_llm/pyproject.toml @@ -15,6 +15,7 @@ gitpython = "3.1.41" nltk = "3.8.1" python-dotenv = "1.0.0" tiktoken = "0.7.0" +pypdf = "5.1.0" [tool.poetry.dev-dependencies] pydantic = "1.10.17"