better rag

ls1intum · Dec 2, 2024 · fa6d65c · fa6d65c
1 parent bbb2bb0
commit fa6d65c
Show file tree

Hide file tree

Showing 9 changed files with 325 additions and 4 deletions.
diff --git a/modules/text/module_text_llm/module_text_llm/approach_config.py b/modules/text/module_text_llm/module_text_llm/approach_config.py
@@ -6,7 +6,8 @@
 class ApproachType(str, Enum):
     basic = "BasicApproach"
     chain_of_thought = "ChainOfThought"
-
+    rag = "RagApproach"
+
 class ApproachConfig(BaseModel, ABC):
     max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
     model: ModelConfigType = Field(default=DefaultModelConfig())

diff --git a/modules/text/module_text_llm/module_text_llm/approach_controller.py b/modules/text/module_text_llm/module_text_llm/approach_controller.py
@@ -3,13 +3,16 @@
 from module_text_llm.basic_approach import BasicApproachConfig
 from module_text_llm.chain_of_thought_approach import ChainOfThoughtConfig
 from module_text_llm.approach_config import ApproachConfig
+from module_text_llm.retrieval_augmented_generation import RAGApproachConfig
 
 from module_text_llm.basic_approach.generate_suggestions import generate_suggestions as generate_suggestions_basic
 from module_text_llm.chain_of_thought_approach.generate_suggestions import generate_suggestions as generate_cot_suggestions
-
+from module_text_llm.retrieval_augmented_generation.generate_suggestions import generate_suggestions as generate_rag_suggestions
 async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool) -> List[Feedback]:
     if isinstance(config, BasicApproachConfig):
         return await generate_suggestions_basic(exercise, submission, config, debug)
     if isinstance(config, ChainOfThoughtConfig):
         return await generate_cot_suggestions(exercise, submission, config, debug)
+    if(isinstance(config, RAGApproachConfig)):
+        return await generate_rag_suggestions(exercise, submission, config, debug)
     raise ValueError("Unsupported config type provided.")
diff --git a/modules/text/module_text_llm/module_text_llm/config.py b/modules/text/module_text_llm/module_text_llm/config.py
@@ -4,8 +4,9 @@
 
 from module_text_llm.chain_of_thought_approach import ChainOfThoughtConfig
 from module_text_llm.basic_approach import BasicApproachConfig
+from module_text_llm.retrieval_augmented_generation import RAGApproachConfig
 
-ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig]
+ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig, RAGApproachConfig]
 
 @config_schema_provider
 class Configuration(BaseModel):

diff --git a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/__init__.py b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/__init__.py
@@ -0,0 +1,10 @@
+from module_text_llm.approach_config import ApproachConfig
+from pydantic import Field
+from typing import Literal
+from module_text_llm.retrieval_augmented_generation.agents import TutorAgent
+tutor = TutorAgent()
+from module_text_llm.retrieval_augmented_generation.prompt_generate_suggestions import GenerateSuggestionsPrompt
+
+class RAGApproachConfig(ApproachConfig):
+    type: Literal['rag'] = 'rag'
+    generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
diff --git a/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py b/modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py
@@ -0,0 +1,127 @@
+from langchain_openai import ChatOpenAI
+from langchain_core.chat_history import InMemoryChatMessageHistory
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.agents import AgentExecutor, create_tool_calling_agent
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain_openai import OpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain.tools.retriever import create_retriever_tool
+from pydantic import BaseModel, Field
+from langchain_core.tools import tool
+import glob
+from typing import List, Optional
+# Output Object
+class FeedbackModel(BaseModel):
+    title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
+    description: str = Field(description="Feedback description")
+    line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
+    line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
+    credits: float = Field(0.0, description="Number of points received/deducted")
+    grading_instruction_id: Optional[int] = Field(
+        description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
+    )
+
+@tool
+class AssessmentModel(BaseModel):
+    """Collection of feedbacks making up an assessment"""
+
+    feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
+
+class AssessmentModelParse(BaseModel):
+    """Collection of feedbacks making up an assessment"""
+
+    feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
+
+class TutorAgent:
+    def __init__(self, session_id="test-session"):
+        # Initialize model, memory, and tools
+        self.model = ChatOpenAI(model="gpt-4o-2024-08-06") #gpt-4o-2024-08-06 , gpt-4o-mini
+        self.memory = InMemoryChatMessageHistory(session_id=session_id)
+        all_docs = []
+        file_paths = glob.glob("module_text_llm/retrieval_augmented_generation/pdfs/*.pdf")
+        self.approach_config = None
+        for file_path in file_paths:
+            loader = PyPDFLoader(file_path)
+            docs = loader.load()
+            all_docs += docs
+
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        splits = text_splitter.split_documents(all_docs)
+        vectorstore = InMemoryVectorStore.from_documents(
+            documents=splits, embedding=OpenAIEmbeddings()
+        )
+
+        retriever = vectorstore.as_retriever()
+        retriever_tool = create_retriever_tool(retriever, name="retrieve_document", description="Retrieves the pdf documents from the relevant lecture")
+
+        # Define the prompt template with a system message placeholder
+
+
+        # Define the tools
+        self.tools = [retriever_tool,AssessmentModel]
+        # structured_llm = self.model.with_structured_output(AssessmentModel)
+        # Create the agent and executor
+
+
+    def setConfig(self,approach_config):
+        self.approach_config = approach_config
+        self.prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", self.approach_config.generate_suggestions_prompt.system_message),
+                ("human", "{submission}"),
+                ("placeholder", "{agent_scratchpad}"),  # Internal for steps created through function calling
+            ])
+        self.agent = create_tool_calling_agent(self.model, self.tools, self.prompt)
+        self.agent_executor = AgentExecutor(agent=self.agent, tools=self.tools)
+
+        # Default configuration for the agent
+        self.config = {"configurable": {"session_id": "test-session"}}
+
+    def call_agent(self, prompt):
+        """Calls the agent with a prompt and returns the response output.
+        Optionally takes a system_message to update the agent's behavior dynamically."""
+        from langchain_core.output_parsers import PydanticOutputParser
+
+        parser = PydanticOutputParser(pydantic_object=AssessmentModelParse)
+
+        chain = self.agent_executor | parser 
+        response = self.agent_executor.invoke(
+            input = prompt#  , "system_message": system_message
+        )
+        import json
+        print(response)
+        res = AssessmentModelParse.parse_obj(json.loads(response["output"]))
+        return res
+
+
+#      system_message = """You are an AI tutor for text assessment at a prestigious university.
+
+# # Task
+# Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification.
+
+# You have access to the provided document lecture slides to help you provide feedback. 
+# If you do use them, please reference the title and the page on your feedback.
+# Write it down epxlicitly when lecture slides or contents are relvant. 
+
+# # Style
+# 1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual
+
+# Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you
+# believe that the student could benefit from the slide refer it on your feedback.
+
+# The grading instructions are there to guide you on which criteria to give points. 
+# You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them.
+
+# # Problem statement
+# {problem_statement}
+
+# # Example solution
+# {example_solution}
+
+# # Grading instructions
+# {grading_instructions}
+# Max points: {max_points}, bonus points: {bonus_points}    
+# Respond only in json with the provided Assessment Feedback schema.
+# """
diff --git a/...xt/module_text_llm/module_text_llm/retrieval_augmented_generation/generate_suggestions.py b/...xt/module_text_llm/module_text_llm/retrieval_augmented_generation/generate_suggestions.py
@@ -0,0 +1,86 @@
+from typing import List
+
+from athena import emit_meta
+from athena.text import Exercise, Submission, Feedback
+from athena.logger import logger
+from llm_core.utils.llm_utils import (
+    get_chat_prompt_with_formatting_instructions, 
+    check_prompt_length_and_omit_features_if_necessary, 
+    num_tokens_from_prompt,
+)
+from athena.text import Exercise, Submission, Feedback
+from module_text_llm.config import BasicApproachConfig
+from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
+from module_text_llm.basic_approach.prompt_generate_suggestions import AssessmentModel
+from module_text_llm.retrieval_augmented_generation.agents import TutorAgent
+from module_text_llm.retrieval_augmented_generation import tutor
+async def generate_suggestions(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
+    model = config.model.get_model()  # type: ignore[attr-defined]
+    prompt_input = {
+        "max_points": exercise.max_points,
+        "bonus_points": exercise.bonus_points,
+        "grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
+        "problem_statement": exercise.problem_statement or "No problem statement.",
+        "example_solution": exercise.example_solution,
+        "submission": add_sentence_numbers(submission.text)
+    }
+
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.generate_suggestions_prompt.system_message, 
+        human_message=config.generate_suggestions_prompt.human_message, 
+        pydantic_object=AssessmentModel
+    )
+
+    # Check if the prompt is too long and omit features if necessary (in order of importance)
+    omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
+    prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
+        prompt=chat_prompt,
+        prompt_input= prompt_input,
+        max_input_tokens=config.max_input_tokens,
+        omittable_features=omittable_features,
+        debug=debug
+    )
+
+    if not should_run:
+        logger.warning("Input too long. Skipping.")
+        if debug:
+            emit_meta("prompt", chat_prompt.format(**prompt_input))
+            emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
+        return []
+
+    tutor.setConfig(config)
+    result = tutor.call_agent(prompt_input)
+
+    if debug:
+        emit_meta("generate_suggestions", {
+            "prompt": chat_prompt.format(**prompt_input),
+            "result": result.dict() if result is not None else None
+        })
+
+    if result is None:
+        return []
+
+    grading_instruction_ids = set(
+        grading_instruction.id 
+        for criterion in exercise.grading_criteria or [] 
+        for grading_instruction in criterion.structured_grading_instructions
+    )
+
+    feedbacks = []
+    for feedback in result.feedbacks:
+        index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text)
+        grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
+        feedbacks.append(Feedback(
+            exercise_id=exercise.id,
+            submission_id=submission.id,
+            title=feedback.title,
+            description=feedback.description,
+            index_start=index_start,
+            index_end=index_end,
+            credits=feedback.credits,
+            structured_grading_instruction_id=grading_instruction_id,
+            meta={}
+        ))
+
+    return feedbacks
diff --git a/...le_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py b/...le_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py
@@ -0,0 +1,73 @@
+from pydantic import Field, BaseModel
+from typing import List, Optional
+from pydantic import BaseModel, Field
+
+system_message = """\
+You are an AI tutor for text assessment at a prestigious university.
+
+# Task
+Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification.
+
+You have access to the provided document lecture slides to help you provide feedback. 
+If you do use them, please reference the title and the page on your feedback.
+You must explcitily use the lecture slides and use them on your feedback.
+# Style
+1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual
+
+Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you
+believe that the student could benefit from the slide refer it on your feedback.
+
+The grading instructions are there to guide you on which criteria to give points. 
+You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them.
+
+# Problem statement
+{problem_statement}
+
+# Example solution
+{example_solution}
+
+# You can use the following grading instructions as a baseline for how you distribute credits, but write your own fedeback. Do not use the feedback provided to write your feedback.
+{grading_instructions}
+Max points: {max_points}, bonus points: {bonus_points}    
+Respond only in json with the provided Assessment Feedback schema but do not prefix the json with "json".
+
+"""
+
+human_message = """\
+Student\'s submission to grade (with sentence numbers <number>: <sentence>):
+
+Respond in json.
+
+\"\"\"
+{submission}
+\"\"\"\
+"""
+
+# Input Prompt
+class GenerateSuggestionsPrompt(BaseModel):
+    """\
+Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
+
+_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._\
+"""
+    system_message: str = Field(default=system_message,
+                                description="Message for priming AI behavior and instructing it what to do.")
+    human_message: str = Field(default=human_message,
+                               description="Message from a human. The input on which the AI is supposed to act.")
+# Output Object
+class FeedbackModel(BaseModel):
+    title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
+    description: str = Field(description="Feedback description")
+    line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
+    line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
+    credits: float = Field(0.0, description="Number of points received/deducted")
+    grading_instruction_id: Optional[int] = Field(
+        description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
+    )
+
+
+class AssessmentModel(BaseModel):
+    """Collection of feedbacks making up an assessment"""
+
+    feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
+
diff --git a/modules/text/module_text_llm/poetry.lock b/modules/text/module_text_llm/poetry.lock
diff --git a/modules/text/module_text_llm/pyproject.toml b/modules/text/module_text_llm/pyproject.toml
@@ -15,6 +15,7 @@ gitpython = "3.1.41"
 nltk = "3.8.1"
 python-dotenv = "1.0.0"
 tiktoken = "0.7.0"
+pypdf = "5.1.0"
 
 [tool.poetry.dev-dependencies]
 pydantic = "1.10.17"