Skip to content

Commit

Permalink
better rag
Browse files Browse the repository at this point in the history
  • Loading branch information
= Enea_Gore committed Dec 2, 2024
1 parent bbb2bb0 commit fa6d65c
Show file tree
Hide file tree
Showing 9 changed files with 325 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
class ApproachType(str, Enum):
basic = "BasicApproach"
chain_of_thought = "ChainOfThought"

rag = "RagApproach"

class ApproachConfig(BaseModel, ABC):
max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
model: ModelConfigType = Field(default=DefaultModelConfig())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
from module_text_llm.basic_approach import BasicApproachConfig
from module_text_llm.chain_of_thought_approach import ChainOfThoughtConfig
from module_text_llm.approach_config import ApproachConfig
from module_text_llm.retrieval_augmented_generation import RAGApproachConfig

from module_text_llm.basic_approach.generate_suggestions import generate_suggestions as generate_suggestions_basic
from module_text_llm.chain_of_thought_approach.generate_suggestions import generate_suggestions as generate_cot_suggestions

from module_text_llm.retrieval_augmented_generation.generate_suggestions import generate_suggestions as generate_rag_suggestions
async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool) -> List[Feedback]:
if isinstance(config, BasicApproachConfig):
return await generate_suggestions_basic(exercise, submission, config, debug)
if isinstance(config, ChainOfThoughtConfig):
return await generate_cot_suggestions(exercise, submission, config, debug)
if(isinstance(config, RAGApproachConfig)):
return await generate_rag_suggestions(exercise, submission, config, debug)
raise ValueError("Unsupported config type provided.")
3 changes: 2 additions & 1 deletion modules/text/module_text_llm/module_text_llm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from module_text_llm.chain_of_thought_approach import ChainOfThoughtConfig
from module_text_llm.basic_approach import BasicApproachConfig
from module_text_llm.retrieval_augmented_generation import RAGApproachConfig

ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig]
ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig, RAGApproachConfig]

@config_schema_provider
class Configuration(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from module_text_llm.approach_config import ApproachConfig
from pydantic import Field
from typing import Literal
from module_text_llm.retrieval_augmented_generation.agents import TutorAgent
tutor = TutorAgent()
from module_text_llm.retrieval_augmented_generation.prompt_generate_suggestions import GenerateSuggestionsPrompt

class RAGApproachConfig(ApproachConfig):
type: Literal['rag'] = 'rag'
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from langchain_openai import ChatOpenAI
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
from pydantic import BaseModel, Field
from langchain_core.tools import tool
import glob
from typing import List, Optional
# Output Object
class FeedbackModel(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
description: str = Field(description="Feedback description")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
grading_instruction_id: Optional[int] = Field(
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
)

@tool
class AssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""

feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")

class AssessmentModelParse(BaseModel):
"""Collection of feedbacks making up an assessment"""

feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")

class TutorAgent:
def __init__(self, session_id="test-session"):
# Initialize model, memory, and tools
self.model = ChatOpenAI(model="gpt-4o-2024-08-06") #gpt-4o-2024-08-06 , gpt-4o-mini
self.memory = InMemoryChatMessageHistory(session_id=session_id)
all_docs = []
file_paths = glob.glob("module_text_llm/retrieval_augmented_generation/pdfs/*.pdf")
self.approach_config = None
for file_path in file_paths:
loader = PyPDFLoader(file_path)
docs = loader.load()
all_docs += docs

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(all_docs)
vectorstore = InMemoryVectorStore.from_documents(
documents=splits, embedding=OpenAIEmbeddings()
)

retriever = vectorstore.as_retriever()
retriever_tool = create_retriever_tool(retriever, name="retrieve_document", description="Retrieves the pdf documents from the relevant lecture")

# Define the prompt template with a system message placeholder


# Define the tools
self.tools = [retriever_tool,AssessmentModel]
# structured_llm = self.model.with_structured_output(AssessmentModel)
# Create the agent and executor


def setConfig(self,approach_config):
self.approach_config = approach_config
self.prompt = ChatPromptTemplate.from_messages(
[
("system", self.approach_config.generate_suggestions_prompt.system_message),
("human", "{submission}"),
("placeholder", "{agent_scratchpad}"), # Internal for steps created through function calling
])
self.agent = create_tool_calling_agent(self.model, self.tools, self.prompt)
self.agent_executor = AgentExecutor(agent=self.agent, tools=self.tools)

# Default configuration for the agent
self.config = {"configurable": {"session_id": "test-session"}}

def call_agent(self, prompt):
"""Calls the agent with a prompt and returns the response output.
Optionally takes a system_message to update the agent's behavior dynamically."""
from langchain_core.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=AssessmentModelParse)

chain = self.agent_executor | parser
response = self.agent_executor.invoke(
input = prompt# , "system_message": system_message
)
import json
print(response)
res = AssessmentModelParse.parse_obj(json.loads(response["output"]))
return res


# system_message = """You are an AI tutor for text assessment at a prestigious university.

# # Task
# Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification.

# You have access to the provided document lecture slides to help you provide feedback.
# If you do use them, please reference the title and the page on your feedback.
# Write it down epxlicitly when lecture slides or contents are relvant.

# # Style
# 1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual

# Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you
# believe that the student could benefit from the slide refer it on your feedback.

# The grading instructions are there to guide you on which criteria to give points.
# You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them.

# # Problem statement
# {problem_statement}

# # Example solution
# {example_solution}

# # Grading instructions
# {grading_instructions}
# Max points: {max_points}, bonus points: {bonus_points}
# Respond only in json with the provided Assessment Feedback schema.
# """
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from typing import List

from athena import emit_meta
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
check_prompt_length_and_omit_features_if_necessary,
num_tokens_from_prompt,
)
from athena.text import Exercise, Submission, Feedback
from module_text_llm.config import BasicApproachConfig
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
from module_text_llm.basic_approach.prompt_generate_suggestions import AssessmentModel
from module_text_llm.retrieval_augmented_generation.agents import TutorAgent
from module_text_llm.retrieval_augmented_generation import tutor
async def generate_suggestions(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
model = config.model.get_model() # type: ignore[attr-defined]
prompt_input = {
"max_points": exercise.max_points,
"bonus_points": exercise.bonus_points,
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
"problem_statement": exercise.problem_statement or "No problem statement.",
"example_solution": exercise.example_solution,
"submission": add_sentence_numbers(submission.text)
}

chat_prompt = get_chat_prompt_with_formatting_instructions(
model=model,
system_message=config.generate_suggestions_prompt.system_message,
human_message=config.generate_suggestions_prompt.human_message,
pydantic_object=AssessmentModel
)

# Check if the prompt is too long and omit features if necessary (in order of importance)
omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
prompt=chat_prompt,
prompt_input= prompt_input,
max_input_tokens=config.max_input_tokens,
omittable_features=omittable_features,
debug=debug
)

if not should_run:
logger.warning("Input too long. Skipping.")
if debug:
emit_meta("prompt", chat_prompt.format(**prompt_input))
emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
return []

tutor.setConfig(config)
result = tutor.call_agent(prompt_input)

if debug:
emit_meta("generate_suggestions", {
"prompt": chat_prompt.format(**prompt_input),
"result": result.dict() if result is not None else None
})

if result is None:
return []

grading_instruction_ids = set(
grading_instruction.id
for criterion in exercise.grading_criteria or []
for grading_instruction in criterion.structured_grading_instructions
)

feedbacks = []
for feedback in result.feedbacks:
index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text)
grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
feedbacks.append(Feedback(
exercise_id=exercise.id,
submission_id=submission.id,
title=feedback.title,
description=feedback.description,
index_start=index_start,
index_end=index_end,
credits=feedback.credits,
structured_grading_instruction_id=grading_instruction_id,
meta={}
))

return feedbacks
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from pydantic import Field, BaseModel
from typing import List, Optional
from pydantic import BaseModel, Field

system_message = """\
You are an AI tutor for text assessment at a prestigious university.
# Task
Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification.
You have access to the provided document lecture slides to help you provide feedback.
If you do use them, please reference the title and the page on your feedback.
You must explcitily use the lecture slides and use them on your feedback.
# Style
1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual
Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you
believe that the student could benefit from the slide refer it on your feedback.
The grading instructions are there to guide you on which criteria to give points.
You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them.
# Problem statement
{problem_statement}
# Example solution
{example_solution}
# You can use the following grading instructions as a baseline for how you distribute credits, but write your own fedeback. Do not use the feedback provided to write your feedback.
{grading_instructions}
Max points: {max_points}, bonus points: {bonus_points}
Respond only in json with the provided Assessment Feedback schema but do not prefix the json with "json".
"""

human_message = """\
Student\'s submission to grade (with sentence numbers <number>: <sentence>):
Respond in json.
\"\"\"
{submission}
\"\"\"\
"""

# Input Prompt
class GenerateSuggestionsPrompt(BaseModel):
"""\
Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._\
"""
system_message: str = Field(default=system_message,
description="Message for priming AI behavior and instructing it what to do.")
human_message: str = Field(default=human_message,
description="Message from a human. The input on which the AI is supposed to act.")
# Output Object
class FeedbackModel(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
description: str = Field(description="Feedback description")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
grading_instruction_id: Optional[int] = Field(
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
)


class AssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""

feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")

21 changes: 20 additions & 1 deletion modules/text/module_text_llm/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions modules/text/module_text_llm/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ gitpython = "3.1.41"
nltk = "3.8.1"
python-dotenv = "1.0.0"
tiktoken = "0.7.0"
pypdf = "5.1.0"

[tool.poetry.dev-dependencies]
pydantic = "1.10.17"
Expand Down

0 comments on commit fa6d65c

Please sign in to comment.