-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
= Enea_Gore
committed
Dec 2, 2024
1 parent
bbb2bb0
commit fa6d65c
Showing
9 changed files
with
325 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from module_text_llm.approach_config import ApproachConfig | ||
from pydantic import Field | ||
from typing import Literal | ||
from module_text_llm.retrieval_augmented_generation.agents import TutorAgent | ||
tutor = TutorAgent() | ||
from module_text_llm.retrieval_augmented_generation.prompt_generate_suggestions import GenerateSuggestionsPrompt | ||
|
||
class RAGApproachConfig(ApproachConfig): | ||
type: Literal['rag'] = 'rag' | ||
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt()) |
127 changes: 127 additions & 0 deletions
127
modules/text/module_text_llm/module_text_llm/retrieval_augmented_generation/agents.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
from langchain_openai import ChatOpenAI | ||
from langchain_core.chat_history import InMemoryChatMessageHistory | ||
from langchain_core.prompts import ChatPromptTemplate | ||
from langchain.agents import AgentExecutor, create_tool_calling_agent | ||
from langchain_core.runnables.history import RunnableWithMessageHistory | ||
from langchain_community.document_loaders import PyPDFLoader | ||
from langchain_core.vectorstores import InMemoryVectorStore | ||
from langchain_openai import OpenAIEmbeddings | ||
from langchain_text_splitters import RecursiveCharacterTextSplitter | ||
from langchain.tools.retriever import create_retriever_tool | ||
from pydantic import BaseModel, Field | ||
from langchain_core.tools import tool | ||
import glob | ||
from typing import List, Optional | ||
# Output Object | ||
class FeedbackModel(BaseModel): | ||
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error") | ||
description: str = Field(description="Feedback description") | ||
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced") | ||
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced") | ||
credits: float = Field(0.0, description="Number of points received/deducted") | ||
grading_instruction_id: Optional[int] = Field( | ||
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used" | ||
) | ||
|
||
@tool | ||
class AssessmentModel(BaseModel): | ||
"""Collection of feedbacks making up an assessment""" | ||
|
||
feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks") | ||
|
||
class AssessmentModelParse(BaseModel): | ||
"""Collection of feedbacks making up an assessment""" | ||
|
||
feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks") | ||
|
||
class TutorAgent: | ||
def __init__(self, session_id="test-session"): | ||
# Initialize model, memory, and tools | ||
self.model = ChatOpenAI(model="gpt-4o-2024-08-06") #gpt-4o-2024-08-06 , gpt-4o-mini | ||
self.memory = InMemoryChatMessageHistory(session_id=session_id) | ||
all_docs = [] | ||
file_paths = glob.glob("module_text_llm/retrieval_augmented_generation/pdfs/*.pdf") | ||
self.approach_config = None | ||
for file_path in file_paths: | ||
loader = PyPDFLoader(file_path) | ||
docs = loader.load() | ||
all_docs += docs | ||
|
||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | ||
splits = text_splitter.split_documents(all_docs) | ||
vectorstore = InMemoryVectorStore.from_documents( | ||
documents=splits, embedding=OpenAIEmbeddings() | ||
) | ||
|
||
retriever = vectorstore.as_retriever() | ||
retriever_tool = create_retriever_tool(retriever, name="retrieve_document", description="Retrieves the pdf documents from the relevant lecture") | ||
|
||
# Define the prompt template with a system message placeholder | ||
|
||
|
||
# Define the tools | ||
self.tools = [retriever_tool,AssessmentModel] | ||
# structured_llm = self.model.with_structured_output(AssessmentModel) | ||
# Create the agent and executor | ||
|
||
|
||
def setConfig(self,approach_config): | ||
self.approach_config = approach_config | ||
self.prompt = ChatPromptTemplate.from_messages( | ||
[ | ||
("system", self.approach_config.generate_suggestions_prompt.system_message), | ||
("human", "{submission}"), | ||
("placeholder", "{agent_scratchpad}"), # Internal for steps created through function calling | ||
]) | ||
self.agent = create_tool_calling_agent(self.model, self.tools, self.prompt) | ||
self.agent_executor = AgentExecutor(agent=self.agent, tools=self.tools) | ||
|
||
# Default configuration for the agent | ||
self.config = {"configurable": {"session_id": "test-session"}} | ||
|
||
def call_agent(self, prompt): | ||
"""Calls the agent with a prompt and returns the response output. | ||
Optionally takes a system_message to update the agent's behavior dynamically.""" | ||
from langchain_core.output_parsers import PydanticOutputParser | ||
|
||
parser = PydanticOutputParser(pydantic_object=AssessmentModelParse) | ||
|
||
chain = self.agent_executor | parser | ||
response = self.agent_executor.invoke( | ||
input = prompt# , "system_message": system_message | ||
) | ||
import json | ||
print(response) | ||
res = AssessmentModelParse.parse_obj(json.loads(response["output"])) | ||
return res | ||
|
||
|
||
# system_message = """You are an AI tutor for text assessment at a prestigious university. | ||
|
||
# # Task | ||
# Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification. | ||
|
||
# You have access to the provided document lecture slides to help you provide feedback. | ||
# If you do use them, please reference the title and the page on your feedback. | ||
# Write it down epxlicitly when lecture slides or contents are relvant. | ||
|
||
# # Style | ||
# 1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual | ||
|
||
# Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you | ||
# believe that the student could benefit from the slide refer it on your feedback. | ||
|
||
# The grading instructions are there to guide you on which criteria to give points. | ||
# You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them. | ||
|
||
# # Problem statement | ||
# {problem_statement} | ||
|
||
# # Example solution | ||
# {example_solution} | ||
|
||
# # Grading instructions | ||
# {grading_instructions} | ||
# Max points: {max_points}, bonus points: {bonus_points} | ||
# Respond only in json with the provided Assessment Feedback schema. | ||
# """ |
86 changes: 86 additions & 0 deletions
86
...xt/module_text_llm/module_text_llm/retrieval_augmented_generation/generate_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
from typing import List | ||
|
||
from athena import emit_meta | ||
from athena.text import Exercise, Submission, Feedback | ||
from athena.logger import logger | ||
from llm_core.utils.llm_utils import ( | ||
get_chat_prompt_with_formatting_instructions, | ||
check_prompt_length_and_omit_features_if_necessary, | ||
num_tokens_from_prompt, | ||
) | ||
from athena.text import Exercise, Submission, Feedback | ||
from module_text_llm.config import BasicApproachConfig | ||
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions | ||
from module_text_llm.basic_approach.prompt_generate_suggestions import AssessmentModel | ||
from module_text_llm.retrieval_augmented_generation.agents import TutorAgent | ||
from module_text_llm.retrieval_augmented_generation import tutor | ||
async def generate_suggestions(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: | ||
model = config.model.get_model() # type: ignore[attr-defined] | ||
prompt_input = { | ||
"max_points": exercise.max_points, | ||
"bonus_points": exercise.bonus_points, | ||
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria), | ||
"problem_statement": exercise.problem_statement or "No problem statement.", | ||
"example_solution": exercise.example_solution, | ||
"submission": add_sentence_numbers(submission.text) | ||
} | ||
|
||
chat_prompt = get_chat_prompt_with_formatting_instructions( | ||
model=model, | ||
system_message=config.generate_suggestions_prompt.system_message, | ||
human_message=config.generate_suggestions_prompt.human_message, | ||
pydantic_object=AssessmentModel | ||
) | ||
|
||
# Check if the prompt is too long and omit features if necessary (in order of importance) | ||
omittable_features = ["example_solution", "problem_statement", "grading_instructions"] | ||
prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary( | ||
prompt=chat_prompt, | ||
prompt_input= prompt_input, | ||
max_input_tokens=config.max_input_tokens, | ||
omittable_features=omittable_features, | ||
debug=debug | ||
) | ||
|
||
if not should_run: | ||
logger.warning("Input too long. Skipping.") | ||
if debug: | ||
emit_meta("prompt", chat_prompt.format(**prompt_input)) | ||
emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}") | ||
return [] | ||
|
||
tutor.setConfig(config) | ||
result = tutor.call_agent(prompt_input) | ||
|
||
if debug: | ||
emit_meta("generate_suggestions", { | ||
"prompt": chat_prompt.format(**prompt_input), | ||
"result": result.dict() if result is not None else None | ||
}) | ||
|
||
if result is None: | ||
return [] | ||
|
||
grading_instruction_ids = set( | ||
grading_instruction.id | ||
for criterion in exercise.grading_criteria or [] | ||
for grading_instruction in criterion.structured_grading_instructions | ||
) | ||
|
||
feedbacks = [] | ||
for feedback in result.feedbacks: | ||
index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text) | ||
grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None | ||
feedbacks.append(Feedback( | ||
exercise_id=exercise.id, | ||
submission_id=submission.id, | ||
title=feedback.title, | ||
description=feedback.description, | ||
index_start=index_start, | ||
index_end=index_end, | ||
credits=feedback.credits, | ||
structured_grading_instruction_id=grading_instruction_id, | ||
meta={} | ||
)) | ||
|
||
return feedbacks |
73 changes: 73 additions & 0 deletions
73
...le_text_llm/module_text_llm/retrieval_augmented_generation/prompt_generate_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from pydantic import Field, BaseModel | ||
from typing import List, Optional | ||
from pydantic import BaseModel, Field | ||
|
||
system_message = """\ | ||
You are an AI tutor for text assessment at a prestigious university. | ||
# Task | ||
Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification. | ||
You have access to the provided document lecture slides to help you provide feedback. | ||
If you do use them, please reference the title and the page on your feedback. | ||
You must explcitily use the lecture slides and use them on your feedback. | ||
# Style | ||
1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual | ||
Make use of the lecture slides provided. State clearly on your feedback which lecture you are using. If you | ||
believe that the student could benefit from the slide refer it on your feedback. | ||
The grading instructions are there to guide you on which criteria to give points. | ||
You can comment with 0 points about grammar and spelling errors, but you should not give or remove points for them. | ||
# Problem statement | ||
{problem_statement} | ||
# Example solution | ||
{example_solution} | ||
# You can use the following grading instructions as a baseline for how you distribute credits, but write your own fedeback. Do not use the feedback provided to write your feedback. | ||
{grading_instructions} | ||
Max points: {max_points}, bonus points: {bonus_points} | ||
Respond only in json with the provided Assessment Feedback schema but do not prefix the json with "json". | ||
""" | ||
|
||
human_message = """\ | ||
Student\'s submission to grade (with sentence numbers <number>: <sentence>): | ||
Respond in json. | ||
\"\"\" | ||
{submission} | ||
\"\"\"\ | ||
""" | ||
|
||
# Input Prompt | ||
class GenerateSuggestionsPrompt(BaseModel): | ||
"""\ | ||
Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}** | ||
_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._\ | ||
""" | ||
system_message: str = Field(default=system_message, | ||
description="Message for priming AI behavior and instructing it what to do.") | ||
human_message: str = Field(default=human_message, | ||
description="Message from a human. The input on which the AI is supposed to act.") | ||
# Output Object | ||
class FeedbackModel(BaseModel): | ||
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error") | ||
description: str = Field(description="Feedback description") | ||
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced") | ||
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced") | ||
credits: float = Field(0.0, description="Number of points received/deducted") | ||
grading_instruction_id: Optional[int] = Field( | ||
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used" | ||
) | ||
|
||
|
||
class AssessmentModel(BaseModel): | ||
"""Collection of feedbacks making up an assessment""" | ||
|
||
feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks") | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters