Skip to content

Commit

Permalink
mvp multiple approaches and cot
Browse files Browse the repository at this point in the history
  • Loading branch information
= Enea_Gore committed Oct 24, 2024
1 parent b336006 commit 4854b16
Show file tree
Hide file tree
Showing 8 changed files with 662 additions and 351 deletions.
3 changes: 3 additions & 0 deletions llm_core/llm_core/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@


DefaultModelConfig: Type[ModelConfig]
MiniModelConfig: ModelConfig
default_model_name = os.environ.get("LLM_DEFAULT_MODEL")
evaluation_model_name = os.environ.get("LLM_EVALUATION_MODEL")

Expand All @@ -18,6 +19,8 @@
types.append(openai_config.OpenAIModelConfig)
if default_model_name in openai_config.available_models:
DefaultModelConfig = openai_config.OpenAIModelConfig
if "openai_gpt-4o-mini" in openai_config.available_models:
MiniModelConfig = openai_config.OpenAIModelConfig(model_name="openai_gpt-4o-mini",max_tokens=3000, temperature=0,top_p=0.9,presence_penalty=0,frequency_penalty=0)
if evaluation_model_name in openai_config.available_models:
evaluation_model = openai_config.available_models[evaluation_model_name]
except AttributeError:
Expand Down
7 changes: 4 additions & 3 deletions modules/text/module_text_llm/module_text_llm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import nltk
import tiktoken

from module_text_llm.approach_controller import generate
from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger
Expand All @@ -13,6 +13,7 @@
from module_text_llm.evaluation import get_feedback_statistics, get_llm_statistics
from module_text_llm.generate_suggestions import generate_suggestions
from module_text_llm.generate_evaluation import generate_evaluation
from module_text_llm.generate_cot_suggestions import generate_cot_suggestions


@submissions_consumer
Expand All @@ -30,12 +31,12 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm
def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)


# change here to have multiple approaches
@feedback_provider
async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]:
logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested",
"Graded" if is_graded else "Non-graded", submission.id, exercise.id)
return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug)
return await generate(exercise, submission, module_config.approach, module_config.debug)


@evaluation_provider
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

from typing import List, Optional, Sequence
from pydantic import BaseModel, Field

from athena import emit_meta
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger
from module_text_llm.config import BasicApproachConfig, ChainOfThoughtConfig


from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
from module_text_llm.generate_suggestions import generate_suggestions
from module_text_llm.generate_cot_suggestions import generate_cot_suggestions

async def generate(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]:
if(isinstance(config, BasicApproachConfig)):
return await generate_suggestions(exercise, submission, config, debug)
elif(isinstance(config, ChainOfThoughtConfig)):
return await generate_cot_suggestions(exercise, submission, config, debug)
82 changes: 75 additions & 7 deletions modules/text/module_text_llm/module_text_llm/config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
from pydantic import BaseModel, Field

from typing import Union
from athena import config_schema_provider
from llm_core.models import ModelConfigType, DefaultModelConfig
from llm_core.models import ModelConfigType, DefaultModelConfig, MiniModelConfig
from module_text_llm.prompts.generate_suggestions import (
system_message as generate_suggestions_system_message,
human_message as generate_suggestions_human_message
)
from enum import Enum
from pydantic import root_validator
from abc import ABC, abstractmethod
from module_text_llm.prompts.cot_suggestions import (
system_message as generate_cot_suggestions_system_message,
human_message as generate_cot_suggestions_human_message
)


from module_text_llm.prompts.refined_cot_suggestions import (
system_message as generate_refined_cot_suggestions_system_message,
human_message as generate_refined_cot_suggestions_human_message
)
class GenerateSuggestionsPrompt(BaseModel):
"""\
Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
Expand All @@ -19,15 +29,73 @@ class GenerateSuggestionsPrompt(BaseModel):
human_message: str = Field(default=generate_suggestions_human_message,
description="Message from a human. The input on which the AI is supposed to act.")

class CoTGenerateSuggestionsPrompt(BaseModel):
"""\
Features cit available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
class BasicApproachConfig(BaseModel):
"""This approach uses a LLM with a single prompt to generate feedback in a single step."""
_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._\
"""
system_message: str = Field(default=generate_cot_suggestions_system_message,
description="Message for priming AI behavior and instructing it what to do.")
human_message: str = Field(default=generate_cot_suggestions_human_message,
description="Message from a human. The input on which the AI is supposed to act.")
second_system_message: str = Field(default=generate_refined_cot_suggestions_system_message,
description="Message for priming AI behavior and instructing it what to do.")
answer_message: str = Field(default=generate_refined_cot_suggestions_human_message,
description="Message from a human. The input on which the AI is supposed to act.")

class ApproachType(str, Enum):
basic = "BasicApproach"
chain_of_thought = "ChainOfThought"


class ApproachConfig(BaseModel, ABC):
max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore
model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore

# @abstractmethod
# def get_prompt(self):
# """Abstract method to get the appropriate prompt configuration."""
# pass

class Config:
# Enable discriminator to distinguish between subclasses in the schema
use_enum_values = True

class BasicApproachConfig(ApproachConfig):
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())

# def get_prompt(self):
# return self.generate_suggestions_prompt

class ChainOfThoughtConfig(ApproachConfig):
model: ModelConfigType = Field(default=MiniModelConfig) # type: ignore
generate_suggestions_prompt: CoTGenerateSuggestionsPrompt = Field(default=CoTGenerateSuggestionsPrompt())

# def get_prompt(self):
# return self.generate_suggestions_prompt

# available_approaches = [BasicApproachConfig, ChainOfThoughtConfig]
ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig]

# def approach_factory(approach_type: ApproachType) -> ApproachConfig:
# if approach_type == ApproachType.basic:
# return BasicApproachConfig()
# elif approach_type == ApproachType.chain_of_thought:
# return ChainOfThoughtConfig()
# else:
# raise ValueError(f"Unknown approach type: {approach_type}")

@config_schema_provider
class Configuration(BaseModel):
debug: bool = Field(default=False, description="Enable debug mode.")
approach: BasicApproachConfig = Field(default=BasicApproachConfig())
approach: ApproachConfigUnion = Field(default_factory=BasicApproachConfig) # Default to BasicApproach
# approach_type: ApproachType = Field(default=ApproachType.basic, description="Type of approach to use.")

# @root_validator(pre=True)
# def populate_approach(cls, values):
# """Automatically instantiate the correct approach based on approach_type."""
# approach_type = values.get('approach_type', ApproachType.basic)
# values['approach'] = approach_factory(approach_type)
# return values

Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from typing import List, Optional, Sequence
from pydantic import BaseModel, Field

from athena import emit_meta
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger

from module_text_llm.config import ChainOfThoughtConfig
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
check_prompt_length_and_omit_features_if_necessary,
num_tokens_from_prompt,
)
from llm_core.utils.predict_and_parse import predict_and_parse

from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions

class FeedbackModel(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
description: str = Field(description="Feedback description")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
grading_instruction_id: Optional[int] = Field(
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
)

class Config:
title = "Feedback"


class AssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""

feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")

class Config:
title = "Assessment"

class InitialAssessment(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
description: str = Field(description="Feedback description")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
reasoning: str = Field(description="Reasoning why the feedback was given")
impprovment_suggestion: str = Field(description="Suggestion for improvement for the student")

class InitialAssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""

feedbacks: List[InitialAssessment] = Field(description="Assessment feedbacks")

async def generate_cot_suggestions(exercise: Exercise, submission: Submission, config: ChainOfThoughtConfig, debug: bool) -> List[Feedback]:
model = config.model.get_model() # type: ignore[attr-defined]

prompt_input = {
"max_points": exercise.max_points,
"bonus_points": exercise.bonus_points,
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
"problem_statement": exercise.problem_statement or "No problem statement.",
"example_solution": exercise.example_solution,
"submission": add_sentence_numbers(submission.text)
}

chat_prompt = get_chat_prompt_with_formatting_instructions(
model=model,
system_message=config.generate_suggestions_prompt.system_message,
human_message=config.generate_suggestions_prompt.human_message,
pydantic_object=InitialAssessmentModel
)



# Check if the prompt is too long and omit features if necessary (in order of importance)
omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
prompt=chat_prompt,
prompt_input= prompt_input,
max_input_tokens=config.max_input_tokens,
omittable_features=omittable_features,
debug=debug
)

# Skip if the prompt is too long
if not should_run:
logger.warning("Input too long. Skipping.")
if debug:
emit_meta("prompt", chat_prompt.format(**prompt_input))
emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
return []

initial_result = await predict_and_parse(
model=model,
chat_prompt=chat_prompt,
prompt_input=prompt_input,
pydantic_object=InitialAssessmentModel,
tags=[
f"exercise-{exercise.id}",
f"submission-{submission.id}",
]
)

second_prompt_input = {
"answer" : initial_result,
"submission": add_sentence_numbers(submission.text)

}

second_chat_prompt = get_chat_prompt_with_formatting_instructions(
model=model,
system_message=config.generate_suggestions_prompt.second_system_message,
human_message=config.generate_suggestions_prompt.answer_message,
pydantic_object=AssessmentModel)

result = await predict_and_parse(
model=model,
chat_prompt=second_chat_prompt,
prompt_input=second_prompt_input,
pydantic_object=AssessmentModel,
tags=[
f"exercise-{exercise.id}",
f"submission-{submission.id}",
]
)

if debug:
emit_meta("generate_suggestions", {
"prompt": chat_prompt.format(**prompt_input),
"result": result.dict() if result is not None else None
})


if result is None:
return []

grading_instruction_ids = set(
grading_instruction.id
for criterion in exercise.grading_criteria or []
for grading_instruction in criterion.structured_grading_instructions
)

feedbacks = []
for feedback in result.feedbacks:
index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text)
grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
feedbacks.append(Feedback(
exercise_id=exercise.id,
submission_id=submission.id,
title=feedback.title,
description=feedback.description,
index_start=index_start,
index_end=index_end,
credits=feedback.credits,
structured_grading_instruction_id=grading_instruction_id,
meta={}
))

return feedbacks
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
system_message = """
You are a grading assistant at a prestrigious university tasked with grading student submissions for text exercises.
You goal is to be as helpful as possible to the student while providing constructive feedback without revealing the solution.
In order to successfully complete this task, you must:
1. Analyze the problem statement and the provided grading instructions to understand the requirements of the task.
2. The problem solution is an example of a solution that meets the requirements of the task. Analyze the solution to understand the logic and the approach used to solve the problem, keeping in mind that the student solutions might diverge and still be correct.
3. Analyze the student's submission in regards to the problem statement, so that you can create chunks of the solution that relate to a part of the problem statement.
4. Use the information gathered from the previous steps to provide constructive feedback to the student, guiding them towards the correct solution without revealing it.
5. If you have additional comments, create an unreferenced feedback.
6. For each feedback make sure that the credits are given only on the basis of the grading instructions and soltuion, the minimal answer from a student that satisfies this should be given the credits. If you have notes or additional comments, make sure to include them in a new feedback with 0 credits and no reference.
You are tasked with grading the following exercise, your response should take into account that you are directly responding to the student so you should adress the student:
The maximal amount of points for this exercise is {max_points}.
# Problem Statement
{problem_statement}
# Sample Solution
{example_solution}
# Grading Instructions
{grading_instructions}
"""

human_message = """\
Student\'s submission to grade (with sentence numbers <number>: <sentence>):
\"\"\"
{submission}
\"\"\"\
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
system_message = """
You gave the following feedback on the first iteration: {answer}
On this step you need to refine your feedback.
Make sure to follow the following steps to assess and improve your feedback:
It shuold follow the grading instructions and the sample solution, if it doesn't, consider improvements.
If you have your own additional improvements that are not present in the grading instructions, add them in a new feedback with 0 credits and no reference.
Remember that your response is directly seen by students and it should adress them directly.
For each feedback where the student has room for improvement, think about how the student could improve his solution.
Once you have thought how the student can improve the solution, formulate it in a way that guides the student towards the correct solution without revealing it directly.
Consider improvements to the feedback if any of this points is not satisfied."""

human_message = """\
Student\'s submission to grade (with sentence numbers <number>: <sentence>):
\"\"\"
{submission}
\"\"\"\
"""
Loading

0 comments on commit 4854b16

Please sign in to comment.