-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
= Enea_Gore
committed
Oct 24, 2024
1 parent
b336006
commit 4854b16
Showing
8 changed files
with
662 additions
and
351 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
modules/text/module_text_llm/module_text_llm/approach_controller.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
|
||
from typing import List, Optional, Sequence | ||
from pydantic import BaseModel, Field | ||
|
||
from athena import emit_meta | ||
from athena.text import Exercise, Submission, Feedback | ||
from athena.logger import logger | ||
from module_text_llm.config import BasicApproachConfig, ChainOfThoughtConfig | ||
|
||
|
||
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions | ||
from module_text_llm.generate_suggestions import generate_suggestions | ||
from module_text_llm.generate_cot_suggestions import generate_cot_suggestions | ||
|
||
async def generate(exercise: Exercise, submission: Submission, config: BasicApproachConfig, debug: bool) -> List[Feedback]: | ||
if(isinstance(config, BasicApproachConfig)): | ||
return await generate_suggestions(exercise, submission, config, debug) | ||
elif(isinstance(config, ChainOfThoughtConfig)): | ||
return await generate_cot_suggestions(exercise, submission, config, debug) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
159 changes: 159 additions & 0 deletions
159
modules/text/module_text_llm/module_text_llm/generate_cot_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
from typing import List, Optional, Sequence | ||
from pydantic import BaseModel, Field | ||
|
||
from athena import emit_meta | ||
from athena.text import Exercise, Submission, Feedback | ||
from athena.logger import logger | ||
|
||
from module_text_llm.config import ChainOfThoughtConfig | ||
from llm_core.utils.llm_utils import ( | ||
get_chat_prompt_with_formatting_instructions, | ||
check_prompt_length_and_omit_features_if_necessary, | ||
num_tokens_from_prompt, | ||
) | ||
from llm_core.utils.predict_and_parse import predict_and_parse | ||
|
||
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions | ||
|
||
class FeedbackModel(BaseModel): | ||
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error") | ||
description: str = Field(description="Feedback description") | ||
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced") | ||
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced") | ||
credits: float = Field(0.0, description="Number of points received/deducted") | ||
grading_instruction_id: Optional[int] = Field( | ||
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used" | ||
) | ||
|
||
class Config: | ||
title = "Feedback" | ||
|
||
|
||
class AssessmentModel(BaseModel): | ||
"""Collection of feedbacks making up an assessment""" | ||
|
||
feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks") | ||
|
||
class Config: | ||
title = "Assessment" | ||
|
||
class InitialAssessment(BaseModel): | ||
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error") | ||
description: str = Field(description="Feedback description") | ||
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced") | ||
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced") | ||
credits: float = Field(0.0, description="Number of points received/deducted") | ||
reasoning: str = Field(description="Reasoning why the feedback was given") | ||
impprovment_suggestion: str = Field(description="Suggestion for improvement for the student") | ||
|
||
class InitialAssessmentModel(BaseModel): | ||
"""Collection of feedbacks making up an assessment""" | ||
|
||
feedbacks: List[InitialAssessment] = Field(description="Assessment feedbacks") | ||
|
||
async def generate_cot_suggestions(exercise: Exercise, submission: Submission, config: ChainOfThoughtConfig, debug: bool) -> List[Feedback]: | ||
model = config.model.get_model() # type: ignore[attr-defined] | ||
|
||
prompt_input = { | ||
"max_points": exercise.max_points, | ||
"bonus_points": exercise.bonus_points, | ||
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria), | ||
"problem_statement": exercise.problem_statement or "No problem statement.", | ||
"example_solution": exercise.example_solution, | ||
"submission": add_sentence_numbers(submission.text) | ||
} | ||
|
||
chat_prompt = get_chat_prompt_with_formatting_instructions( | ||
model=model, | ||
system_message=config.generate_suggestions_prompt.system_message, | ||
human_message=config.generate_suggestions_prompt.human_message, | ||
pydantic_object=InitialAssessmentModel | ||
) | ||
|
||
|
||
|
||
# Check if the prompt is too long and omit features if necessary (in order of importance) | ||
omittable_features = ["example_solution", "problem_statement", "grading_instructions"] | ||
prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary( | ||
prompt=chat_prompt, | ||
prompt_input= prompt_input, | ||
max_input_tokens=config.max_input_tokens, | ||
omittable_features=omittable_features, | ||
debug=debug | ||
) | ||
|
||
# Skip if the prompt is too long | ||
if not should_run: | ||
logger.warning("Input too long. Skipping.") | ||
if debug: | ||
emit_meta("prompt", chat_prompt.format(**prompt_input)) | ||
emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}") | ||
return [] | ||
|
||
initial_result = await predict_and_parse( | ||
model=model, | ||
chat_prompt=chat_prompt, | ||
prompt_input=prompt_input, | ||
pydantic_object=InitialAssessmentModel, | ||
tags=[ | ||
f"exercise-{exercise.id}", | ||
f"submission-{submission.id}", | ||
] | ||
) | ||
|
||
second_prompt_input = { | ||
"answer" : initial_result, | ||
"submission": add_sentence_numbers(submission.text) | ||
|
||
} | ||
|
||
second_chat_prompt = get_chat_prompt_with_formatting_instructions( | ||
model=model, | ||
system_message=config.generate_suggestions_prompt.second_system_message, | ||
human_message=config.generate_suggestions_prompt.answer_message, | ||
pydantic_object=AssessmentModel) | ||
|
||
result = await predict_and_parse( | ||
model=model, | ||
chat_prompt=second_chat_prompt, | ||
prompt_input=second_prompt_input, | ||
pydantic_object=AssessmentModel, | ||
tags=[ | ||
f"exercise-{exercise.id}", | ||
f"submission-{submission.id}", | ||
] | ||
) | ||
|
||
if debug: | ||
emit_meta("generate_suggestions", { | ||
"prompt": chat_prompt.format(**prompt_input), | ||
"result": result.dict() if result is not None else None | ||
}) | ||
|
||
|
||
if result is None: | ||
return [] | ||
|
||
grading_instruction_ids = set( | ||
grading_instruction.id | ||
for criterion in exercise.grading_criteria or [] | ||
for grading_instruction in criterion.structured_grading_instructions | ||
) | ||
|
||
feedbacks = [] | ||
for feedback in result.feedbacks: | ||
index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text) | ||
grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None | ||
feedbacks.append(Feedback( | ||
exercise_id=exercise.id, | ||
submission_id=submission.id, | ||
title=feedback.title, | ||
description=feedback.description, | ||
index_start=index_start, | ||
index_end=index_end, | ||
credits=feedback.credits, | ||
structured_grading_instruction_id=grading_instruction_id, | ||
meta={} | ||
)) | ||
|
||
return feedbacks |
28 changes: 28 additions & 0 deletions
28
modules/text/module_text_llm/module_text_llm/prompts/cot_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
system_message = """ | ||
You are a grading assistant at a prestrigious university tasked with grading student submissions for text exercises. | ||
You goal is to be as helpful as possible to the student while providing constructive feedback without revealing the solution. | ||
In order to successfully complete this task, you must: | ||
1. Analyze the problem statement and the provided grading instructions to understand the requirements of the task. | ||
2. The problem solution is an example of a solution that meets the requirements of the task. Analyze the solution to understand the logic and the approach used to solve the problem, keeping in mind that the student solutions might diverge and still be correct. | ||
3. Analyze the student's submission in regards to the problem statement, so that you can create chunks of the solution that relate to a part of the problem statement. | ||
4. Use the information gathered from the previous steps to provide constructive feedback to the student, guiding them towards the correct solution without revealing it. | ||
5. If you have additional comments, create an unreferenced feedback. | ||
6. For each feedback make sure that the credits are given only on the basis of the grading instructions and soltuion, the minimal answer from a student that satisfies this should be given the credits. If you have notes or additional comments, make sure to include them in a new feedback with 0 credits and no reference. | ||
You are tasked with grading the following exercise, your response should take into account that you are directly responding to the student so you should adress the student: | ||
The maximal amount of points for this exercise is {max_points}. | ||
# Problem Statement | ||
{problem_statement} | ||
# Sample Solution | ||
{example_solution} | ||
# Grading Instructions | ||
{grading_instructions} | ||
""" | ||
|
||
human_message = """\ | ||
Student\'s submission to grade (with sentence numbers <number>: <sentence>): | ||
\"\"\" | ||
{submission} | ||
\"\"\"\ | ||
""" |
17 changes: 17 additions & 0 deletions
17
modules/text/module_text_llm/module_text_llm/prompts/refined_cot_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
system_message = """ | ||
You gave the following feedback on the first iteration: {answer} | ||
On this step you need to refine your feedback. | ||
Make sure to follow the following steps to assess and improve your feedback: | ||
It shuold follow the grading instructions and the sample solution, if it doesn't, consider improvements. | ||
If you have your own additional improvements that are not present in the grading instructions, add them in a new feedback with 0 credits and no reference. | ||
Remember that your response is directly seen by students and it should adress them directly. | ||
For each feedback where the student has room for improvement, think about how the student could improve his solution. | ||
Once you have thought how the student can improve the solution, formulate it in a way that guides the student towards the correct solution without revealing it directly. | ||
Consider improvements to the feedback if any of this points is not satisfied.""" | ||
|
||
human_message = """\ | ||
Student\'s submission to grade (with sentence numbers <number>: <sentence>): | ||
\"\"\" | ||
{submission} | ||
\"\"\"\ | ||
""" |
Oops, something went wrong.