diff --git a/athena/athena/schemas/feedback.py b/athena/athena/schemas/feedback.py index 4cf9c6a2..3cbd62ea 100644 --- a/athena/athena/schemas/feedback.py +++ b/athena/athena/schemas/feedback.py @@ -23,6 +23,9 @@ class Feedback(Schema, ABC): is_graded: Optional[bool] = Field(None, description="Graded or non graded.", example=False) + positive: int = Field( + description="Positive means that the student got a principle correctly. Negative means that the student needs " + "to put in some work. Empty is set if the feedback is neither positive nor negative.") meta: dict = Field({}, example={}) diff --git a/modules/programming/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py b/modules/programming/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py index b857a24f..1bc66691 100644 --- a/modules/programming/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py +++ b/modules/programming/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py @@ -8,6 +8,9 @@ from module_programming_llm.config import NonGradedBasicApproachConfig from module_programming_llm.generate_summary_by_file import generate_summary_by_file +from module_programming_llm.split_grading_instructions_by_file import ( + split_grading_instructions_by_file, +) from module_programming_llm.split_problem_statement_by_file import ( split_problem_statement_by_file, ) @@ -36,6 +39,14 @@ class FeedbackModel(BaseModel): line_end: Optional[int] = Field( description="Referenced line number end, or empty if unreferenced" ) + is_positive: Optional[bool] = Field( + description="Describes if this feedback item doesn't require work(positive), requires work(negative) or is " + "neutral(empty)") + credits: Optional[float] = Field( + description="The number of points you would give for this element if you were a human tutor. Remember that " + "the total number of credits should not exceed the total number of points. Negative points mean " + "that the student should put in more work." + ) class Config: title = "Feedback" @@ -108,6 +119,25 @@ async def generate_suggestions_by_file( ) problem_statement_tokens = num_tokens_from_string(exercise.problem_statement or "") + + # Get split problem statement and grading instructions by file (if necessary) + split_problem_statement, split_grading_instructions = await asyncio.gather( + split_problem_statement_by_file( + exercise=exercise, + submission=submission, + prompt=chat_prompt, + config=config, + debug=debug, + ), + split_grading_instructions_by_file( + exercise=exercise, + submission=submission, + prompt=chat_prompt, + config=config, + debug=debug, + ), + ) + is_short_problem_statement = ( problem_statement_tokens <= config.split_problem_statement_by_file_prompt.tokens_before_split @@ -121,6 +151,21 @@ async def generate_suggestions_by_file( else {} ) + is_short_grading_instructions = ( + num_tokens_from_string(exercise.grading_instructions) + <= config.split_grading_instructions_by_file_prompt.tokens_before_split + if exercise.grading_instructions is not None + else True + ) + file_grading_instructions = ( + { + item.file_name: item.grading_instructions + for item in split_grading_instructions.items + } + if split_grading_instructions is not None + else {} + ) + # Gather prompt inputs for each changed file (independently) for file_path, file_content in changed_files.items(): problem_statement = ( @@ -155,6 +200,20 @@ async def generate_suggestions_by_file( template_to_submission_diff = "\n".join(diff_without_deletions) + grading_instructions = ( + exercise.grading_instructions or "" + if is_short_grading_instructions + else file_grading_instructions.get( + file_path, "No relevant grading instructions found." + ) + ) + grading_instructions = ( + grading_instructions + if grading_instructions.strip() + else "No grading instructions found." + ) + + prompt_inputs.append( { "submission_file": file_content, @@ -162,6 +221,9 @@ async def generate_suggestions_by_file( "problem_statement": problem_statement, "file_path": file_path, "summary": summary_string, + "max_points": exercise.max_points, + "bonus_points": exercise.bonus_points, + "grading_instructions": grading_instructions, } ) diff --git a/modules/programming/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py b/modules/programming/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py index 9b52fefc..69dc656e 100644 --- a/modules/programming/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py +++ b/modules/programming/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py @@ -5,11 +5,13 @@ {problem_statement} # Task -Create non graded improvement suggestions for a student\'s programming submission that a human tutor would recommend. \ +Create improvement suggestions for a student\'s programming submission that a human tutor would recommend. \ Assume the tutor is not familiar with the solution. The feedback must contain only the feedback the student can learn from. Important: the answer you generate must not contain any solution suggestions or contain corrected errors. Rather concentrate on incorrectly applied principles or inconsistencies. +Try to assess the submission in a style a human tutor would do(if the grading instructions are present - follow them). +The credits should be negative if the student should put in more work. Students can move some functionality to other files. Students can deviate to some degree from the problem statement or book unless they complete all tasks. Very important, the feedback must be balanced. @@ -28,6 +30,9 @@ human_message = """\ Path: {file_path} +{grading_instructions} +Max points: {max_points}, bonus points: {bonus_points} (whole assessment, not just this file) + File(with line numbers : ): \"\"\" {submission_file}