diff --git a/.vscode/settings.json b/.vscode/settings.json index 3b5da8376..1ceae8a76 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,4 +2,5 @@ "python.linting.enabled": true, "python.linting.prospectorEnabled": true, "python.analysis.typeCheckingMode": "basic", + "python.terminal.activateEnvironment": true } \ No newline at end of file diff --git a/assessment_module_manager/.vscode/settings.json b/assessment_module_manager/.vscode/settings.json deleted file mode 100644 index 2a843f56f..000000000 --- a/assessment_module_manager/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "python.pythonPath": "./.venv/bin/python", - "python.analysis.typeCheckingMode": "basic", - } \ No newline at end of file diff --git a/athena-workspace.code-workspace b/athena-workspace.code-workspace index 49b528cf6..4a7809f21 100644 --- a/athena-workspace.code-workspace +++ b/athena-workspace.code-workspace @@ -32,7 +32,11 @@ } ], "settings": { - "python.linting.prospectorEnabled": true, - "python.linting.mypyEnabled": true + "python.pythonPath": "./.venv/bin/python", + "python.linting.enabled": true, + "python.linting.prospectorEnabled": true, + "python.linting.mypyEnabled": true, + "python.analysis.typeCheckingMode": "basic", + "python.terminal.activateEnvironment": true } } diff --git a/athena/athena/models/model.py b/athena/athena/models/model.py index c3405d88c..6ce8d1293 100644 --- a/athena/athena/models/model.py +++ b/athena/athena/models/model.py @@ -1,8 +1,7 @@ import importlib -from pydantic import BaseModel, AnyUrl -from sqlalchemy import Column, String, UniqueConstraint, event -from sqlalchemy.orm import mapper +from pydantic import BaseModel +from sqlalchemy import Column, String, UniqueConstraint class Model: diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py index c7031765f..9afe4444c 100644 --- a/module_programming_llm/module_programming_llm/__main__.py +++ b/module_programming_llm/module_programming_llm/__main__.py @@ -15,12 +15,10 @@ from athena.logger import logger from module_programming_llm.config import Configuration -from module_programming_llm.generate_graded_suggestions_by_file import ( - generate_suggestions_by_file as generate_graded_suggestions_by_file, -) -from module_programming_llm.generate_non_graded_suggestions_by_file import ( - generate_suggestions_by_file as generate_non_graded_suggestions_by_file, -) +from module_programming_llm.graded.basic_by_file.generate import generate_graded_basic_by_file_suggestions +from module_programming_llm.graded.zero_shot.generate import generate_graded_zero_shot_suggestions +from module_programming_llm.guided.basic_by_file.generate import generate_guided_basic_by_file_suggestions +from module_programming_llm.guided.zero_shot.generate import generate_guided_zero_shot_suggestions @submissions_consumer @@ -36,19 +34,34 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm @feedback_consumer def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]): - logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id) + logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), + submission.id, exercise.id) @feedback_provider -async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]: +async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> \ +List[Feedback]: logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested", "Graded" if is_graded else "Non-graded", submission.id, exercise.id) if is_graded: - return await generate_graded_suggestions_by_file(exercise, submission, module_config.graded_approach, - module_config.debug) - return await generate_non_graded_suggestions_by_file(exercise, submission, module_config.non_graded_approach, - module_config.debug) + if module_config.graded_basic_by_file: + return await generate_graded_basic_by_file_suggestions(exercise, submission, + module_config.graded_basic_by_file, + module_config.debug) + elif module_config.graded_zero_shot: + return await generate_graded_zero_shot_suggestions(exercise, submission, + module_config.graded_zero_shot, + module_config.debug) + else: + if module_config.guided_basic_by_file: + return await generate_guided_basic_by_file_suggestions(exercise, submission, + module_config.guided_basic_by_file, + module_config.debug) + elif module_config.guided_zero_shot: + return await generate_guided_zero_shot_suggestions(exercise, submission, module_config.guided_zero_shot, + module_config.debug) + return [] if __name__ == "__main__": @@ -56,7 +69,7 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded tiktoken.get_encoding("cl100k_base") app.start() - enable_debug = os.getenv("ENABLE_DEBUGGING_INFO", False) + enable_debug = os.environ.get("ENABLE_DEBUGGING_INFO", False) if enable_debug: set_debug(True) - set_verbose(True) \ No newline at end of file + set_verbose(True) diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 5edd58003..606f834c0 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -1,138 +1,12 @@ -from abc import ABC - from pydantic import BaseModel, Field from athena import config_schema_provider -from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig -from module_programming_llm.prompts.generate_graded_suggestions_by_file import ( - system_message as generate_graded_suggestions_by_file_system_message, - human_message as generate_graded_suggestions_by_file_human_message, -) -from module_programming_llm.prompts.generate_non_graded_suggestions_by_file import ( - system_message as generate_non_graded_suggestions_by_file_system_message, - human_message as generate_non_graded_suggestions_by_file_human_message, -) -from module_programming_llm.prompts.split_grading_instructions_by_file import ( - system_message as split_grading_instructions_by_file_message, - human_message as split_grading_instructions_by_file_human_message, -) -from module_programming_llm.prompts.split_problem_non_grading_statement_by_file import ( - system_message as split_problem_statements_by_file_system_message_without_solution, - human_message as split_problem_statements_by_file_human_message_without_solution, -) -from module_programming_llm.prompts.split_problem_grading_statement_by_file import ( - system_message as split_problem_statements_by_file_system_message_with_solution, - human_message as split_problem_statements_by_file_human_message_with_solution, -) -from module_programming_llm.prompts.summarize_submission_by_file import ( - system_message as summarize_submission_by_file_system_message, - human_message as summarize_submission_by_file_human_message, -) - - -class SplitProblemStatementsBasePrompt(BaseModel): - """Base class for splitting problem statements into file-based ones, providing a structured approach for processing statements.""" - - system_message: str = Field(..., - description="Message for priming AI behavior and instructing it what to do.") - human_message: str = Field(..., - description="Message for priming AI behavior and instructing it what to do.") - tokens_before_split: int = Field(default=250, - description="Split the problem statement into file-based ones after this number of tokens.") - - -class SplitProblemStatementsWithSolutionByFilePrompt(SplitProblemStatementsBasePrompt): - """Specialized class for splitting problem statements with solutions, for cases where detailed solution information is available.""" - system_message: str = split_problem_statements_by_file_system_message_with_solution - human_message: str = split_problem_statements_by_file_human_message_with_solution - - -class SplitProblemStatementsWithoutSolutionByFilePrompt( - SplitProblemStatementsBasePrompt -): - """Specialized class for splitting problem statements without solutions, applicable when solution details are not provided.""" - system_message: str = split_problem_statements_by_file_system_message_without_solution - human_message: str = split_problem_statements_by_file_human_message_without_solution - - -class SplitGradingInstructionsByFilePrompt(BaseModel): - """\ -Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\ -""" - system_message: str = Field(default=split_grading_instructions_by_file_message, - description="Message for priming AI behavior and instructing it what to do.") - human_message: str = Field(default=split_grading_instructions_by_file_human_message, - description="Message from a human. The input on which the AI is supposed to act.") - tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") - - -class FeedbackGenerationBasePrompt(BaseModel): - """Base class for feedback generation prompts, contains common definitions.""" - - system_message: str = Field(..., - description="Message for priming AI behavior and instructing it what to do.",) - human_message: str = Field(..., - description="Message from a human. The input on which the AI is supposed to act.",) - - -class GradedFeedbackGenerationPrompt(FeedbackGenerationBasePrompt): - """Generates graded feedback based on file submissions, tailored to provide detailed, evaluative comments and scores.""" - - system_message: str = generate_graded_suggestions_by_file_system_message - human_message: str = generate_graded_suggestions_by_file_human_message +from module_programming_llm.graded.zero_shot.config import GradedZeroShotConfig +from module_programming_llm.graded.basic_by_file.config import GradedBasicByFileConfig -class NonGradedFeedbackGenerationPrompt(FeedbackGenerationBasePrompt): - """\ -Features available: **{problem_statement}**, **{submission_file}** - -*Note: Prompt will be applied per file independently. Also, you don't have to include all features, -e.g. template_to_submission_diff. - """ - - system_message: str = generate_non_graded_suggestions_by_file_system_message - human_message: str = generate_non_graded_suggestions_by_file_human_message - - -class FileSummaryPrompt(BaseModel): - """Generates concise summaries of submission files, facilitating a quicker review and understanding of the content for AI processing.""" - - system_message: str = Field(summarize_submission_by_file_system_message, - description="Message for priming AI behavior and instructing it what to do.") - human_message: str = Field(summarize_submission_by_file_human_message, - description="Message from a human. The input on which the AI is supposed to act.") - - -class BasicApproachConfig(BaseModel): - """Defines a basic configuration for processing submissions, incorporating problem statement splitting, feedback generation, and file summarization.""" - - max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") - model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore - max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") - split_problem_statement_by_file_prompt: SplitProblemStatementsBasePrompt = Field(description="To be defined in " "subclasses.") - generate_suggestions_by_file_prompt: SplitProblemStatementsBasePrompt = Field(description="To be defined in " "subclasses.") - generate_file_summary_prompt: FileSummaryPrompt = Field(default=FileSummaryPrompt(), description="Generates short summaries to be fed into the LLM with separate files.") - - -class GradedBasicApproachConfig(BasicApproachConfig, ABC): - """\ -This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \ -Then, it generates graded suggestions for each file independently.\ -""" - - split_problem_statement_by_file_prompt: SplitProblemStatementsWithSolutionByFilePrompt = Field(default=SplitProblemStatementsWithSolutionByFilePrompt()) - split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = (Field(default=SplitGradingInstructionsByFilePrompt())) - generate_suggestions_by_file_prompt: FeedbackGenerationBasePrompt = Field(default=GradedFeedbackGenerationPrompt()) - - -class NonGradedBasicApproachConfig(BasicApproachConfig, ABC): - """\ -This approach uses an LLM to split up the problem statement, if necessary. \ -Then, it generates non graded suggestions for each file independently.\ -""" - - split_problem_statement_by_file_prompt: SplitProblemStatementsWithoutSolutionByFilePrompt = Field(default=SplitProblemStatementsWithoutSolutionByFilePrompt()) - generate_suggestions_by_file_prompt: FeedbackGenerationBasePrompt = Field(default=NonGradedFeedbackGenerationPrompt()) +from module_programming_llm.guided.basic_by_file.config import GuidedBasicByFileConfig +from module_programming_llm.guided.zero_shot.config import GuidedZeroShotConfig @config_schema_provider @@ -140,5 +14,9 @@ class Configuration(BaseModel): """Configuration settings for the entire module, including debug mode and approach-specific configurations.""" debug: bool = Field(default=False, description="Enable debug mode.") - graded_approach: GradedBasicApproachConfig = Field(default=GradedBasicApproachConfig()) - non_graded_approach: NonGradedBasicApproachConfig = Field(default=NonGradedBasicApproachConfig()) \ No newline at end of file + + graded_zero_shot: GradedZeroShotConfig = Field(default=GradedZeroShotConfig()) + graded_basic_by_file: GradedBasicByFileConfig = Field(default=GradedBasicByFileConfig()) + + guided_zero_shot: GuidedZeroShotConfig = Field(default=GuidedZeroShotConfig()) + guided_basic_by_file: GuidedBasicByFileConfig = Field(default=GuidedBasicByFileConfig()) \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/graded/basic_by_file/config/__init__.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/__init__.py new file mode 100644 index 000000000..97e162531 --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/__init__.py @@ -0,0 +1,23 @@ +from abc import ABC +from pydantic import BaseModel, Field + +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig + +from .generate import GradedFeedbackGenerationPrompt +from .split_grading_instructions import SplitGradingInstructionsPrompt +from .split_problem_statement import SplitProblemStatementPrompt + + +class GradedBasicByFileConfig(BaseModel, ABC): + """\ +This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \ +Then, it generates graded suggestions for each file independently.\ +""" + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") + + generate_prompt: GradedFeedbackGenerationPrompt = Field(default=GradedFeedbackGenerationPrompt()) + split_grading_instructions_prompt: SplitGradingInstructionsPrompt = (Field(default=SplitGradingInstructionsPrompt())) + split_problem_statement_prompt: SplitProblemStatementPrompt = Field(default=SplitProblemStatementPrompt()) + diff --git a/module_programming_llm/module_programming_llm/graded/basic_by_file/config/generate.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/generate.py new file mode 100644 index 000000000..ba72202d4 --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/generate.py @@ -0,0 +1,47 @@ +from pydantic import BaseModel, Field + + +system_message = """\ +You are an AI tutor for programming assessment at a prestigious university. + +# Task +Create graded feedback suggestions for a student\'s programming submission that a human tutor would accept. \ +Meaning, the feedback you provide should be appliable to the submission with little to no modification. + +# Style +1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual + +# Problem statement +{problem_statement} + +# Grading instructions +{grading_instructions} +Max points: {max_points}, bonus points: {bonus_points} (whole assessment, not just this file) +""" + + +file_message = """\ +Student\'s submission file to grade (with line numbers : ): +\"\"\" +{submission_file} +\"\"\"\ + +# Diff between solution (deletions) and student\'s submission (additions): +{solution_to_submission_diff} + +# Diff between template (deletions) and student\'s submission (additions): +{template_to_submission_diff} +""" + + +class GradedFeedbackGenerationPrompt(BaseModel): + """\ + Generates graded feedback based on file submissions, tailored to provide detailed, evaluative comments and scores. + + Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{submission_file}**\ + """ + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + file_message: str = Field(default=file_message, + description="Message containing the context of a single file submission.") diff --git a/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_grading_instructions.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_grading_instructions.py new file mode 100644 index 000000000..79fbb1f61 --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_grading_instructions.py @@ -0,0 +1,35 @@ +from pydantic import BaseModel, Field + + +system_message = """\ +You are an AI tutor for programming assessment at a prestigious university. + +# Task +Restructure the grading instructions by student changed file to show relevant information for each file to the tutor. \ +Make it as easy as possible for the tutor to grade the assignment when looking at the changed file. \ +Some instructions may be relevant for multiple files. +""" + + +human_message = """\ +Grading instructions: +{grading_instructions} + +Changed files from template to sample solution: +{changed_files_from_template_to_solution} + +Changed files from template to student submission (Pick from this list, very important!): +{changed_files_from_template_to_submission} + +Grading instructions by file: +""" + + +class SplitGradingInstructionsPrompt(BaseModel): + """Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + grading_instructions_message: str = Field(default=human_message, + description="Message containing the context needed to split the grading instructions by file.") + tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_grading_statement_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_problem_statement.py similarity index 50% rename from module_programming_llm/module_programming_llm/prompts/split_problem_grading_statement_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_problem_statement.py index ce4620c7c..dbd95d880 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_grading_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_problem_statement.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are an AI tutor for programming assessment at a prestigious university. @@ -8,6 +11,7 @@ For the file keys, include the full path. """ + human_message = """\ Problem statement: {problem_statement} @@ -20,3 +24,14 @@ Problem statement by file: """ + + +class SplitProblemStatementPrompt(BaseModel): + """Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message for priming AI behavior and instructing it what to do.") + tokens_before_split: int = Field(default=250, + description="Split the problem statement into file-based ones after this number of tokens.") diff --git a/module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/generate.py similarity index 92% rename from module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/generate.py index c87737c36..90ea176ae 100644 --- a/module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/generate.py @@ -6,13 +6,10 @@ from athena import emit_meta from athena.programming import Exercise, Submission, Feedback -from module_programming_llm.config import GradedBasicApproachConfig -from module_programming_llm.split_grading_instructions_by_file import ( - split_grading_instructions_by_file, -) -from module_programming_llm.split_problem_statement_by_file import ( - split_problem_statement_by_file, -) +from .config import GradedBasicByFileConfig +from .split_grading_instructions import generate_split_grading_instructions_by_file +from .split_problem_statement import generate_split_problem_statement_by_file + from module_programming_llm.helpers.llm_utils import ( check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions, @@ -57,31 +54,31 @@ class Config: # pylint: disable=too-many-locals -async def generate_suggestions_by_file( +async def generate_graded_basic_by_file_suggestions( exercise: Exercise, submission: Submission, - config: GradedBasicApproachConfig, + config: GradedBasicByFileConfig, debug: bool, ) -> List[Feedback]: model = config.model.get_model() # type: ignore[attr-defined] chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.generate_suggestions_by_file_prompt.system_message, - human_message=config.generate_suggestions_by_file_prompt.human_message, + system_message=config.generate_prompt.system_message, + human_message=config.generate_prompt.file_message, pydantic_object=AssessmentModel, ) # Get split problem statement and grading instructions by file (if necessary) split_problem_statement, split_grading_instructions = await asyncio.gather( - split_problem_statement_by_file( + generate_split_problem_statement_by_file( exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug, ), - split_grading_instructions_by_file( + generate_split_grading_instructions_by_file( exercise=exercise, submission=submission, prompt=chat_prompt, @@ -93,7 +90,7 @@ async def generate_suggestions_by_file( problem_statement_tokens = num_tokens_from_string(exercise.problem_statement or "") is_short_problem_statement = ( problem_statement_tokens - <= config.split_problem_statement_by_file_prompt.tokens_before_split + <= config.split_problem_statement_prompt.tokens_before_split ) file_problem_statements = ( { @@ -106,7 +103,7 @@ async def generate_suggestions_by_file( is_short_grading_instructions = ( num_tokens_from_string(exercise.grading_instructions) - <= config.split_grading_instructions_by_file_prompt.tokens_before_split + <= config.split_grading_instructions_prompt.tokens_before_split if exercise.grading_instructions is not None else True ) @@ -203,7 +200,6 @@ async def generate_suggestions_by_file( "bonus_points": exercise.bonus_points, "solution_to_submission_diff": solution_to_submission_diff, "template_to_submission_diff": template_to_submission_diff, - "template_to_solution_diff": template_to_solution_diff, "grading_instructions": grading_instructions, "problem_statement": problem_statement, } @@ -271,7 +267,7 @@ async def generate_suggestions_by_file( f"exercise-{exercise.id}", f"submission-{submission.id}", f"file-{prompt_input['file_path']}", - "generate-suggestions-by-file", + "generate-graded-suggestions-by-file", ], ) for prompt_input in prompt_inputs @@ -280,7 +276,7 @@ async def generate_suggestions_by_file( if debug: emit_meta( - "generate_suggestions", + "generate-graded-suggestions-by-file", [ { "file_path": prompt_input["file_path"], @@ -310,6 +306,7 @@ async def generate_suggestions_by_file( ) feedbacks.append( Feedback( + id=None, exercise_id=exercise.id, submission_id=submission.id, title=feedback.title, diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_grading_instructions.py similarity index 92% rename from module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/split_grading_instructions.py index 3ca91c901..d4fb85415 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_grading_instructions.py @@ -7,7 +7,7 @@ from athena import emit_meta from athena.programming import Exercise, Submission -from module_programming_llm.config import GradedBasicApproachConfig +from .config import GradedBasicByFileConfig from module_programming_llm.helpers.llm_utils import ( get_chat_prompt_with_formatting_instructions, num_tokens_from_string, @@ -28,11 +28,11 @@ class SplitGradingInstructions(BaseModel): # pylint: disable=too-many-locals -async def split_grading_instructions_by_file( +async def generate_split_grading_instructions_by_file( exercise: Exercise, submission: Submission, prompt: ChatPromptTemplate, - config: GradedBasicApproachConfig, + config: GradedBasicByFileConfig, debug: bool ) -> Optional[SplitGradingInstructions]: """Split the general grading instructions by file @@ -52,7 +52,7 @@ async def split_grading_instructions_by_file( # Return None if the grading instructions are too short if (grading_instructions is None or num_tokens_from_string( - grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split): + grading_instructions) <= config.split_grading_instructions_prompt.tokens_before_split): return None # Return None if the grading instructions are not in the prompt @@ -75,8 +75,8 @@ async def split_grading_instructions_by_file( chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.split_grading_instructions_by_file_prompt.system_message, - human_message=config.split_grading_instructions_by_file_prompt.human_message, + system_message=config.split_grading_instructions_prompt.system_message, + human_message=config.split_grading_instructions_prompt.grading_instructions_message, pydantic_object=SplitGradingInstructions, ) diff --git a/module_programming_llm/module_programming_llm/graded/basic_by_file/split_problem_statement.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_problem_statement.py new file mode 100644 index 000000000..823765579 --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_problem_statement.py @@ -0,0 +1,136 @@ +from typing import Optional, Sequence +from collections import defaultdict + +from pydantic import BaseModel, Field +from langchain.prompts import ChatPromptTemplate + +from athena import emit_meta +from athena.programming import Exercise, Submission + +from module_programming_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + num_tokens_from_prompt, + predict_and_parse +) +from module_programming_llm.helpers.utils import get_diff + +from .config import GradedBasicByFileConfig + +class FileProblemStatement(BaseModel): + file_name: str = Field(description="File name") + problem_statement: str = Field(description="Problem statement relevant for this file") + + +class SplitProblemStatement(BaseModel): + """Collection of problem statements split by file""" + + items: Sequence[FileProblemStatement] = Field(description="File problem statements") + + +# pylint: disable=too-many-locals +async def generate_split_problem_statement_by_file( + exercise: Exercise, + submission: Submission, + prompt: ChatPromptTemplate, + config: GradedBasicByFileConfig, + debug: bool + ) -> Optional[SplitProblemStatement]: + """Split the general problem statement by file + + Args: + exercise (Exercise): Exercise to split the problem statement for (respecting the changed files) + submission (Submission): Submission to split the problem statement for (respecting the changed files) + prompt (ChatPromptTemplate): Prompt template to check for problem_statement + config (GradedBasicApproachConfig): Configuration + + Returns: + Optional[SplitProblemStatement]: Split problem statement, None if it is too short or too long + """ + + # Return None if the problem statement is too short + if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_prompt.tokens_before_split: + return None + + # Return None if the problem statement not in the prompt + if "problem_statement" not in prompt.input_variables: + return None + + model = config.model.get_model() # type: ignore[attr-defined] + + template_repo = exercise.get_template_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") + + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.split_problem_statement_prompt.system_message, + human_message=config.split_problem_statement_prompt.human_message, + pydantic_object=SplitProblemStatement + ) + + prompt_input = { + "problem_statement": exercise.problem_statement or "No problem statement.", + "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) + } + + if "changed_files_from_template_to_solution" in chat_prompt.input_variables: + solution_repo = exercise.get_solution_repository() + changed_files_from_template_to_solution = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + file_path=None, + name_only=True, + ).split("\n") + prompt_input["changed_files_from_template_to_solution"] = ", ".join( + changed_files_from_template_to_solution + ) + + # Return None if the prompt is too long + if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: + return None + + split_problem_statement = await predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=SplitProblemStatement, + tags=[ + f"exercise-{exercise.id}", + f"submission-{submission.id}", + "split-problem-statement-by-file" + ] + ) + + if debug: + emit_meta("file_problem_statements", { + "prompt": chat_prompt.format(**prompt_input), + "result": split_problem_statement.dict() if split_problem_statement is not None else None + }) + + if split_problem_statement is None or not split_problem_statement.items: + return None + + # Join duplicate file names (some responses contain multiple problem statements for the same file) + file_problem_statements_by_file_name = defaultdict(list) + for file_problem_statement in split_problem_statement.items: + file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement) + + split_problem_statement.items = [ + FileProblemStatement( + file_name=file_name, + problem_statement="\n".join( + file_problem_statement.problem_statement + for file_problem_statement in file_problem_statements + ) + ) + for file_name, file_problem_statements in file_problem_statements_by_file_name.items() + ] + + return split_problem_statement diff --git a/module_programming_llm/module_programming_llm/graded/zero_shot/config/__init__.py b/module_programming_llm/module_programming_llm/graded/zero_shot/config/__init__.py new file mode 100644 index 000000000..04f4eeff4 --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/zero_shot/config/__init__.py @@ -0,0 +1,15 @@ +from abc import ABC +from pydantic import BaseModel, Field + +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig + +from .generate import GradedZeroShotPrompt + + +class GradedZeroShotConfig(BaseModel, ABC): + """This approach uses an LLM to just generate graded suggestions for all changed files at once.""" + + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + + prompt: GradedZeroShotPrompt = Field(default=GradedZeroShotPrompt()) diff --git a/module_programming_llm/module_programming_llm/prompts/generate_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/graded/zero_shot/config/generate.py similarity index 57% rename from module_programming_llm/module_programming_llm/prompts/generate_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/graded/zero_shot/config/generate.py index 8b1f00832..efc85d9f9 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/zero_shot/config/generate.py @@ -1,9 +1,12 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are an AI tutor for programming assessment at a prestigious university. # Task Create graded feedback suggestions for a student\'s programming submission that a human tutor would accept. \ -Meaning, the feedback you provide should be appliable to the submission with little to no modification. +Meaning, the feedback you provide should be appliable to the submission with no to little modifications. # Style 1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual @@ -13,7 +16,15 @@ # Grading instructions {grading_instructions} -Max points: {max_points}, bonus points: {bonus_points} (whole assessment, not just this file) +Max points: {max_points}, bonus points: {bonus_points} (whole assessment, not just one file) +""" + + +file_message = """\ +Student\'s submission file to grade (with line numbers : ): +\"\"\" +{submission_file} +\"\"\"\ # Diff between solution (deletions) and student\'s submission (additions): {solution_to_submission_diff} @@ -22,9 +33,11 @@ {template_to_submission_diff} """ -human_message = """\ -Student\'s submission file to grade (with line numbers : ): -\"\"\" -{submission_file} -\"\"\"\ -""" + +class GradedZeroShotPrompt(BaseModel): + """Prompt for the one-shot guided feedback generation approach.""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.",) + file_message: str = Field(default=file_message, + description="Message for one file which contains **{submission_file}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**",) diff --git a/module_programming_llm/module_programming_llm/graded/zero_shot/generate.py b/module_programming_llm/module_programming_llm/graded/zero_shot/generate.py new file mode 100644 index 000000000..d46ce0f6a --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/zero_shot/generate.py @@ -0,0 +1,215 @@ +from typing import List, Optional, Sequence +import os +import asyncio + +from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate +from langchain_core.pydantic_v1 import BaseModel, Field + +from athena import emit_meta +from athena.programming import Exercise, Submission, Feedback +from module_programming_llm.graded.zero_shot.config import GradedZeroShotConfig + +from module_programming_llm.helpers.llm_utils import ( + check_prompt_length_and_omit_features_if_necessary, + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + predict_and_parse, +) +from module_programming_llm.helpers.utils import ( + get_diff, + load_files_from_repo, + add_line_numbers, + get_programming_language_file_extension, format_grading_instructions, +) + + +class FeedbackModel(BaseModel): + title: str = Field( + description="Very short title, i.e. feedback category", example="Logic Error" + ) + description: str = Field(description="Feedback description") + line_start: Optional[int] = Field( + description="Referenced line number start, or empty if unreferenced" + ) + line_end: Optional[int] = Field( + description="Referenced line number end, or empty if unreferenced" + ) + credits: float = Field(0.0, description="Number of points received/deducted") + grading_instruction_id: Optional[int] = Field( + description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used" + ) + + class Config: + title = "Feedback" + + +class AssessmentModel(BaseModel): + """Collection of feedbacks making up an assessment""" + + feedbacks: Sequence[FeedbackModel] = Field(description="Assessment feedbacks") + + class Config: + title = "Assessment" + + +# pylint: disable=too-many-locals +async def generate_graded_zero_shot_suggestions( + exercise: Exercise, + submission: Submission, + config: GradedZeroShotConfig, + debug: bool, +) -> List[Feedback]: + model = config.model.get_model() # type: ignore[attr-defined] + + system_message_prompt = SystemMessagePromptTemplate.from_template(config.prompt.system_message) + file_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.file_message) + + prompt_inputs: List[dict] = [] + + # Feature extraction + template_repo = exercise.get_template_repository() + solution_repo = exercise.get_solution_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, dst_repo=submission_repo, file_path=None, name_only=True + ).split("\n") + changed_files_from_template_to_submission = [ + os.path.join(str(submission_repo.working_tree_dir or ""), file_path) + for file_path in changed_files_from_template_to_submission + ] + + # Changed text files + loaded_changed_files = load_files_from_repo( + submission_repo, + file_filter=lambda file_path: file_path + in changed_files_from_template_to_submission, + ) + + problem_statement = exercise.problem_statement or "" + problem_statement = ( + problem_statement + if problem_statement.strip() + else "No problem statement found." + ) + + programming_language_extension = get_programming_language_file_extension( + programming_language=exercise.programming_language + ) + + # Gather prompt inputs for each changed file (independently) + for file_path, file_content in loaded_changed_files.items(): + if programming_language_extension and not file_path.endswith(programming_language_extension): + continue + + file_content = add_line_numbers(file_content) + solution_to_submission_diff = get_diff( + src_repo=solution_repo, + dst_repo=submission_repo, + src_prefix="solution", + dst_prefix="submission", + file_path=file_path, + ) + template_to_submission_diff = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + src_prefix="template", + dst_prefix="submission", + file_path=file_path, + ) + template_to_solution_diff = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + src_prefix="template", + dst_prefix="solution", + file_path=file_path, + ) + + grading_instructions = format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria) + + prompt_inputs.append( + { + "file_path": file_path, # Not really relevant for the prompt, but necessary for e.g. logging purposes + "submission_file": file_content, + "solution_to_submission_diff": solution_to_submission_diff, + "template_to_submission_diff": template_to_submission_diff, + "priority": len( + template_to_solution_diff + ), # Not really relevant for the prompt, necessary for filtering + } + ) + + prompt_input = { + "max_points": exercise.max_points, + "bonus_points": exercise.bonus_points, + "grading_instructions": grading_instructions, + "problem_statement": problem_statement, + "prompt_inputs": prompt_inputs, + } + + chat_prompt = ChatPromptTemplate.from_messages( + [system_message_prompt] + + [file_message_prompt.format(**prompt_input) for prompt_input in prompt_inputs] + ) + + # noinspection PyTypeChecker + results: List[Optional[AssessmentModel]] = await predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=AssessmentModel, + tags=[ + f"exercise-{exercise.id}", + f"submission-{submission.id}", + "one-shot-graded-suggestions", + ], + ) + + if debug: + emit_meta( + "one-shot-graded-suggestions", + [ + { + "file_path": prompt_input["file_path"], + "prompt": chat_prompt.format(**prompt_input), + "result": result.dict() if result is not None else None, + } + for prompt_input, result in zip(prompt_inputs, results) + ], + ) + + grading_instruction_ids = set( + grading_instruction.id + for criterion in exercise.grading_criteria or [] + for grading_instruction in criterion.structured_grading_instructions + ) + + feedbacks: List[Feedback] = [] + for prompt_input, result in zip(prompt_inputs, results): + file_path = prompt_input["file_path"] + if result is None: + continue + for feedback in result.feedbacks: + grading_instruction_id = ( + feedback.grading_instruction_id + if feedback.grading_instruction_id in grading_instruction_ids + else None + ) + feedbacks.append( + Feedback( + id=None, + exercise_id=exercise.id, + submission_id=submission.id, + title=feedback.title, + description=feedback.description, + file_path=file_path, + line_start=feedback.line_start, + line_end=feedback.line_end, + credits=feedback.credits, + structured_grading_instruction_id=grading_instruction_id, + is_graded=True, + meta={}, + ) + ) + + return feedbacks \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/guided/basic_by_file/config/__init__.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/__init__.py new file mode 100644 index 000000000..89089b955 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/__init__.py @@ -0,0 +1,23 @@ +from abc import ABC +from pydantic import BaseModel, Field + +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig + +from .generate import GuidedFeedbackGenerationPrompt +from .split_problem_statement import SplitProblemStatementPrompt +from .summarize_submission import FileSummaryPrompt + + +class GuidedBasicByFileConfig(BaseModel, ABC): + """\ + This approach uses an LLM to split up the problem statement, if necessary. + Then, it generates non graded suggestions for each file independently.\ + """ + + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") + + generate_prompt: GuidedFeedbackGenerationPrompt = Field(default=GuidedFeedbackGenerationPrompt()) + split_problem_statement_prompt: SplitProblemStatementPrompt = (Field(default=SplitProblemStatementPrompt())) + summarize_submission_prompt: FileSummaryPrompt = Field(default=FileSummaryPrompt()) diff --git a/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/generate.py similarity index 61% rename from module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/config/generate.py index 2fa45a31f..1ad15a6eb 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/generate.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are an AI tutor for programming assessment at a prestigious university. @@ -5,7 +8,7 @@ {problem_statement} # Task -Create non graded improvement suggestions for a student\'s programming submission that a human tutor would recommend. \ +Create non graded improvement suggestions for a student\'s programming submission that a human tutor would recommend. Assume the tutor is not familiar with the solution. The feedback must contain only the feedback the student can learn from. Important: the answer you generate must not contain any solution suggestions or contain corrected errors. @@ -22,24 +25,38 @@ In git diff, lines marked with '-' were removed and with '+' were added by the student. -# Address your feedback to student +The student will be reading your response, use you instead of them\ """ -human_message = """\ + +human_message = '''\ Path: {file_path} File(with line numbers : ): -\"\"\" +""" {submission_file} -\"\"\"\ +""" Summary of other files in the solution: -\"\"\" +""" {summary} -\"\"\" +""" The template->submission diff(only as reference): -\"\"\" +""" {template_to_submission_diff} -\"\"\" """ +''' + + +class GuidedFeedbackGenerationPrompt(BaseModel): + """\ + Generates guided feedback based on file submissions, tailored to provide detailed, evaluative comments and scores. + + Features available: **{problem_statement}**, **{submission_file}**, **{template_to_submission_diff}**, **{file_path}**, **{submission_file}**\ + """ + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message containing the context of a single file submission.") diff --git a/module_programming_llm/module_programming_llm/guided/basic_by_file/config/split_problem_statement.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/split_problem_statement.py new file mode 100644 index 000000000..695ba8739 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/split_problem_statement.py @@ -0,0 +1,35 @@ +from pydantic import BaseModel, Field + + +system_message = """\ +You are an AI tutor for programming assessment at a prestigious university. + +# Task +Restructure the problem statement by student changed files to gather work items for each file. \ +Some parts of the problem statement may be relevant for multiple files. +Comments in the template solution can be relevant for some files, some might be not. +Include only those work items based on comments that make sense. +For the file keys, include the full path. +""" + + +human_message = """\ +Problem statement: +{problem_statement} + +Changed files from template to student submission (Pick from this list, very important!): +{changed_files_from_template_to_submission} + +Problem statement by file: +""" + + +class SplitProblemStatementPrompt(BaseModel): + """Features available: **{problem_statement}**, **{changed_files_from_template_to_submission}**""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message for priming AI behavior and instructing it what to do.") + tokens_before_split: int = Field(default=250, + description="Split the problem statement into file-based ones after this number of tokens.") diff --git a/module_programming_llm/module_programming_llm/prompts/summarize_submission_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/summarize_submission.py similarity index 51% rename from module_programming_llm/module_programming_llm/prompts/summarize_submission_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/config/summarize_submission.py index 398078e60..ec3f98ad6 100644 --- a/module_programming_llm/module_programming_llm/prompts/summarize_submission_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/summarize_submission.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are a very experienced software engineer. @@ -14,10 +17,21 @@ Include full path for files where necessary. """ + human_message = """\ -Path: {file_path} +File path: {file_path} File: \"\"\" {submission_file} \"\"\" """ + + +class FileSummaryPrompt(BaseModel): + """Generates concise summaries of submission files, facilitating a quicker review and understanding of the content for AI processing.""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message from a human. The input on which the AI is supposed to act.") + diff --git a/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/generate.py similarity index 89% rename from module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/generate.py index 569fb0d0c..b3a8f7eac 100644 --- a/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/generate.py @@ -5,11 +5,10 @@ from athena import emit_meta from athena.programming import Exercise, Submission, Feedback -from module_programming_llm.config import NonGradedBasicApproachConfig -from module_programming_llm.generate_summary_by_file import generate_summary_by_file -from module_programming_llm.split_problem_statement_by_file import ( - split_problem_statement_by_file, -) +from .config import GuidedBasicByFileConfig +from .summarize_submission import generate_summarize_submission +from .split_problem_statement import generate_split_problem_statement_by_file + from module_programming_llm.helpers.llm_utils import ( check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions, @@ -60,18 +59,18 @@ def generate_feedback_text(model: FeedbackModel, file: str): # pylint: disable=too-many-locals -async def generate_suggestions_by_file( - exercise: Exercise, - submission: Submission, - config: NonGradedBasicApproachConfig, - debug: bool, +async def generate_guided_basic_by_file_suggestions( + exercise: Exercise, + submission: Submission, + config: GuidedBasicByFileConfig, + debug: bool, ) -> List[Feedback]: model = config.model.get_model() # type: ignore[attr-defined] chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.generate_suggestions_by_file_prompt.system_message, - human_message=config.generate_suggestions_by_file_prompt.human_message, + system_message=config.generate_prompt.system_message, + human_message=config.generate_prompt.human_message, pydantic_object=ImprovementModel, ) @@ -97,7 +96,7 @@ async def generate_suggestions_by_file( ) # Get solution summary by file (if necessary) - solution_summary = await generate_summary_by_file( + solution_summary = await generate_summarize_submission( exercise=exercise, submission=submission, config=config, @@ -106,7 +105,7 @@ async def generate_suggestions_by_file( summary_string = solution_summary.describe_solution_summary() if solution_summary is not None else "" # Get split problem statement by file (if necessary) - split_problem_statement = await split_problem_statement_by_file( + split_problem_statement = await generate_split_problem_statement_by_file( exercise=exercise, submission=submission, config=config, @@ -115,8 +114,8 @@ async def generate_suggestions_by_file( problem_statement_tokens = num_tokens_from_string(exercise.problem_statement or "") is_short_problem_statement = ( - problem_statement_tokens - <= config.split_problem_statement_by_file_prompt.tokens_before_split + problem_statement_tokens + <= config.split_problem_statement_prompt.tokens_before_split ) file_problem_statements = ( { @@ -228,7 +227,7 @@ async def generate_suggestions_by_file( f"exercise-{exercise.id}", f"submission-{submission.id}", f"file-{prompt_input['file_path']}", - "generate-suggestions-by-file", + "generate-guided-suggestions-by-file", ], ) for prompt_input in prompt_inputs @@ -237,7 +236,7 @@ async def generate_suggestions_by_file( if debug: emit_meta( - "generate_suggestions", + "generate-guided-suggestions-by-file", [ { "file_path": prompt_input["file_path"], @@ -256,6 +255,7 @@ async def generate_suggestions_by_file( for feedback in result.feedbacks: feedbacks.append( Feedback( + id=None, exercise_id=exercise.id, submission_id=submission.id, title=generate_feedback_text(feedback, file_path), @@ -264,6 +264,8 @@ async def generate_suggestions_by_file( line_start=feedback.line_start, line_end=feedback.line_end, is_graded=False, + credits=0, + structured_grading_instruction_id=None, meta={}, ) ) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/split_problem_statement.py similarity index 86% rename from module_programming_llm/module_programming_llm/split_problem_statement_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/split_problem_statement.py index 0aa5e6b1c..5a86378d3 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/split_problem_statement.py @@ -6,7 +6,6 @@ from athena import emit_meta from athena.programming import Exercise, Submission -from module_programming_llm.config import GradedBasicApproachConfig, BasicApproachConfig from module_programming_llm.helpers.llm_utils import ( get_chat_prompt_with_formatting_instructions, num_tokens_from_string, @@ -15,6 +14,7 @@ ) from module_programming_llm.helpers.utils import get_diff +from .config import GuidedBasicByFileConfig class FileProblemStatement(BaseModel): file_name: str = Field(description="File name") @@ -28,10 +28,10 @@ class SplitProblemStatement(BaseModel): # pylint: disable=too-many-locals -async def split_problem_statement_by_file( +async def generate_split_problem_statement_by_file( exercise: Exercise, submission: Submission, - config: BasicApproachConfig, + config: GuidedBasicByFileConfig, debug: bool ) -> Optional[SplitProblemStatement]: """Split the general problem statement by file @@ -49,13 +49,13 @@ async def split_problem_statement_by_file( prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.split_problem_statement_by_file_prompt.system_message, - human_message=config.split_problem_statement_by_file_prompt.human_message, + system_message=config.split_problem_statement_prompt.system_message, + human_message=config.split_problem_statement_prompt.human_message, pydantic_object=SplitProblemStatement ) # Return None if the problem statement is too short - if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_by_file_prompt.tokens_before_split: + if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_prompt.tokens_before_split: return None # Return None if the problem statement not in the prompt @@ -72,12 +72,19 @@ async def split_problem_statement_by_file( name_only=True ).split("\n") + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.split_problem_statement_prompt.system_message, + human_message=config.split_problem_statement_prompt.human_message, + pydantic_object=SplitProblemStatement + ) + prompt_input = { "problem_statement": exercise.problem_statement or "No problem statement.", "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) } - if "changed_files_from_template_to_solution" in chat_prompt.input_variables: + if "changed_files_from_template_to_solution" in prompt.input_variables: solution_repo = exercise.get_solution_repository() changed_files_from_template_to_solution = get_diff( src_repo=template_repo, diff --git a/module_programming_llm/module_programming_llm/generate_summary_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/summarize_submission.py similarity index 88% rename from module_programming_llm/module_programming_llm/generate_summary_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/summarize_submission.py index 30381cd2f..02d07a21a 100644 --- a/module_programming_llm/module_programming_llm/generate_summary_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/summarize_submission.py @@ -7,7 +7,7 @@ from athena import emit_meta from athena.programming import Exercise, Submission -from module_programming_llm.config import GradedBasicApproachConfig, BasicApproachConfig +from .config import GuidedBasicByFileConfig from module_programming_llm.helpers.llm_utils import ( get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, @@ -45,10 +45,10 @@ def describe_solution_summary(self) -> str: # pylint: disable=too-many-locals -async def generate_summary_by_file( +async def generate_summarize_submission( exercise: Exercise, submission: Submission, - config: BasicApproachConfig, + config: GuidedBasicByFileConfig, debug: bool, ) -> Optional[SolutionSummary]: """Generate summary for the submission file by file @@ -66,8 +66,8 @@ async def generate_summary_by_file( prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.generate_file_summary_prompt.system_message, - human_message=config.generate_file_summary_prompt.human_message, + system_message=config.summarize_submission_prompt.system_message, + human_message=config.summarize_submission_prompt.human_message, pydantic_object=FileDescription, ) @@ -87,12 +87,6 @@ async def generate_summary_by_file( submission_repo, file_filter=lambda file_path: file_path in changed_files_from_template_to_submission, ) - chat_prompt = get_chat_prompt_with_formatting_instructions( - model=model, - system_message=config.generate_file_summary_prompt.system_message, - human_message=config.generate_file_summary_prompt.human_message, - pydantic_object=SolutionSummary, - ) prompt_inputs = [] diff --git a/module_programming_llm/module_programming_llm/guided/zero_shot/config/__init__.py b/module_programming_llm/module_programming_llm/guided/zero_shot/config/__init__.py new file mode 100644 index 000000000..d2b664a29 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/zero_shot/config/__init__.py @@ -0,0 +1,15 @@ +from abc import ABC +from pydantic import BaseModel, Field + +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig + +from .generate import GuidedZeroShotPrompt + + +class GuidedZeroShotConfig(BaseModel, ABC): + """This approach uses an LLM to just generates non graded suggestions for all changed files at once.""" + + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + + prompt: GuidedZeroShotPrompt = Field(default=GuidedZeroShotPrompt()) diff --git a/module_programming_llm/module_programming_llm/guided/zero_shot/config/generate.py b/module_programming_llm/module_programming_llm/guided/zero_shot/config/generate.py new file mode 100644 index 000000000..afbf4b6f6 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/zero_shot/config/generate.py @@ -0,0 +1,57 @@ +from pydantic import BaseModel, Field + + +system_message = """\ +You are an expert AI tutor for programming education at a prestigious university. + +## Task +Create minimal guided feedback to nudge a student towards improving their programming skills with didactically valuable feedback. +Act like a teacher who is encouraging and guiding a student to learn and improve without spoiling the solution. + +## Style +1. Constructive +2. Specific +3. Balanced +4. Clear and Concise +5. Actionable +6. Educational +7. Contextual + +Directly address the student, use "you" instead of "the student".\ +""" + + +problem_message = '''\ +Problem statement: +{problem_statement}\ +''' + + +file_message = '''\ +File Path: {file_path} +File with line numbers (: ): +""" +{submission_file} +""" + +Here is what the student changed (- removed, + added by the student): +""" +{template_to_submission_diff} +""" + +Here is the difference between the potential solution by the instructor and the student's submission (don't spoil the solution): +""" +{solution_to_submission_diff} +"""\ +''' + + +class GuidedZeroShotPrompt(BaseModel): + """Prompt for the one-shot guided feedback generation approach.""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.",) + problem_message: str = Field(default=problem_message, + description="Message which contains **{problem_statement}**",) + file_message: str = Field(default=file_message, + description="Message for one file which contains **{file_path}**, **{submission_file}** and potentially **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{template_to_solution_diff}**",) diff --git a/module_programming_llm/module_programming_llm/guided/zero_shot/generate.py b/module_programming_llm/module_programming_llm/guided/zero_shot/generate.py new file mode 100644 index 000000000..bc6dd3512 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/zero_shot/generate.py @@ -0,0 +1,190 @@ +import os +from typing import List, Optional, Sequence +from pydantic import BaseModel, Field + +from langchain.prompts import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) + +from athena import emit_meta +from athena.programming import Exercise, Submission, Feedback + +from .config import GuidedZeroShotConfig + +from module_programming_llm.helpers.llm_utils import ( + num_tokens_from_string, + predict_and_parse, +) +from module_programming_llm.helpers.utils import ( + get_diff, + load_files_from_repo, + add_line_numbers, + get_programming_language_file_extension, +) + + +class FeedbackModel(BaseModel): + file_path: Optional[str] = Field(description="File path of the feedback, or empty if unreferenced") + line_start: Optional[int] = Field( + description="Referenced line number start, or empty if unreferenced" + ) + line_end: Optional[int] = Field( + description="Referenced line number end, or empty if unreferenced" + ) + description: str = Field(description="Guided feedback description") + + class Config: + title = "GuidedFeedback" + + +class GuidedFeedbackCompendiumModel(BaseModel): + """Compendium of guided feedbacks for a submission.""" + + guided_feedbacks: Sequence[FeedbackModel] = Field(description="Guided feedbacks") + + class Config: + title = "GuidedFeedbackCompendium" + + +# pylint: disable=too-many-locals +async def generate_guided_zero_shot_suggestions( + exercise: Exercise, + submission: Submission, + config: GuidedZeroShotConfig, + debug: bool, +) -> List[Feedback]: + model = config.model.get_model() # type: ignore[attr-defined] + + system_message_prompt = SystemMessagePromptTemplate.from_template(config.prompt.system_message) + problem_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.problem_message) + file_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.file_message) + + prompt_inputs: List[dict] = [] + + # Feature extraction + template_repo = exercise.get_template_repository() + solution_repo = exercise.get_solution_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, dst_repo=submission_repo, file_path=None, name_only=True + ).split("\n") + changed_files_from_template_to_submission = [ + os.path.join(str(submission_repo.working_tree_dir or ""), file_path) + for file_path in changed_files_from_template_to_submission + ] + + # Changed text files + changed_files = load_files_from_repo( + submission_repo, + file_filter=lambda file_path: file_path + in changed_files_from_template_to_submission, + ) + + problem_statement = exercise.problem_statement or "" + problem_statement = ( + problem_statement + if problem_statement.strip() + else "No problem statement found." + ) + + programming_language_extension = get_programming_language_file_extension( + programming_language=exercise.programming_language + ) + + # Gather prompt inputs for each changed file (independently) + for file_path, file_content in changed_files.items(): + if programming_language_extension and not file_path.endswith(programming_language_extension): + continue + + file_content = add_line_numbers(file_content) + solution_to_submission_diff = get_diff( + src_repo=solution_repo, + dst_repo=submission_repo, + src_prefix="solution", + dst_prefix="submission", + file_path=file_path, + ) + template_to_submission_diff = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + src_prefix="template", + dst_prefix="submission", + file_path=file_path, + ) + template_to_solution_diff = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + src_prefix="template", + dst_prefix="solution", + file_path=file_path, + ) + + prompt_inputs.append( + { + "file_path": file_path, + "submission_file": file_content, + "solution_to_submission_diff": solution_to_submission_diff, + "template_to_submission_diff": template_to_submission_diff, + "template_to_solution_diff": template_to_solution_diff, + } + ) + + prompt_input = { + "problem_statement": problem_statement, + "prompt_inputs": prompt_inputs, + } + + chat_prompt = ChatPromptTemplate.from_messages( + [system_message_prompt, problem_message_prompt] + + [file_message_prompt.format(**prompt_input) for prompt_input in prompt_inputs] + ) + + results: Optional[GuidedFeedbackCompendiumModel] = await predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=GuidedFeedbackCompendiumModel, + tags=[ + f"exercise-{exercise.id}", + f"submission-{submission.id}", + "one-shot-non-graded-suggestions", + ], + ) + + if debug: + emit_meta( + "one-shot-non-graded-suggestions", + [ + { + "file_path": prompt_input["file_path"], + "prompt": chat_prompt.format(**prompt_input), + "result": result.dict() if result is not None else None, + } + for prompt_input, result in zip(prompt_inputs, results) + ], + ) + + feedbacks: List[Feedback] = [] + if results is not None: + for feedback in results.guided_feedbacks: + feedbacks.append( + Feedback( + id=None, + exercise_id=exercise.id, + submission_id=submission.id, + title="Guided Feedback", + description=feedback.description, + file_path=feedback.file_path, + line_start=feedback.line_start, + line_end=feedback.line_end, + is_graded=False, + credits=0, + structured_grading_instruction_id=None, + meta={}, + ) + ) + + return feedbacks diff --git a/module_programming_llm/module_programming_llm/helpers/models/openai.py b/module_programming_llm/module_programming_llm/helpers/models/openai.py index ce4726a8f..819217a25 100644 --- a/module_programming_llm/module_programming_llm/helpers/models/openai.py +++ b/module_programming_llm/module_programming_llm/helpers/models/openai.py @@ -142,7 +142,7 @@ def _openai_client(use_azure_api: bool, is_preference: bool): yield -def _get_available_deployments(): +def _get_available_deployments() -> Dict[str, Dict[str, Any]]: available_deployments: Dict[str, Dict[str, Any]] = { "chat_completion": {}, "completion": {}, diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py deleted file mode 100644 index f8c971bce..000000000 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ /dev/null @@ -1,21 +0,0 @@ -system_message = """\ -You are an AI tutor for programming assessment at a prestigious university. - -# Task -Restructure the grading instructions by student changed file to show relevant information for each file to the tutor. \ -Make it as easy as possible for the tutor to grade the assignment when looking at the changed file. \ -Some instructions may be relevant for multiple files. -""" - -human_message = """\ -Grading instructions: -{grading_instructions} - -Changed files from template to sample solution: -{changed_files_from_template_to_solution} - -Changed files from template to student submission (Pick from this list, very important!): -{changed_files_from_template_to_submission} - -Grading instructions by file: -""" diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_non_grading_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_non_grading_statement_by_file.py deleted file mode 100644 index ff4f0a881..000000000 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_non_grading_statement_by_file.py +++ /dev/null @@ -1,19 +0,0 @@ -system_message = """\ -You are an AI tutor for programming assessment at a prestigious university. - -# Task -Restructure the problem statement by student changed files to gather work items for each file. \ -Some parts of the problem statement may be relevant for multiple files. -Include only those work items based on comments that make sense. -For the file keys, include the full path. -""" - -human_message = """\ -Problem statement: -{problem_statement} - -Changed files from template to student submission (Pick from this list, very important!): -{changed_files_from_template_to_submission} - -Problem statement by file: -""" diff --git a/playground/src/helpers/get_data.ts b/playground/src/helpers/get_data.ts index 58211c149..1fa327ab2 100644 --- a/playground/src/helpers/get_data.ts +++ b/playground/src/helpers/get_data.ts @@ -19,7 +19,7 @@ function replaceJsonPlaceholders( // 2. Replace a few placeholders. // Placeholders look like this: `{{placeholder}}` const jsonPlaceholders: { [key: string]: string } = { - exerciseDataUrl: `${athenaOrigin}${baseUrl}/api/data/${dataMode}/exercise/${exerciseId}/data`, + exerciseDataUri: `${athenaOrigin}${baseUrl}/api/data/${dataMode}/exercise/${exerciseId}/data`, }; const result: any = {}; for (const key in json) {