From 9478a930250e745172f8129310db410e5d1bdf46 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 9 Jun 2024 14:07:17 +0200 Subject: [PATCH 1/3] refactor and add one shot --- .vscode/settings.json | 1 + .../.vscode/settings.json | 4 - athena-workspace.code-workspace | 11 +- .../module_programming_llm/__main__.py | 22 +- .../module_programming_llm/config.py | 140 +- .../graded/basic_by_file/config/__init__.py | 23 + .../basic_by_file/config/generate.py} | 29 +- .../config/split_grading_instructions.py | 35 + .../config/split_problem_statement.py} | 15 + .../basic_by_file/generate.py} | 28 +- .../split_grading_instructions.py} | 12 +- .../basic_by_file/split_problem_statement.py} | 12 +- .../guided/basic_by_file/config/__init__.py | 23 + .../basic_by_file/config/generate.py} | 35 +- .../config/split_problem_statement.py | 35 + .../config/summarize_submission.py} | 16 +- .../basic_by_file/generate.py} | 26 +- .../basic_by_file/split_problem_statement.py | 136 + .../basic_by_file/summarize_submission.py} | 10 +- .../guided/one_shot/config/__init__.py | 15 + .../guided/one_shot/config/generate.py | 57 + .../guided/one_shot/generate.py | 176 + .../split_grading_instructions_by_file.py | 21 - ...t_problem_non_grading_statement_by_file.py | 20 - playground/data/example/exercise-1.json | 12 +- .../4_link_programming_repositories.mjs | 16 +- .../details/exercise_detail/programming.tsx | 6 +- .../details/submission_detail/programming.tsx | 2 +- .../selectors/submission_select.tsx | 2 +- playground/src/helpers/get_data.ts | 2 +- playground/src/model/exercise.ts | 6 +- playground/src/model/submission.ts | 2 +- playground/yarn.lock | 4910 ----------------- 33 files changed, 676 insertions(+), 5184 deletions(-) delete mode 100644 assessment_module_manager/.vscode/settings.json create mode 100644 module_programming_llm/module_programming_llm/graded/basic_by_file/config/__init__.py rename module_programming_llm/module_programming_llm/{prompts/generate_graded_suggestions_by_file.py => graded/basic_by_file/config/generate.py} (54%) create mode 100644 module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_grading_instructions.py rename module_programming_llm/module_programming_llm/{prompts/split_problem_grading_statement_by_file.py => graded/basic_by_file/config/split_problem_statement.py} (50%) rename module_programming_llm/module_programming_llm/{generate_graded_suggestions_by_file.py => graded/basic_by_file/generate.py} (93%) rename module_programming_llm/module_programming_llm/{split_grading_instructions_by_file.py => graded/basic_by_file/split_grading_instructions.py} (92%) rename module_programming_llm/module_programming_llm/{split_problem_statement_by_file.py => graded/basic_by_file/split_problem_statement.py} (92%) create mode 100644 module_programming_llm/module_programming_llm/guided/basic_by_file/config/__init__.py rename module_programming_llm/module_programming_llm/{prompts/generate_non_graded_suggestions_by_file.py => guided/basic_by_file/config/generate.py} (64%) create mode 100644 module_programming_llm/module_programming_llm/guided/basic_by_file/config/split_problem_statement.py rename module_programming_llm/module_programming_llm/{prompts/summarize_submission_by_file.py => guided/basic_by_file/config/summarize_submission.py} (61%) rename module_programming_llm/module_programming_llm/{generate_non_graded_suggestions_by_file.py => guided/basic_by_file/generate.py} (91%) create mode 100644 module_programming_llm/module_programming_llm/guided/basic_by_file/split_problem_statement.py rename module_programming_llm/module_programming_llm/{generate_summary_by_file.py => guided/basic_by_file/summarize_submission.py} (94%) create mode 100644 module_programming_llm/module_programming_llm/guided/one_shot/config/__init__.py create mode 100644 module_programming_llm/module_programming_llm/guided/one_shot/config/generate.py create mode 100644 module_programming_llm/module_programming_llm/guided/one_shot/generate.py delete mode 100644 module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py delete mode 100644 module_programming_llm/module_programming_llm/prompts/split_problem_non_grading_statement_by_file.py delete mode 100644 playground/yarn.lock diff --git a/.vscode/settings.json b/.vscode/settings.json index 3b5da8376..1ceae8a76 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,4 +2,5 @@ "python.linting.enabled": true, "python.linting.prospectorEnabled": true, "python.analysis.typeCheckingMode": "basic", + "python.terminal.activateEnvironment": true } \ No newline at end of file diff --git a/assessment_module_manager/.vscode/settings.json b/assessment_module_manager/.vscode/settings.json deleted file mode 100644 index 2a843f56f..000000000 --- a/assessment_module_manager/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "python.pythonPath": "./.venv/bin/python", - "python.analysis.typeCheckingMode": "basic", - } \ No newline at end of file diff --git a/athena-workspace.code-workspace b/athena-workspace.code-workspace index a548d2212..f3350e64e 100644 --- a/athena-workspace.code-workspace +++ b/athena-workspace.code-workspace @@ -28,13 +28,18 @@ "path": "module_programming_themisml" }, { - "path": "module_programming_ast" + "path": "module_programming_ast" }, + { "path": "module_programming_code_embedding" }, ], "settings": { - "python.linting.prospectorEnabled": true, - "python.linting.mypyEnabled": true + "python.pythonPath": "./.venv/bin/python", + "python.linting.enabled": true, + "python.linting.prospectorEnabled": true, + "python.linting.mypyEnabled": true, + "python.analysis.typeCheckingMode": "basic", + "python.terminal.activateEnvironment": true } } \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/__main__.py b/module_programming_llm/module_programming_llm/__main__.py index e8356270f..ead227c7a 100644 --- a/module_programming_llm/module_programming_llm/__main__.py +++ b/module_programming_llm/module_programming_llm/__main__.py @@ -13,12 +13,9 @@ from athena.logger import logger from module_programming_llm.config import Configuration -from module_programming_llm.generate_graded_suggestions_by_file import ( - generate_suggestions_by_file as generate_graded_suggestions_by_file, -) -from module_programming_llm.generate_non_graded_suggestions_by_file import ( - generate_suggestions_by_file as generate_non_graded_suggestions_by_file, -) +from module_programming_llm.graded.basic_by_file.generate import generate_graded_basic_by_file_suggestions +from module_programming_llm.guided.basic_by_file.generate import generate_guided_basic_by_file_suggestions +from module_programming_llm.guided.one_shot.generate import generate_guided_one_shot_suggestions @submissions_consumer @@ -42,10 +39,15 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested", "Graded" if is_graded else "Non-graded", submission.id, exercise.id) if is_graded: - return await generate_graded_suggestions_by_file(exercise, submission, module_config.graded_approach, - module_config.debug) - return await generate_non_graded_suggestions_by_file(exercise, submission, module_config.non_graded_approach, - module_config.debug) + if module_config.graded_basic_by_file: + return await generate_graded_basic_by_file_suggestions(exercise, submission, module_config.graded_basic_by_file, module_config.debug) + else: + # if module_config.guided_basic_by_file: + # return await generate_guided_basic_by_file_suggestions(exercise, submission, module_config.guided_basic_by_file, module_config.debug) + if module_config.guided_one_shot: + return await generate_guided_one_shot_suggestions(exercise, submission, module_config.guided_one_shot, module_config.debug) + + return [] diff --git a/module_programming_llm/module_programming_llm/config.py b/module_programming_llm/module_programming_llm/config.py index 5edd58003..93db54d97 100644 --- a/module_programming_llm/module_programming_llm/config.py +++ b/module_programming_llm/module_programming_llm/config.py @@ -1,138 +1,11 @@ -from abc import ABC - from pydantic import BaseModel, Field from athena import config_schema_provider -from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig -from module_programming_llm.prompts.generate_graded_suggestions_by_file import ( - system_message as generate_graded_suggestions_by_file_system_message, - human_message as generate_graded_suggestions_by_file_human_message, -) -from module_programming_llm.prompts.generate_non_graded_suggestions_by_file import ( - system_message as generate_non_graded_suggestions_by_file_system_message, - human_message as generate_non_graded_suggestions_by_file_human_message, -) -from module_programming_llm.prompts.split_grading_instructions_by_file import ( - system_message as split_grading_instructions_by_file_message, - human_message as split_grading_instructions_by_file_human_message, -) -from module_programming_llm.prompts.split_problem_non_grading_statement_by_file import ( - system_message as split_problem_statements_by_file_system_message_without_solution, - human_message as split_problem_statements_by_file_human_message_without_solution, -) -from module_programming_llm.prompts.split_problem_grading_statement_by_file import ( - system_message as split_problem_statements_by_file_system_message_with_solution, - human_message as split_problem_statements_by_file_human_message_with_solution, -) -from module_programming_llm.prompts.summarize_submission_by_file import ( - system_message as summarize_submission_by_file_system_message, - human_message as summarize_submission_by_file_human_message, -) - - -class SplitProblemStatementsBasePrompt(BaseModel): - """Base class for splitting problem statements into file-based ones, providing a structured approach for processing statements.""" - - system_message: str = Field(..., - description="Message for priming AI behavior and instructing it what to do.") - human_message: str = Field(..., - description="Message for priming AI behavior and instructing it what to do.") - tokens_before_split: int = Field(default=250, - description="Split the problem statement into file-based ones after this number of tokens.") - - -class SplitProblemStatementsWithSolutionByFilePrompt(SplitProblemStatementsBasePrompt): - """Specialized class for splitting problem statements with solutions, for cases where detailed solution information is available.""" - system_message: str = split_problem_statements_by_file_system_message_with_solution - human_message: str = split_problem_statements_by_file_human_message_with_solution - - -class SplitProblemStatementsWithoutSolutionByFilePrompt( - SplitProblemStatementsBasePrompt -): - """Specialized class for splitting problem statements without solutions, applicable when solution details are not provided.""" - system_message: str = split_problem_statements_by_file_system_message_without_solution - human_message: str = split_problem_statements_by_file_human_message_without_solution - - -class SplitGradingInstructionsByFilePrompt(BaseModel): - """\ -Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**\ -""" - system_message: str = Field(default=split_grading_instructions_by_file_message, - description="Message for priming AI behavior and instructing it what to do.") - human_message: str = Field(default=split_grading_instructions_by_file_human_message, - description="Message from a human. The input on which the AI is supposed to act.") - tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") - - -class FeedbackGenerationBasePrompt(BaseModel): - """Base class for feedback generation prompts, contains common definitions.""" - - system_message: str = Field(..., - description="Message for priming AI behavior and instructing it what to do.",) - human_message: str = Field(..., - description="Message from a human. The input on which the AI is supposed to act.",) - - -class GradedFeedbackGenerationPrompt(FeedbackGenerationBasePrompt): - """Generates graded feedback based on file submissions, tailored to provide detailed, evaluative comments and scores.""" - - system_message: str = generate_graded_suggestions_by_file_system_message - human_message: str = generate_graded_suggestions_by_file_human_message - - -class NonGradedFeedbackGenerationPrompt(FeedbackGenerationBasePrompt): - """\ -Features available: **{problem_statement}**, **{submission_file}** - -*Note: Prompt will be applied per file independently. Also, you don't have to include all features, -e.g. template_to_submission_diff. - """ - - system_message: str = generate_non_graded_suggestions_by_file_system_message - human_message: str = generate_non_graded_suggestions_by_file_human_message - - -class FileSummaryPrompt(BaseModel): - """Generates concise summaries of submission files, facilitating a quicker review and understanding of the content for AI processing.""" - - system_message: str = Field(summarize_submission_by_file_system_message, - description="Message for priming AI behavior and instructing it what to do.") - human_message: str = Field(summarize_submission_by_file_human_message, - description="Message from a human. The input on which the AI is supposed to act.") - - -class BasicApproachConfig(BaseModel): - """Defines a basic configuration for processing submissions, incorporating problem statement splitting, feedback generation, and file summarization.""" - - max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") - model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore - max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") - split_problem_statement_by_file_prompt: SplitProblemStatementsBasePrompt = Field(description="To be defined in " "subclasses.") - generate_suggestions_by_file_prompt: SplitProblemStatementsBasePrompt = Field(description="To be defined in " "subclasses.") - generate_file_summary_prompt: FileSummaryPrompt = Field(default=FileSummaryPrompt(), description="Generates short summaries to be fed into the LLM with separate files.") - - -class GradedBasicApproachConfig(BasicApproachConfig, ABC): - """\ -This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \ -Then, it generates graded suggestions for each file independently.\ -""" - - split_problem_statement_by_file_prompt: SplitProblemStatementsWithSolutionByFilePrompt = Field(default=SplitProblemStatementsWithSolutionByFilePrompt()) - split_grading_instructions_by_file_prompt: SplitGradingInstructionsByFilePrompt = (Field(default=SplitGradingInstructionsByFilePrompt())) - generate_suggestions_by_file_prompt: FeedbackGenerationBasePrompt = Field(default=GradedFeedbackGenerationPrompt()) - -class NonGradedBasicApproachConfig(BasicApproachConfig, ABC): - """\ -This approach uses an LLM to split up the problem statement, if necessary. \ -Then, it generates non graded suggestions for each file independently.\ -""" +from module_programming_llm.graded.basic_by_file.config import GradedBasicByFileConfig - split_problem_statement_by_file_prompt: SplitProblemStatementsWithoutSolutionByFilePrompt = Field(default=SplitProblemStatementsWithoutSolutionByFilePrompt()) - generate_suggestions_by_file_prompt: FeedbackGenerationBasePrompt = Field(default=NonGradedFeedbackGenerationPrompt()) +from module_programming_llm.guided.basic_by_file.config import GuidedBasicByFileConfig +from module_programming_llm.guided.one_shot.config import GuidedOneShotConfig @config_schema_provider @@ -140,5 +13,8 @@ class Configuration(BaseModel): """Configuration settings for the entire module, including debug mode and approach-specific configurations.""" debug: bool = Field(default=False, description="Enable debug mode.") - graded_approach: GradedBasicApproachConfig = Field(default=GradedBasicApproachConfig()) - non_graded_approach: NonGradedBasicApproachConfig = Field(default=NonGradedBasicApproachConfig()) \ No newline at end of file + + graded_basic_by_file: GradedBasicByFileConfig = Field(default=GradedBasicByFileConfig()) + + # GuidedBasicByFileConfig | GuidedOneShotConfig + guided_one_shot: GuidedOneShotConfig = Field(default=GuidedOneShotConfig()) \ No newline at end of file diff --git a/module_programming_llm/module_programming_llm/graded/basic_by_file/config/__init__.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/__init__.py new file mode 100644 index 000000000..97e162531 --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/__init__.py @@ -0,0 +1,23 @@ +from abc import ABC +from pydantic import BaseModel, Field + +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig + +from .generate import GradedFeedbackGenerationPrompt +from .split_grading_instructions import SplitGradingInstructionsPrompt +from .split_problem_statement import SplitProblemStatementPrompt + + +class GradedBasicByFileConfig(BaseModel, ABC): + """\ +This approach uses an LLM to split up the problem statement and grading instructions by file, if necessary. \ +Then, it generates graded suggestions for each file independently.\ +""" + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") + + generate_prompt: GradedFeedbackGenerationPrompt = Field(default=GradedFeedbackGenerationPrompt()) + split_grading_instructions_prompt: SplitGradingInstructionsPrompt = (Field(default=SplitGradingInstructionsPrompt())) + split_problem_statement_prompt: SplitProblemStatementPrompt = Field(default=SplitProblemStatementPrompt()) + diff --git a/module_programming_llm/module_programming_llm/prompts/generate_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/generate.py similarity index 54% rename from module_programming_llm/module_programming_llm/prompts/generate_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/config/generate.py index 8b1f00832..ba72202d4 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/generate.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are an AI tutor for programming assessment at a prestigious university. @@ -14,6 +17,14 @@ # Grading instructions {grading_instructions} Max points: {max_points}, bonus points: {bonus_points} (whole assessment, not just this file) +""" + + +file_message = """\ +Student\'s submission file to grade (with line numbers : ): +\"\"\" +{submission_file} +\"\"\"\ # Diff between solution (deletions) and student\'s submission (additions): {solution_to_submission_diff} @@ -22,9 +33,15 @@ {template_to_submission_diff} """ -human_message = """\ -Student\'s submission file to grade (with line numbers : ): -\"\"\" -{submission_file} -\"\"\"\ -""" + +class GradedFeedbackGenerationPrompt(BaseModel): + """\ + Generates graded feedback based on file submissions, tailored to provide detailed, evaluative comments and scores. + + Features available: **{problem_statement}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{submission_file}**\ + """ + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + file_message: str = Field(default=file_message, + description="Message containing the context of a single file submission.") diff --git a/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_grading_instructions.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_grading_instructions.py new file mode 100644 index 000000000..79fbb1f61 --- /dev/null +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_grading_instructions.py @@ -0,0 +1,35 @@ +from pydantic import BaseModel, Field + + +system_message = """\ +You are an AI tutor for programming assessment at a prestigious university. + +# Task +Restructure the grading instructions by student changed file to show relevant information for each file to the tutor. \ +Make it as easy as possible for the tutor to grade the assignment when looking at the changed file. \ +Some instructions may be relevant for multiple files. +""" + + +human_message = """\ +Grading instructions: +{grading_instructions} + +Changed files from template to sample solution: +{changed_files_from_template_to_solution} + +Changed files from template to student submission (Pick from this list, very important!): +{changed_files_from_template_to_submission} + +Grading instructions by file: +""" + + +class SplitGradingInstructionsPrompt(BaseModel): + """Features available: **{grading_instructions}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + grading_instructions_message: str = Field(default=human_message, + description="Message containing the context needed to split the grading instructions by file.") + tokens_before_split: int = Field(default=250, description="Split the grading instructions into file-based ones after this number of tokens.") diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_grading_statement_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_problem_statement.py similarity index 50% rename from module_programming_llm/module_programming_llm/prompts/split_problem_grading_statement_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_problem_statement.py index ce4620c7c..dbd95d880 100644 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_grading_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/config/split_problem_statement.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are an AI tutor for programming assessment at a prestigious university. @@ -8,6 +11,7 @@ For the file keys, include the full path. """ + human_message = """\ Problem statement: {problem_statement} @@ -20,3 +24,14 @@ Problem statement by file: """ + + +class SplitProblemStatementPrompt(BaseModel): + """Features available: **{problem_statement}**, **{changed_files_from_template_to_solution}**, **{changed_files_from_template_to_submission}**""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message for priming AI behavior and instructing it what to do.") + tokens_before_split: int = Field(default=250, + description="Split the problem statement into file-based ones after this number of tokens.") diff --git a/module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/generate.py similarity index 93% rename from module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/generate.py index 1fe4dde47..0a0b56189 100644 --- a/module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/generate.py @@ -6,13 +6,10 @@ from athena import emit_meta from athena.programming import Exercise, Submission, Feedback -from module_programming_llm.config import GradedBasicApproachConfig -from module_programming_llm.split_grading_instructions_by_file import ( - split_grading_instructions_by_file, -) -from module_programming_llm.split_problem_statement_by_file import ( - split_problem_statement_by_file, -) +from .config import GradedBasicByFileConfig +from .split_grading_instructions import generate_split_grading_instructions_by_file +from .split_problem_statement import generate_split_problem_statement_by_file + from module_programming_llm.helpers.llm_utils import ( check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions, @@ -57,31 +54,31 @@ class Config: # pylint: disable=too-many-locals -async def generate_suggestions_by_file( +async def generate_graded_basic_by_file_suggestions( exercise: Exercise, submission: Submission, - config: GradedBasicApproachConfig, + config: GradedBasicByFileConfig, debug: bool, ) -> List[Feedback]: model = config.model.get_model() # type: ignore[attr-defined] chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.generate_suggestions_by_file_prompt.system_message, - human_message=config.generate_suggestions_by_file_prompt.human_message, + system_message=config.generate_prompt.system_message, + human_message=config.generate_prompt.file_message, pydantic_object=AssessmentModel, ) # Get split problem statement and grading instructions by file (if necessary) split_problem_statement, split_grading_instructions = await asyncio.gather( - split_problem_statement_by_file( + generate_split_problem_statement_by_file( exercise=exercise, submission=submission, prompt=chat_prompt, config=config, debug=debug, ), - split_grading_instructions_by_file( + generate_split_grading_instructions_by_file( exercise=exercise, submission=submission, prompt=chat_prompt, @@ -93,7 +90,7 @@ async def generate_suggestions_by_file( problem_statement_tokens = num_tokens_from_string(exercise.problem_statement or "") is_short_problem_statement = ( problem_statement_tokens - <= config.split_problem_statement_by_file_prompt.tokens_before_split + <= config.split_problem_statement_prompt.tokens_before_split ) file_problem_statements = ( { @@ -106,7 +103,7 @@ async def generate_suggestions_by_file( is_short_grading_instructions = ( num_tokens_from_string(exercise.grading_instructions) - <= config.split_grading_instructions_by_file_prompt.tokens_before_split + <= config.split_grading_instructions_prompt.tokens_before_split if exercise.grading_instructions is not None else True ) @@ -310,6 +307,7 @@ async def generate_suggestions_by_file( ) feedbacks.append( Feedback( + id=None, exercise_id=exercise.id, submission_id=submission.id, title=feedback.title, diff --git a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_grading_instructions.py similarity index 92% rename from module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/split_grading_instructions.py index 0e402b378..46d3a85b3 100644 --- a/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_grading_instructions.py @@ -7,7 +7,7 @@ from athena import emit_meta from athena.programming import Exercise, Submission -from module_programming_llm.config import GradedBasicApproachConfig +from .config import GradedBasicByFileConfig from module_programming_llm.helpers.llm_utils import ( get_chat_prompt_with_formatting_instructions, num_tokens_from_string, @@ -28,11 +28,11 @@ class SplitGradingInstructions(BaseModel): # pylint: disable=too-many-locals -async def split_grading_instructions_by_file( +async def generate_split_grading_instructions_by_file( exercise: Exercise, submission: Submission, prompt: ChatPromptTemplate, - config: GradedBasicApproachConfig, + config: GradedBasicByFileConfig, debug: bool ) -> Optional[SplitGradingInstructions]: """Split the general grading instructions by file @@ -52,7 +52,7 @@ async def split_grading_instructions_by_file( # Return None if the grading instructions are too short if (grading_instructions is None or num_tokens_from_string( - grading_instructions) <= config.split_grading_instructions_by_file_prompt.tokens_before_split): + grading_instructions) <= config.split_grading_instructions_prompt.tokens_before_split): return None # Return None if the grading instructions are not in the prompt @@ -75,8 +75,8 @@ async def split_grading_instructions_by_file( chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.split_grading_instructions_by_file_prompt.system_message, - human_message=config.split_grading_instructions_by_file_prompt.human_message, + system_message=config.split_grading_instructions_prompt.system_message, + human_message=config.split_grading_instructions_prompt.grading_instructions_message, pydantic_object=SplitGradingInstructions, ) diff --git a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_problem_statement.py similarity index 92% rename from module_programming_llm/module_programming_llm/split_problem_statement_by_file.py rename to module_programming_llm/module_programming_llm/graded/basic_by_file/split_problem_statement.py index 86f841ee1..d97990659 100644 --- a/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py +++ b/module_programming_llm/module_programming_llm/graded/basic_by_file/split_problem_statement.py @@ -7,7 +7,6 @@ from athena import emit_meta from athena.programming import Exercise, Submission -from module_programming_llm.config import GradedBasicApproachConfig, BasicApproachConfig from module_programming_llm.helpers.llm_utils import ( get_chat_prompt_with_formatting_instructions, num_tokens_from_string, @@ -16,6 +15,7 @@ ) from module_programming_llm.helpers.utils import get_diff +from .config import GradedBasicByFileConfig class FileProblemStatement(BaseModel): file_name: str = Field(description="File name") @@ -29,11 +29,11 @@ class SplitProblemStatement(BaseModel): # pylint: disable=too-many-locals -async def split_problem_statement_by_file( +async def generate_split_problem_statement_by_file( exercise: Exercise, submission: Submission, prompt: ChatPromptTemplate, - config: BasicApproachConfig, + config: GradedBasicByFileConfig, debug: bool ) -> Optional[SplitProblemStatement]: """Split the general problem statement by file @@ -49,7 +49,7 @@ async def split_problem_statement_by_file( """ # Return None if the problem statement is too short - if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_by_file_prompt.tokens_before_split: + if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_prompt.tokens_before_split: return None # Return None if the problem statement not in the prompt @@ -70,8 +70,8 @@ async def split_problem_statement_by_file( chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.split_problem_statement_by_file_prompt.system_message, - human_message=config.split_problem_statement_by_file_prompt.human_message, + system_message=config.split_problem_statement_prompt.system_message, + human_message=config.split_problem_statement_prompt.human_message, pydantic_object=SplitProblemStatement ) diff --git a/module_programming_llm/module_programming_llm/guided/basic_by_file/config/__init__.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/__init__.py new file mode 100644 index 000000000..89089b955 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/__init__.py @@ -0,0 +1,23 @@ +from abc import ABC +from pydantic import BaseModel, Field + +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig + +from .generate import GuidedFeedbackGenerationPrompt +from .split_problem_statement import SplitProblemStatementPrompt +from .summarize_submission import FileSummaryPrompt + + +class GuidedBasicByFileConfig(BaseModel, ABC): + """\ + This approach uses an LLM to split up the problem statement, if necessary. + Then, it generates non graded suggestions for each file independently.\ + """ + + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + max_number_of_files: int = Field(default=25, description="Maximum number of files. If exceeded, it will prioritize the most important ones.") + + generate_prompt: GuidedFeedbackGenerationPrompt = Field(default=GuidedFeedbackGenerationPrompt()) + split_problem_statement_prompt: SplitProblemStatementPrompt = (Field(default=SplitProblemStatementPrompt())) + summarize_submission_prompt: FileSummaryPrompt = Field(default=FileSummaryPrompt()) diff --git a/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/generate.py similarity index 64% rename from module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/config/generate.py index 9b52fefcf..382be4c3f 100644 --- a/module_programming_llm/module_programming_llm/prompts/generate_non_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/generate.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are an AI tutor for programming assessment at a prestigious university. @@ -5,7 +8,7 @@ {problem_statement} # Task -Create non graded improvement suggestions for a student\'s programming submission that a human tutor would recommend. \ +Create non graded improvement suggestions for a student\'s programming submission that a human tutor would recommend. Assume the tutor is not familiar with the solution. The feedback must contain only the feedback the student can learn from. Important: the answer you generate must not contain any solution suggestions or contain corrected errors. @@ -22,24 +25,38 @@ In git diff, lines marked with '-' were removed and with '+' were added by the student. -# The student will be reading your response, use you instead of them +# The student will be reading your response, use you instead of them\ """ -human_message = """\ + +human_message = '''\ Path: {file_path} File(with line numbers : ): -\"\"\" +""" {submission_file} -\"\"\"\ +""" Summary of other files in the solution: -\"\"\" +""" {summary} -\"\"\" +""" The template->submission diff(only as reference): -\"\"\" +""" {template_to_submission_diff} -\"\"\" """ +''' + + +class GuidedFeedbackGenerationPrompt(BaseModel): + """\ + Generates guided feedback based on file submissions, tailored to provide detailed, evaluative comments and scores. + + Features available: **{problem_statement}**, **{submission_file}**, **{template_to_submission_diff}**, **{file_path}**, **{submission_file}**\ + """ + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message containing the context of a single file submission.") diff --git a/module_programming_llm/module_programming_llm/guided/basic_by_file/config/split_problem_statement.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/split_problem_statement.py new file mode 100644 index 000000000..695ba8739 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/split_problem_statement.py @@ -0,0 +1,35 @@ +from pydantic import BaseModel, Field + + +system_message = """\ +You are an AI tutor for programming assessment at a prestigious university. + +# Task +Restructure the problem statement by student changed files to gather work items for each file. \ +Some parts of the problem statement may be relevant for multiple files. +Comments in the template solution can be relevant for some files, some might be not. +Include only those work items based on comments that make sense. +For the file keys, include the full path. +""" + + +human_message = """\ +Problem statement: +{problem_statement} + +Changed files from template to student submission (Pick from this list, very important!): +{changed_files_from_template_to_submission} + +Problem statement by file: +""" + + +class SplitProblemStatementPrompt(BaseModel): + """Features available: **{problem_statement}**, **{changed_files_from_template_to_submission}**""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message for priming AI behavior and instructing it what to do.") + tokens_before_split: int = Field(default=250, + description="Split the problem statement into file-based ones after this number of tokens.") diff --git a/module_programming_llm/module_programming_llm/prompts/summarize_submission_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/summarize_submission.py similarity index 61% rename from module_programming_llm/module_programming_llm/prompts/summarize_submission_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/config/summarize_submission.py index 7ba2d1281..52741b7fd 100644 --- a/module_programming_llm/module_programming_llm/prompts/summarize_submission_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/config/summarize_submission.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel, Field + + system_message = """\ You are a very experienced software engineer. @@ -19,10 +22,21 @@ It is absolutely unacceptable to include any free text that is not part of schema or any format violating response. """ + human_message = """\ -Path: {file_path} +File path: {file_path} File: \"\"\" {submission_file} \"\"\" """ + + +class FileSummaryPrompt(BaseModel): + """Generates concise summaries of submission files, facilitating a quicker review and understanding of the content for AI processing.""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.") + human_message: str = Field(default=human_message, + description="Message from a human. The input on which the AI is supposed to act.") + diff --git a/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/generate.py similarity index 91% rename from module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/generate.py index b857a24f4..90fc48c4a 100644 --- a/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/generate.py @@ -6,11 +6,10 @@ from athena import emit_meta from athena.programming import Exercise, Submission, Feedback -from module_programming_llm.config import NonGradedBasicApproachConfig -from module_programming_llm.generate_summary_by_file import generate_summary_by_file -from module_programming_llm.split_problem_statement_by_file import ( - split_problem_statement_by_file, -) +from .config import GuidedBasicByFileConfig +from .summarize_submission import generate_summarize_submission +from .split_problem_statement import generate_split_problem_statement_by_file + from module_programming_llm.helpers.llm_utils import ( check_prompt_length_and_omit_features_if_necessary, get_chat_prompt_with_formatting_instructions, @@ -51,18 +50,18 @@ class Config: # pylint: disable=too-many-locals -async def generate_suggestions_by_file( +async def generate_guided_basic_by_file_suggestions( exercise: Exercise, submission: Submission, - config: NonGradedBasicApproachConfig, + config: GuidedBasicByFileConfig, debug: bool, ) -> List[Feedback]: model = config.model.get_model() # type: ignore[attr-defined] chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.generate_suggestions_by_file_prompt.system_message, - human_message=config.generate_suggestions_by_file_prompt.human_message, + system_message=config.generate_prompt.system_message, + human_message=config.generate_prompt.human_message, pydantic_object=ImprovementModel, ) @@ -88,7 +87,7 @@ async def generate_suggestions_by_file( ) # Get solution summary by file (if necessary) - solution_summary = await generate_summary_by_file( + solution_summary = await generate_summarize_submission( exercise=exercise, submission=submission, prompt=chat_prompt, @@ -99,7 +98,7 @@ async def generate_suggestions_by_file( # Get split problem statement by file (if necessary) - split_problem_statement = await split_problem_statement_by_file( + split_problem_statement = await generate_split_problem_statement_by_file( exercise=exercise, submission=submission, prompt=chat_prompt, @@ -110,7 +109,7 @@ async def generate_suggestions_by_file( problem_statement_tokens = num_tokens_from_string(exercise.problem_statement or "") is_short_problem_statement = ( problem_statement_tokens - <= config.split_problem_statement_by_file_prompt.tokens_before_split + <= config.split_problem_statement_prompt.tokens_before_split ) file_problem_statements = ( { @@ -250,6 +249,7 @@ async def generate_suggestions_by_file( for feedback in result.feedbacks: feedbacks.append( Feedback( + id=None, exercise_id=exercise.id, submission_id=submission.id, title=feedback.title, @@ -258,6 +258,8 @@ async def generate_suggestions_by_file( line_start=feedback.line_start, line_end=feedback.line_end, is_graded=False, + credits=0, + structured_grading_instruction_id=None, meta={}, ) ) diff --git a/module_programming_llm/module_programming_llm/guided/basic_by_file/split_problem_statement.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/split_problem_statement.py new file mode 100644 index 000000000..9ef8749c3 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/split_problem_statement.py @@ -0,0 +1,136 @@ +from typing import Optional, Sequence +from collections import defaultdict + +from pydantic import BaseModel, Field +from langchain.prompts import ChatPromptTemplate + +from athena import emit_meta +from athena.programming import Exercise, Submission + +from module_programming_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + num_tokens_from_string, + num_tokens_from_prompt, + predict_and_parse +) +from module_programming_llm.helpers.utils import get_diff + +from .config import GuidedBasicByFileConfig + +class FileProblemStatement(BaseModel): + file_name: str = Field(description="File name") + problem_statement: str = Field(description="Problem statement relevant for this file") + + +class SplitProblemStatement(BaseModel): + """Collection of problem statements split by file""" + + items: Sequence[FileProblemStatement] = Field(description="File problem statements") + + +# pylint: disable=too-many-locals +async def generate_split_problem_statement_by_file( + exercise: Exercise, + submission: Submission, + prompt: ChatPromptTemplate, + config: GuidedBasicByFileConfig, + debug: bool + ) -> Optional[SplitProblemStatement]: + """Split the general problem statement by file + + Args: + exercise (Exercise): Exercise to split the problem statement for (respecting the changed files) + submission (Submission): Submission to split the problem statement for (respecting the changed files) + prompt (ChatPromptTemplate): Prompt template to check for problem_statement + config (GradedBasicApproachConfig): Configuration + + Returns: + Optional[SplitProblemStatement]: Split problem statement, None if it is too short or too long + """ + + # Return None if the problem statement is too short + if num_tokens_from_string(exercise.problem_statement or "") <= config.split_problem_statement_prompt.tokens_before_split: + return None + + # Return None if the problem statement not in the prompt + if "problem_statement" not in prompt.input_variables: + return None + + model = config.model.get_model() # type: ignore[attr-defined] + + template_repo = exercise.get_template_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + file_path=None, + name_only=True + ).split("\n") + + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=model, + system_message=config.split_problem_statement_prompt.system_message, + human_message=config.split_problem_statement_prompt.human_message, + pydantic_object=SplitProblemStatement + ) + + prompt_input = { + "problem_statement": exercise.problem_statement or "No problem statement.", + "changed_files_from_template_to_submission": ", ".join(changed_files_from_template_to_submission) + } + + if "changed_files_from_template_to_solution" in prompt.input_variables: + solution_repo = exercise.get_solution_repository() + changed_files_from_template_to_solution = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + file_path=None, + name_only=True, + ).split("\n") + prompt_input["changed_files_from_template_to_solution"] = ", ".join( + changed_files_from_template_to_solution + ) + + # Return None if the prompt is too long + if num_tokens_from_prompt(chat_prompt, prompt_input) > config.max_input_tokens: + return None + + split_problem_statement = await predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=SplitProblemStatement, + tags=[ + f"exercise-{exercise.id}", + f"submission-{submission.id}", + "split-problem-statement-by-file" + ] + ) + + if debug: + emit_meta("file_problem_statements", { + "prompt": chat_prompt.format(**prompt_input), + "result": split_problem_statement.dict() if split_problem_statement is not None else None + }) + + if split_problem_statement is None or not split_problem_statement.items: + return None + + # Join duplicate file names (some responses contain multiple problem statements for the same file) + file_problem_statements_by_file_name = defaultdict(list) + for file_problem_statement in split_problem_statement.items: + file_problem_statements_by_file_name[file_problem_statement.file_name].append(file_problem_statement) + + split_problem_statement.items = [ + FileProblemStatement( + file_name=file_name, + problem_statement="\n".join( + file_problem_statement.problem_statement + for file_problem_statement in file_problem_statements + ) + ) + for file_name, file_problem_statements in file_problem_statements_by_file_name.items() + ] + + return split_problem_statement diff --git a/module_programming_llm/module_programming_llm/generate_summary_by_file.py b/module_programming_llm/module_programming_llm/guided/basic_by_file/summarize_submission.py similarity index 94% rename from module_programming_llm/module_programming_llm/generate_summary_by_file.py rename to module_programming_llm/module_programming_llm/guided/basic_by_file/summarize_submission.py index a42315b34..a08f35d32 100644 --- a/module_programming_llm/module_programming_llm/generate_summary_by_file.py +++ b/module_programming_llm/module_programming_llm/guided/basic_by_file/summarize_submission.py @@ -8,7 +8,7 @@ from athena import emit_meta from athena.programming import Exercise, Submission -from module_programming_llm.config import GradedBasicApproachConfig, BasicApproachConfig +from .config import GuidedBasicByFileConfig from module_programming_llm.helpers.llm_utils import ( get_chat_prompt_with_formatting_instructions, num_tokens_from_prompt, @@ -46,11 +46,11 @@ def describe_solution_summary(self) -> str: # pylint: disable=too-many-locals -async def generate_summary_by_file( +async def generate_summarize_submission( exercise: Exercise, submission: Submission, prompt: ChatPromptTemplate, - config: BasicApproachConfig, + config: GuidedBasicByFileConfig, debug: bool, ) -> Optional[SolutionSummary]: """Generaty summary for the submission file by file @@ -89,8 +89,8 @@ async def generate_summary_by_file( ) chat_prompt = get_chat_prompt_with_formatting_instructions( model=model, - system_message=config.generate_file_summary_prompt.system_message, - human_message=config.generate_file_summary_prompt.human_message, + system_message=config.summarize_submission_prompt.system_message, + human_message=config.summarize_submission_prompt.human_message, pydantic_object=FileDescription, ) diff --git a/module_programming_llm/module_programming_llm/guided/one_shot/config/__init__.py b/module_programming_llm/module_programming_llm/guided/one_shot/config/__init__.py new file mode 100644 index 000000000..901e19fdb --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/one_shot/config/__init__.py @@ -0,0 +1,15 @@ +from abc import ABC +from pydantic import BaseModel, Field + +from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig + +from .generate import GuidedOneShotPrompt + + +class GuidedOneShotConfig(BaseModel, ABC): + """This approach uses an LLM to just generates non graded suggestions for all changed files at once.""" + + model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore + max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.") + + prompt: GuidedOneShotPrompt = Field(default=GuidedOneShotPrompt()) diff --git a/module_programming_llm/module_programming_llm/guided/one_shot/config/generate.py b/module_programming_llm/module_programming_llm/guided/one_shot/config/generate.py new file mode 100644 index 000000000..7324385a6 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/one_shot/config/generate.py @@ -0,0 +1,57 @@ +from pydantic import BaseModel, Field + + +system_message = """\ +You are an expert AI tutor for programming education at a prestigious university. + +## Task +Create minimal guided feedback to nudge a student towards improving their programming skills with didactically valuable feedback. +Act like a teacher who is encouraging and guiding a student to learn and improve without spoiling the solution. + +## Style +1. Constructive +2. Specific +3. Balanced +4. Clear and Concise +5. Actionable +6. Educational +7. Contextual + +Directly address the student, use "you" instead of "the student".\ +""" + + +problem_message = '''\ +Problem statement: +{problem_statement}\ +''' + + +file_message = '''\ +File Path: {file_path} +File with line numbers (: ): +""" +{submission_file} +""" + +Here is what the student changed (- removed, + added by the student): +""" +{template_to_submission_diff} +""" + +Here is the difference between the potential solution by the instructor and the student's submission (don't spoil the solution): +""" +{solution_to_submission_diff} +"""\ +''' + + +class GuidedOneShotPrompt(BaseModel): + """Prompt for the one-shot guided feedback generation approach.""" + + system_message: str = Field(default=system_message, + description="Message for priming AI behavior and instructing it what to do.",) + problem_message: str = Field(default=problem_message, + description="Message which contains **{problem_statement}**",) + file_message: str = Field(default=file_message, + description="Message for one file which contains **{file_path}**, **{submission_file}** and potentially **{solution_to_submission_diff}**, **{template_to_submission_diff}**, **{template_to_solution_diff}**",) diff --git a/module_programming_llm/module_programming_llm/guided/one_shot/generate.py b/module_programming_llm/module_programming_llm/guided/one_shot/generate.py new file mode 100644 index 000000000..a92ffa602 --- /dev/null +++ b/module_programming_llm/module_programming_llm/guided/one_shot/generate.py @@ -0,0 +1,176 @@ +import os +from typing import List, Optional, Sequence +from pydantic import BaseModel, Field + +from langchain.prompts import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) + +from athena.programming import Exercise, Submission, Feedback + +from .config import GuidedOneShotConfig + +from module_programming_llm.helpers.llm_utils import ( + num_tokens_from_string, + predict_and_parse, +) +from module_programming_llm.helpers.utils import ( + get_diff, + load_files_from_repo, + add_line_numbers, + get_programming_language_file_extension, +) + + +class FeedbackModel(BaseModel): + file_path: Optional[str] = Field(description="File path of the feedback, or empty if unreferenced") + line_start: Optional[int] = Field( + description="Referenced line number start, or empty if unreferenced" + ) + line_end: Optional[int] = Field( + description="Referenced line number end, or empty if unreferenced" + ) + description: str = Field(description="Guided feedback description") + + class Config: + title = "GuidedFeedback" + + +class GuidedFeedbackCompendiumModel(BaseModel): + """Compendium of guided feedbacks for a submission.""" + + guided_feedbacks: Sequence[FeedbackModel] = Field(description="Guided feedbacks") + + class Config: + title = "GuidedFeedbackCompendium" + + +# pylint: disable=too-many-locals +async def generate_guided_one_shot_suggestions( + exercise: Exercise, + submission: Submission, + config: GuidedOneShotConfig, + debug: bool, +) -> List[Feedback]: + model = config.model.get_model() # type: ignore[attr-defined] + + system_message_prompt = SystemMessagePromptTemplate.from_template(config.prompt.system_message) + problem_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.problem_message) + file_message_prompt = HumanMessagePromptTemplate.from_template(config.prompt.file_message) + + prompt_inputs: List[dict] = [] + + # Feature extraction + template_repo = exercise.get_template_repository() + solution_repo = exercise.get_solution_repository() + submission_repo = submission.get_repository() + + changed_files_from_template_to_submission = get_diff( + src_repo=template_repo, dst_repo=submission_repo, file_path=None, name_only=True + ).split("\n") + changed_files_from_template_to_submission = [ + os.path.join(str(submission_repo.working_tree_dir or ""), file_path) + for file_path in changed_files_from_template_to_submission + ] + + # Changed text files + changed_files = load_files_from_repo( + submission_repo, + file_filter=lambda file_path: file_path + in changed_files_from_template_to_submission, + ) + + problem_statement = exercise.problem_statement or "" + problem_statement = ( + problem_statement + if problem_statement.strip() + else "No problem statement found." + ) + + programming_language_extension = get_programming_language_file_extension( + programming_language=exercise.programming_language + ) + + # Gather prompt inputs for each changed file (independently) + for file_path, file_content in changed_files.items(): + if programming_language_extension and not file_path.endswith(programming_language_extension): + continue + + file_content = add_line_numbers(file_content) + solution_to_submission_diff = get_diff( + src_repo=solution_repo, + dst_repo=submission_repo, + src_prefix="solution", + dst_prefix="submission", + file_path=file_path, + ) + template_to_submission_diff = get_diff( + src_repo=template_repo, + dst_repo=submission_repo, + src_prefix="template", + dst_prefix="submission", + file_path=file_path, + ) + template_to_solution_diff = get_diff( + src_repo=template_repo, + dst_repo=solution_repo, + src_prefix="template", + dst_prefix="solution", + file_path=file_path, + ) + + prompt_inputs.append( + { + "file_path": file_path, + "submission_file": file_content, + "solution_to_submission_diff": solution_to_submission_diff, + "template_to_submission_diff": template_to_submission_diff, + "template_to_solution_diff": template_to_solution_diff, + } + ) + + prompt_input = { + "problem_statement": problem_statement, + "prompt_inputs": prompt_inputs, + } + + chat_prompt = ChatPromptTemplate.from_messages( + [system_message_prompt, problem_message_prompt] + + [file_message_prompt.format(**prompt_input) for prompt_input in prompt_inputs] + ) + + results: Optional[GuidedFeedbackCompendiumModel] = await predict_and_parse( + model=model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=GuidedFeedbackCompendiumModel, + tags=[ + f"exercise-{exercise.id}", + f"submission-{submission.id}", + "one-shot-non-graded-suggestions", + ], + ) + + feedbacks: List[Feedback] = [] + if results is not None: + for feedback in results.guided_feedbacks: + feedbacks.append( + Feedback( + id=None, + exercise_id=exercise.id, + submission_id=submission.id, + title="Guided Feedback", + description=feedback.description, + file_path=feedback.file_path, + line_start=feedback.line_start, + line_end=feedback.line_end, + is_graded=False, + credits=0, + structured_grading_instruction_id=None, + meta={}, + ) + ) + + return feedbacks diff --git a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py deleted file mode 100644 index f8c971bce..000000000 --- a/module_programming_llm/module_programming_llm/prompts/split_grading_instructions_by_file.py +++ /dev/null @@ -1,21 +0,0 @@ -system_message = """\ -You are an AI tutor for programming assessment at a prestigious university. - -# Task -Restructure the grading instructions by student changed file to show relevant information for each file to the tutor. \ -Make it as easy as possible for the tutor to grade the assignment when looking at the changed file. \ -Some instructions may be relevant for multiple files. -""" - -human_message = """\ -Grading instructions: -{grading_instructions} - -Changed files from template to sample solution: -{changed_files_from_template_to_solution} - -Changed files from template to student submission (Pick from this list, very important!): -{changed_files_from_template_to_submission} - -Grading instructions by file: -""" diff --git a/module_programming_llm/module_programming_llm/prompts/split_problem_non_grading_statement_by_file.py b/module_programming_llm/module_programming_llm/prompts/split_problem_non_grading_statement_by_file.py deleted file mode 100644 index 5dc3024d7..000000000 --- a/module_programming_llm/module_programming_llm/prompts/split_problem_non_grading_statement_by_file.py +++ /dev/null @@ -1,20 +0,0 @@ -system_message = """\ -You are an AI tutor for programming assessment at a prestigious university. - -# Task -Restructure the problem statement by student changed files to gather work items for each file. \ -Some parts of the problem statement may be relevant for multiple files. -Comments in the template solution can be relevant for some files, some might be not. -Include only those work items based on comments that make sense. -For the file keys, include the full path. -""" - -human_message = """\ -Problem statement: -{problem_statement} - -Changed files from template to student submission (Pick from this list, very important!): -{changed_files_from_template_to_submission} - -Problem statement by file: -""" diff --git a/playground/data/example/exercise-1.json b/playground/data/example/exercise-1.json index d7ecfa463..991e09697 100644 --- a/playground/data/example/exercise-1.json +++ b/playground/data/example/exercise-1.json @@ -8,15 +8,15 @@ "problem_statement": "-> file:problem-statement.md", "grading_instructions": "-> file:grading-instructions.md", "programming_language": "java", - "solution_repository_url": "{{exerciseDataUrl}}/solution.zip", - "template_repository_url": "{{exerciseDataUrl}}/template.zip", - "tests_repository_url": "{{exerciseDataUrl}}/tests.zip", + "solution_repository_uri": "{{exerciseDataUri}}/solution.zip", + "template_repository_uri": "{{exerciseDataUri}}/template.zip", + "tests_repository_uri": "{{exerciseDataUri}}/tests.zip", "meta": {}, "submissions": [ { "id": 101, - "repository_url": "{{exerciseDataUrl}}/submissions/101.zip", + "repository_uri": "{{exerciseDataUri}}/submissions/101.zip", "meta": {}, "feedbacks": [ { @@ -43,12 +43,12 @@ }, { "id": 102, - "repository_url": "{{exerciseDataUrl}}/submissions/102.zip", + "repository_uri": "{{exerciseDataUri}}/submissions/102.zip", "meta": {} }, { "id": 103, - "repository_url": "{{exerciseDataUrl}}/submissions/103.zip", + "repository_uri": "{{exerciseDataUri}}/submissions/103.zip", "meta": {}, "feedbacks": [ { diff --git a/playground/scripts/artemis/4_link_programming_repositories.mjs b/playground/scripts/artemis/4_link_programming_repositories.mjs index 20583f5e5..502f9c04e 100644 --- a/playground/scripts/artemis/4_link_programming_repositories.mjs +++ b/playground/scripts/artemis/4_link_programming_repositories.mjs @@ -32,23 +32,23 @@ for (let exercise of exercises) { if (!fs.existsSync(path.join(exercisePath, "solution"))) { console.log(`Exercise ${exercise.id} has no solution at ${exercisePath}/solution`); - exercise.solution_repository_url = null; + exercise.solution_repository_uri = null; } else { - exercise.solution_repository_url = `{{exerciseDataUrl}}/solution.zip`; + exercise.solution_repository_uri = `{{exerciseDataUri}}/solution.zip`; } if (!fs.existsSync(path.join(exercisePath, "template"))) { console.log(`Exercise ${exercise.id} has no template at ${exercisePath}/template`); - exercise.template_repository_url = null; + exercise.template_repository_uri = null; } else { - exercise.template_repository_url = `{{exerciseDataUrl}}/template.zip`; + exercise.template_repository_uri = `{{exerciseDataUri}}/template.zip`; } if (!fs.existsSync(path.join(exercisePath, "tests"))) { console.log(`Exercise ${exercise.id} has no tests at ${exercisePath}/tests`); - exercise.tests_repository_url = null; + exercise.tests_repository_uri = null; } else { - exercise.tests_repository_url = `{{exerciseDataUrl}}/tests.zip`; + exercise.tests_repository_uri = `{{exerciseDataUri}}/tests.zip`; } const submissionsPath = path.join(exercisePath, "submissions"); @@ -56,9 +56,9 @@ for (let exercise of exercises) { const submissionPath = path.join(submissionsPath, `${submission.id}`); if (!fs.existsSync(submissionPath)) { console.log(`Submission ${submission.id} has no directory at ${submissionPath}`); - submission.repository_url = null; + submission.repository_uri = null; } else { - submission.repository_url = `{{exerciseDataUrl}}/submissions/${submission.id}.zip`; + submission.repository_uri = `{{exerciseDataUri}}/submissions/${submission.id}.zip`; } return submission; }); diff --git a/playground/src/components/details/exercise_detail/programming.tsx b/playground/src/components/details/exercise_detail/programming.tsx index 01d9a946f..e88bf1f33 100644 --- a/playground/src/components/details/exercise_detail/programming.tsx +++ b/playground/src/components/details/exercise_detail/programming.tsx @@ -7,13 +7,13 @@ export default function ProgrammingExerciseDetail({ exercise, openedInitially }: return ( <> - + - + - + ); diff --git a/playground/src/components/details/submission_detail/programming.tsx b/playground/src/components/details/submission_detail/programming.tsx index f63d223ae..4f99c5d9a 100644 --- a/playground/src/components/details/submission_detail/programming.tsx +++ b/playground/src/components/details/submission_detail/programming.tsx @@ -30,7 +30,7 @@ export default function ProgrammingSubmissionDetail({ createNewFeedback(submission)} diff --git a/playground/src/components/selectors/submission_select.tsx b/playground/src/components/selectors/submission_select.tsx index 4d38fa5dd..915bac023 100644 --- a/playground/src/components/selectors/submission_select.tsx +++ b/playground/src/components/selectors/submission_select.tsx @@ -57,7 +57,7 @@ export default function SubmissionSelect({ {data?.map((sub: Submission) => { const contentPreview = (sub as TextSubmission)?.text || - (sub as ProgrammingSubmission)?.repository_url || + (sub as ProgrammingSubmission)?.repository_uri || "?"; return (