Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ModelingLLM: Add Structured Grading Instruction Generation and Restructure Module #340

Merged
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6211745
Rewrite Modeling Module to process all UML Diagram types
LeonWehrhahn Aug 31, 2024
8c86453
Update module to process non-graded feedback requests
LeonWehrhahn Sep 6, 2024
e426766
Merge develop
LeonWehrhahn Sep 6, 2024
93ab9bf
Merge develop
LeonWehrhahn Sep 6, 2024
b938606
Structured Grading
LeonWehrhahn Sep 6, 2024
6b6a060
Add structured grading instruction generation and restructure modelin…
LeonWehrhahn Sep 9, 2024
89208bd
Add structured grading instruction generation and restructure modelin…
LeonWehrhahn Sep 9, 2024
4ff35da
Ignore .gradle directories
LeonWehrhahn Sep 10, 2024
0af7b89
Merge branch 'features/modeling/structured-grading-instructions' of h…
LeonWehrhahn Sep 10, 2024
42db368
Refactor transform_grading_criteria to transform_grading_criterion
LeonWehrhahn Sep 10, 2024
1266b22
Use GradingCriterion instead of custom model
LeonWehrhahn Sep 10, 2024
112a964
Merge remote-tracking branch 'origin/develop' into features/modeling/…
LeonWehrhahn Sep 13, 2024
132b041
Remove duplicate configuration for Module Modeling LLM
LeonWehrhahn Sep 13, 2024
7678f60
Fix lint
LeonWehrhahn Sep 13, 2024
32f35b2
Update import statements for StructuredGradingCriterion
LeonWehrhahn Sep 13, 2024
2c8a0fc
Refactor convert_to_athana_feedback_model.py to set is_graded to Fals…
LeonWehrhahn Sep 13, 2024
1f2a73c
Merge branch 'develop' into features/modeling/structured-grading-inst…
dmytropolityka Sep 16, 2024
7eccf05
Refactor import statements for UMLParser and related classes
LeonWehrhahn Sep 18, 2024
ec7147b
Merge branch 'features/modeling/structured-grading-instructions' of h…
LeonWehrhahn Sep 18, 2024
7062c2d
Refactor convert_to_athana_feedback_model.py to remove unnecessary pa…
LeonWehrhahn Sep 18, 2024
8c83fd8
Refactor convert_to_athana_feedback_model.py to remove unnecessary pa…
LeonWehrhahn Sep 18, 2024
6538117
Refactor max_tokens parameter in openai.py to increase the limit for …
LeonWehrhahn Sep 18, 2024
c70f1db
Adjust prompts and apollon
LeonWehrhahn Sep 20, 2024
6e3e908
Update postcss version in package-lock.json
LeonWehrhahn Sep 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions athena/athena/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from . import contextvars
from .app import app
from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction
from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction, StructuredGradingCriterion
from .metadata import emit_meta, get_meta
from .experiment import get_experiment_environment
from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore
Expand Down Expand Up @@ -36,5 +36,6 @@ def run_module():
"get_experiment_environment",
"ExerciseType",
"GradingCriterion",
"StructuredGradingInstruction"
"StructuredGradingInstruction",
"StructuredGradingCriterion"
]
2 changes: 1 addition & 1 deletion athena/athena/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
from .modeling_feedback import ModelingFeedback
from .modeling_exercise import ModelingExercise
from .modeling_submission import ModelingSubmission
from .grading_criterion import GradingCriterion, StructuredGradingInstruction
from .grading_criterion import GradingCriterion, StructuredGradingInstruction, StructuredGradingCriterion
5 changes: 4 additions & 1 deletion athena/athena/schemas/grading_criterion.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC
from typing import List, Optional

from pydantic import Field
from pydantic import BaseModel, Field

from .schema import Schema

Expand All @@ -24,3 +24,6 @@ class GradingCriterion(Schema, ABC):
structured_grading_instructions: List[StructuredGradingInstruction] = Field(
[], example=[{"credits": 1.0, "gradingScale": "Good", "instructionDescription": "Some instructions", "feedback": "Nicely done!", "usageCount": 1},
{"credits": 0.0, "gradingScale": "Bad", "instructionDescription": "Some instructions", "feedback": "Try again!", "usageCount": 0}])

class StructuredGradingCriterion(BaseModel):
criteria: List[GradingCriterion]
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@

from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider
from athena.logger import logger
from athena.modeling import Exercise, Submission, Feedback
from athena.modeling import Exercise, Feedback, Submission
from module_modeling_llm.config import Configuration
from module_modeling_llm.generate_suggestions import generate_suggestions
from module_modeling_llm.core.filter_feedback import filter_feedback
from module_modeling_llm.core.generate_suggestions import generate_suggestions
from module_modeling_llm.core.get_structured_grading_instructions import get_structured_grading_instructions
from module_modeling_llm.utils.convert_to_athana_feedback_model import convert_to_athana_feedback_model
from module_modeling_llm.utils.get_exercise_model import get_exercise_model


@submissions_consumer
Expand All @@ -31,7 +35,26 @@ def process_incoming_feedback(exercise: Exercise, submission: Submission, feedba
async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]:
logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id,
exercise.id)
return await generate_suggestions(exercise, submission, is_graded, module_config.approach, module_config.debug)

# First, we convert the incoming exercise and submission to our internal models and textual representations
exercise_model = get_exercise_model(exercise, submission)

# Next, we retrieve or generate the structured grading instructions for the exercise
structured_grading_instructions = await get_structured_grading_instructions(
exercise_model, module_config.approach, exercise.grading_instructions, exercise.grading_criteria, module_config.debug
)

# Finally, we generate feedback suggestions for the submission
feedback = await generate_suggestions(
exercise_model, structured_grading_instructions, module_config.approach, module_config.debug
)

# If the submission is not graded (Student is requesting feedback), we reformulate the feedback to not give away the solution
if is_graded is False:
feedback = await filter_feedback(exercise_model, feedback, module_config.approach, module_config.debug)

return convert_to_athana_feedback_model(feedback, exercise_model)



if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import Optional
from module_modeling_llm.helpers.serializers.parser.uml_parser import UMLParser
import json

from module_modeling_llm.apollon_transformer.parser.uml_parser import UMLParser

class DiagramModelSerializer:

class ApollonJSONTransformer:

@staticmethod
def serialize_model(model: dict) -> tuple[Optional[str], dict[str, str]]:
def transform_json(model: str) -> tuple[str, dict[str, str], str]:
"""
Serialize a given Apollon diagram model to a string representation.
This method converts the UML diagram model into a format similar to mermaid syntax, called "apollon".
Expand All @@ -14,7 +15,12 @@ def serialize_model(model: dict) -> tuple[Optional[str], dict[str, str]]:
:return: A tuple containing the serialized model as a string and a dictionary mapping element and relation names
to their corresponding IDs.
"""
parser = UMLParser(model)

model_dict = json.loads(model)

parser = UMLParser(model_dict)

diagram_type = model_dict.get("type", "unknown")

# Convert the UML diagram to the apollon representation
apollon_representation = parser.to_apollon()
Expand All @@ -25,5 +31,5 @@ def serialize_model(model: dict) -> tuple[Optional[str], dict[str, str]]:
**{relation['name']: relation['id'] for relation in parser.get_relations()}
}

return apollon_representation, names
return apollon_representation, names, diagram_type

Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ def resolve_references(self, element_dict: Dict[str, Any]):
self.attributes = [element_dict[ref].get("name", "") for ref in self.attribute_refs if ref in element_dict]
self.methods = [element_dict[ref].get('name', '') for ref in self.method_refs if ref in element_dict]

for ref_list, target_list in [(self.attribute_refs, self.attributes), (self.method_refs, self.methods)]:
target_list.extend(
element_dict.get(ref, {}).get("name", "") for ref in ref_list if ref in element_dict
)

def to_apollon(self) -> str:
parts = [f"[{self.type}] {self.name}"]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Dict, Any, List
from string import ascii_uppercase

from module_modeling_llm.helpers.serializers.parser.element import Element
from module_modeling_llm.helpers.serializers.parser.relation import Relation
from module_modeling_llm.apollon_transformer.parser.element import Element
from module_modeling_llm.apollon_transformer.parser.relation import Relation


class UMLParser:
Expand Down Expand Up @@ -42,9 +42,14 @@ def _parse(self) -> None:
for element_data in self.data['elements'].values():
if element_data.get('id') not in referenced_ids:
name = element_data.get('name')
if name_count[name] > 1:
suffix_index = name_suffix_counters[name]
element_data['name'] = f"{name}{ascii_uppercase[suffix_index]}"
suffix_index = name_suffix_counters[name]

if name == '':
element_data['name'] = f"##{ascii_uppercase[suffix_index]}"
if name_count[name] > 1:
name_suffix_counters[name] += 1
elif name_count[name] > 1:
element_data['name'] = f"{name}#{ascii_uppercase[suffix_index]}"
name_suffix_counters[name] += 1

element = Element(element_data, self.data['elements'])
Expand Down
49 changes: 30 additions & 19 deletions modules/modeling/module_modeling_llm/module_modeling_llm/config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from pydantic import BaseModel, Field

from athena import config_schema_provider
from module_modeling_llm.helpers.models import ModelConfigType, DefaultModelConfig
from module_modeling_llm.prompts.generate_suggestions import (
graded_feedback_system_message as default_graded_feedback_system_message,
graded_feedback_human_message as default_graded_feedback_human_message,
filter_feedback_system_message as default_filter_feedback_system_message,
filter_feedback_human_message as default_filter_feedback_human_message
from module_modeling_llm.models import ModelConfigType, DefaultModelConfig
from module_modeling_llm.prompts import (
graded_feedback_prompt,
filter_feedback_prompt,
structured_grading_instructions_prompt
)


class GenerateSuggestionsPrompt(BaseModel):
"""
Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**,
Expand All @@ -18,25 +16,38 @@ class GenerateSuggestionsPrompt(BaseModel):
_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input
is too long._
"""
graded_feedback_system_message: str = Field(default=default_graded_feedback_system_message,
description="Message for priming AI behavior and instructing it what to do.")
graded_feedback_human_message: str = Field(default=default_graded_feedback_human_message,
description="Message from a human. The input on which the AI is supposed to act.")
filter_feedback_system_message: str = Field(default=default_filter_feedback_system_message,
description="Message for priming AI behavior for filtering ungraded feedback.")
filter_feedback_human_message: str = Field(default=default_filter_feedback_human_message,
description="Message for instructing AI to filter ungraded feedback.")


graded_feedback_system_message: str = Field(
default=graded_feedback_prompt.graded_feedback_system_message,
description="Message for priming AI behavior and instructing it what to do."
)
graded_feedback_human_message: str = Field(
default=graded_feedback_prompt.graded_feedback_human_message,
description="Message from a human. The input on which the AI is supposed to act."
)
filter_feedback_system_message: str = Field(
default=filter_feedback_prompt.filter_feedback_system_message,
description="Message for priming AI behavior for filtering ungraded feedback."
)
filter_feedback_human_message: str = Field(
default=filter_feedback_prompt.filter_feedback_human_message,
description="Message for instructing AI to filter ungraded feedback."
)
structured_grading_instructions_system_message: str = Field(
default=structured_grading_instructions_prompt.structured_grading_instructions_system_message,
description="Message for instructing AI to structure the Problem Statement"
)
structured_grading_instructions_human_message: str = Field(
default=structured_grading_instructions_prompt.structured_grading_instructions_human_message,
description="Message for instructing AI to filter ungraded feedback."
)

class BasicApproachConfig(BaseModel):
"""This approach uses a LLM with a single prompt to generate feedback in a single step."""
max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())


@config_schema_provider
class Configuration(BaseModel):
debug: bool = Field(default=False, description="Enable debug mode.")
approach: BasicApproachConfig = Field(default=BasicApproachConfig())
approach: BasicApproachConfig = Field(default=BasicApproachConfig())
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate

from athena import emit_meta
from module_modeling_llm.config import BasicApproachConfig
from module_modeling_llm.utils.predict_and_parse import predict_and_parse
from module_modeling_llm.models.assessment_model import AssessmentModel
from module_modeling_llm.models.exercise_model import ExerciseModel
from module_modeling_llm.prompts.filter_feedback_prompt import FilterFeedbackInputs

async def filter_feedback(
exercise: ExerciseModel,
original_feedback: AssessmentModel,
config: BasicApproachConfig,
debug: bool,
) -> AssessmentModel:

print(f"\n\n\n\n\n{original_feedback.json()}\n\n\n\n\n")

chat_prompt = ChatPromptTemplate.from_messages([
("system", config.generate_suggestions_prompt.filter_feedback_system_message),
("human", config.generate_suggestions_prompt.filter_feedback_human_message)
])

prompt_inputs = FilterFeedbackInputs(
original_feedback=original_feedback.json(),
feedback_output_format=PydanticOutputParser(pydantic_object=AssessmentModel).get_format_instructions()
)

feedback_result = await predict_and_parse(
model=config.model.get_model(), # type: ignore[attr-defined]
chat_prompt=chat_prompt,
prompt_input=prompt_inputs.dict(),
pydantic_object=AssessmentModel,
tags=[
f"exercise-{exercise.exercise_id}-filter",
f"submission-{exercise.submission_id}-filter",
]
)

if debug:
emit_meta("filter_feedback", {
"prompt": chat_prompt.format(**prompt_inputs.dict()),
"result": feedback_result.dict() if feedback_result is not None else None
})

if feedback_result is None:
raise ValueError("No feedback was returned by the model.")

print(f"\n\n\n\n\n{feedback_result.json()}\n\n\n\n\n")

return feedback_result
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from athena.schemas.grading_criterion import StructuredGradingCriterion
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate

from athena import emit_meta
from module_modeling_llm.config import BasicApproachConfig
from module_modeling_llm.models.assessment_model import AssessmentModel
from module_modeling_llm.prompts.apollon_format_description import apollon_format_description
from module_modeling_llm.utils.predict_and_parse import predict_and_parse
from module_modeling_llm.prompts.graded_feedback_prompt import GradedFeedbackInputs
from module_modeling_llm.models.exercise_model import ExerciseModel

async def generate_suggestions(
exercise_model: ExerciseModel,
structured_grading_instructions: StructuredGradingCriterion,
config: BasicApproachConfig,
debug: bool) -> AssessmentModel:
"""
Generate feedback suggestions for modeling exercise submissions
:param exercise: The exercise for which a submission is assessed
:param submission: The submission that is assessed
:param is_graded: Indicates whether the submission is graded
:param config: A configuration object for the feedback module
:param debug: Indicates whether additional debugging information should be provided
:return: A list of feedback items for the assessed submission
"""

prompt_inputs = GradedFeedbackInputs(
submission=exercise_model.transformed_submission,
problem_statement=exercise_model.problem_statement,
max_points=exercise_model.max_points,
bonus_points=exercise_model.bonus_points,
structured_grading_instructions=structured_grading_instructions.json(),
submission_uml_type=exercise_model.submission_uml_type,
example_solution=exercise_model.transformed_example_solution,
uml_diagram_format=apollon_format_description,
feedback_output_format=PydanticOutputParser(pydantic_object=AssessmentModel).get_format_instructions()
)

chat_prompt = ChatPromptTemplate.from_messages([
("system", config.generate_suggestions_prompt.graded_feedback_system_message),
("human", config.generate_suggestions_prompt.graded_feedback_human_message)])

feedback_result = await predict_and_parse(
model=config.model.get_model(), # type: ignore[attr-defined]
chat_prompt=chat_prompt,
prompt_input=prompt_inputs.dict(),
pydantic_object=AssessmentModel,
tags=[
f"exercise-{exercise_model.exercise_id}",
f"submission-{exercise_model.submission_id}",
]
)

if debug:
emit_meta("generate_suggestions", {
"prompt": chat_prompt.format(**prompt_inputs.dict()),
"result": feedback_result.dict() if feedback_result is not None else None
})

if feedback_result is None:
raise ValueError("No feedback was generated")

return feedback_result
Loading
Loading