Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modeling Exercises: Refactor Feedback Reference (#354) #354

Merged
merged 17 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
f510596
Refactor feedback reference
LeonWehrhahn Nov 16, 2024
ba85feb
Merge branch 'develop' into feature/modeling/reference
LeonWehrhahn Nov 16, 2024
6b98bb8
Enhance Apollon JSON transformer and parser for improved element ID m…
LeonWehrhahn Nov 18, 2024
2bba32f
Merge branch 'feature/modeling/reference' of https://github.com/ls1in…
LeonWehrhahn Nov 18, 2024
ac31664
Merge branch 'develop' into feature/modeling/reference
LeonWehrhahn Nov 18, 2024
bf5a80d
Merge branch 'develop' into feature/modeling/reference
LeonWehrhahn Nov 22, 2024
9de3f10
Add element_ids field to ModelingFeedback and update feedback convers…
LeonWehrhahn Nov 27, 2024
e15d11c
Merge branch 'feature/modeling/reference' of https://github.com/ls1in…
LeonWehrhahn Nov 28, 2024
1e8e21d
Add element_ids field to DBModelingFeedback model
LeonWehrhahn Nov 28, 2024
a5203ff
Add JSON type import to db_modeling_feedback.py
LeonWehrhahn Nov 29, 2024
2a169b8
Merge branch 'develop' into feature/modeling/reference
LeonWehrhahn Nov 29, 2024
b6af29a
Merge branch 'develop' into feature/modeling/reference
LeonWehrhahn Dec 3, 2024
f2604c9
Increase default max_tokens to 4000 in OpenAI model configuration
LeonWehrhahn Dec 3, 2024
87a66a7
Increase max_input_tokens to 5000 and update element_ids assignment i…
LeonWehrhahn Dec 3, 2024
3a10e22
Add TODO comments to element_ids field for migration tracking
LeonWehrhahn Dec 6, 2024
31e8d59
Fix TODO comment formatting in element_ids field for clarity
LeonWehrhahn Dec 6, 2024
2657ff8
Merge branch 'develop' into feature/modeling/reference
LeonWehrhahn Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions athena/athena/models/db_modeling_feedback.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional

from sqlalchemy import Column, ForeignKey, JSON
from sqlalchemy import Column, ForeignKey, JSON, String
from sqlalchemy.orm import relationship

from athena.database import Base
Expand All @@ -11,7 +11,8 @@
class DBModelingFeedback(DBFeedback, Base):
__tablename__ = "modeling_feedbacks"

element_ids: Optional[list[str]] = Column(JSON) # type: ignore
element_ids: Optional[list[str]] = Column(JSON) # type: ignore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a todo comment to remove that in the future?

reference: Optional[str] = Column(String, nullable=True) # type: ignore

exercise_id = Column(BigIntegerWithAutoincrement, ForeignKey("modeling_exercises.id", ondelete="CASCADE"), index=True)
submission_id = Column(BigIntegerWithAutoincrement, ForeignKey("modeling_submissions.id", ondelete="CASCADE"), index=True)
Expand Down
5 changes: 2 additions & 3 deletions athena/athena/schemas/modeling_feedback.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from typing import Optional, List

from typing import List, Optional
from pydantic import Field

from .feedback import Feedback


class ModelingFeedback(Feedback):
"""Feedback on a modeling exercise."""

element_ids: Optional[List[str]] = Field([], description="referenced diagram element IDs", example=["id_1"])
reference: Optional[str] = Field(None, description="reference to the diagram element", example="ClassAttribute:5a337bdf-da00-4bd0-a6f0-78ba5b84330e")
maximiliansoelch marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion llm_core/llm_core/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class OpenAIModelConfig(ModelConfig):

model_name: OpenAIModel = Field(default=default_openai_model, # type: ignore
description="The name of the model to use.")
max_tokens: PositiveInt = Field(1000, description="""\
max_tokens: PositiveInt = Field(4000, description="""\
The maximum number of [tokens](https://platform.openai.com/tokenizer) to generate in the chat completion.

The total length of input tokens and generated tokens is limited by the model's context length. \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class ApollonJSONTransformer:

@staticmethod
def transform_json(model: str) -> tuple[str, dict[str, str], str]:
def transform_json(model: str) -> tuple[str, dict[str, str], str, dict[str, str]]:
"""
Serialize a given Apollon diagram model to a string representation.
This method converts the UML diagram model into a format similar to mermaid syntax, called "apollon".
Expand All @@ -25,11 +25,11 @@ def transform_json(model: str) -> tuple[str, dict[str, str], str]:
# Convert the UML diagram to the apollon representation
apollon_representation = parser.to_apollon()

# Extract elements and relations with their corresponding IDs and names
names = {
**{element['name']: element['id'] for element in parser.get_elements()},
**{relation['name']: relation['id'] for relation in parser.get_relations()}
}
return apollon_representation, names, diagram_type
# Get the mapping of element, method, and attribute names to their corresponding IDs
# This is used to resolve references to as the apollon representation only contains names and not IDs
names = parser.get_element_id_mapping()

id_type_mapping = parser.get_id_to_type_mapping()

return apollon_representation, names, diagram_type, id_type_mapping

Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import Dict, Any, List, Optional
# apollon_transformer/parser/element.py

from typing import Dict, Any, List, Optional, Tuple
from string import ascii_uppercase

class Element:
"""
Expand All @@ -17,15 +20,61 @@ def __init__(self, data: Dict[str, Any], element_dict: Optional[Dict[str, Any]]
self.method_refs: List[str] = data.get('methods', [])
self.attributes: List[str] = []
self.methods: List[str] = []
self.attribute_id_mapping: Dict[str, str] = {}
self.method_id_mapping: Dict[str, str] = {}
if element_dict is not None:
self.resolve_references(element_dict)

def resolve_references(self, element_dict: Dict[str, Any]):
"""
Resolve attribute and method references using the provided element dictionary. The json data contains only references to other elements that represent attributes and methods. This method resolves these references to the actual names of the attributes and methods by looking up the corresponding elements via their IDs in the provided element dictionary.
Resolve attribute and method references using the provided element dictionary.
Ensures uniqueness among attribute and method names within the class.
"""
self.attributes = [element_dict[ref].get("name", "") for ref in self.attribute_refs if ref in element_dict]
self.methods = [element_dict[ref].get('name', '') for ref in self.method_refs if ref in element_dict]
# Resolve attributes
self.attributes, self.attribute_id_mapping = self._resolve_uniqueness(
self.attribute_refs, element_dict)

# Resolve methods
self.methods, self.method_id_mapping = self._resolve_uniqueness(
self.method_refs, element_dict)

def _resolve_uniqueness(
self, refs: List[str], element_dict: Dict[str, Any]
) -> Tuple[List[str], Dict[str, str]]:
name_counts: Dict[str, int] = {}
unique_full_names: List[str] = []
id_mapping: Dict[str, str] = {}
for ref in refs:
if ref in element_dict:
full_name = element_dict[ref].get("name", "")
simplified_name = self.extract_name(full_name)
count = name_counts.get(simplified_name, 0)
if count > 0:
suffix = f"#{ascii_uppercase[count - 1]}"
simplified_name_with_suffix = f"{simplified_name}{suffix}"
else:
simplified_name_with_suffix = simplified_name
name_counts[simplified_name] = count + 1
unique_full_names.append(full_name)
id_mapping[simplified_name_with_suffix] = ref
return unique_full_names, id_mapping

@staticmethod
def extract_name(full_name: str) -> str:
"""
Extracts the simplified name from the full attribute or method name.
Removes visibility symbols, type annotations, and parameters.
"""
# Remove visibility symbols and leading/trailing spaces
name = full_name.lstrip('+-#~ ').strip()
# For attributes, split on ':'
if ':' in name:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can it be the case that both are present?

name = name.split(':')[0].strip()
# For methods, split on '('
elif '(' in name:
name = name.split('(')[0].strip()
return name


def to_apollon(self) -> str:
parts = [f"[{self.type}] {self.name}"]
Expand All @@ -41,6 +90,6 @@ def to_apollon(self) -> str:
parts.append("{\n" + "\n".join(details) + "\n}")

return " ".join(parts)

def __getitem__(self, key):
return self.__dict__[key]
return self.__dict__[key]
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,30 @@ def get_elements(self) -> List[Element]:
return self.elements

def get_relations(self) -> List[Relation]:
return self.relations
return self.relations

def get_element_id_mapping(self) -> Dict[str, str]:
"""
Creates a mapping from element names to their IDs, including attributes and methods.
"""
mapping = {}
for element in self.elements:
mapping[element.name] = element.id
for simplified_name_with_suffix, attr_id in element.attribute_id_mapping.items():
mapping[f"{element.name}.{simplified_name_with_suffix}"] = attr_id
for simplified_name_with_suffix, method_id in element.method_id_mapping.items():
mapping[f"{element.name}.{simplified_name_with_suffix}"] = method_id
for relation in self.relations:
mapping[relation.name] = relation.id
return mapping

def get_id_to_type_mapping(self) -> Dict[str, str]:
"""
Creates a mapping from IDs to their types, including attributes and methods.
"""
mapping = {}
for element in self.data['elements'].values():
mapping[element['id']] = element['type']
for relation in self.data['relationships'].values():
mapping[relation['id']] = relation['type']
return mapping
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class GenerateSuggestionsPrompt(BaseModel):

class BasicApproachConfig(BaseModel):
"""This approach uses a LLM with a single prompt to generate feedback in a single step."""
max_input_tokens: int = Field(default=3000, description="Maximum number of tokens in the input prompt.")
max_input_tokens: int = Field(default=5000, description="Maximum number of tokens in the input prompt.")
model: ModelConfigType = Field(default=DefaultModelConfig()) # type: ignore
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import List, Optional, Sequence
from typing import Optional, Sequence
from pydantic import BaseModel, Field

class FeedbackModel(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
description: str = Field(description="Feedback description")
element_names: Optional[List[str]] = Field(description="Referenced diagram element names, and relations (R<number>) or empty if unreferenced")
element_name: Optional[str] = Field(description="Referenced diagram element, attribute names, and relations (use format: <ClassName>, <ClassName>.<AttributeName>, <ClassName>.<MethodName>, R<number>), or leave empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we don't have a list of feedbacks any more, do we use multiple instances of FeedbackModel somehow now?

grading_instruction_id: int = Field(
description="ID of the grading instruction that was used to generate this feedback"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import Optional, Dict
from pydantic import BaseModel

class ExerciseModel(BaseModel):
Expand All @@ -11,4 +11,5 @@ class ExerciseModel(BaseModel):
grading_instructions: Optional[str] = None
submission_uml_type: str
transformed_example_solution: Optional[str] = None
element_id_mapping: dict[str, str]
element_id_mapping: Dict[str, str]
id_type_mapping: Dict[str, str]
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,15 @@ class GradedFeedbackInputs(BaseModel):
{example_solution}

Important:
Make sure to provide detailed feedback for each criterion. Always try to be as specific as possible.
Also make sure your feedback adds up to the correct number of points. If there are n points available and everything is correct, then the feedback should add up to n points.
Deeply think about the diagram and what the student potentially missed, misunderstood or mixed up.
- Make sure to provide detailed feedback for each criterion. Always try to be as specific as possible.
- Also make sure your feedback adds up to the correct number of points. If there are n points available and everything is correct, then the feedback should add up to n points.
- Deeply think about the diagram and what the student potentially missed, misunderstood, or mixed up.
- For the `element_name` field in the output, reference the specific diagram element, attribute, method, or relation related to the feedback. Use the following formats:
- For classes or elements: `<ClassName>`
- For attributes: `<ClassName>.<AttributeName>`
- For methods: `<ClassName>.<MethodName>`
- For relations: `R<number>` (e.g., `R1`, `R2`)
- If the feedback is not related to a specific element, leave the `element_name` field empty.

<UML Diagram Format>
The submission uses the following UML diagram format:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,45 @@


def convert_to_athana_feedback_model(
feedback_result : AssessmentModel,
exercise_model: ExerciseModel,
feedback_result: AssessmentModel,
exercise_model: ExerciseModel,
manual_structured_grading_instructions: Optional[List[GradingCriterion]] = None
) -> List[Feedback]:

grading_instruction_ids = set(
grading_instruction_ids = {
grading_instruction.id
for criterion in manual_structured_grading_instructions or []
for criterion in (manual_structured_grading_instructions or [])
for grading_instruction in criterion.structured_grading_instructions
)
}

feedbacks = []
for feedback in feedback_result.feedbacks:
# Each feedback has a grading_instruction_id. However we only want to have the grading_instruction_id in the final feedback that are associated with the manual structured grading instructions
grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
element_ids = [
exercise_model.element_id_mapping[element]
for element in (feedback.element_names or [])
if element in exercise_model.element_id_mapping
]
grading_instruction_id = (
feedback.grading_instruction_id
if feedback.grading_instruction_id in grading_instruction_ids
else None
)

reference: Optional[str] = None
if feedback.element_name:
reference_id = exercise_model.element_id_mapping.get(feedback.element_name)
reference_type = exercise_model.id_type_mapping.get(reference_id) if reference_id else None

if reference_type and reference_id:
reference = f"{reference_type}:{reference_id}"

feedbacks.append(Feedback(
exercise_id=exercise_model.exercise_id,
submission_id=exercise_model.submission_id,
title=feedback.title,
description=feedback.description,
element_ids=element_ids,
credits=feedback.credits,
structured_grading_instruction_id=grading_instruction_id,
meta={},
id=None,
is_graded=False
is_graded=False,
reference=reference,
element_ids=[reference] if reference else [] # Todo: Remove after adding migrations to athena
))

return feedbacks
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ def get_exercise_model(exercise: Exercise, submission: Submission) -> ExerciseMo

serialized_example_solution = None
if exercise.example_solution:
serialized_example_solution, _, _ = ApollonJSONTransformer.transform_json(exercise.example_solution)
serialized_example_solution, _, _, _ = ApollonJSONTransformer.transform_json(exercise.example_solution)

transformed_submission, element_id_mapping, diagram_type = ApollonJSONTransformer.transform_json(submission.model)
transformed_submission, element_id_mapping, diagram_type, id_type_mapping = ApollonJSONTransformer.transform_json(submission.model)

return ExerciseModel(
submission_id=submission.id,
Expand All @@ -22,6 +22,7 @@ def get_exercise_model(exercise: Exercise, submission: Submission) -> ExerciseMo
submission_uml_type=diagram_type,
transformed_example_solution=serialized_example_solution,
element_id_mapping=element_id_mapping,
id_type_mapping=id_type_mapping
)


Loading