Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ModelingLLM: Add Structured Grading Instruction Generation and Restructure Module #340

Merged
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6211745
Rewrite Modeling Module to process all UML Diagram types
LeonWehrhahn Aug 31, 2024
8c86453
Update module to process non-graded feedback requests
LeonWehrhahn Sep 6, 2024
e426766
Merge develop
LeonWehrhahn Sep 6, 2024
93ab9bf
Merge develop
LeonWehrhahn Sep 6, 2024
b938606
Structured Grading
LeonWehrhahn Sep 6, 2024
6b6a060
Add structured grading instruction generation and restructure modelin…
LeonWehrhahn Sep 9, 2024
89208bd
Add structured grading instruction generation and restructure modelin…
LeonWehrhahn Sep 9, 2024
4ff35da
Ignore .gradle directories
LeonWehrhahn Sep 10, 2024
0af7b89
Merge branch 'features/modeling/structured-grading-instructions' of h…
LeonWehrhahn Sep 10, 2024
42db368
Refactor transform_grading_criteria to transform_grading_criterion
LeonWehrhahn Sep 10, 2024
1266b22
Use GradingCriterion instead of custom model
LeonWehrhahn Sep 10, 2024
112a964
Merge remote-tracking branch 'origin/develop' into features/modeling/…
LeonWehrhahn Sep 13, 2024
132b041
Remove duplicate configuration for Module Modeling LLM
LeonWehrhahn Sep 13, 2024
7678f60
Fix lint
LeonWehrhahn Sep 13, 2024
32f35b2
Update import statements for StructuredGradingCriterion
LeonWehrhahn Sep 13, 2024
2c8a0fc
Refactor convert_to_athana_feedback_model.py to set is_graded to Fals…
LeonWehrhahn Sep 13, 2024
1f2a73c
Merge branch 'develop' into features/modeling/structured-grading-inst…
dmytropolityka Sep 16, 2024
7eccf05
Refactor import statements for UMLParser and related classes
LeonWehrhahn Sep 18, 2024
ec7147b
Merge branch 'features/modeling/structured-grading-instructions' of h…
LeonWehrhahn Sep 18, 2024
7062c2d
Refactor convert_to_athana_feedback_model.py to remove unnecessary pa…
LeonWehrhahn Sep 18, 2024
8c83fd8
Refactor convert_to_athana_feedback_model.py to remove unnecessary pa…
LeonWehrhahn Sep 18, 2024
6538117
Refactor max_tokens parameter in openai.py to increase the limit for …
LeonWehrhahn Sep 18, 2024
c70f1db
Adjust prompts and apollon
LeonWehrhahn Sep 20, 2024
6e3e908
Update postcss version in package-lock.json
LeonWehrhahn Sep 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,21 @@
"module": "module_example",
"justMyCode": true
},
{
"name": "Module Modeling LLM",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}/modules/modeling/module_modeling_llm",
"module": "module_modeling_llm",
"justMyCode": false
},
{
"name": "Module Programming LLM",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}/modules/programming/module_programming_llm",
"module": "module_programming_llm",
"justMyCode": true
"justMyCode": false
},
{
"name": "Module Programming ThemisML",
Expand Down
2 changes: 1 addition & 1 deletion assessment_module_manager/modules.docker.ini
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,6 @@ supports_graded_feedback_requests = false
url = http://module-modeling-llm:5008
type = modeling
supports_evaluation = false
supports_non_graded_feedback_requests = false
supports_non_graded_feedback_requests = true
supports_graded_feedback_requests = true

2 changes: 1 addition & 1 deletion assessment_module_manager/modules.ini
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,6 @@ supports_graded_feedback_requests = false
url = http://localhost:5008
type = modeling
supports_evaluation = false
supports_non_graded_feedback_requests = false
supports_non_graded_feedback_requests = true
supports_graded_feedback_requests = true

5 changes: 4 additions & 1 deletion athena/athena/schemas/grading_criterion.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC
from typing import List, Optional

from pydantic import Field
from pydantic import BaseModel, Field

from .schema import Schema

Expand All @@ -24,3 +24,6 @@ class GradingCriterion(Schema, ABC):
structured_grading_instructions: List[StructuredGradingInstruction] = Field(
[], example=[{"credits": 1.0, "gradingScale": "Good", "instructionDescription": "Some instructions", "feedback": "Nicely done!", "usageCount": 1},
{"credits": 0.0, "gradingScale": "Bad", "instructionDescription": "Some instructions", "feedback": "Try again!", "usageCount": 0}])

class StructuredGradingCriterion(BaseModel):
criteria: List[GradingCriterion]
10 changes: 5 additions & 5 deletions modules/modeling/module_modeling_llm/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ DATABASE_URL=sqlite:///../data/data.sqlite

# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-35"
LLM_DEFAULT_MODEL="azure_openai_gpt-4o"

# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled
LLM_ENABLE_LLM_AS_A_JUDGE=1
Expand All @@ -23,13 +23,13 @@ LLM_EVALUATION_MODEL="azure_openai_gpt-4"
# Standard OpenAI (Non-Azure) [leave blank if not used]
# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
# A list of models can be found in `module_text_llm/helpers/models/openai.py` (openai_models)
LLM_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# Azure OpenAI [leave blank if not used]
# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
LLM_AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
LLM_AZURE_OPENAI_API_BASE="https://ase-eu01.openai.azure.com/" # change base if needed
LLM_AZURE_OPENAI_API_VERSION="2023-07-01-preview" # change base if needed
AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
AZURE_OPENAI_ENDPOINT="https://ase-eu01.openai.azure.com/" # change base if needed
OPENAI_API_VERSION="2023-07-01-preview" # change base if needed

# Replicate [leave blank if not used]
# See https://replicate.com and adjust model config options in `module_text_llm/helpers/models/replicate.py`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@

from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider
from athena.logger import logger
from athena.modeling import Exercise, Submission, Feedback
from athena.modeling import Exercise, Feedback, Submission
from module_modeling_llm.config import Configuration
from module_modeling_llm.generate_suggestions import generate_suggestions
from module_modeling_llm.core.filter_feedback import filter_feedback
from module_modeling_llm.core.generate_suggestions import generate_suggestions
from module_modeling_llm.core.get_structured_grading_instructions import get_structured_grading_instructions
from module_modeling_llm.utils.convert_to_athana_feedback_model import convert_to_athana_feedback_model
from module_modeling_llm.utils.get_exercise_model import get_exercise_model


@submissions_consumer
Expand All @@ -31,7 +35,26 @@ def process_incoming_feedback(exercise: Exercise, submission: Submission, feedba
async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]:
logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id,
exercise.id)
return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug)

# First, we convert the incoming exercise and submission to our internal models and textual representations
exercise_model = get_exercise_model(exercise, submission)

# Next, we retrieve or generate the structured grading instructions for the exercise
structured_grading_instructions = await get_structured_grading_instructions(
exercise_model, module_config.approach, exercise.grading_instructions, exercise.grading_criteria, module_config.debug
)

# Finally, we generate feedback suggestions for the submission
feedback = await generate_suggestions(
exercise_model, structured_grading_instructions, module_config.approach, module_config.debug
)

# If the submission is not graded (Student is requesting feedback), we reformulate the feedback to not give away the solution
if is_graded == False:
feedback = await filter_feedback(exercise_model, feedback, module_config.approach, module_config.debug)

return convert_to_athana_feedback_model(feedback, exercise_model, is_graded)



if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import json

from module_modeling_llm.apollon_transformer.parser.uml_parser import UMLParser


class ApollonJSONTransformer:

@staticmethod
def transform_json(model: str) -> tuple[str, dict[str, str], str]:
"""
Serialize a given Apollon diagram model to a string representation.
This method converts the UML diagram model into a format similar to mermaid syntax, called "apollon".

:param model: The Apollon diagram model to serialize.
:return: A tuple containing the serialized model as a string and a dictionary mapping element and relation names
to their corresponding IDs.
"""

model_dict = json.loads(model)

parser = UMLParser(model_dict)

diagram_type = model_dict.get("type", "unknown")

# Convert the UML diagram to the apollon representation
apollon_representation = parser.to_apollon()

# Extract elements and relations with their corresponding IDs and names
names = {
**{element['name']: element['id'] for element in parser.get_elements()},
**{relation['name']: relation['id'] for relation in parser.get_relations()}
}

return apollon_representation, names, diagram_type

Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import Dict, Any, List, Optional

class Element:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to give the class a more specific name? Element seems to be too general.

def __init__(self, data: Dict[str, Any], element_dict: Optional[Dict[str, Any]] = None):
self.id: str = data.get('id', '')
self.type: str = data.get('type', '')
self.name: str = data.get('name', '')
self.owner: str = data.get('owner', '')
self.attribute_refs: List[str] = data.get('attributes', [])
self.method_refs: List[str] = data.get('methods', [])
self.attributes: List[str] = []
self.methods: List[str] = []
if element_dict is not None:
self.resolve_references(element_dict)

def resolve_references(self, element_dict: Dict[str, Any]):
self.attributes = [element_dict[ref].get("name", "") for ref in self.attribute_refs if ref in element_dict]
self.methods = [element_dict[ref].get('name', '') for ref in self.method_refs if ref in element_dict]

for ref_list, target_list in [(self.attribute_refs, self.attributes), (self.method_refs, self.methods)]:
target_list.extend(
element_dict.get(ref, {}).get("name", "") for ref in ref_list if ref in element_dict
)

def to_apollon(self) -> str:
parts = [f"[{self.type}] {self.name}"]

if self.attributes or self.methods:
details = []
if self.attributes:
details.append(f" attributes:")
details.extend(f" {attr}" for attr in self.attributes)
if self.methods:
details.append(f" methods:")
details.extend(f" {method}" for method in self.methods)
parts.append("{\n" + "\n".join(details) + "\n}")

return " ".join(parts)

def __getitem__(self, key):
return self.__dict__[key]
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from typing import Dict, Any, List, Optional

class Relation:
def __init__(self, data: Dict[str, Any], element_dict: Optional[Dict[str, Any]], index: int):
self.id: str = data.get('id', '')
self.type: str = data.get('type', '')
# Check if flowType exists, if so use that as the type
self.type = data.get('flowType', self.type)
self.label: str = data.get('name', '')
self.source: Dict[str, Any] = data.get('source', {})
self.target: Dict[str, Any] = data.get('target', {})
self.messages: List[Dict[str, str]] = data.get('message', [])
self.name = f"R{index}"
if element_dict is not None:
self.resolve_references(element_dict)

def resolve_references(self, element_dict: Dict[str, Any]):
if self.source['element'] in element_dict:
self.source['element'] = element_dict[self.source['element']].get("name", "")
if self.target['element'] in element_dict:
self.target['element'] = element_dict[self.target['element']].get("name", "")

def to_apollon(self) -> str:
parts = [f"{self.name}: {self.source['element']} {get_relation_arrow(self.type)} {self.target['element']}"]

if self.label:
parts[0] += f": {self.label}"

details = []
for end in ['source', 'target']:
end_data = getattr(self, end)
if 'role' in end_data or 'multiplicity' in end_data:
end_details = [f" {end_data['element']}: {{"]
if 'role' in end_data:
end_details.append(f" role: {end_data['role']}")
if 'multiplicity' in end_data:
end_details.append(f" multiplicity: {end_data['multiplicity']}")
end_details.append(" }")
details.extend(end_details)

if self.messages:
details.append(" messages: [")
for message in self.messages:
to_element = self.target['element'] if message['direction'] == 'target' else self.source['element']
details.append(f" {{ name: {message['name']}, to_direction: {to_element} }}")
details.append(" ]")

if details:
parts.append("{\n" + "\n".join(details) + "\n}")

return " ".join(parts)

def __getitem__(self, key):
return self.__dict__[key]

def get_relation_arrow(relation_type: str) -> str:
"""
Returns the correct arrow based on the relation type or flow type using string containment.

Parameters:
relation_type (str): The type of the relation (e.g., "ClassAggregation", "BPMNFlow_sequence").

Returns:
str: The arrow representation for the given relation type in Mermaid syntax.
"""
arrow_map = {
# Keys sorted manually by length in descending order to ensure correct matching when using endswith, e.g., when we have dataassociation, dataassociation should be checked before association
"interfacerequired": "--c",
"interfaceprovided": "--",
"dataassociation": "-->",
"generalization": "<|--",
"unidirectional": "-->",
"bidirectional": "<-->",
"association": "-->",
"inheritance": "<|--",
"composition": "*--",
"aggregation": "o--",
"realization": "..|>",
"dependency": "..>",
"sequence": "-->",
"message": "-->",
"include": "..>",
"message": "-->",
"extend": "-->",
"flow": "-->",
"link": "-->",
"arc": "-->",
}

relation_type = relation_type.replace(" ", "").lower()

for key in arrow_map:
if relation_type.endswith(key):
return f"({relation_type}) {arrow_map[key]}"

return f"-- {relation_type} --"
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from typing import Dict, Any, List
from string import ascii_uppercase

from module_modeling_llm.apollon_transformer.parser.element import Element
from module_modeling_llm.apollon_transformer.parser.relation import Relation


class UMLParser:
def __init__(self, json_data: Dict[str, Any]):
self.data = json_data
self.title = self.data['type']
self.elements: List[Element] = []
self.relations: List[Relation] = []
self.owners: Dict[str, List[str]] = {}
self._parse()

def _parse(self):
name_counts = {}
referenced_ids : List[str] = []
name_suffix_counters = {}

# Get all referenced attributes and methods
for element_data in self.data['elements'].values():
referenced_ids.extend(element_data.get('attributes', []))
referenced_ids.extend(element_data.get('methods', []))

# Count occurrences of each name
for element_data in self.data['elements'].values():
name = element_data.get('name')
name_counts[name] = name_counts.get(name, 0) + 1
name_suffix_counters[name] = 0

# Filter elements and ensure unique names for duplicates
# This filters out all Elements that are referenced by any other Element, as they are attributes or methods
for element_data in self.data['elements'].values():
if element_data.get('id') not in referenced_ids:
name = element_data.get('name')
if name_counts[name] > 1:
suffix_index = name_suffix_counters[name]
element_data['name'] = f"{name}{ascii_uppercase[suffix_index]}"
name_suffix_counters[name] += 1

element = Element(element_data, self.data['elements'])
self.elements.append(element)

# Parse relations
for index, relation_data in enumerate(self.data['relationships'].values()):
relation = Relation(relation_data, self.data['elements'], index + 1)
self.relations.append(relation)

# Get all owners and their elements
for element in self.elements:
ownerId = element.owner
if ownerId:
owner_element = next((el for el in self.elements if el.id == ownerId), None)
if owner_element:
ownerName = owner_element.name
if ownerName not in self.owners:
self.owners[ownerName] = []
self.owners[ownerName].append(element.name)

def to_apollon(self) -> str:
lines = [f"UML Diagram Type: {self.title}", ""]

if self.elements:
lines.append("@Elements:\n")
lines.extend(element.to_apollon() for element in self.elements)

if self.relations:
lines.append("\n\n@Relations:\n")
lines.extend(relation.to_apollon() for relation in self.relations)

if self.owners:
lines.append("\n\n@Owners:\n")
for owner, children in self.owners.items():
lines.append(f"{owner}: {', '.join(children)}")

return "\n".join(lines)

def get_elements(self) -> List[Element]:
return self.elements

def get_relations(self) -> List[Relation]:
return self.relations
Loading
Loading