Merge remote-tracking branch 'origin/develop' into features/modeling/…

…structured-grading-instructions
ls1intum · Sep 13, 2024 · 112a964 · 112a964
2 parents 1266b22 + 2da2d33
commit 112a964
Show file tree

Hide file tree

Showing 17 changed files with 1,397 additions and 1,031 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,4 @@ data/*
 !.idea/runConfigurations/
 .env
 **/.env
+**/data/*
diff --git a/.idea/runConfigurations/module_example.xml b/.idea/runConfigurations/module_example.xml
diff --git a/.idea/runConfigurations/module_programming_llm.xml b/.idea/runConfigurations/module_programming_llm.xml
diff --git a/.idea/runConfigurations/module_programming_themisml.xml b/.idea/runConfigurations/module_programming_themisml.xml
diff --git a/.idea/runConfigurations/module_text_cofee.xml b/.idea/runConfigurations/module_text_cofee.xml
diff --git a/.idea/runConfigurations/module_text_llm.xml b/.idea/runConfigurations/module_text_llm.xml
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -70,7 +70,7 @@
             "type": "python",
             "request": "launch",
             "cwd": "${workspaceFolder}/modules/modeling/module_modeling_llm",
-            "module": "module_text_cofee"
+            "module": "module_modeling_llm"
         }
     ]
 }

diff --git a/...es/modeling/module_modeling_llm/module_modeling_llm/helpers/serializers/parser/element.py b/...es/modeling/module_modeling_llm/module_modeling_llm/helpers/serializers/parser/element.py
@@ -0,0 +1,51 @@
+from typing import Dict, Any, List, Optional
+
+class Element:
+    """
+    Represents an element in a UML diagram.
+
+    This class encapsulates the properties and behavior of a UML element,
+    including its attributes and methods.
+    """
+
+    def __init__(self, data: Dict[str, Any], element_dict: Optional[Dict[str, Any]] = None):
+        self.id: str = data.get('id', '')
+        self.type: str = data.get('type', '')
+        self.name: str = data.get('name', '')
+        self.owner: str = data.get('owner', '')
+        self.attribute_refs: List[str] = data.get('attributes', [])
+        self.method_refs: List[str] = data.get('methods', [])
+        self.attributes: List[str] = []
+        self.methods: List[str] = []
+        if element_dict is not None:
+            self.resolve_references(element_dict)
+
+    def resolve_references(self, element_dict: Dict[str, Any]):
+        """
+        Resolve attribute and method references using the provided element dictionary. The json data contains only references to other elements that represent attributes and methods. This method resolves these references to the actual names of the attributes and methods by looking up the corresponding elements via their IDs in the provided element dictionary.
+        """
+        self.attributes = [element_dict[ref].get("name", "") for ref in self.attribute_refs if ref in element_dict]
+        self.methods = [element_dict[ref].get('name', '') for ref in self.method_refs if ref in element_dict]
+
+        for ref_list, target_list in [(self.attribute_refs, self.attributes), (self.method_refs, self.methods)]:
+            target_list.extend(
+                element_dict.get(ref, {}).get("name", "") for ref in ref_list if ref in element_dict
+            )
+
+    def to_apollon(self) -> str:
+        parts = [f"[{self.type}] {self.name}"]
+
+        if self.attributes or self.methods:
+            details = []
+            if self.attributes:
+                details.append("   attributes:")
+                details.extend(f"       {attr}" for attr in self.attributes)
+            if self.methods:
+                details.append("   methods:")
+                details.extend(f"       {method}" for method in self.methods)
+            parts.append("{\n" + "\n".join(details) + "\n}")
+
+        return " ".join(parts)
+
+    def __getitem__(self, key):
+        return self.__dict__[key]
diff --git a/...s/modeling/module_modeling_llm/module_modeling_llm/helpers/serializers/parser/relation.py b/...s/modeling/module_modeling_llm/module_modeling_llm/helpers/serializers/parser/relation.py
@@ -0,0 +1,105 @@
+from typing import Dict, Any, List, Optional
+
+class Relation:
+    """
+    Represents a relationship between elements in a UML diagram.
+
+    This class encapsulates the properties and behavior of a UML relationship,
+    including its type, source and target elements, and associated messages.
+    """
+    def __init__(self, data: Dict[str, Any], element_dict: Optional[Dict[str, Any]], index: int):
+        self.id: str = data.get('id', '')
+        self.type: str = data.get('type', '')
+        # Check if flowType exists, if so use that as the type
+        self.type = data.get('flowType', self.type)
+        self.label: str = data.get('name', '')
+        self.source: Dict[str, Any] = data.get('source', {})
+        self.target: Dict[str, Any] = data.get('target', {})
+        self.messages: List[Dict[str, str]] = data.get('message', [])
+        self.name = f"R{index}"
+        if element_dict is not None:
+            self.resolve_references(element_dict)
+
+    def resolve_references(self, element_dict: Dict[str, Any]):
+        """
+        Resolve the source and target element references using the provided element dictionary. The json data contains only references to other elements that represent the source and target elements. This method resolves these references to the actual names of the elements by looking up the corresponding elements via their IDs in the provided element dictionary.
+        """
+        if self.source['element'] in element_dict:
+            self.source['element'] = element_dict[self.source['element']].get("name", "")
+        if self.target['element'] in element_dict:
+            self.target['element'] = element_dict[self.target['element']].get("name", "")
+
+    def to_apollon(self) -> str:
+        parts = [f"{self.name}: {self.source['element']} {get_relation_arrow(self.type)} {self.target['element']}"]
+
+        if self.label:
+            parts[0] += f": {self.label}"
+
+        details = []
+        for end in ['source', 'target']:
+            end_data = getattr(self, end)
+            if 'role' in end_data or 'multiplicity' in end_data:
+                end_details = [f"    {end_data['element']}: {{"]
+                if 'role' in end_data:
+                    end_details.append(f"        role: {end_data['role']}")
+                if 'multiplicity' in end_data:
+                    end_details.append(f"        multiplicity: {end_data['multiplicity']}")
+                end_details.append("    }")
+                details.extend(end_details)
+
+        if self.messages:
+            details.append("    messages: [")
+            for message in self.messages:
+                to_element = self.target['element'] if message['direction'] == 'target' else self.source['element']
+                details.append(f"        {{ name: {message['name']}, to_direction: {to_element} }}")
+            details.append("    ]")
+
+        if details:
+            parts.append("{\n" + "\n".join(details) + "\n}")
+
+        return " ".join(parts)
+
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+def get_relation_arrow(relation_type: str) -> str:
+    """
+    Returns the correct arrow based on the relation type or flow type using string containment.
+
+    Parameters:
+    relation_type (str): The type of the relation (e.g., "ClassAggregation", "BPMNFlow_sequence").
+
+    Returns:
+    str: The arrow representation for the given relation type in Mermaid syntax.
+    """
+    arrow_map = {
+        # Keys sorted manually by length in descending order to ensure correct matching when using endswith, e.g., when we have dataassociation, dataassociation should be checked before association
+        "interfacerequired": "--c",
+        "interfaceprovided": "--",
+        "dataassociation": "-->",
+        "generalization": "<|--",
+        "unidirectional": "-->",
+        "bidirectional": "<-->",
+        "association": "-->",
+        "inheritance": "<|--",
+        "composition": "*--",
+        "aggregation": "o--",
+        "realization": "..|>",
+        "dependency": "..>",
+        "sequence": "-->",
+        "message": "-->",
+        "include": "..>",
+        "message": "-->",
+        "extend": "-->",
+        "flow": "-->",
+        "link": "-->",
+        "arc": "-->",
+    }
+
+    relation_type = relation_type.replace(" ", "").lower()
+
+    for key, value in arrow_map.items():
+        if relation_type.endswith(key):
+            return f"({relation_type}) {value}"
+
+    return f"-- {relation_type} --"
diff --git a/...modeling/module_modeling_llm/module_modeling_llm/helpers/serializers/parser/uml_parser.py b/...modeling/module_modeling_llm/module_modeling_llm/helpers/serializers/parser/uml_parser.py
@@ -0,0 +1,91 @@
+from typing import Dict, Any, List
+from string import ascii_uppercase
+
+from module_modeling_llm.helpers.serializers.parser.element import Element
+from module_modeling_llm.helpers.serializers.parser.relation import Relation
+
+
+class UMLParser:
+    """
+    A parser for UML diagrams
+
+    This class is responsible for parsing JSON data representing a Apollon UML diagram
+    and converting it into a mermaid like textual representation
+    """
+
+    def __init__(self, json_data: Dict[str, Any]):
+        self.data: Dict[str, Any] = json_data
+        self.title: str = self.data['type']
+        self.elements: List[Element] = []
+        self.relations: List[Relation] = []
+        self.owners: Dict[str, List[str]] = {}
+        self._parse()
+
+    def _parse(self) -> None:
+        name_count: Dict[str, int] = {}
+        referenced_ids: List[str] = []
+        name_suffix_counters: Dict[str, int] = {}
+
+        # Get all referenced attributes and methods
+        for element_data in self.data['elements'].values():
+            referenced_ids.extend(element_data.get('attributes', []))
+            referenced_ids.extend(element_data.get('methods', []))
+
+        # Count occurrences of each name
+        for element_data in self.data['elements'].values():
+            name = element_data.get('name')
+            name_count[name] = name_count.get(name, 0) + 1
+            name_suffix_counters[name] = 0
+
+        # Filter elements and ensure unique names for duplicates
+        # This filters out all Elements that are referenced by any other Element, as they are attributes or methods
+        for element_data in self.data['elements'].values():
+            if element_data.get('id') not in referenced_ids:
+                name = element_data.get('name')
+                if name_count[name] > 1:
+                    suffix_index = name_suffix_counters[name]
+                    element_data['name'] = f"{name}{ascii_uppercase[suffix_index]}"
+                    name_suffix_counters[name] += 1
+
+                element = Element(element_data, self.data['elements'])
+                self.elements.append(element)
+
+        # Parse relations
+        for index, relation_data in enumerate(self.data['relationships'].values()):
+            relation = Relation(relation_data, self.data['elements'], index + 1)
+            self.relations.append(relation)
+
+        # Get all owners and their elements
+        for element in self.elements:
+            ownerId = element.owner
+            if ownerId:
+                owner_element = next((el for el in self.elements if el.id == ownerId), None)
+                if owner_element:
+                    ownerName = owner_element.name
+                    if ownerName not in self.owners:
+                        self.owners[ownerName] = []
+                    self.owners[ownerName].append(element.name)
+
+    def to_apollon(self) -> str:
+        lines: List[str] = [f"UML Diagram Type: {self.title}", ""]
+
+        if self.elements:
+            lines.append("@Elements:\n")
+            lines.extend(element.to_apollon() for element in self.elements)
+
+        if self.relations:
+            lines.append("\n\n@Relations:\n")
+            lines.extend(relation.to_apollon() for relation in self.relations)
+
+        if self.owners:
+            lines.append("\n\n@Owners:\n")
+            for owner, children in self.owners.items():
+                lines.append(f"{owner}: {', '.join(children)}")
+
+        return "\n".join(lines)
+
+    def get_elements(self) -> List[Element]:
+        return self.elements
+
+    def get_relations(self) -> List[Relation]:
+        return self.relations
diff --git a/modules/modeling/module_modeling_llm/module_modeling_llm/models/__init__.py b/modules/modeling/module_modeling_llm/module_modeling_llm/models/__init__.py
@@ -5,6 +5,7 @@
 from module_modeling_llm.models.model_config import ModelConfig
 
 
+
 DefaultModelConfig: Type[ModelConfig]
 default_model_name = os.environ.get("LLM_DEFAULT_MODEL")
 evaluation_model_name = os.environ.get("LLM_EVALUATION_MODEL")
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,3 +10,4 @@ data/* @@
     !.idea/runConfigurations/
     .env
     **/.env
+    **/data/*