ls1intum · maximiliansoelch · Jun 6, 2024 · May 5, 2024 · May 9, 2024 · Jun 3, 2024
diff --git a/module_programming_ast/module_programming_ast/__main__.py b/module_programming_ast/module_programming_ast/__main__.py
@@ -2,13 +2,15 @@
 Entry point for the module_programming_ast module.
 """
 import random
-from typing import List, Any
+from typing import List, Any, cast
 from pydantic import BaseModel, Field
 
 from athena import app, config_schema_provider, submissions_consumer, submission_selector, feedback_consumer, feedback_provider, evaluation_provider, emit_meta
-from athena.programming import Exercise, Submission, Feedback
 from athena.logger import logger
 from athena.storage import store_exercise, store_submissions, store_feedback
+from athena.programming import Exercise, Submission, Feedback, get_stored_feedback_suggestions, count_stored_submissions
+from module_programming_ast.remove_overlapping import filter_overlapping_suggestions
+from module_programming_ast.remove_suspicious import filter_suspicious
 
 
 @config_schema_provider
@@ -73,71 +75,23 @@ def process_incoming_feedback(exercise: Exercise, submission: Submission, feedba
 def suggest_feedback(exercise: Exercise, submission: Submission, module_config: Configuration) -> List[Feedback]:
     logger.info("suggest_feedback: Suggestions for submission %d of exercise %d were requested", submission.id, exercise.id)
     # Do something with the submission and return a list of feedback
+    # ThemisML currently only works with Java
+    if exercise.programming_language.lower() != "java":
+        logger.info("ThemisML only works with Java. Returning no suggestions.")
+        return []
+
+    suggested_feedbacks = cast(List[Feedback], list(get_stored_feedback_suggestions(exercise.id, submission.id)))
+    logger.debug("Found %d feedback suggestions (unfiltered)", len(suggested_feedbacks))
+    suggested_feedbacks = filter_suspicious(suggested_feedbacks, count_stored_submissions(exercise.id))
+    logger.debug("Found %d feedback suggestions (removed suspicious suggestions)", len(suggested_feedbacks))
+    suggested_feedbacks = filter_overlapping_suggestions(suggested_feedbacks)
+    logger.debug("Found %d feedback suggestions (removed overlapping suggestions)", len(suggested_feedbacks))
+
+    logger.info("Suggesting %d filtered feedback suggestions", len(suggested_feedbacks))
+    logger.debug("Suggested Feedback suggestions: %s", suggested_feedbacks)
+
+    return suggested_feedbacks
 
-    # Example use of module config
-    # If you are not using module_config for your module, you can remove it from the function signature
-    logger.info("Config: %s", module_config)
-    if module_config.debug:
-        emit_meta("costs", "100.00€")
-
-    return [
-        # Referenced feedback, line 8-9 in BinarySearch.java
-        Feedback(
-            id=None,
-            exercise_id=exercise.id,
-            submission_id=submission.id,
-            title="This is a suggestion.",
-            description="There is something wrong here.",
-            credits=-1.0,
-            file_path="BinarySearch.java",
-            line_start=8,
-            line_end=9,
-            structured_grading_instruction_id=None,
-            meta={}
-        ),
-        # Referenced feedback, line 13-18 in BinarySearch.java
-        Feedback(
-            id=None,
-            exercise_id=exercise.id,
-            submission_id=submission.id,
-            title="This is a second suggestion.",
-            description="This is very good!",
-            credits=2.0,
-            file_path="BinarySearch.java",
-            line_start=13,
-            line_end=18,
-            structured_grading_instruction_id=None,
-            meta={}
-        ),
-        # Unreferenced feedback without file
-        Feedback(
-            id=None,
-            exercise_id=exercise.id,
-            submission_id=submission.id,
-            title="This is an unreferenced suggestion.",
-            description="General feedback without any reference to the submission.",
-            credits=0.0,
-            file_path=None,
-            line_start=None,
-            line_end=None,
-            structured_grading_instruction_id=None,
-            meta={}
-        ),
-        # Unreferenced feedback in BinarySearch.java
-        Feedback(
-            id=None,
-            exercise_id=exercise.id,
-            submission_id=submission.id,
-            title="This is an unreferenced suggestion in a file.",
-            description="General feedback with only the reference to a file (BinarySearch.java)",
-            credits=0.0,
-            file_path="BinarySearch.java",
-            line_start=None,
-            line_end=None,
-            structured_grading_instruction_id=None,
-            meta={}
-        )
-    ]
 
 
 # Only if it makes sense for a module (Optional)

diff --git a/module_programming_ast/module_programming_ast/remove_overlapping.py b/module_programming_ast/module_programming_ast/remove_overlapping.py
@@ -0,0 +1,40 @@
+"""
+Feedback suggestions can overlap each other, which is not ideal.
+This module removes overlapping suggestions.
+"""
+
+from typing import List
+
+from athena.programming import Feedback
+
+
+def is_overlapping(feedback1: Feedback, feedback2: Feedback) -> bool:
+    """Returns whether the two given feedbacks overlap."""
+    if feedback1.file_path != feedback2.file_path:
+        # feedback in different files
+        return False
+    if feedback1.line_start is None or feedback2.line_start is None or feedback1.line_end is None or feedback2.line_end is None:
+        # unreferenced feedback (both start and end are None because of Schema validation for line_end)
+        return False
+    if feedback1.line_start > feedback2.line_end:
+        return False
+    if feedback2.line_start > feedback1.line_end:
+        return False
+    return True
+
+
+def filter_overlapping_suggestions(suggestions: List[Feedback]) -> List[Feedback]:
+    """Filters out overlapping suggestions we don't want to suggest to tutors.
+
+    Arguments:
+        suggestions {list} -- List of suggestions to filter
+    """
+    # sort suggestions by similarity_score to keep the most accurate ones
+    suggestions.sort(key=lambda s: s.meta.get("similarity_score", 0), reverse=True)
+    # skip suggestions if they overlap with a suggestion that was already added
+    added_suggestions: List[Feedback] = []
+    for suggestion in suggestions:
+        if any(is_overlapping(suggestion, added_suggestion) for added_suggestion in added_suggestions):
+            continue
+        added_suggestions.append(suggestion)
+    return added_suggestions
diff --git a/module_programming_ast/module_programming_ast/remove_suspicious.py b/module_programming_ast/module_programming_ast/remove_suspicious.py
@@ -0,0 +1,46 @@
+""" Like in ThemisML, the following problems with the suggestions exist, thats why the filter_suspicious method is needed:
+(1) Sometimes, there was a feedback on something banal like a getter, which was actually meant for another method.
+    This caused suggestions for almost all the other submissions, which were not helpful.
+    We therefore classify a suggestion as "suspicious" if it affects too many other submissions (> 10% and > 2).
+(2) However, this would also sometimes classify a suggestion as suspicious if it is actually helpful.
+    Therefore, we make a suggestion non-supicious if there are at least 3 other suggestions for the same method.
+    This makes a mistake like described above unlikely.
+(3) Suggestions are also non-suspicious if they include words that hint at other parts of the code, like
+    "again", "consequential error", "previous", "later", "earlier", "above", "below" and German equivalents of these words.
+"""
+
+from typing import Dict, List, cast
+
+from athena.programming import Feedback
+
+
+def filter_suspicious(suggestions: List[Feedback], n_submissions: int) -> List[Feedback]:
+    """
+    Filters out suspicious suggestions we don't want to suggest to tutors.
+    suggestions: List of suggestions to filter
+    n_submissions: Number of submissions for the exercise
+    """
+    suspicious: Dict[int, bool] = {}  # feedback id: is suspicious
+    # (1) classify suggestions as suspicious if they affect too many other submissions
+    for suggestion in suggestions:
+        n_feedback_suggestions = suggestion.meta.get("n_feedback_suggestions", 999999)
+        if n_feedback_suggestions > 2 and n_feedback_suggestions > 0.1 * n_submissions:
+            suspicious[cast(int, suggestion.id)] = True
+        # find all other suggestions for the same method
+        other_suggestions: List[Feedback] = []
+        for other_suggestion in suggestions:
+            if other_suggestion.id == suggestion.id:
+                continue
+            if other_suggestion.file_path == suggestion.file_path and other_suggestion.meta.get("method_name") == suggestion.meta.get("method_name"):
+                other_suggestions.append(other_suggestion)
+        # (2) make suggestion non-suspicious if there are at least 3 other suggestions for the same method
+        if len(other_suggestions) >= 3:
+            suspicious[cast(int, suggestion.id)] = False
+    # (3) classify suggestions as suspicious if they include words that hint at other parts of the code
+    suspicious_words = ["again", "consequential error", "previous", "later", "earlier", "above", "below"]
+    for suggestion in suggestions:
+        for word in suspicious_words:
+            if word in str(suggestion.description):
+                suspicious[cast(int, suggestion.id)] = True
+    # filter out suspicious suggestions
+    return list(filter(lambda s: not suspicious.get(cast(int, s.id), False), suggestions))