Skip to content

Commit

Permalink
Implement exercise export for expert evaluation
Browse files Browse the repository at this point in the history
Allows exercises to be exported as json with either normal feedback or categorized feedback.
  • Loading branch information
DominikRemo committed Dec 17, 2024
1 parent f6f0527 commit f980d52
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 24 deletions.
7 changes: 4 additions & 3 deletions evaluation/model/model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass, asdict
from typing import Optional, List
from typing import Optional, List, Union, Dict


@dataclass
class Feedback:
Expand Down Expand Up @@ -32,9 +33,9 @@ class Submission:
text: str
language: str
meta: dict
feedbacks: Optional[List[Feedback]]
feedbacks: Optional[Union[List[Feedback], Dict[str, List[Feedback]]]] = None

def __init__(self, id: int, text: str, language: str, feedbacks: List[Feedback]) -> None:
def __init__(self, id: int, text: str, language: str, feedbacks: Optional[Union[List[Feedback], Dict[str, List[Feedback]]]]) -> None:
self.id = id
self.text = text
self.language = language
Expand Down
16 changes: 1 addition & 15 deletions evaluation/scripts/2_sampling_submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,5 @@
# %%
from evaluation.service.json_service import group_exercise_data, exercises_to_json

exercises = group_exercise_data(sampled_submissions)
exercises = group_exercise_data(sampled_submissions, "Tutor")
exercises_to_json(exercises, "../data/2_exercise_jsons")

# %% [markdown]
# ## Upload the json exercises from data/2_exercise_jsons to the playground. In evaluation mode, generate feedback for each exercise and export the results. Make sure that the configuration names for feedback generation do not contain underscores '_'.
#
# The downloaded json files should have the following naming scheme:
#
# ```text_results_<Configuration name (e.g.: LLM)>_<...>```
#
# **Do not change the names of the downloaded files!**
#
# Save these files in the data/3_feedback_suggestions directory

# %% [markdown]
# # Continue in the next notebook: [3_feedback_generation.ipynb](3_feedback_generation.ipynb)
17 changes: 17 additions & 0 deletions evaluation/scripts/3_feedback_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,17 @@

sampled_submissions = pd.read_csv("../data/2_sampled_submissions.csv")

# %% [markdown]
# ### Upload the json files from data/2_exercise_jsons to the playground. In evaluation mode, generate feedback for each exercise and export the results. Make sure that the configuration names for feedback generation do not contain underscores '_'.
#
# The downloaded json files should have the following naming scheme:
#
# ```text_results_<Configuration name (e.g.: LLM)>_<...>```
#
# **Do not change the names of the downloaded files!**
#
# Save these files in the data/3_feedback_suggestions directory

# %%
from evaluation.service.json_service import read_result_files_to_dataframe, add_feedback_suggestions_to_data, fill_missing_feedback_with_tutor_feedback

Expand All @@ -28,6 +39,12 @@

sampled_submissions_with_feedback.to_csv("../data/3_sampled_submissions_with_feedback.csv", index=False)

# %%
from evaluation.service.json_service import group_exercise_data, exercises_to_json

exercises = group_exercise_data(sampled_submissions)
exercises_to_json(exercises, "../data/3_submissions_with_categorized_feedback_jsons")

# %%
# Group by 'exercise_id' and 'result_score' to calculate submission counts
grouped_data = (
Expand Down
13 changes: 13 additions & 0 deletions evaluation/scripts/4_expert_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,16 @@
# ---

# %%
import pandas as pd

sampled_submissions = pd.read_csv("../data/3_sampled_submissions_with_feedback.csv")

# %% [markdown]
# ### Import the json files from data/3_submissions_with_categorized_feedback_jsons into a new expert evaluation in the playground. Conduct the evaluation and download the results as well as the evaluation configuration.
#
# **Do not change the names of the downloaded files!**
#
# Save these files in the data/4_expert_evaluation directory

# %%
# TODO: Implement the expert evaluation import
20 changes: 14 additions & 6 deletions evaluation/service/json_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import os
from typing import List
from typing import List, Dict, Union
import pandas as pd

from evaluation.model.model import Exercise, Feedback, Submission, GradingCriterion, StructuredGradingInstruction
Expand All @@ -26,7 +26,7 @@ def validate_columns(df: pd.DataFrame, required_columns: List[str]) -> None:
)


def group_exercise_data(df: pd.DataFrame, feedback_type_filter: str = "Tutor") -> List[Exercise]:
def group_exercise_data(df: pd.DataFrame, feedback_type_filter: str = None) -> List[Exercise]:
"""
Groups exercises, submissions, grading instructions, and feedback of specified type into a structured format.
Expand All @@ -38,8 +38,10 @@ def group_exercise_data(df: pd.DataFrame, feedback_type_filter: str = "Tutor") -
List[Exercise]: A list of Exercise objects.
"""

def process_feedbacks(submission_group: pd.DataFrame, exercise_id: int, submission_id: int) -> List[Feedback]:
def process_feedbacks(submission_group: pd.DataFrame, exercise_id: int, submission_id: int) -> Union[
List[Feedback], Dict[str, List[Feedback]]]:
"""Process feedbacks for a submission."""
categorized_feedbacks = {}
feedbacks = []
for feedback_id, feedback_group in submission_group.groupby("feedback_id"):
feedback_details = feedback_group.iloc[0]
Expand All @@ -54,8 +56,13 @@ def process_feedbacks(submission_group: pd.DataFrame, exercise_id: int, submissi
exercise_id=exercise_id,
submission_id=submission_id
)
feedbacks.append(feedback)
return feedbacks

if feedback_type_filter: # Single feedback type
feedbacks.append(feedback)
else: # Categorized feedback
categorized_feedbacks.setdefault(feedback_details["feedback_type"], []).append(feedback)

return feedbacks if feedback_type_filter else categorized_feedbacks

def process_submissions(exercise_group: pd.DataFrame, exercise_id: int) -> List[Submission]:
"""Process submissions for an exercise."""
Expand Down Expand Up @@ -115,7 +122,8 @@ def process_grading_criteria(exercise_group: pd.DataFrame) -> List[GradingCriter
]
validate_columns(df, required_columns)

df = df[df["feedback_type"] == feedback_type_filter]
if feedback_type_filter:
df = df[df["feedback_type"] == feedback_type_filter]

exercises = []
for exercise_id, exercise_group in df.groupby("exercise_id"):
Expand Down

0 comments on commit f980d52

Please sign in to comment.