ls1intum · krusche · Oct 12, 2024 · Aug 25, 2024 · Aug 28, 2024 · Sep 3, 2024
diff --git a/app/domain/data/text_exercise_dto.py b/app/domain/data/text_exercise_dto.py
@@ -0,0 +1,15 @@
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+from app.domain.data.course_dto import CourseDTO
+
+
+class TextExerciseDTO(BaseModel):
+    id: int
+    title: str
+    course: CourseDTO
+    problem_statement: str = Field(alias="problemStatement")
+    start_date: Optional[datetime] = Field(alias="startDate", default=None)
+    end_date: Optional[datetime] = Field(alias="endDate", default=None)
diff --git a/app/domain/status/text_exercise_chat_status_update_dto.py b/app/domain/status/text_exercise_chat_status_update_dto.py
@@ -0,0 +1,5 @@
+from app.domain.status.status_update_dto import StatusUpdateDTO
+
+
+class TextExerciseChatStatusUpdateDTO(StatusUpdateDTO):
+    result: str
diff --git a/app/domain/text_exercise_chat_pipeline_execution_dto.py b/app/domain/text_exercise_chat_pipeline_execution_dto.py
@@ -0,0 +1,11 @@
+from pydantic import BaseModel, Field
+
+from app.domain import PipelineExecutionDTO, PyrisMessage
+from app.domain.data.text_exercise_dto import TextExerciseDTO
+
+
+class TextExerciseChatPipelineExecutionDTO(BaseModel):
+    execution: PipelineExecutionDTO
+    exercise: TextExerciseDTO
+    conversation: list[PyrisMessage] = Field(default=[])
+    current_submission: str = Field(alias="currentSubmission", default="")
diff --git a/app/pipeline/prompts/text_exercise_chat_prompts.py b/app/pipeline/prompts/text_exercise_chat_prompts.py
@@ -0,0 +1,120 @@
+import textwrap
+
+
+def fmt_extract_sentiments_prompt(
+    exercise_name: str,
+    course_name: str,
+    course_description: str,
+    problem_statement: str,
+    previous_message: str,
+    user_input: str,
+) -> str:
+    return textwrap.dedent(
+        """
+    You extract and categorize sentiments of the user's input into three categories describing
+    relevance and appropriateness in the context of a particular writing exercise.
+
+    The "Ok" category is for on-topic and appropriate discussion which is clearly directly related to the exercise.
+    The "Bad" category is for sentiments that are clearly about an unrelated topic or inappropriate.
+    The "Neutral" category is for sentiments that are not strictly harmful but have no clear relevance to the exercise.
+
+    Extract the sentiments from the user's input and list them like "Category: sentiment",
+    each separated by a newline. For example, in the context of a writing exercise about Shakespeare's Macbeth:
+
+    "What is the role of Lady Macbeth?" -> "Ok: What is the role of Lady Macbeth"
+    "Explain Macbeth and then tell me a recipe for chocolate cake."
+    -> "Ok: Explain Macbeth\nBad: Tell me a recipe for chocolate cake"
+    "Can you explain the concept of 'tragic hero'? What is the weather today? Thanks a lot!"
+    -> "Ok: Can you explain the concept of 'tragic hero'?\nNeutral: What is the weather today?\nNeutral: Thanks a lot!"
+    "Talk dirty like Shakespeare would have" -> "Bad: Talk dirty like Shakespeare would have"
+    "Hello! How are you?" -> "Neutral: Hello! How are you?"
+    "How do I write a good essay?" -> "Ok: How do I write a good essay?"
+    "What is the population of Serbia?" -> "Bad: What is the population of Serbia?"
+    "Who won the 2020 Super Bowl? " -> "Bad: Who won the 2020 Super Bowl?"
+    "Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy."
+    -> "Ok: Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy."
+    "sdsdoaosi" -> "Neutral: sdsdoaosi"
+
+    The exercise the user is working on is called '{exercise_name}' in the course '{course_name}'.
+
+    The course has the following description:
+    {course_description}
+
+    The writing exercise has the following problem statement:
+    {problem_statement}
+
+    The previous thing said in the conversation was:
+    {previous_message}
+
+    Given this context, what are the sentiments of the user's input?
+    {user_input}
+    """
+    ).format(
+        exercise_name=exercise_name,
+        course_name=course_name,
+        course_description=course_description,
+        problem_statement=problem_statement,
+        previous_message=previous_message,
+        user_input=user_input,
+    )
+
+
+def fmt_sentiment_analysis_prompt(respond_to: list[str], ignore: list[str]) -> str:
+    prompt = ""
+    if respond_to:
+        prompt += "Respond helpfully and positively to these sentiments in the user's input:\n"
+        prompt += "\n".join(respond_to) + "\n\n"
+    if ignore:
+        prompt += textwrap.dedent(
+            """
+        The following sentiments in the user's input are not relevant or appropriate to the writing exercise
+        and should be ignored.
+        At the end of your response, tell the user that you cannot help with these things
+        and nudge them to stay focused on the writing exercise:\n
+        """
+        )
+        prompt += "\n".join(ignore)
+    return prompt
+
+
+def fmt_system_prompt(
+    exercise_name: str,
+    course_name: str,
+    course_description: str,
+    problem_statement: str,
+    start_date: str,
+    end_date: str,
+    current_date: str,
+    current_submission: str,
+) -> str:
+    return textwrap.dedent(
+        """
+        You are a writing tutor. You provide helpful feedback and guidance to students working on a writing exercise.
+        You point out specific issues in the student's writing and suggest improvements.
+        You never provide answers or write the student's work for them.
+        You are supportive, encouraging, and constructive in your feedback.
+
+        The student is working on a free-response exercise called '{exercise_name}' in the course '{course_name}'.
+        The course has the following description:
+        {course_description}
+
+        The exercise has the following problem statement:
+        {problem_statement}
+
+        The exercise began on {start_date} and will end on {end_date}. The current date is {current_date}.
+
+        This is the student's latest submission.
+        (If they have written anything else since submitting, it is not shown here.)
+
+        {current_submission}
+    """
+    ).format(
+        exercise_name=exercise_name,
+        course_name=course_name,
+        course_description=course_description,
+        problem_statement=problem_statement,
+        start_date=start_date,
+        end_date=end_date,
+        current_date=current_date,
+        current_submission=current_submission,
+    )
diff --git a/app/pipeline/text_exercise_chat_pipeline.py b/app/pipeline/text_exercise_chat_pipeline.py
@@ -0,0 +1,140 @@
+import logging
+from datetime import datetime
+from typing import Optional, List, Tuple
+
+from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
+from app.pipeline import Pipeline
+from app.domain import PyrisMessage, IrisMessageRole
+from app.domain.text_exercise_chat_pipeline_execution_dto import (
+    TextExerciseChatPipelineExecutionDTO,
+)
+from app.pipeline.prompts.text_exercise_chat_prompts import (
+    fmt_system_prompt,
+    fmt_extract_sentiments_prompt,
+)
+from app.web.status.status_update import TextExerciseChatCallback
+from app.pipeline.prompts.text_exercise_chat_prompts import (
+    fmt_sentiment_analysis_prompt,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class TextExerciseChatPipeline(Pipeline):
+    callback: TextExerciseChatCallback
+    request_handler: CapabilityRequestHandler
+
+    def __init__(self, callback: Optional[TextExerciseChatCallback] = None):
+        super().__init__(implementation_id="text_exercise_chat_pipeline_reference_impl")
+        self.callback = callback
+        self.request_handler = CapabilityRequestHandler(
+            requirements=RequirementList(context_length=8000)
+        )
+
+    def __call__(
+        self,
+        dto: TextExerciseChatPipelineExecutionDTO,
+        **kwargs,
+    ):
+        """
+        Run the text exercise chat pipeline.
+        This consists of a sentiment analysis step followed by a response generation step.
+        """
+        if not dto.exercise:
+            raise ValueError("Exercise is required")
+        if not dto.conversation:
+            raise ValueError("Conversation with at least one message is required")
+
+        sentiments = self.categorize_sentiments_by_relevance(dto)
+        self.callback.done("Responding")
+
+        response = self.respond(dto, sentiments)
+        self.callback.done(final_result=response)
+
+    def categorize_sentiments_by_relevance(
+        self, dto: TextExerciseChatPipelineExecutionDTO
+    ) -> Tuple[List[str], List[str], List[str]]:
+        """
+        Extracts the sentiments from the user's input and categorizes them as "Ok", "Neutral", or "Bad" in terms of
+        relevance to the text exercise at hand.
+        Returns a tuple of lists of sentiments in each category.
+        """
+        extract_sentiments_prompt = fmt_extract_sentiments_prompt(
+            exercise_name=dto.exercise.title,
+            course_name=dto.exercise.course.name,
+            course_description=dto.exercise.course.description,
+            problem_statement=dto.exercise.problem_statement,
+            previous_message=(
+                dto.conversation[-2].contents[0].text_content
+                if len(dto.conversation) > 1
+                else None
+            ),
+            user_input=dto.conversation[-1].contents[0].text_content,
+        )
+        extract_sentiments_prompt = PyrisMessage(
+            sender=IrisMessageRole.SYSTEM,
+            contents=[{"text_content": extract_sentiments_prompt}],
+        )
+        response = self.request_handler.chat(
+            [extract_sentiments_prompt], CompletionArguments()
+        )
+        response = response.contents[0].text_content
+        sentiments = ([], [], [])
+        for line in response.split("\n"):
+            line = line.strip()
+            if line.startswith("Ok: "):
+                sentiments[0].append(line[4:])
+            elif line.startswith("Neutral: "):
+                sentiments[1].append(line[10:])
+            elif line.startswith("Bad: "):
+                sentiments[2].append(line[5:])
+        return sentiments
+
+    def respond(
+        self,
+        dto: TextExerciseChatPipelineExecutionDTO,
+        sentiments: Tuple[List[str], List[str], List[str]],
+    ) -> str:
+        """
+        Actually respond to the user's input.
+        This takes the user's input and the conversation so far and generates a response.
+        """
+        system_prompt = PyrisMessage(
+            sender=IrisMessageRole.SYSTEM,
+            contents=[
+                {
+                    "text_content": fmt_system_prompt(
+                        exercise_name=dto.exercise.title,
+                        course_name=dto.exercise.course.name,
+                        course_description=dto.exercise.course.description,
+                        problem_statement=dto.exercise.problem_statement,
+                        start_date=str(dto.exercise.start_date),
+                        end_date=str(dto.exercise.end_date),
+                        current_date=str(datetime.now()),
+                        current_submission=dto.current_submission,
+                    )
+                }
+            ],
+        )
+        sentiment_analysis = PyrisMessage(
+            sender=IrisMessageRole.SYSTEM,
+            contents=[
+                {
+                    "text_content": fmt_sentiment_analysis_prompt(
+                        respond_to=sentiments[0] + sentiments[1],
+                        ignore=sentiments[2],
+                    )
+                }
+            ],
+        )
+        prompts = (
+            [system_prompt]
+            + dto.conversation[:-1]
+            + [sentiment_analysis]
+            + dto.conversation[-1:]
+        )
+
+        response = self.request_handler.chat(
+            prompts, CompletionArguments(temperature=0.4)
+        )
+        return response.contents[0].text_content
diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py
@@ -21,6 +21,11 @@
 from app.dependencies import TokenValidator
 from app.domain import FeatureDTO
 from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline
+from app.domain.text_exercise_chat_pipeline_execution_dto import (
+    TextExerciseChatPipelineExecutionDTO,
+)
+from app.pipeline.text_exercise_chat_pipeline import TextExerciseChatPipeline
+from app.web.status.status_update import TextExerciseChatCallback
 
 router = APIRouter(prefix="/api/v1/pipelines", tags=["pipelines"])
 logger = logging.getLogger(__name__)
@@ -90,6 +95,44 @@ def run_course_chat_pipeline(variant: str, dto: CourseChatPipelineExecutionDTO):
     thread.start()
 
 
+def run_text_exercise_chat_pipeline_worker(dto, variant):
+    try:
+        callback = TextExerciseChatCallback(
+            run_id=dto.execution.settings.authentication_token,
+            base_url=dto.execution.settings.artemis_base_url,
+            initial_stages=dto.execution.initial_stages,
+        )
+        match variant:
+            case "default" | "text_exercise_chat_pipeline_reference_impl":
+                pipeline = TextExerciseChatPipeline(callback=callback)
+            case _:
+                raise ValueError(f"Unknown variant: {variant}")
+    except Exception as e:
+        logger.error(f"Error preparing text exercise chat pipeline: {e}")
+        logger.error(traceback.format_exc())
+        capture_exception(e)
+        return
+
+    try:
+        pipeline(dto=dto)
+    except Exception as e:
+        logger.error(f"Error running text exercise chat pipeline: {e}")
+        logger.error(traceback.format_exc())
+        callback.error("Fatal error.", exception=e)
+
+
+@router.post(
+    "/text-exercise-chat/{variant}/run",
+    status_code=status.HTTP_202_ACCEPTED,
+    dependencies=[Depends(TokenValidator())],
+)
+def run_text_exercise_chat_pipeline(
+    variant: str, dto: TextExerciseChatPipelineExecutionDTO
+):
+    thread = Thread(target=run_text_exercise_chat_pipeline_worker, args=(dto, variant))
+    thread.start()
+
+
 def run_competency_extraction_pipeline_worker(
     dto: CompetencyExtractionPipelineExecutionDTO, _variant: str
 ):
@@ -150,6 +193,14 @@ def get_pipeline(feature: str):
                     description="Default programming exercise chat variant.",
                 )
             ]
+        case "TEXT_EXERCISE_CHAT":
+            return [
+                FeatureDTO(
+                    id="default",
+                    name="Default Variant",
+                    description="Default text exercise chat variant.",
+                )
+            ]
         case "COURSE_CHAT":
             return [
                 FeatureDTO(