diff --git a/app/domain/data/text_exercise_dto.py b/app/domain/data/text_exercise_dto.py new file mode 100644 index 00000000..098779f3 --- /dev/null +++ b/app/domain/data/text_exercise_dto.py @@ -0,0 +1,15 @@ +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, Field + +from app.domain.data.course_dto import CourseDTO + + +class TextExerciseDTO(BaseModel): + id: int + title: str + course: CourseDTO + problem_statement: str = Field(alias="problemStatement") + start_date: Optional[datetime] = Field(alias="startDate", default=None) + end_date: Optional[datetime] = Field(alias="endDate", default=None) diff --git a/app/domain/status/text_exercise_chat_status_update_dto.py b/app/domain/status/text_exercise_chat_status_update_dto.py new file mode 100644 index 00000000..a825e92f --- /dev/null +++ b/app/domain/status/text_exercise_chat_status_update_dto.py @@ -0,0 +1,5 @@ +from app.domain.status.status_update_dto import StatusUpdateDTO + + +class TextExerciseChatStatusUpdateDTO(StatusUpdateDTO): + result: str diff --git a/app/domain/text_exercise_chat_pipeline_execution_dto.py b/app/domain/text_exercise_chat_pipeline_execution_dto.py new file mode 100644 index 00000000..65e8871c --- /dev/null +++ b/app/domain/text_exercise_chat_pipeline_execution_dto.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel, Field + +from app.domain import PipelineExecutionDTO, PyrisMessage +from app.domain.data.text_exercise_dto import TextExerciseDTO + + +class TextExerciseChatPipelineExecutionDTO(BaseModel): + execution: PipelineExecutionDTO + exercise: TextExerciseDTO + conversation: list[PyrisMessage] = Field(default=[]) + current_submission: str = Field(alias="currentSubmission", default="") diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 9ec9d0d1..8688149d 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -1,10 +1,16 @@ import logging import time -import traceback from datetime import datetime from typing import Literal, Any, Optional -from openai import OpenAI +from openai import ( + OpenAI, + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, + ContentFilterFinishReasonError, +) from openai.lib.azure import AzureOpenAI from openai.types import CompletionUsage from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam @@ -97,16 +103,19 @@ def chat( ) -> PyrisMessage: print("Sending messages to OpenAI", messages) # noinspection PyTypeChecker - retries = 10 + retries = 5 backoff_factor = 2 initial_delay = 1 + # Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds + + messages = convert_to_open_ai_messages(messages) for attempt in range(retries): try: if arguments.response_format == "JSON": response = self._client.chat.completions.create( model=self.model, - messages=convert_to_open_ai_messages(messages), + messages=messages, temperature=arguments.temperature, max_tokens=arguments.max_tokens, response_format=ResponseFormatJSONObject(type="json_object"), @@ -114,20 +123,29 @@ def chat( else: response = self._client.chat.completions.create( model=self.model, - messages=convert_to_open_ai_messages(messages), + messages=messages, temperature=arguments.temperature, max_tokens=arguments.max_tokens, ) - return convert_to_iris_message( - response.choices[0].message, response.usage, response.model - ) - except Exception as e: + choice = response.choices[0] + if choice.finish_reason == "content_filter": + # I figured that an openai error would be automatically raised if the content filter activated, + # but it seems that that is not the case. + # We don't want to retry because the same message will likely be rejected again. + # Raise an exception to trigger the global error handler and report a fatal error to the client. + raise ContentFilterFinishReasonError() + return convert_to_iris_message(choice.message) + except ( + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, + ): wait_time = initial_delay * (backoff_factor**attempt) - logging.warning(f"Exception on attempt {attempt + 1}: {e}") - traceback.print_exc() + logging.exception(f"OpenAI error on attempt {attempt + 1}:") logging.info(f"Retrying in {wait_time} seconds...") time.sleep(wait_time) - logging.error("Failed to interpret image after several attempts.") + raise Exception(f"Failed to get response from OpenAI after {retries} retries") class DirectOpenAIChatModel(OpenAIChatModel): diff --git a/app/llm/external/openai_embeddings.py b/app/llm/external/openai_embeddings.py index 243860df..1f9106e6 100644 --- a/app/llm/external/openai_embeddings.py +++ b/app/llm/external/openai_embeddings.py @@ -1,6 +1,12 @@ import logging from typing import Literal, Any -from openai import OpenAI +from openai import ( + OpenAI, + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, +) from openai.lib.azure import AzureOpenAI from ...llm.external.model import EmbeddingModel @@ -13,9 +19,10 @@ class OpenAIEmbeddingModel(EmbeddingModel): _client: OpenAI def embed(self, text: str) -> list[float]: - retries = 10 + retries = 5 backoff_factor = 2 initial_delay = 1 + # Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds for attempt in range(retries): try: @@ -25,15 +32,17 @@ def embed(self, text: str) -> list[float]: encoding_format="float", ) return response.data[0].embedding - except Exception as e: + except ( + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, + ): wait_time = initial_delay * (backoff_factor**attempt) - logging.warning(f"Rate limit exceeded on attempt {attempt + 1}: {e}") + logging.exception(f"OpenAI error on attempt {attempt + 1}") logging.info(f"Retrying in {wait_time} seconds...") time.sleep(wait_time) - logging.error( - "Failed to get embedding after several attempts due to rate limit." - ) - return [] + raise Exception(f"Failed to get embedding from OpenAI after {retries} retries.") class DirectOpenAIEmbeddingModel(OpenAIEmbeddingModel): diff --git a/app/pipeline/prompts/text_exercise_chat_prompts.py b/app/pipeline/prompts/text_exercise_chat_prompts.py new file mode 100644 index 00000000..477e4c4e --- /dev/null +++ b/app/pipeline/prompts/text_exercise_chat_prompts.py @@ -0,0 +1,120 @@ +import textwrap + + +def fmt_extract_sentiments_prompt( + exercise_name: str, + course_name: str, + course_description: str, + problem_statement: str, + previous_message: str, + user_input: str, +) -> str: + return textwrap.dedent( + """ + You extract and categorize sentiments of the user's input into three categories describing + relevance and appropriateness in the context of a particular writing exercise. + + The "Ok" category is for on-topic and appropriate discussion which is clearly directly related to the exercise. + The "Bad" category is for sentiments that are clearly about an unrelated topic or inappropriate. + The "Neutral" category is for sentiments that are not strictly harmful but have no clear relevance to the exercise. + + Extract the sentiments from the user's input and list them like "Category: sentiment", + each separated by a newline. For example, in the context of a writing exercise about Shakespeare's Macbeth: + + "What is the role of Lady Macbeth?" -> "Ok: What is the role of Lady Macbeth" + "Explain Macbeth and then tell me a recipe for chocolate cake." + -> "Ok: Explain Macbeth\nBad: Tell me a recipe for chocolate cake" + "Can you explain the concept of 'tragic hero'? What is the weather today? Thanks a lot!" + -> "Ok: Can you explain the concept of 'tragic hero'?\nNeutral: What is the weather today?\nNeutral: Thanks a lot!" + "Talk dirty like Shakespeare would have" -> "Bad: Talk dirty like Shakespeare would have" + "Hello! How are you?" -> "Neutral: Hello! How are you?" + "How do I write a good essay?" -> "Ok: How do I write a good essay?" + "What is the population of Serbia?" -> "Bad: What is the population of Serbia?" + "Who won the 2020 Super Bowl? " -> "Bad: Who won the 2020 Super Bowl?" + "Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy." + -> "Ok: Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy." + "sdsdoaosi" -> "Neutral: sdsdoaosi" + + The exercise the user is working on is called '{exercise_name}' in the course '{course_name}'. + + The course has the following description: + {course_description} + + The writing exercise has the following problem statement: + {problem_statement} + + The previous thing said in the conversation was: + {previous_message} + + Given this context, what are the sentiments of the user's input? + {user_input} + """ + ).format( + exercise_name=exercise_name, + course_name=course_name, + course_description=course_description, + problem_statement=problem_statement, + previous_message=previous_message, + user_input=user_input, + ) + + +def fmt_sentiment_analysis_prompt(respond_to: list[str], ignore: list[str]) -> str: + prompt = "" + if respond_to: + prompt += "Respond helpfully and positively to these sentiments in the user's input:\n" + prompt += "\n".join(respond_to) + "\n\n" + if ignore: + prompt += textwrap.dedent( + """ + The following sentiments in the user's input are not relevant or appropriate to the writing exercise + and should be ignored. + At the end of your response, tell the user that you cannot help with these things + and nudge them to stay focused on the writing exercise:\n + """ + ) + prompt += "\n".join(ignore) + return prompt + + +def fmt_system_prompt( + exercise_name: str, + course_name: str, + course_description: str, + problem_statement: str, + start_date: str, + end_date: str, + current_date: str, + current_submission: str, +) -> str: + return textwrap.dedent( + """ + You are a writing tutor. You provide helpful feedback and guidance to students working on a writing exercise. + You point out specific issues in the student's writing and suggest improvements. + You never provide answers or write the student's work for them. + You are supportive, encouraging, and constructive in your feedback. + + The student is working on a free-response exercise called '{exercise_name}' in the course '{course_name}'. + The course has the following description: + {course_description} + + The exercise has the following problem statement: + {problem_statement} + + The exercise began on {start_date} and will end on {end_date}. The current date is {current_date}. + + This is the student's latest submission. + (If they have written anything else since submitting, it is not shown here.) + + {current_submission} + """ + ).format( + exercise_name=exercise_name, + course_name=course_name, + course_description=course_description, + problem_statement=problem_statement, + start_date=start_date, + end_date=end_date, + current_date=current_date, + current_submission=current_submission, + ) diff --git a/app/pipeline/text_exercise_chat_pipeline.py b/app/pipeline/text_exercise_chat_pipeline.py new file mode 100644 index 00000000..5d27fc71 --- /dev/null +++ b/app/pipeline/text_exercise_chat_pipeline.py @@ -0,0 +1,140 @@ +import logging +from datetime import datetime +from typing import Optional, List, Tuple + +from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments +from app.pipeline import Pipeline +from app.domain import PyrisMessage, IrisMessageRole +from app.domain.text_exercise_chat_pipeline_execution_dto import ( + TextExerciseChatPipelineExecutionDTO, +) +from app.pipeline.prompts.text_exercise_chat_prompts import ( + fmt_system_prompt, + fmt_extract_sentiments_prompt, +) +from app.web.status.status_update import TextExerciseChatCallback +from app.pipeline.prompts.text_exercise_chat_prompts import ( + fmt_sentiment_analysis_prompt, +) + +logger = logging.getLogger(__name__) + + +class TextExerciseChatPipeline(Pipeline): + callback: TextExerciseChatCallback + request_handler: CapabilityRequestHandler + + def __init__(self, callback: Optional[TextExerciseChatCallback] = None): + super().__init__(implementation_id="text_exercise_chat_pipeline_reference_impl") + self.callback = callback + self.request_handler = CapabilityRequestHandler( + requirements=RequirementList(context_length=8000) + ) + + def __call__( + self, + dto: TextExerciseChatPipelineExecutionDTO, + **kwargs, + ): + """ + Run the text exercise chat pipeline. + This consists of a sentiment analysis step followed by a response generation step. + """ + if not dto.exercise: + raise ValueError("Exercise is required") + if not dto.conversation: + raise ValueError("Conversation with at least one message is required") + + sentiments = self.categorize_sentiments_by_relevance(dto) + self.callback.done("Responding") + + response = self.respond(dto, sentiments) + self.callback.done(final_result=response) + + def categorize_sentiments_by_relevance( + self, dto: TextExerciseChatPipelineExecutionDTO + ) -> Tuple[List[str], List[str], List[str]]: + """ + Extracts the sentiments from the user's input and categorizes them as "Ok", "Neutral", or "Bad" in terms of + relevance to the text exercise at hand. + Returns a tuple of lists of sentiments in each category. + """ + extract_sentiments_prompt = fmt_extract_sentiments_prompt( + exercise_name=dto.exercise.title, + course_name=dto.exercise.course.name, + course_description=dto.exercise.course.description, + problem_statement=dto.exercise.problem_statement, + previous_message=( + dto.conversation[-2].contents[0].text_content + if len(dto.conversation) > 1 + else None + ), + user_input=dto.conversation[-1].contents[0].text_content, + ) + extract_sentiments_prompt = PyrisMessage( + sender=IrisMessageRole.SYSTEM, + contents=[{"text_content": extract_sentiments_prompt}], + ) + response = self.request_handler.chat( + [extract_sentiments_prompt], CompletionArguments() + ) + response = response.contents[0].text_content + sentiments = ([], [], []) + for line in response.split("\n"): + line = line.strip() + if line.startswith("Ok: "): + sentiments[0].append(line[4:]) + elif line.startswith("Neutral: "): + sentiments[1].append(line[10:]) + elif line.startswith("Bad: "): + sentiments[2].append(line[5:]) + return sentiments + + def respond( + self, + dto: TextExerciseChatPipelineExecutionDTO, + sentiments: Tuple[List[str], List[str], List[str]], + ) -> str: + """ + Actually respond to the user's input. + This takes the user's input and the conversation so far and generates a response. + """ + system_prompt = PyrisMessage( + sender=IrisMessageRole.SYSTEM, + contents=[ + { + "text_content": fmt_system_prompt( + exercise_name=dto.exercise.title, + course_name=dto.exercise.course.name, + course_description=dto.exercise.course.description, + problem_statement=dto.exercise.problem_statement, + start_date=str(dto.exercise.start_date), + end_date=str(dto.exercise.end_date), + current_date=str(datetime.now()), + current_submission=dto.current_submission, + ) + } + ], + ) + sentiment_analysis = PyrisMessage( + sender=IrisMessageRole.SYSTEM, + contents=[ + { + "text_content": fmt_sentiment_analysis_prompt( + respond_to=sentiments[0] + sentiments[1], + ignore=sentiments[2], + ) + } + ], + ) + prompts = ( + [system_prompt] + + dto.conversation[:-1] + + [sentiment_analysis] + + dto.conversation[-1:] + ) + + response = self.request_handler.chat( + prompts, CompletionArguments(temperature=0.4) + ) + return response.contents[0].text_content diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index eb198199..7bc047c0 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -21,6 +21,11 @@ from app.dependencies import TokenValidator from app.domain import FeatureDTO from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline +from app.domain.text_exercise_chat_pipeline_execution_dto import ( + TextExerciseChatPipelineExecutionDTO, +) +from app.pipeline.text_exercise_chat_pipeline import TextExerciseChatPipeline +from app.web.status.status_update import TextExerciseChatCallback router = APIRouter(prefix="/api/v1/pipelines", tags=["pipelines"]) logger = logging.getLogger(__name__) @@ -90,6 +95,44 @@ def run_course_chat_pipeline(variant: str, dto: CourseChatPipelineExecutionDTO): thread.start() +def run_text_exercise_chat_pipeline_worker(dto, variant): + try: + callback = TextExerciseChatCallback( + run_id=dto.execution.settings.authentication_token, + base_url=dto.execution.settings.artemis_base_url, + initial_stages=dto.execution.initial_stages, + ) + match variant: + case "default" | "text_exercise_chat_pipeline_reference_impl": + pipeline = TextExerciseChatPipeline(callback=callback) + case _: + raise ValueError(f"Unknown variant: {variant}") + except Exception as e: + logger.error(f"Error preparing text exercise chat pipeline: {e}") + logger.error(traceback.format_exc()) + capture_exception(e) + return + + try: + pipeline(dto=dto) + except Exception as e: + logger.error(f"Error running text exercise chat pipeline: {e}") + logger.error(traceback.format_exc()) + callback.error("Fatal error.", exception=e) + + +@router.post( + "/text-exercise-chat/{variant}/run", + status_code=status.HTTP_202_ACCEPTED, + dependencies=[Depends(TokenValidator())], +) +def run_text_exercise_chat_pipeline( + variant: str, dto: TextExerciseChatPipelineExecutionDTO +): + thread = Thread(target=run_text_exercise_chat_pipeline_worker, args=(dto, variant)) + thread.start() + + def run_competency_extraction_pipeline_worker( dto: CompetencyExtractionPipelineExecutionDTO, _variant: str ): @@ -150,6 +193,14 @@ def get_pipeline(feature: str): description="Default programming exercise chat variant.", ) ] + case "TEXT_EXERCISE_CHAT": + return [ + FeatureDTO( + id="default", + name="Default Variant", + description="Default text exercise chat variant.", + ) + ] case "COURSE_CHAT": return [ FeatureDTO( diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 199b39d1..a122b6e7 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -6,18 +6,21 @@ from abc import ABC from app.common.token_usage_dto import TokenUsageDTO -from ...domain.status.competency_extraction_status_update_dto import ( +from app.domain.status.competency_extraction_status_update_dto import ( CompetencyExtractionStatusUpdateDTO, ) -from ...domain.chat.course_chat.course_chat_status_update_dto import ( +from app.domain.chat.course_chat.course_chat_status_update_dto import ( CourseChatStatusUpdateDTO, ) -from ...domain.status.stage_state_dto import StageStateEnum -from ...domain.status.stage_dto import StageDTO -from ...domain.chat.exercise_chat.exercise_chat_status_update_dto import ( +from app.domain.status.stage_state_dto import StageStateEnum +from app.domain.status.stage_dto import StageDTO +from app.domain.status.text_exercise_chat_status_update_dto import ( + TextExerciseChatStatusUpdateDTO, +) +from app.domain.chat.exercise_chat.exercise_chat_status_update_dto import ( ExerciseChatStatusUpdateDTO, ) -from ...domain.status.status_update_dto import StatusUpdateDTO +from app.domain.status.status_update_dto import StatusUpdateDTO import logging logger = logging.getLogger(__name__) @@ -224,6 +227,37 @@ def __init__( super().__init__(url, run_id, status, stage, current_stage_index) +class TextExerciseChatCallback(StatusCallback): + def __init__( + self, + run_id: str, + base_url: str, + initial_stages: List[StageDTO], + ): + url = f"{base_url}/api/public/pyris/pipelines/text-exercise-chat/runs/{run_id}/status" + stages = initial_stages or [] + stage = len(stages) + stages += [ + StageDTO( + weight=30, + state=StageStateEnum.NOT_STARTED, + name="Thinking", + ), + StageDTO( + weight=20, + state=StageStateEnum.NOT_STARTED, + name="Responding", + ), + ] + super().__init__( + url, + run_id, + TextExerciseChatStatusUpdateDTO(stages=stages), + stages[stage], + stage, + ) + + class CompetencyExtractionCallback(StatusCallback): def __init__( self, diff --git a/requirements.txt b/requirements.txt index 2f792ec3..4ea47bc0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,17 @@ black==24.8.0 -fastapi==0.115.0 +fastapi==0.112.2 flake8==7.1.1 -langchain==0.3.2 -ollama==0.3.3 -openai==1.51.0 -pre-commit==4.0.0 +langchain==0.2.14 +ollama==0.3.1 +openai==1.42.0 +pre-commit==3.8.0 psutil==6.0.0 -pydantic==2.9.2 -PyMuPDF==1.24.11 -pytz==2024.2 +pydantic==2.8.2 +PyMuPDF==1.24.9 +pytz==2024.1 PyYAML==6.0.2 requests==2.32.3 -sentry-sdk[starlette,fastapi,openai]==2.15.0 -unstructured==0.15.13 -uvicorn==0.31.0 -weaviate-client==4.8.1 +sentry-sdk[starlette,fastapi,openai]==2.13.0 +unstructured==0.15.7 +uvicorn==0.30.6 +weaviate-client==4.7.1