Merge branch 'develop' into dependabot/npm_and_yarn/playground/next-1…

…4.2.10
ls1intum · Nov 19, 2024 · 91e5102 · 91e5102
2 parents 9ac4da5 + acb2929
commit 91e5102
Show file tree

Hide file tree

Showing 133 changed files with 8,656 additions and 3,834 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -56,6 +56,13 @@ jobs:
           docker build -t athena .
           cd ..
 
+      - name: Build llm_core image
+        id: set-image-core_llm
+        run: |
+          cd ./llm_core
+          docker build -t llm_core .
+          cd ..
+
       - name: Docker Login
         id: docker-login
         run: |

diff --git a/assessment_module_manager/assessment_module_manager/module/request_to_module.py b/assessment_module_manager/assessment_module_manager/module/request_to_module.py
@@ -33,6 +33,7 @@ async def find_module_by_name(module_name: str) -> Optional[Module]:
     return None
 
 
+# pylint: disable=too-many-positional-arguments
 async def request_to_module(module: Module, headers: dict, path: str, lms_url: str, data: Optional[dict], method: str) -> ModuleResponse:
     """
     Helper function to send a request to a module.

diff --git a/assessment_module_manager/poetry.lock b/assessment_module_manager/poetry.lock
diff --git a/athena-docker.sh b/athena-docker.sh
@@ -40,7 +40,7 @@ function download_cofee_config {
   mkdir -p ./module_text_cofee
   for file in traefik.docker.yml node_config.docker.yml; do
     echo "  Downloading $file..."
-    curl -sSL -o ./module_text_cofee/$file https://raw.githubusercontent.com/ls1intum/Athena/"$pr_branch"/module_text_cofee/"$file"
+    curl -sSL -o ./module_text_cofee/$file https://raw.githubusercontent.com/ls1intum/Athena/"$pr_branch"/modules/text/module_text_cofee/"$file"
   done
 }
 
@@ -146,4 +146,4 @@ case "$subcommand" in
         general_help
         exit 1
         ;;
-esac
+esac
diff --git a/athena/athena/__init__.py b/athena/athena/__init__.py
@@ -4,7 +4,7 @@
 
 from . import contextvars
 from .app import app
-from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction
+from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction, StructuredGradingCriterion
 from .metadata import emit_meta, get_meta
 from .experiment import get_experiment_environment
 from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider  # type: ignore
@@ -36,5 +36,6 @@ def run_module():
     "get_experiment_environment",
     "ExerciseType",
     "GradingCriterion",
-    "StructuredGradingInstruction"
+    "StructuredGradingInstruction",
+    "StructuredGradingCriterion"
 ]
diff --git a/athena/athena/models/db_exercise.py b/athena/athena/models/db_exercise.py
@@ -7,6 +7,7 @@
 
 class DBExercise(Model):
     id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, nullable=False)
+    lms_url = Column(String, index=True, nullable=False)
     title = Column(String, index=True, nullable=False)
     type = Column(SqlEnum(ExerciseType), index=True, nullable=False)
     max_points = Column(Float, index=True, nullable=False)

diff --git a/athena/athena/models/db_feedback.py b/athena/athena/models/db_feedback.py
@@ -8,6 +8,7 @@ class DBFeedback(Model):
     __table_args__ = (UniqueConstraint('lms_id'),)
 
     id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, autoincrement=True)
+    lms_url = Column(String, index=True, nullable=False)
     lms_id = Column(BigInteger)
     title = Column(String)
     description = Column(String)

diff --git a/athena/athena/models/db_submission.py b/athena/athena/models/db_submission.py
@@ -1,9 +1,10 @@
-from sqlalchemy import Column, JSON
+from sqlalchemy import Column, JSON, String
 
 from .model import Model
 from .big_integer_with_autoincrement import BigIntegerWithAutoincrement
 
 
 class DBSubmission(Model):
     id = Column(BigIntegerWithAutoincrement, primary_key=True, index=True, autoincrement=True,)
+    lms_url = Column(String, index=True, nullable=False)
     meta = Column(JSON, nullable=False)
diff --git a/athena/athena/models/model.py b/athena/athena/models/model.py
@@ -6,12 +6,6 @@
 
 class Model:
 
-    lms_url = Column(String, index=True, nullable=False)
-
-    __table_args__ = (
-        UniqueConstraint('id', 'lms_url'),
-    )
-
     @classmethod
     def get_schema_class(cls) -> BaseModel:
         # The schema class has the same name as myself, but without the "DB" prefix.

diff --git a/athena/athena/schemas/__init__.py b/athena/athena/schemas/__init__.py
@@ -13,4 +13,4 @@
 from .modeling_feedback import ModelingFeedback
 from .modeling_exercise import ModelingExercise
 from .modeling_submission import ModelingSubmission
-from .grading_criterion import GradingCriterion, StructuredGradingInstruction
+from .grading_criterion import GradingCriterion, StructuredGradingInstruction, StructuredGradingCriterion
diff --git a/athena/athena/schemas/grading_criterion.py b/athena/athena/schemas/grading_criterion.py
@@ -1,7 +1,7 @@
 from abc import ABC
 from typing import List, Optional
 
-from pydantic import Field
+from pydantic import BaseModel, Field
 
 from .schema import Schema
 
@@ -24,3 +24,6 @@ class GradingCriterion(Schema, ABC):
     structured_grading_instructions: List[StructuredGradingInstruction] = Field(
         [], example=[{"credits": 1.0, "gradingScale": "Good", "instructionDescription": "Some instructions", "feedback": "Nicely done!", "usageCount": 1},
                      {"credits": 0.0, "gradingScale": "Bad", "instructionDescription": "Some instructions", "feedback": "Try again!", "usageCount": 0}])
+
+class StructuredGradingCriterion(BaseModel):
+    criteria: List[GradingCriterion]
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -8,6 +8,13 @@ services:
     image: athena
     command: echo "Athena build succeeded, exiting (this is normal)"
 
+  llm_core:
+      build: ./llm_core
+      depends_on:
+        - athena
+      image: llm_core
+      command: echo "llm_core build succeeded, exiting (this is normal)"
+
   assessment_module_manager:
     build: ./assessment_module_manager
     depends_on:
@@ -30,6 +37,7 @@ services:
     build: modules/programming/module_programming_llm
     depends_on:
       - athena
+      - llm_core
     ports:
       - "5002:5002"
 
@@ -38,6 +46,7 @@ services:
     build: modules/text/module_text_llm
     depends_on:
       - athena
+      - llm_core
     ports:
       - "5003:5003"
 
@@ -70,5 +79,6 @@ services:
     build: modules/modeling/module_modeling_llm
     depends_on:
       - athena
+      - llm_core
     ports:
       - "5008:5008"
diff --git a/llm_core/.env.example b/llm_core/.env.example
@@ -0,0 +1,41 @@
+# Comment out the variables that you define somewhere else
+# Environment variables are overwritten by .env file
+
+PRODUCTION=0
+SECRET=12345abcdef
+DATABASE_URL=sqlite:///../data/data.sqlite
+
+
+################################################################
+# LLM Credentials                                              #
+################################################################
+
+# Default model to use
+# See below for options, available models are also logged on startup
+LLM_DEFAULT_MODEL="azure_openai_gpt-4o"
+LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5
+LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15
+
+# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled
+LLM_ENABLE_LLM_AS_A_JUDGE=1
+# Evaluation model to use for the LLM-as-a-judge approach [Only important if you want to use it in the /evaluate endpoint]
+# See below for options, available models are also logged on startup
+LLM_EVALUATION_MODEL="azure_openai_gpt-4o"
+
+# Standard OpenAI (Non-Azure) [leave blank if not used]
+# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
+# A list of models can be found in `module_text_llm/helpers/models/openai.py` (openai_models)
+OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+# Azure OpenAI [leave blank if not used]
+# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
+AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+AZURE_OPENAI_ENDPOINT="https://ase-eu01.openai.azure.com/" # change base if needed
+OPENAI_API_VERSION="2024-06-01" # change base if needed
+
+# LangSmith (can be used for tracing LLMs) [leave blank if not used]
+# See https://docs.smith.langchain.com
+# LANGCHAIN_TRACING_V2=true
+# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+# LANGCHAIN_API_KEY="XXX"
+# LANGCHAIN_PROJECT="XXX"
diff --git a/llm_core/Dockerfile b/llm_core/Dockerfile
@@ -0,0 +1,24 @@
+# syntax=docker/dockerfile:1
+
+# This is the Dockerfile for the shared llm package.
+# Its output is used as a dependency in the module_* Dockerfiles.
+
+FROM python:3.11 as llm_core
+
+WORKDIR /code
+
+# Poetry
+RUN pip install --no-cache-dir poetry==1.5.0
+
+# Dependencies
+COPY pyproject.toml poetry.lock ./
+COPY --from=athena /code /athena
+
+RUN poetry config virtualenvs.create false \
+    && poetry install --no-interaction --no-ansi
+
+# Project files
+COPY . ./
+
+# Build the package
+RUN poetry build -f wheel
diff --git a/llm_core/llm_core/__init__.py b/llm_core/llm_core/__init__.py
@@ -0,0 +1,4 @@
+import dotenv
+
+# Load environment variables from .env file (for local development)
+dotenv.load_dotenv(override=True)
diff --git a/...odule_text_llm/helpers/models/__init__.py → llm_core/llm_core/models/__init__.py b/...odule_text_llm/helpers/models/__init__.py → llm_core/llm_core/models/__init__.py
@@ -2,7 +2,7 @@
 from typing import Type, Union, List, Optional
 from langchain.base_language import BaseLanguageModel
 
-from module_text_llm.helpers.models.model_config import ModelConfig
+from llm_core.models.model_config import ModelConfig
 
 
 DefaultModelConfig: Type[ModelConfig]
@@ -14,7 +14,7 @@
 
 types: List[Type[ModelConfig]] = []
 try:
-    import module_text_llm.helpers.models.openai as openai_config
+    import llm_core.models.openai as openai_config
     types.append(openai_config.OpenAIModelConfig)
     if default_model_name in openai_config.available_models:
         DefaultModelConfig = openai_config.OpenAIModelConfig
@@ -35,4 +35,4 @@
     ModelConfigType = type0
 else:
     type1 = types[1]
-    ModelConfigType = Union[type0, type1] # type: ignore
+    ModelConfigType = Union[type0, type1] # type: ignore
diff --git a/llm_core/llm_core/models/callbacks.py b/llm_core/llm_core/models/callbacks.py
@@ -0,0 +1,43 @@
+import os
+
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain_core.outputs import  LLMResult
+from langchain_core.messages.ai import UsageMetadata
+
+from athena import emit_meta, get_meta
+
+
+class UsageHandler(BaseCallbackHandler):
+    def on_llm_end(self, response: LLMResult, **kwargs) -> None:
+        cost_per_million_input_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN", 0.0))
+        cost_per_million_output_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN", 0.0))
+
+        meta = get_meta()
+
+        total_usage = meta.get("totalUsage", {"numInputTokens": 0, "numOutputTokens": 0, "numTotalTokens": 0, "cost": 0 })
+        llm_calls = meta.get("llmRequests", [])
+
+        for generations in response.generations:
+            for generation in generations:
+                message = generation.dict()["message"]
+                generation_usage: UsageMetadata = message["usage_metadata"]
+                model_name = message["response_metadata"].get("model_name", None)
+
+                total_usage["numInputTokens"] += generation_usage["input_tokens"]
+                total_usage["numOutputTokens"] += generation_usage["output_tokens"]
+                total_usage["numTotalTokens"] += generation_usage["total_tokens"]
+
+                total_usage["cost"] += int(generation_usage["input_tokens"]) * cost_per_million_output_tokens / 1_000_000
+                total_usage["cost"] += int(generation_usage["output_tokens"]) * cost_per_million_output_tokens / 1_000_000
+
+                llm_calls.append({
+                    "model": model_name,
+                    "costPerMillionInputToken": cost_per_million_input_tokens,
+                    "costPerMillionOutputToken": cost_per_million_output_tokens,
+                    "numInputTokens": generation_usage["input_tokens"],
+                    "numOutputTokens": generation_usage["output_tokens"],
+                    "numTotalTokens": generation_usage["total_tokens"],
+                })
+
+        emit_meta("totalUsage", total_usage)
+        emit_meta("llmRequests", llm_calls)
diff --git a/...e_text_llm/helpers/models/model_config.py → llm_core/llm_core/models/model_config.py b/...e_text_llm/helpers/models/model_config.py → llm_core/llm_core/models/model_config.py
diff --git a/..._programming_llm/helpers/models/openai.py → llm_core/llm_core/models/openai.py b/..._programming_llm/helpers/models/openai.py → llm_core/llm_core/models/openai.py
@@ -10,6 +10,8 @@
 
 from athena.logger import logger
 from .model_config import ModelConfig
+from .callbacks import UsageHandler
+
 
 OPENAI_PREFIX = "openai_"
 AZURE_OPENAI_PREFIX = "azure_openai_"
@@ -132,6 +134,7 @@ def get_model(self) -> BaseLanguageModel:
                     # Otherwise, add it to model_kwargs (necessary for chat models)
                     model_kwargs[attr] = value
             kwargs["model_kwargs"] = model_kwargs
+            kwargs["callbacks"] = [UsageHandler()]
 
             # Initialize a copy of the model using the config
             model = model.__class__(**kwargs)

diff --git a/..._llm/module_text_llm/helpers/llm_utils.py → llm_core/llm_core/utils/llm_utils.py b/..._llm/module_text_llm/helpers/llm_utils.py → llm_core/llm_core/utils/llm_utils.py
@@ -1,20 +1,15 @@
-from typing import Optional, Type, TypeVar, List
-from pydantic import BaseModel, ValidationError
+from typing import Type, TypeVar, List
+from pydantic import BaseModel
 import tiktoken
-
-from langchain.chains import LLMChain
 from langchain.chat_models import ChatOpenAI
 from langchain.base_language import BaseLanguageModel
 from langchain.prompts import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
 )
-from langchain.chains.openai_functions import create_structured_output_chain
 from langchain.output_parsers import PydanticOutputParser
-from langchain.schema import OutputParserException
-
-from athena import emit_meta, get_experiment_environment
+from athena import emit_meta
 
 T = TypeVar("T", bound=BaseModel)
 
@@ -112,51 +107,4 @@ def get_chat_prompt_with_formatting_instructions(
     system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
     system_message_prompt.prompt.input_variables.remove("format_instructions")
     human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\n\nJSON response following the provided schema:")
-    return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
-
-
-async def predict_and_parse(
-        model: BaseLanguageModel, 
-        chat_prompt: ChatPromptTemplate, 
-        prompt_input: dict, 
-        pydantic_object: Type[T], 
-        tags: Optional[List[str]]
-    ) -> Optional[T]:
-    """Predicts an LLM completion using the model and parses the output using the provided Pydantic model
-
-    Args:
-        model (BaseLanguageModel): The model to predict with
-        chat_prompt (ChatPromptTemplate): Prompt to use
-        prompt_input (dict): Input parameters to use for the prompt
-        pydantic_object (Type[T]): Pydantic model to parse the output
-        tags (Optional[List[str]]: List of tags to tag the prediction with
-
-    Returns:
-        Optional[T]: Parsed output, or None if it could not be parsed
-    """
-    experiment = get_experiment_environment()
-
-    tags = tags or []
-    if experiment.experiment_id is not None:
-        tags.append(f"experiment-{experiment.experiment_id}")
-    if experiment.module_configuration_id is not None:
-        tags.append(f"module-configuration-{experiment.module_configuration_id}")
-    if experiment.run_id is not None:
-        tags.append(f"run-{experiment.run_id}")
-
-    if supports_function_calling(model):
-        chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt, tags=tags)
-
-        try:
-            return await chain.arun(**prompt_input)
-        except (OutputParserException, ValidationError):
-            # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
-            return None
-
-    output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
-    chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser, tags=tags)
-    try:
-        return await chain.arun(**prompt_input)
-    except (OutputParserException, ValidationError):
-        # In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
-        return None
+    return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])