Skip to content

Commit

Permalink
Merge branch 'develop' into feature/revised-programming-feedback
Browse files Browse the repository at this point in the history
# Conflicts:
#	llm_core/llm_core/utils/llm_utils.py
#	modules/modeling/module_modeling_llm/poetry.lock
#	modules/modeling/module_modeling_llm/pyproject.toml
#	modules/programming/module_programming_llm/module_programming_llm/config.py
#	modules/programming/module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py
#	modules/programming/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py
#	modules/programming/module_programming_llm/module_programming_llm/generate_summary_by_file.py
#	modules/programming/module_programming_llm/module_programming_llm/helpers/models/model_config.py
#	modules/programming/module_programming_llm/module_programming_llm/helpers/models/openai.py
#	modules/programming/module_programming_llm/module_programming_llm/helpers/models/replicate.py
#	modules/programming/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py
#	modules/programming/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py
#	modules/programming/module_programming_llm/poetry.lock
#	modules/programming/module_programming_llm/pyproject.toml
#	modules/text/module_text_llm/poetry.lock
#	modules/text/module_text_llm/pyproject.toml
  • Loading branch information
dmytropolityka committed Oct 29, 2024
2 parents 132214c + 9c49cc8 commit 139cc19
Show file tree
Hide file tree
Showing 45 changed files with 4,177 additions and 2,262 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ jobs:
docker build -t athena .
cd ..
- name: Build llm_core image
id: set-image-core_llm
run: |
cd ./llm_core
docker build -t llm_core .
cd ..
- name: Docker Login
id: docker-login
run: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ async def find_module_by_name(module_name: str) -> Optional[Module]:
return None


# pylint: disable=too-many-positional-arguments
async def request_to_module(module: Module, headers: dict, path: str, lms_url: str, data: Optional[dict], method: str) -> ModuleResponse:
"""
Helper function to send a request to a module.
Expand Down
211 changes: 41 additions & 170 deletions assessment_module_manager/poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions athena-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function download_cofee_config {
mkdir -p ./module_text_cofee
for file in traefik.docker.yml node_config.docker.yml; do
echo " Downloading $file..."
curl -sSL -o ./module_text_cofee/$file https://raw.githubusercontent.com/ls1intum/Athena/"$pr_branch"/module_text_cofee/"$file"
curl -sSL -o ./module_text_cofee/$file https://raw.githubusercontent.com/ls1intum/Athena/"$pr_branch"/modules/text/module_text_cofee/"$file"
done
}

Expand Down Expand Up @@ -146,4 +146,4 @@ case "$subcommand" in
general_help
exit 1
;;
esac
esac
10 changes: 10 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ services:
image: athena
command: echo "Athena build succeeded, exiting (this is normal)"

llm_core:
build: ./llm_core
depends_on:
- athena
image: llm_core
command: echo "llm_core build succeeded, exiting (this is normal)"

assessment_module_manager:
build: ./assessment_module_manager
depends_on:
Expand All @@ -30,6 +37,7 @@ services:
build: modules/programming/module_programming_llm
depends_on:
- athena
- llm_core
ports:
- "5002:5002"

Expand All @@ -38,6 +46,7 @@ services:
build: modules/text/module_text_llm
depends_on:
- athena
- llm_core
ports:
- "5003:5003"

Expand Down Expand Up @@ -70,5 +79,6 @@ services:
build: modules/modeling/module_modeling_llm
depends_on:
- athena
- llm_core
ports:
- "5008:5008"
41 changes: 41 additions & 0 deletions llm_core/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Comment out the variables that you define somewhere else
# Environment variables are overwritten by .env file

PRODUCTION=0
SECRET=12345abcdef
DATABASE_URL=sqlite:///../data/data.sqlite


################################################################
# LLM Credentials #
################################################################

# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-4o"
LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5
LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15

# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled
LLM_ENABLE_LLM_AS_A_JUDGE=1
# Evaluation model to use for the LLM-as-a-judge approach [Only important if you want to use it in the /evaluate endpoint]
# See below for options, available models are also logged on startup
LLM_EVALUATION_MODEL="azure_openai_gpt-4o"

# Standard OpenAI (Non-Azure) [leave blank if not used]
# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
# A list of models can be found in `module_text_llm/helpers/models/openai.py` (openai_models)
OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# Azure OpenAI [leave blank if not used]
# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35`
AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
AZURE_OPENAI_ENDPOINT="https://ase-eu01.openai.azure.com/" # change base if needed
OPENAI_API_VERSION="2024-06-01" # change base if needed

# LangSmith (can be used for tracing LLMs) [leave blank if not used]
# See https://docs.smith.langchain.com
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY="XXX"
# LANGCHAIN_PROJECT="XXX"
24 changes: 24 additions & 0 deletions llm_core/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# syntax=docker/dockerfile:1

# This is the Dockerfile for the shared llm package.
# Its output is used as a dependency in the module_* Dockerfiles.

FROM python:3.11 as llm_core

WORKDIR /code

# Poetry
RUN pip install --no-cache-dir poetry==1.5.0

# Dependencies
COPY pyproject.toml poetry.lock ./
COPY --from=athena /code /athena

RUN poetry config virtualenvs.create false \
&& poetry install --no-interaction --no-ansi

# Project files
COPY . ./

# Build the package
RUN poetry build -f wheel
4 changes: 4 additions & 0 deletions llm_core/llm_core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import dotenv

# Load environment variables from .env file (for local development)
dotenv.load_dotenv(override=True)
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from typing import Type, Union, List, Optional
from langchain.base_language import BaseLanguageModel

from module_modeling_llm.models.model_config import ModelConfig

from llm_core.models.model_config import ModelConfig


DefaultModelConfig: Type[ModelConfig]
Expand All @@ -15,7 +14,7 @@

types: List[Type[ModelConfig]] = []
try:
import module_modeling_llm.models.openai as openai_config
import llm_core.models.openai as openai_config
types.append(openai_config.OpenAIModelConfig)
if default_model_name in openai_config.available_models:
DefaultModelConfig = openai_config.OpenAIModelConfig
Expand All @@ -36,4 +35,4 @@
ModelConfigType = type0
else:
type1 = types[1]
ModelConfigType = Union[type0, type1] # type: ignore
ModelConfigType = Union[type0, type1] # type: ignore
43 changes: 43 additions & 0 deletions llm_core/llm_core/models/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os

from langchain.callbacks.base import BaseCallbackHandler
from langchain_core.outputs import LLMResult
from langchain_core.messages.ai import UsageMetadata

from athena import emit_meta, get_meta


class UsageHandler(BaseCallbackHandler):
def on_llm_end(self, response: LLMResult, **kwargs) -> None:
cost_per_million_input_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN", 0.0))
cost_per_million_output_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN", 0.0))

meta = get_meta()

total_usage = meta.get("totalUsage", {"numInputTokens": 0, "numOutputTokens": 0, "numTotalTokens": 0, "cost": 0 })
llm_calls = meta.get("llmRequests", [])

for generations in response.generations:
for generation in generations:
message = generation.dict()["message"]
generation_usage: UsageMetadata = message["usage_metadata"]
model_name = message["response_metadata"].get("model_name", None)

total_usage["numInputTokens"] += generation_usage["input_tokens"]
total_usage["numOutputTokens"] += generation_usage["output_tokens"]
total_usage["numTotalTokens"] += generation_usage["total_tokens"]

total_usage["cost"] += int(generation_usage["input_tokens"]) * cost_per_million_output_tokens / 1_000_000
total_usage["cost"] += int(generation_usage["output_tokens"]) * cost_per_million_output_tokens / 1_000_000

llm_calls.append({
"model": model_name,
"costPerMillionInputToken": cost_per_million_input_tokens,
"costPerMillionOutputToken": cost_per_million_output_tokens,
"numInputTokens": generation_usage["input_tokens"],
"numOutputTokens": generation_usage["output_tokens"],
"numTotalTokens": generation_usage["total_tokens"],
})

emit_meta("totalUsage", total_usage)
emit_meta("llmRequests", llm_calls)
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
from typing import Optional, Type, TypeVar, List
from pydantic import BaseModel, ValidationError
from typing import Type, TypeVar, List
from pydantic import BaseModel
import tiktoken

from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.base_language import BaseLanguageModel
from langchain.prompts import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.chains.openai_functions import create_structured_output_chain
from langchain.output_parsers import PydanticOutputParser
from langchain.schema import OutputParserException

from athena import emit_meta, get_experiment_environment
from athena import emit_meta

T = TypeVar("T", bound=BaseModel)

Expand All @@ -31,9 +26,9 @@ def num_tokens_from_prompt(chat_prompt: ChatPromptTemplate, prompt_input: dict)
return num_tokens_from_string(chat_prompt.format(**prompt_input))


def check_prompt_length_and_omit_features_if_necessary(prompt: ChatPromptTemplate,
prompt_input: dict,
max_input_tokens: int,
def check_prompt_length_and_omit_features_if_necessary(prompt: ChatPromptTemplate,
prompt_input: dict,
max_input_tokens: int,
omittable_features: List[str],
debug: bool):
"""Check if the input is too long and omit features if necessary.
Expand All @@ -48,7 +43,7 @@ def check_prompt_length_and_omit_features_if_necessary(prompt: ChatPromptTemplat
debug (bool): Debug flag
Returns:
(dict, bool): Tuple of (prompt_input, should_run) where prompt_input is the input with omitted features and
(dict, bool): Tuple of (prompt_input, should_run) where prompt_input is the input with omitted features and
should_run is True if the model should run, False otherwise
"""
if num_tokens_from_prompt(prompt, prompt_input) <= max_input_tokens:
Expand Down Expand Up @@ -84,11 +79,11 @@ def supports_function_calling(model: BaseLanguageModel):


def get_chat_prompt_with_formatting_instructions(
model: BaseLanguageModel,
system_message: str,
human_message: str,
pydantic_object: Type[T]
) -> ChatPromptTemplate:
model: BaseLanguageModel,
system_message: str,
human_message: str,
pydantic_object: Type[T]
) -> ChatPromptTemplate:
"""Returns a ChatPromptTemplate with formatting instructions (if necessary)
Note: Does nothing if the model supports function calling
Expand All @@ -106,57 +101,10 @@ def get_chat_prompt_with_formatting_instructions(
system_message_prompt = SystemMessagePromptTemplate.from_template(system_message)
human_message_prompt = HumanMessagePromptTemplate.from_template(human_message)
return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}")
system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
system_message_prompt.prompt.input_variables.remove("format_instructions")
human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\n\nJSON response following the provided schema:")
return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])


async def predict_and_parse(
model: BaseLanguageModel,
chat_prompt: ChatPromptTemplate,
prompt_input: dict,
pydantic_object: Type[T],
tags: Optional[List[str]]
) -> Optional[T]:
"""Predicts an LLM completion using the model and parses the output using the provided Pydantic model
Args:
model (BaseLanguageModel): The model to predict with
chat_prompt (ChatPromptTemplate): Prompt to use
prompt_input (dict): Input parameters to use for the prompt
pydantic_object (Type[T]): Pydantic model to parse the output
tags (Optional[List[str]]: List of tags to tag the prediction with
Returns:
Optional[T]: Parsed output, or None if it could not be parsed
"""
experiment = get_experiment_environment()

tags = tags or []
if experiment.experiment_id is not None:
tags.append(f"experiment-{experiment.experiment_id}")
if experiment.module_configuration_id is not None:
tags.append(f"module-configuration-{experiment.module_configuration_id}")
if experiment.run_id is not None:
tags.append(f"run-{experiment.run_id}")

if supports_function_calling(model):
chain = create_structured_output_chain(pydantic_object, llm=model, prompt=chat_prompt, tags=tags)

try:
return await chain.arun(**prompt_input)
except (OutputParserException, ValidationError):
# In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
return None

output_parser = PydanticOutputParser(pydantic_object=pydantic_object)
chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=output_parser, tags=tags)
try:
return await chain.arun(**prompt_input)
except (OutputParserException, ValidationError):
# In the future, we should probably have some recovery mechanism here (i.e. fix the output with another prompt)
return None
return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,24 @@
T = TypeVar("T", bound=BaseModel)

async def predict_and_parse(
model: BaseLanguageModel,
chat_prompt: ChatPromptTemplate,
prompt_input: dict,
pydantic_object: Type[T],
model: BaseLanguageModel,
chat_prompt: ChatPromptTemplate,
prompt_input: dict,
pydantic_object: Type[T],
tags: Optional[List[str]]
) -> Optional[T]:
) -> Optional[T]:
"""Predicts an LLM completion using the model and parses the output using the provided Pydantic model
Args:
model (BaseLanguageModel): The model to predict with
chat_prompt (ChatPromptTemplate): Prompt to use
prompt_input (dict): Input parameters to use for the prompt
pydantic_object (Type[T]): Pydantic model to parse the output
tags (Optional[List[str]]: List of tags to tag the prediction with
Returns:
Optional[T]: Parsed output, or None if it could not be parsed
"""
experiment = get_experiment_environment()

tags = tags or []
Expand Down
Loading

0 comments on commit 139cc19

Please sign in to comment.