diff --git a/app/domain/codehint.py b/app/domain/codehint.py index 45a16d8a..c27694a7 100644 --- a/app/domain/codehint.py +++ b/app/domain/codehint.py @@ -19,7 +19,7 @@ class CodeHint(BaseModel): title: str description: str content: str - solution_entries: [ProgrammingExerciseSolutionEntry] + solution_entries: list[ProgrammingExerciseSolutionEntry] def __str__(self): return ( diff --git a/app/domain/dtos.py b/app/domain/dtos.py index eb723f8f..35576dbd 100644 --- a/app/domain/dtos.py +++ b/app/domain/dtos.py @@ -13,7 +13,7 @@ class ProgrammingExerciseTutorChatDTO(BaseModel): course: Course exercise: ProgrammingExercise submission: ProgrammingSubmission - chat_history: [IrisMessage] + chat_history: list[IrisMessage] def __str__(self): return ( @@ -27,7 +27,7 @@ class CodeEditorChatDTO(BaseModel): solution_repository: dict[str, str] template_repository: dict[str, str] test_repository: dict[str, str] - chat_history: [IrisMessage] + chat_history: list[IrisMessage] def __str__(self): return ( diff --git a/app/domain/submission.py b/app/domain/submission.py index e64b8a4b..82674928 100644 --- a/app/domain/submission.py +++ b/app/domain/submission.py @@ -12,7 +12,7 @@ def __str__(self): class ProgrammingSubmission(BaseModel): commit_hash: str build_failed: bool - build_log_entries: [BuildLogEntry] + build_log_entries: list[BuildLogEntry] def __str__(self): return ( diff --git a/app/llm/__init__.py b/app/llm/__init__.py index ed54099b..dcb62980 100644 --- a/app/llm/__init__.py +++ b/app/llm/__init__.py @@ -1,3 +1,5 @@ from llm.completion_arguments import * from llm.external import * +from llm.capability import * from llm.request_handler import * +from llm.capability import RequirementList diff --git a/app/llm/capability/__init__.py b/app/llm/capability/__init__.py new file mode 100644 index 00000000..a588dc8a --- /dev/null +++ b/app/llm/capability/__init__.py @@ -0,0 +1,3 @@ +from llm.capability.capability_list import CapabilityList +from llm.capability.requirement_list import RequirementList +from llm.capability.capability_checker import capabilities_fulfill_requirements diff --git a/app/llm/capability/capability_checker.py b/app/llm/capability/capability_checker.py new file mode 100644 index 00000000..44874092 --- /dev/null +++ b/app/llm/capability/capability_checker.py @@ -0,0 +1,80 @@ +from .capability_list import ( + CapabilityList, + capability_weights, + always_considered_capabilities_with_default, +) +from .requirement_list import RequirementList + + +def capabilities_fulfill_requirements( + capability: CapabilityList, requirements: RequirementList +) -> bool: + """Check if the capability fulfills the requirements""" + return all( + getattr(capability, field).matches(getattr(requirements, field)) + for field in requirements.__dict__.keys() + if getattr(requirements, field) is not None + ) + + +def calculate_capability_scores( + capabilities: list[CapabilityList], + requirements: RequirementList, + invert_cost: bool = False, +) -> list[int]: + """Calculate the scores of the capabilities against the requirements""" + all_scores = [] + + for requirement in requirements.__dict__.keys(): + requirement_value = getattr(requirements, requirement) + if ( + requirement_value is None + and requirement not in always_considered_capabilities_with_default + ): + continue + + # Calculate the scores for each capability + scores = [] + for capability in capabilities: + if ( + requirement_value is None + and requirement in always_considered_capabilities_with_default + ): + # If the requirement is not set, use the default value if necessary + score = getattr(capability, requirement).matches( + always_considered_capabilities_with_default[requirement] + ) + else: + score = getattr(capability, requirement).matches(requirement_value) + # Invert the cost if required + # The cost is a special case, as depending on how you want to use the scores + # the cost needs to be considered differently + if ( + requirement in ["input_cost", "output_cost"] + and invert_cost + and score != 0 + ): + score = 1 / score + scores.append(score) + + # Normalize the scores between 0 and 1 and multiply by the weight modifier + # The normalization here is based on the position of the score in the sorted list to balance out + # the different ranges of the capabilities + sorted_scores = sorted(set(scores)) + weight_modifier = capability_weights[requirement] + normalized_scores = [ + ((sorted_scores.index(score) + 1) / len(sorted_scores)) * weight_modifier + for score in scores + ] + all_scores.append(normalized_scores) + + final_scores = [] + + # Sum up the scores for each capability to get the final score for each list of capabilities + for i in range(len(all_scores[0])): + score = 0 + for j in range(len(all_scores)): + score += all_scores[j][i] + final_scores.append(score) + + return final_scores diff --git a/app/llm/capability/capability_list.py b/app/llm/capability/capability_list.py new file mode 100644 index 00000000..a865483f --- /dev/null +++ b/app/llm/capability/capability_list.py @@ -0,0 +1,131 @@ +from abc import ABCMeta +from pydantic import BaseModel, Field, model_validator + + +class Capability(metaclass=ABCMeta): + """A capability to match a generic value""" + + @classmethod + def __subclasshook__(cls, subclass) -> bool: + return hasattr(subclass, "matches") and callable(subclass.matches) + + def matches(self, other: any) -> int: + """Return a score for how well the capability matches the input""" + raise NotImplementedError + + +class TextCapability(BaseModel): + """A capability to match a fixed text value""" + + value: str + + def matches(self, text: str) -> int: + return int(self.value == text) + + def __str__(self): + return f"TextCapability({super().__str__()})" + + +class OrderedNumberCapability(BaseModel): + """A capability that is better the higher the value""" + + value: int | float + + def matches(self, number: int | float) -> int | float: + if self.value < number: + return 0 + return self.value - number + 1 + + def __str__(self): + return f"OrderedNumberCapability({super().__str__()})" + + +class InverseOrderedNumberCapability(BaseModel): + """A capability that is better the lower the value""" + + value: int | float + + def matches(self, number: int | float) -> int | float: + if self.value > number: + return 0 + return number - self.value + 1 + + def __str__(self): + return f"InverseOrderedNumberCapability({super().__str__()})" + + +class BooleanCapability(BaseModel): + """A simple boolean capability""" + + value: bool + + def matches(self, boolean: bool) -> int: + return int(self.value == boolean) + + def __str__(self): + return f"BooleanCapability({str(self.value)})" + + +class CapabilityList(BaseModel): + """A list of capabilities for a model""" + + # Cost in $ per 1k input tokens + input_cost: InverseOrderedNumberCapability = Field( + default=InverseOrderedNumberCapability(value=0) + ) + # Output cost in $ per 1k tokens + output_cost: InverseOrderedNumberCapability = Field( + default=InverseOrderedNumberCapability(value=0) + ) + # The GPT version that is roughly equivalent to the model + gpt_version_equivalent: OrderedNumberCapability = Field( + default=OrderedNumberCapability(value=2) + ) + # The speed of the model in tokens per second + speed: OrderedNumberCapability = Field(default=OrderedNumberCapability(value=0)) + # The context length of the model in tokens + context_length: OrderedNumberCapability = Field( + default=OrderedNumberCapability(value=0) + ) + # The vendor of the model e.g. "OpenAI" or "Anthropic" + vendor: TextCapability = Field(default=TextCapability(value="")) + # Whether the model is privacy compliant and can be used for sensitive data + privacy_compliance: BooleanCapability = Field( + default=BooleanCapability(value=False) + ) + # Whether the model is self-hosted + self_hosted: BooleanCapability = Field(default=BooleanCapability(value=False)) + # Whether the model supports image recognition + image_recognition: BooleanCapability = Field(default=BooleanCapability(value=False)) + # Whether the model supports a JSON mode + json_mode: BooleanCapability = Field(default=BooleanCapability(value=False)) + + @model_validator(mode="before") + @classmethod + def from_dict(cls, data: dict[str, any]): + """Prepare the data for handling by Pydantic""" + for key, value in data.items(): + if type(value) is not dict: + data[key] = {"value": value} + return data + + +# The weights for the capabilities used in the scoring +capability_weights = { + "input_cost": 0.5, + "output_cost": 0.5, + "gpt_version_equivalent": 4, + "speed": 2, + "context_length": 0.1, + "vendor": 1, + "privacy_compliance": 0, + "self_hosted": 0, + "image_recognition": 0, + "json_mode": 0, +} + +# The default values for the capabilities that are always considered +always_considered_capabilities_with_default = { + "input_cost": 100000000000000, + "output_cost": 100000000000000, +} diff --git a/app/llm/capability/requirement_list.py b/app/llm/capability/requirement_list.py new file mode 100644 index 00000000..57d28d09 --- /dev/null +++ b/app/llm/capability/requirement_list.py @@ -0,0 +1,47 @@ +class RequirementList: + """A class to represent the requirements you want to match against""" + + # Maximum cost in $ per 1k input tokens + input_cost: float | None + # Maximum cost in $ per 1k output tokens + output_cost: float | None + # The minimum GPT version that the model should be roughly equivalent to + gpt_version_equivalent: float | None + # The minimum speed of the model in tokens per second + speed: float | None + # The minimum context length of the model in tokens + context_length: int | None + # The vendor of the model e.g. "OpenAI" or "Anthropic" + vendor: str | None + # Whether the model should be privacy compliant to be used for sensitive data + privacy_compliance: bool | None + # Whether the model should be self-hosted + self_hosted: bool | None + # Whether the model should support image recognition + image_recognition: bool | None + # Whether the model should support a JSON mode + json_mode: bool | None + + def __init__( + self, + input_cost: float | None = None, + output_cost: float | None = None, + gpt_version_equivalent: float | None = None, + speed: float | None = None, + context_length: int | None = None, + vendor: str | None = None, + privacy_compliance: bool | None = None, + self_hosted: bool | None = None, + image_recognition: bool | None = None, + json_mode: bool | None = None, + ) -> None: + self.input_cost = input_cost + self.output_cost = output_cost + self.gpt_version_equivalent = gpt_version_equivalent + self.speed = speed + self.context_length = context_length + self.vendor = vendor + self.privacy_compliance = privacy_compliance + self.self_hosted = self_hosted + self.image_recognition = image_recognition + self.json_mode = json_mode diff --git a/app/llm/external/model.py b/app/llm/external/model.py index c831009f..d16e206a 100644 --- a/app/llm/external/model.py +++ b/app/llm/external/model.py @@ -3,6 +3,7 @@ from domain import IrisMessage from llm import CompletionArguments +from llm.capability import CapabilityList class LanguageModel(BaseModel, metaclass=ABCMeta): @@ -11,6 +12,7 @@ class LanguageModel(BaseModel, metaclass=ABCMeta): id: str name: str description: str + capabilities: CapabilityList class CompletionModel(LanguageModel, metaclass=ABCMeta): diff --git a/app/llm/llm_manager.py b/app/llm/llm_manager.py index 1abc02bd..84ab9186 100644 --- a/app/llm/llm_manager.py +++ b/app/llm/llm_manager.py @@ -5,6 +5,8 @@ import yaml from common import Singleton +from llm.capability import RequirementList +from llm.capability.capability_checker import calculate_capability_scores from llm.external import LanguageModel, AnyLLM @@ -26,6 +28,7 @@ def get_llm_by_id(self, llm_id): return llm def load_llms(self): + """Load the llms from the config file""" path = os.environ.get("LLM_CONFIG_PATH") if not path: raise Exception("LLM_CONFIG_PATH not set") @@ -34,3 +37,13 @@ def load_llms(self): loaded_llms = yaml.safe_load(file) self.entries = LlmList.parse_obj({"llms": loaded_llms}).llms + + def get_llms_sorted_by_capabilities_score( + self, requirements: RequirementList, invert_cost: bool = False + ): + """Get the llms sorted by their capability to requirement scores""" + scores = calculate_capability_scores( + [llm.capabilities for llm in self.entries], requirements, invert_cost + ) + sorted_llms = sorted(zip(scores, self.entries), key=lambda pair: -pair[0]) + return [llm for _, llm in sorted_llms] diff --git a/app/llm/request_handler/__init__.py b/app/llm/request_handler/__init__.py index 8ad46295..ef20a36a 100644 --- a/app/llm/request_handler/__init__.py +++ b/app/llm/request_handler/__init__.py @@ -1,2 +1,6 @@ -from basic_request_handler import BasicRequestHandler -from request_handler_interface import RequestHandler +from llm.request_handler.request_handler_interface import RequestHandler +from llm.request_handler.basic_request_handler import BasicRequestHandler +from llm.request_handler.capability_request_handler import ( + CapabilityRequestHandler, + CapabilityRequestHandlerSelectionMode, +) diff --git a/app/llm/request_handler/basic_request_handler.py b/app/llm/request_handler/basic_request_handler.py index 14227997..1c9a4dfe 100644 --- a/app/llm/request_handler/basic_request_handler.py +++ b/app/llm/request_handler/basic_request_handler.py @@ -1,5 +1,6 @@ from domain import IrisMessage -from llm import RequestHandler, CompletionArguments +from llm.request_handler import RequestHandler +from llm.completion_arguments import CompletionArguments from llm.llm_manager import LlmManager diff --git a/app/llm/request_handler/capability_request_handler.py b/app/llm/request_handler/capability_request_handler.py new file mode 100644 index 00000000..3ea1d0ce --- /dev/null +++ b/app/llm/request_handler/capability_request_handler.py @@ -0,0 +1,63 @@ +from enum import Enum + +from domain import IrisMessage +from llm.capability import RequirementList +from llm.external.model import ChatModel, CompletionModel, EmbeddingModel, LanguageModel +from llm.request_handler import RequestHandler +from llm.completion_arguments import CompletionArguments +from llm.llm_manager import LlmManager + + +class CapabilityRequestHandlerSelectionMode(Enum): + """Enum for the selection mode of the capability request handler""" + + BEST = "best" + WORST = "worst" + + +class CapabilityRequestHandler(RequestHandler): + """Request handler that selects the best/worst model based on the requirements""" + + requirements: RequirementList + selection_mode: CapabilityRequestHandlerSelectionMode + llm_manager: LlmManager + + def __init__( + self, + requirements: RequirementList, + selection_mode: CapabilityRequestHandlerSelectionMode = CapabilityRequestHandlerSelectionMode.WORST, + ) -> None: + self.requirements = requirements + self.selection_mode = selection_mode + self.llm_manager = LlmManager() + + def complete(self, prompt: str, arguments: CompletionArguments) -> str: + llm = self._select_model(CompletionModel) + return llm.complete(prompt, arguments) + + def chat( + self, messages: list[IrisMessage], arguments: CompletionArguments + ) -> IrisMessage: + llm = self._select_model(ChatModel) + return llm.chat(messages, arguments) + + def embed(self, text: str) -> list[float]: + llm = self._select_model(EmbeddingModel) + return llm.embed(text) + + def _select_model(self, type_filter: type) -> LanguageModel: + """Select the best/worst model based on the requirements and the selection mode""" + llms = self.llm_manager.get_llms_sorted_by_capabilities_score( + self.requirements, + self.selection_mode == CapabilityRequestHandlerSelectionMode.WORST, + ) + llms = [llm for llm in llms if isinstance(llm, type_filter)] + + if self.selection_mode == CapabilityRequestHandlerSelectionMode.BEST: + llm = llms[0] + else: + llm = llms[-1] + + # Print the selected model for the logs + print(f"Selected {llm.description}") + return llm