Add capability system

ls1intum · Feb 28, 2024 · f74d39f · f74d39f
1 parent 2c30752
commit f74d39f
Show file tree

Hide file tree

Showing 12 changed files with 311 additions and 5 deletions.
diff --git a/app/domain/codehint.py b/app/domain/codehint.py
@@ -19,7 +19,7 @@ class CodeHint(BaseModel):
     title: str
     description: str
     content: str
-    solution_entries: [ProgrammingExerciseSolutionEntry]
+    # solution_entries: [ProgrammingExerciseSolutionEntry]
 
     def __str__(self):
         return (

diff --git a/app/domain/submission.py b/app/domain/submission.py
@@ -12,7 +12,7 @@ def __str__(self):
 class ProgrammingSubmission(BaseModel):
     commit_hash: str
     build_failed: bool
-    build_log_entries: [BuildLogEntry]
+    # build_log_entries: [BuildLogEntry]
 
     def __str__(self):
         return (

diff --git a/app/llm/__init__.py b/app/llm/__init__.py
@@ -1,3 +1,4 @@
 from llm.completion_arguments import *
 from llm.external import *
 from llm.request_handler import *
+from llm.capability import RequirementList
diff --git a/app/llm/capability/__init__.py b/app/llm/capability/__init__.py
@@ -0,0 +1,3 @@
+from llm.capability.capability_list import CapabilityList
+from llm.capability.requirement_list import RequirementList
+from llm.capability.capability_checker import capabilities_fulfill_requirements
diff --git a/app/llm/capability/capability_checker.py b/app/llm/capability/capability_checker.py
@@ -0,0 +1,76 @@
+from .capability_list import (
+    CapabilityList,
+    capability_weights,
+    always_considered_capabilities_with_default,
+)
+from .requirement_list import RequirementList
+
+
+def capabilities_fulfill_requirements(
+    capability: CapabilityList, requirements: RequirementList
+) -> bool:
+    """Check if the capability fulfills the requirements"""
+    return all(
+        getattr(capability, field).matches(getattr(requirements, field))
+        for field in requirements.__dict__.keys()
+        if getattr(requirements, field) is not None
+    )
+
+
+def calculate_capability_scores(
+    capabilities: list[CapabilityList],
+    requirements: RequirementList,
+    invert_cost: bool = False,
+) -> list[int]:
+    """Calculate the scores of the capabilities against the requirements"""
+    all_scores = []
+
+    for requirement in requirements.__dict__.keys():
+        requirement_value = getattr(requirements, requirement)
+        if (
+            requirement_value is None
+            and requirement not in always_considered_capabilities_with_default
+        ):
+            continue
+
+        # Calculate the scores for each capability
+        scores = []
+        for capability in capabilities:
+            if (
+                requirement_value is None
+                and requirement in always_considered_capabilities_with_default
+            ):
+                # If the requirement is not set, use the default value if necessary
+                score = getattr(capability, requirement).matches(
+                    always_considered_capabilities_with_default[requirement]
+                )
+            else:
+                score = getattr(capability, requirement).matches(requirement_value)
+            # Invert the cost if required
+            # The cost is a special case, as depending on how you want to use the scores
+            # the cost needs to be considered differently
+            if requirement == "cost" and invert_cost and score != 0:
+                score = 1 / score
+            scores.append(score)
+
+        # Normalize the scores between 0 and 1 and multiply by the weight modifier
+        # The normalization here is based on the position of the score in the sorted list to balance out
+        # the different ranges of the capabilities
+        sorted_scores = sorted(set(scores))
+        weight_modifier = capability_weights[requirement]
+        normalized_scores = [
+            ((sorted_scores.index(score) + 1) / len(sorted_scores)) * weight_modifier
+            for score in scores
+        ]
+        all_scores.append(normalized_scores)
+
+    final_scores = []
+
+    # Sum up the scores for each capability to get the final score for each list of capabilities
+    for i in range(len(all_scores[0])):
+        score = 0
+        for j in range(len(all_scores)):
+            score += all_scores[j][i]
+        final_scores.append(score)
+
+    return final_scores
diff --git a/app/llm/capability/capability_list.py b/app/llm/capability/capability_list.py
@@ -0,0 +1,112 @@
+from abc import ABCMeta
+from pydantic import BaseModel, Field, model_validator
+
+
+class Capability(metaclass=ABCMeta):
+    """A capability to match a generic value"""
+
+    @classmethod
+    def __subclasshook__(cls, subclass) -> bool:
+        return hasattr(subclass, "matches") and callable(subclass.matches)
+
+    def matches(self, other: any) -> int:
+        """Return a score for how well the capability matches the input"""
+        raise NotImplementedError
+
+
+class TextCapability(BaseModel):
+    """A capability to match a fixed text value"""
+
+    value: str
+
+    def matches(self, text: str) -> int:
+        return int(self.value == text)
+
+    def __str__(self):
+        return f"TextCapability({super().__str__()})"
+
+
+class OrderedNumberCapability(BaseModel):
+    """A capability that is better the higher the value"""
+
+    value: int | float
+
+    def matches(self, number: int | float) -> int | float:
+        if self.value < number:
+            return 0
+        return self.value - number + 1
+
+    def __str__(self):
+        return f"OrderedNumberCapability({super().__str__()})"
+
+
+class InverseOrderedNumberCapability(BaseModel):
+    """A capability that is better the lower the value"""
+
+    value: int | float
+
+    def matches(self, number: int | float) -> int | float:
+        if self.value > number:
+            return 0
+        return number - self.value + 1
+
+    def __str__(self):
+        return f"InverseOrderedNumberCapability({super().__str__()})"
+
+
+class BooleanCapability(BaseModel):
+    """A simple boolean capability"""
+
+    value: bool
+
+    def matches(self, boolean: bool) -> int:
+        return int(self.value == boolean)
+
+    def __str__(self):
+        return f"BooleanCapability({str(self.value)})"
+
+
+class CapabilityList(BaseModel):
+    """A list of capabilities for a model"""
+
+    cost: InverseOrderedNumberCapability = Field(
+        default=InverseOrderedNumberCapability(value=0)
+    )
+    gpt_version_equivalent: OrderedNumberCapability = Field(
+        default=OrderedNumberCapability(value=2)
+    )
+    context_length: OrderedNumberCapability = Field(
+        default=OrderedNumberCapability(value=0)
+    )
+    vendor: TextCapability = Field(default=TextCapability(value=""))
+    privacy_compliance: BooleanCapability = Field(
+        default=BooleanCapability(value=False)
+    )
+    self_hosted: BooleanCapability = Field(default=BooleanCapability(value=False))
+    image_recognition: BooleanCapability = Field(default=BooleanCapability(value=False))
+    json_mode: BooleanCapability = Field(default=BooleanCapability(value=False))
+
+    @model_validator(mode="before")
+    @classmethod
+    def from_dict(cls, data: dict[str, any]):
+        """Prepare the data for handling by Pydantic"""
+        for key, value in data.items():
+            if type(value) is not dict:
+                data[key] = {"value": value}
+        return data
+
+
+# The weights for the capabilities used in the scoring
+capability_weights = {
+    "cost": 1,
+    "gpt_version_equivalent": 4,
+    "context_length": 0.1,
+    "vendor": 1,
+    "privacy_compliance": 0,
+    "self_hosted": 0,
+    "image_recognition": 0,
+    "json_mode": 0,
+}
+
+# The default values for the capabilities that are always considered
+always_considered_capabilities_with_default = {"cost": 100000000000000}
diff --git a/app/llm/capability/requirement_list.py b/app/llm/capability/requirement_list.py
@@ -0,0 +1,31 @@
+class RequirementList:
+    """A class to represent the requirements you want to match against"""
+
+    cost: int | None
+    gpt_version_equivalent: float | None
+    context_length: int | None
+    vendor: str | None
+    privacy_compliance: bool | None
+    self_hosted: bool | None
+    image_recognition: bool | None
+    json_mode: bool | None
+
+    def __init__(
+        self,
+        cost: int | None = None,
+        gpt_version_equivalent: float | None = None,
+        context_length: int | None = None,
+        vendor: str | None = None,
+        privacy_compliance: bool | None = None,
+        self_hosted: bool | None = None,
+        image_recognition: bool | None = None,
+        json_mode: bool | None = None,
+    ) -> None:
+        self.cost = cost
+        self.gpt_version_equivalent = gpt_version_equivalent
+        self.context_length = context_length
+        self.vendor = vendor
+        self.privacy_compliance = privacy_compliance
+        self.self_hosted = self_hosted
+        self.image_recognition = image_recognition
+        self.json_mode = json_mode
diff --git a/app/llm/external/model.py b/app/llm/external/model.py
@@ -3,6 +3,7 @@
 
 from domain import IrisMessage
 from llm import CompletionArguments
+from llm.capability import CapabilityList
 
 
 class LanguageModel(BaseModel, metaclass=ABCMeta):
@@ -11,6 +12,7 @@ class LanguageModel(BaseModel, metaclass=ABCMeta):
     id: str
     name: str
     description: str
+    capabilities: CapabilityList
 
 
 class CompletionModel(LanguageModel, metaclass=ABCMeta):

diff --git a/app/llm/llm_manager.py b/app/llm/llm_manager.py
@@ -5,6 +5,8 @@
 import yaml
 
 from common import Singleton
+from llm.capability import RequirementList
+from llm.capability.capability_checker import calculate_capability_scores
 from llm.external import LanguageModel, AnyLLM
 
 
@@ -26,6 +28,7 @@ def get_llm_by_id(self, llm_id):
                 return llm
 
     def load_llms(self):
+        """Load the llms from the config file"""
         path = os.environ.get("LLM_CONFIG_PATH")
         if not path:
             raise Exception("LLM_CONFIG_PATH not set")
@@ -34,3 +37,13 @@ def load_llms(self):
             loaded_llms = yaml.safe_load(file)
 
         self.entries = LlmList.parse_obj({"llms": loaded_llms}).llms
+
+    def get_llms_sorted_by_capabilities_score(
+        self, requirements: RequirementList, invert_cost: bool = False
+    ):
+        """Get the llms sorted by their capability to requirement scores"""
+        scores = calculate_capability_scores(
+            [llm.capabilities for llm in self.entries], requirements, invert_cost
+        )
+        sorted_llms = sorted(zip(scores, self.entries), key=lambda pair: -pair[0])
+        return [llm for _, llm in sorted_llms]
diff --git a/app/llm/request_handler/__init__.py b/app/llm/request_handler/__init__.py
@@ -1,2 +1,6 @@
-from basic_request_handler import BasicRequestHandler
-from request_handler_interface import RequestHandler
+from llm.request_handler.request_handler_interface import RequestHandler
+from llm.request_handler.basic_request_handler import BasicRequestHandler
+from llm.request_handler.capability_request_handler import (
+    CapabilityRequestHandler,
+    CapabilityRequestHandlerSelectionMode,
+)
diff --git a/app/llm/request_handler/basic_request_handler.py b/app/llm/request_handler/basic_request_handler.py
@@ -1,5 +1,6 @@
 from domain import IrisMessage
-from llm import RequestHandler, CompletionArguments
+from llm.request_handler import RequestHandler
+from llm.completion_arguments import CompletionArguments
 from llm.llm_manager import LlmManager
 
 

diff --git a/app/llm/request_handler/capability_request_handler.py b/app/llm/request_handler/capability_request_handler.py
@@ -0,0 +1,63 @@
+from enum import Enum
+
+from domain import IrisMessage
+from llm.capability import RequirementList
+from llm.external.model import ChatModel, CompletionModel, EmbeddingModel, LanguageModel
+from llm.request_handler import RequestHandler
+from llm.completion_arguments import CompletionArguments
+from llm.llm_manager import LlmManager
+
+
+class CapabilityRequestHandlerSelectionMode(Enum):
+    """Enum for the selection mode of the capability request handler"""
+
+    BEST = "best"
+    WORST = "worst"
+
+
+class CapabilityRequestHandler(RequestHandler):
+    """Request handler that selects the best/worst model based on the requirements"""
+
+    requirements: RequirementList
+    selection_mode: CapabilityRequestHandlerSelectionMode
+    llm_manager: LlmManager
+
+    def __init__(
+        self,
+        requirements: RequirementList,
+        selection_mode: CapabilityRequestHandlerSelectionMode = CapabilityRequestHandlerSelectionMode.WORST,
+    ) -> None:
+        self.requirements = requirements
+        self.selection_mode = selection_mode
+        self.llm_manager = LlmManager()
+
+    def complete(self, prompt: str, arguments: CompletionArguments) -> str:
+        llm = self._select_model(CompletionModel)
+        return llm.complete(prompt, arguments)
+
+    def chat(
+        self, messages: list[IrisMessage], arguments: CompletionArguments
+    ) -> IrisMessage:
+        llm = self._select_model(ChatModel)
+        return llm.chat(messages, arguments)
+
+    def embed(self, text: str) -> list[float]:
+        llm = self._select_model(EmbeddingModel)
+        return llm.embed(text)
+
+    def _select_model(self, type_filter: type) -> LanguageModel:
+        """Select the best/worst model based on the requirements and the selection mode"""
+        llms = self.llm_manager.get_llms_sorted_by_capabilities_score(
+            self.requirements,
+            self.selection_mode == CapabilityRequestHandlerSelectionMode.WORST,
+        )
+        llms = [llm for llm in llms if isinstance(llm, type_filter)]
+
+        if self.selection_mode == CapabilityRequestHandlerSelectionMode.BEST:
+            llm = llms[0]
+        else:
+            llm = llms[-1]
+
+        # Print the selected model for the logs
+        print(f"Selected {llm.description}")
+        return llm