LLM: Add simple capability system (#69)

ls1intum · Mar 5, 2024 · f6832e0 · f6832e0
1 parent aa9daf8
commit f6832e0
Show file tree

Hide file tree

Showing 13 changed files with 353 additions and 7 deletions.
diff --git a/app/domain/codehint.py b/app/domain/codehint.py
@@ -19,7 +19,7 @@ class CodeHint(BaseModel):
     title: str
     description: str
     content: str
-    solution_entries: [ProgrammingExerciseSolutionEntry]
+    solution_entries: list[ProgrammingExerciseSolutionEntry]
 
     def __str__(self):
         return (

diff --git a/app/domain/dtos.py b/app/domain/dtos.py
@@ -13,7 +13,7 @@ class ProgrammingExerciseTutorChatDTO(BaseModel):
     course: Course
     exercise: ProgrammingExercise
     submission: ProgrammingSubmission
-    chat_history: [IrisMessage]
+    chat_history: list[IrisMessage]
 
     def __str__(self):
         return (
@@ -27,7 +27,7 @@ class CodeEditorChatDTO(BaseModel):
     solution_repository: dict[str, str]
     template_repository: dict[str, str]
     test_repository: dict[str, str]
-    chat_history: [IrisMessage]
+    chat_history: list[IrisMessage]
 
     def __str__(self):
         return (

diff --git a/app/domain/submission.py b/app/domain/submission.py
@@ -12,7 +12,7 @@ def __str__(self):
 class ProgrammingSubmission(BaseModel):
     commit_hash: str
     build_failed: bool
-    build_log_entries: [BuildLogEntry]
+    build_log_entries: list[BuildLogEntry]
 
     def __str__(self):
         return (

diff --git a/app/llm/__init__.py b/app/llm/__init__.py
@@ -1,3 +1,5 @@
 from llm.completion_arguments import *
 from llm.external import *
+from llm.capability import *
 from llm.request_handler import *
+from llm.capability import RequirementList
diff --git a/app/llm/capability/__init__.py b/app/llm/capability/__init__.py
@@ -0,0 +1,3 @@
+from llm.capability.capability_list import CapabilityList
+from llm.capability.requirement_list import RequirementList
+from llm.capability.capability_checker import capabilities_fulfill_requirements
diff --git a/app/llm/capability/capability_checker.py b/app/llm/capability/capability_checker.py
@@ -0,0 +1,80 @@
+from .capability_list import (
+    CapabilityList,
+    capability_weights,
+    always_considered_capabilities_with_default,
+)
+from .requirement_list import RequirementList
+
+
+def capabilities_fulfill_requirements(
+    capability: CapabilityList, requirements: RequirementList
+) -> bool:
+    """Check if the capability fulfills the requirements"""
+    return all(
+        getattr(capability, field).matches(getattr(requirements, field))
+        for field in requirements.__dict__.keys()
+        if getattr(requirements, field) is not None
+    )
+
+
+def calculate_capability_scores(
+    capabilities: list[CapabilityList],
+    requirements: RequirementList,
+    invert_cost: bool = False,
+) -> list[int]:
+    """Calculate the scores of the capabilities against the requirements"""
+    all_scores = []
+
+    for requirement in requirements.__dict__.keys():
+        requirement_value = getattr(requirements, requirement)
+        if (
+            requirement_value is None
+            and requirement not in always_considered_capabilities_with_default
+        ):
+            continue
+
+        # Calculate the scores for each capability
+        scores = []
+        for capability in capabilities:
+            if (
+                requirement_value is None
+                and requirement in always_considered_capabilities_with_default
+            ):
+                # If the requirement is not set, use the default value if necessary
+                score = getattr(capability, requirement).matches(
+                    always_considered_capabilities_with_default[requirement]
+                )
+            else:
+                score = getattr(capability, requirement).matches(requirement_value)
+            # Invert the cost if required
+            # The cost is a special case, as depending on how you want to use the scores
+            # the cost needs to be considered differently
+            if (
+                requirement in ["input_cost", "output_cost"]
+                and invert_cost
+                and score != 0
+            ):
+                score = 1 / score
+            scores.append(score)
+
+        # Normalize the scores between 0 and 1 and multiply by the weight modifier
+        # The normalization here is based on the position of the score in the sorted list to balance out
+        # the different ranges of the capabilities
+        sorted_scores = sorted(set(scores))
+        weight_modifier = capability_weights[requirement]
+        normalized_scores = [
+            ((sorted_scores.index(score) + 1) / len(sorted_scores)) * weight_modifier
+            for score in scores
+        ]
+        all_scores.append(normalized_scores)
+
+    final_scores = []
+
+    # Sum up the scores for each capability to get the final score for each list of capabilities
+    for i in range(len(all_scores[0])):
+        score = 0
+        for j in range(len(all_scores)):
+            score += all_scores[j][i]
+        final_scores.append(score)
+
+    return final_scores
diff --git a/app/llm/capability/capability_list.py b/app/llm/capability/capability_list.py
@@ -0,0 +1,131 @@
+from abc import ABCMeta
+from pydantic import BaseModel, Field, model_validator
+
+
+class Capability(metaclass=ABCMeta):
+    """A capability to match a generic value"""
+
+    @classmethod
+    def __subclasshook__(cls, subclass) -> bool:
+        return hasattr(subclass, "matches") and callable(subclass.matches)
+
+    def matches(self, other: any) -> int:
+        """Return a score for how well the capability matches the input"""
+        raise NotImplementedError
+
+
+class TextCapability(BaseModel):
+    """A capability to match a fixed text value"""
+
+    value: str
+
+    def matches(self, text: str) -> int:
+        return int(self.value == text)
+
+    def __str__(self):
+        return f"TextCapability({super().__str__()})"
+
+
+class OrderedNumberCapability(BaseModel):
+    """A capability that is better the higher the value"""
+
+    value: int | float
+
+    def matches(self, number: int | float) -> int | float:
+        if self.value < number:
+            return 0
+        return self.value - number + 1
+
+    def __str__(self):
+        return f"OrderedNumberCapability({super().__str__()})"
+
+
+class InverseOrderedNumberCapability(BaseModel):
+    """A capability that is better the lower the value"""
+
+    value: int | float
+
+    def matches(self, number: int | float) -> int | float:
+        if self.value > number:
+            return 0
+        return number - self.value + 1
+
+    def __str__(self):
+        return f"InverseOrderedNumberCapability({super().__str__()})"
+
+
+class BooleanCapability(BaseModel):
+    """A simple boolean capability"""
+
+    value: bool
+
+    def matches(self, boolean: bool) -> int:
+        return int(self.value == boolean)
+
+    def __str__(self):
+        return f"BooleanCapability({str(self.value)})"
+
+
+class CapabilityList(BaseModel):
+    """A list of capabilities for a model"""
+
+    # Cost in $ per 1k input tokens
+    input_cost: InverseOrderedNumberCapability = Field(
+        default=InverseOrderedNumberCapability(value=0)
+    )
+    # Output cost in $ per 1k tokens
+    output_cost: InverseOrderedNumberCapability = Field(
+        default=InverseOrderedNumberCapability(value=0)
+    )
+    # The GPT version that is roughly equivalent to the model
+    gpt_version_equivalent: OrderedNumberCapability = Field(
+        default=OrderedNumberCapability(value=2)
+    )
+    # The speed of the model in tokens per second
+    speed: OrderedNumberCapability = Field(default=OrderedNumberCapability(value=0))
+    # The context length of the model in tokens
+    context_length: OrderedNumberCapability = Field(
+        default=OrderedNumberCapability(value=0)
+    )
+    # The vendor of the model e.g. "OpenAI" or "Anthropic"
+    vendor: TextCapability = Field(default=TextCapability(value=""))
+    # Whether the model is privacy compliant and can be used for sensitive data
+    privacy_compliance: BooleanCapability = Field(
+        default=BooleanCapability(value=False)
+    )
+    # Whether the model is self-hosted
+    self_hosted: BooleanCapability = Field(default=BooleanCapability(value=False))
+    # Whether the model supports image recognition
+    image_recognition: BooleanCapability = Field(default=BooleanCapability(value=False))
+    # Whether the model supports a JSON mode
+    json_mode: BooleanCapability = Field(default=BooleanCapability(value=False))
+
+    @model_validator(mode="before")
+    @classmethod
+    def from_dict(cls, data: dict[str, any]):
+        """Prepare the data for handling by Pydantic"""
+        for key, value in data.items():
+            if type(value) is not dict:
+                data[key] = {"value": value}
+        return data
+
+
+# The weights for the capabilities used in the scoring
+capability_weights = {
+    "input_cost": 0.5,
+    "output_cost": 0.5,
+    "gpt_version_equivalent": 4,
+    "speed": 2,
+    "context_length": 0.1,
+    "vendor": 1,
+    "privacy_compliance": 0,
+    "self_hosted": 0,
+    "image_recognition": 0,
+    "json_mode": 0,
+}
+
+# The default values for the capabilities that are always considered
+always_considered_capabilities_with_default = {
+    "input_cost": 100000000000000,
+    "output_cost": 100000000000000,
+}
diff --git a/app/llm/capability/requirement_list.py b/app/llm/capability/requirement_list.py
@@ -0,0 +1,47 @@
+class RequirementList:
+    """A class to represent the requirements you want to match against"""
+
+    # Maximum cost in $ per 1k input tokens
+    input_cost: float | None
+    # Maximum cost in $ per 1k output tokens
+    output_cost: float | None
+    # The minimum GPT version that the model should be roughly equivalent to
+    gpt_version_equivalent: float | None
+    # The minimum speed of the model in tokens per second
+    speed: float | None
+    # The minimum context length of the model in tokens
+    context_length: int | None
+    # The vendor of the model e.g. "OpenAI" or "Anthropic"
+    vendor: str | None
+    # Whether the model should be privacy compliant to be used for sensitive data
+    privacy_compliance: bool | None
+    # Whether the model should be self-hosted
+    self_hosted: bool | None
+    # Whether the model should support image recognition
+    image_recognition: bool | None
+    # Whether the model should support a JSON mode
+    json_mode: bool | None
+
+    def __init__(
+        self,
+        input_cost: float | None = None,
+        output_cost: float | None = None,
+        gpt_version_equivalent: float | None = None,
+        speed: float | None = None,
+        context_length: int | None = None,
+        vendor: str | None = None,
+        privacy_compliance: bool | None = None,
+        self_hosted: bool | None = None,
+        image_recognition: bool | None = None,
+        json_mode: bool | None = None,
+    ) -> None:
+        self.input_cost = input_cost
+        self.output_cost = output_cost
+        self.gpt_version_equivalent = gpt_version_equivalent
+        self.speed = speed
+        self.context_length = context_length
+        self.vendor = vendor
+        self.privacy_compliance = privacy_compliance
+        self.self_hosted = self_hosted
+        self.image_recognition = image_recognition
+        self.json_mode = json_mode
diff --git a/app/llm/external/model.py b/app/llm/external/model.py
@@ -3,6 +3,7 @@
 
 from domain import IrisMessage
 from llm import CompletionArguments
+from llm.capability import CapabilityList
 
 
 class LanguageModel(BaseModel, metaclass=ABCMeta):
@@ -11,6 +12,7 @@ class LanguageModel(BaseModel, metaclass=ABCMeta):
     id: str
     name: str
     description: str
+    capabilities: CapabilityList
 
 
 class CompletionModel(LanguageModel, metaclass=ABCMeta):

diff --git a/app/llm/llm_manager.py b/app/llm/llm_manager.py
@@ -5,6 +5,8 @@
 import yaml
 
 from common import Singleton
+from llm.capability import RequirementList
+from llm.capability.capability_checker import calculate_capability_scores
 from llm.external import LanguageModel, AnyLLM
 
 
@@ -26,6 +28,7 @@ def get_llm_by_id(self, llm_id):
                 return llm
 
     def load_llms(self):
+        """Load the llms from the config file"""
         path = os.environ.get("LLM_CONFIG_PATH")
         if not path:
             raise Exception("LLM_CONFIG_PATH not set")
@@ -34,3 +37,13 @@ def load_llms(self):
             loaded_llms = yaml.safe_load(file)
 
         self.entries = LlmList.parse_obj({"llms": loaded_llms}).llms
+
+    def get_llms_sorted_by_capabilities_score(
+        self, requirements: RequirementList, invert_cost: bool = False
+    ):
+        """Get the llms sorted by their capability to requirement scores"""
+        scores = calculate_capability_scores(
+            [llm.capabilities for llm in self.entries], requirements, invert_cost
+        )
+        sorted_llms = sorted(zip(scores, self.entries), key=lambda pair: -pair[0])
+        return [llm for _, llm in sorted_llms]
diff --git a/app/llm/request_handler/__init__.py b/app/llm/request_handler/__init__.py
@@ -1,2 +1,6 @@
-from basic_request_handler import BasicRequestHandler
-from request_handler_interface import RequestHandler
+from llm.request_handler.request_handler_interface import RequestHandler
+from llm.request_handler.basic_request_handler import BasicRequestHandler
+from llm.request_handler.capability_request_handler import (
+    CapabilityRequestHandler,
+    CapabilityRequestHandlerSelectionMode,
+)
diff --git a/app/llm/request_handler/basic_request_handler.py b/app/llm/request_handler/basic_request_handler.py
@@ -1,5 +1,6 @@
 from domain import IrisMessage
-from llm import RequestHandler, CompletionArguments
+from llm.request_handler import RequestHandler
+from llm.completion_arguments import CompletionArguments
 from llm.llm_manager import LlmManager