Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LLM: Add simple capability system #69

Merged
merged 3 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/domain/codehint.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class CodeHint(BaseModel):
title: str
description: str
content: str
solution_entries: [ProgrammingExerciseSolutionEntry]
solution_entries: list[ProgrammingExerciseSolutionEntry]

def __str__(self):
return (
Expand Down
4 changes: 2 additions & 2 deletions app/domain/dtos.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class ProgrammingExerciseTutorChatDTO(BaseModel):
course: Course
exercise: ProgrammingExercise
submission: ProgrammingSubmission
chat_history: [IrisMessage]
chat_history: list[IrisMessage]

def __str__(self):
return (
Expand All @@ -27,7 +27,7 @@ class CodeEditorChatDTO(BaseModel):
solution_repository: dict[str, str]
template_repository: dict[str, str]
test_repository: dict[str, str]
chat_history: [IrisMessage]
chat_history: list[IrisMessage]

def __str__(self):
return (
Expand Down
2 changes: 1 addition & 1 deletion app/domain/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __str__(self):
class ProgrammingSubmission(BaseModel):
commit_hash: str
build_failed: bool
build_log_entries: [BuildLogEntry]
build_log_entries: list[BuildLogEntry]

def __str__(self):
return (
Expand Down
2 changes: 2 additions & 0 deletions app/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from llm.completion_arguments import *
from llm.external import *
from llm.capability import *
from llm.request_handler import *
from llm.capability import RequirementList
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explicit import of RequirementList is redundant due to the preceding wildcard import from the same module (from llm.capability import *). It's a good practice to avoid such redundancies to keep the code clean and readable.

- from llm.capability import RequirementList

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
from llm.capability import RequirementList

3 changes: 3 additions & 0 deletions app/llm/capability/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from llm.capability.capability_list import CapabilityList
from llm.capability.requirement_list import RequirementList
from llm.capability.capability_checker import capabilities_fulfill_requirements
80 changes: 80 additions & 0 deletions app/llm/capability/capability_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from .capability_list import (
CapabilityList,
capability_weights,
always_considered_capabilities_with_default,
)
from .requirement_list import RequirementList


def capabilities_fulfill_requirements(
capability: CapabilityList, requirements: RequirementList
) -> bool:
"""Check if the capability fulfills the requirements"""
return all(
getattr(capability, field).matches(getattr(requirements, field))
for field in requirements.__dict__.keys()
if getattr(requirements, field) is not None
)


def calculate_capability_scores(
capabilities: list[CapabilityList],
requirements: RequirementList,
invert_cost: bool = False,
) -> list[int]:
"""Calculate the scores of the capabilities against the requirements"""
all_scores = []

for requirement in requirements.__dict__.keys():
requirement_value = getattr(requirements, requirement)
if (
requirement_value is None
and requirement not in always_considered_capabilities_with_default
):
continue

# Calculate the scores for each capability
scores = []
for capability in capabilities:
if (
requirement_value is None
and requirement in always_considered_capabilities_with_default
):
# If the requirement is not set, use the default value if necessary
score = getattr(capability, requirement).matches(
always_considered_capabilities_with_default[requirement]
)
else:
score = getattr(capability, requirement).matches(requirement_value)
# Invert the cost if required
# The cost is a special case, as depending on how you want to use the scores
# the cost needs to be considered differently
if (
requirement in ["input_cost", "output_cost"]
and invert_cost
and score != 0
):
score = 1 / score
scores.append(score)

# Normalize the scores between 0 and 1 and multiply by the weight modifier
# The normalization here is based on the position of the score in the sorted list to balance out
# the different ranges of the capabilities
sorted_scores = sorted(set(scores))
weight_modifier = capability_weights[requirement]
normalized_scores = [
((sorted_scores.index(score) + 1) / len(sorted_scores)) * weight_modifier
for score in scores
]
all_scores.append(normalized_scores)

final_scores = []

# Sum up the scores for each capability to get the final score for each list of capabilities
for i in range(len(all_scores[0])):
score = 0
for j in range(len(all_scores)):
score += all_scores[j][i]
final_scores.append(score)

return final_scores
Comment on lines +9 to +80
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The functions capabilities_fulfill_requirements and calculate_capability_scores are well-implemented, providing a robust mechanism for evaluating and scoring models based on their capabilities and specified requirements. This functionality is central to the new capability system.

However, consider adding documentation to clarify the scoring logic, especially the role of invert_cost in the calculation process, to enhance understanding and maintainability.

131 changes: 131 additions & 0 deletions app/llm/capability/capability_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from abc import ABCMeta
from pydantic import BaseModel, Field, model_validator


class Capability(metaclass=ABCMeta):
"""A capability to match a generic value"""

@classmethod
def __subclasshook__(cls, subclass) -> bool:
return hasattr(subclass, "matches") and callable(subclass.matches)

def matches(self, other: any) -> int:
"""Return a score for how well the capability matches the input"""
raise NotImplementedError


class TextCapability(BaseModel):
"""A capability to match a fixed text value"""

value: str

def matches(self, text: str) -> int:
return int(self.value == text)

def __str__(self):
return f"TextCapability({super().__str__()})"


class OrderedNumberCapability(BaseModel):
"""A capability that is better the higher the value"""

value: int | float

def matches(self, number: int | float) -> int | float:
if self.value < number:
return 0
return self.value - number + 1

def __str__(self):
return f"OrderedNumberCapability({super().__str__()})"


class InverseOrderedNumberCapability(BaseModel):
"""A capability that is better the lower the value"""

value: int | float

def matches(self, number: int | float) -> int | float:
if self.value > number:
return 0
return number - self.value + 1

def __str__(self):
return f"InverseOrderedNumberCapability({super().__str__()})"


class BooleanCapability(BaseModel):
"""A simple boolean capability"""

value: bool

def matches(self, boolean: bool) -> int:
return int(self.value == boolean)

def __str__(self):
return f"BooleanCapability({str(self.value)})"


class CapabilityList(BaseModel):
"""A list of capabilities for a model"""

# Cost in $ per 1k input tokens
input_cost: InverseOrderedNumberCapability = Field(
default=InverseOrderedNumberCapability(value=0)
)
# Output cost in $ per 1k tokens
output_cost: InverseOrderedNumberCapability = Field(
default=InverseOrderedNumberCapability(value=0)
)
# The GPT version that is roughly equivalent to the model
gpt_version_equivalent: OrderedNumberCapability = Field(
default=OrderedNumberCapability(value=2)
)
# The speed of the model in tokens per second
speed: OrderedNumberCapability = Field(default=OrderedNumberCapability(value=0))
# The context length of the model in tokens
context_length: OrderedNumberCapability = Field(
default=OrderedNumberCapability(value=0)
)
# The vendor of the model e.g. "OpenAI" or "Anthropic"
vendor: TextCapability = Field(default=TextCapability(value=""))
# Whether the model is privacy compliant and can be used for sensitive data
privacy_compliance: BooleanCapability = Field(
default=BooleanCapability(value=False)
)
# Whether the model is self-hosted
self_hosted: BooleanCapability = Field(default=BooleanCapability(value=False))
# Whether the model supports image recognition
image_recognition: BooleanCapability = Field(default=BooleanCapability(value=False))
# Whether the model supports a JSON mode
json_mode: BooleanCapability = Field(default=BooleanCapability(value=False))

@model_validator(mode="before")
@classmethod
def from_dict(cls, data: dict[str, any]):
"""Prepare the data for handling by Pydantic"""
for key, value in data.items():
if type(value) is not dict:
data[key] = {"value": value}
return data


# The weights for the capabilities used in the scoring
capability_weights = {
"input_cost": 0.5,
"output_cost": 0.5,
"gpt_version_equivalent": 4,
"speed": 2,
"context_length": 0.1,
"vendor": 1,
"privacy_compliance": 0,
"self_hosted": 0,
"image_recognition": 0,
"json_mode": 0,
}

# The default values for the capabilities that are always considered
always_considered_capabilities_with_default = {
"input_cost": 100000000000000,
"output_cost": 100000000000000,
}
47 changes: 47 additions & 0 deletions app/llm/capability/requirement_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
class RequirementList:
"""A class to represent the requirements you want to match against"""

# Maximum cost in $ per 1k input tokens
input_cost: float | None
# Maximum cost in $ per 1k output tokens
output_cost: float | None
# The minimum GPT version that the model should be roughly equivalent to
gpt_version_equivalent: float | None
# The minimum speed of the model in tokens per second
speed: float | None
# The minimum context length of the model in tokens
context_length: int | None
# The vendor of the model e.g. "OpenAI" or "Anthropic"
vendor: str | None
# Whether the model should be privacy compliant to be used for sensitive data
privacy_compliance: bool | None
# Whether the model should be self-hosted
self_hosted: bool | None
# Whether the model should support image recognition
image_recognition: bool | None
# Whether the model should support a JSON mode
json_mode: bool | None

def __init__(
self,
input_cost: float | None = None,
output_cost: float | None = None,
gpt_version_equivalent: float | None = None,
speed: float | None = None,
context_length: int | None = None,
vendor: str | None = None,
privacy_compliance: bool | None = None,
self_hosted: bool | None = None,
image_recognition: bool | None = None,
json_mode: bool | None = None,
) -> None:
self.input_cost = input_cost
self.output_cost = output_cost
self.gpt_version_equivalent = gpt_version_equivalent
self.speed = speed
self.context_length = context_length
self.vendor = vendor
self.privacy_compliance = privacy_compliance
self.self_hosted = self_hosted
self.image_recognition = image_recognition
self.json_mode = json_mode
2 changes: 2 additions & 0 deletions app/llm/external/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from domain import IrisMessage
from llm import CompletionArguments
from llm.capability import CapabilityList


class LanguageModel(BaseModel, metaclass=ABCMeta):
Expand All @@ -11,6 +12,7 @@ class LanguageModel(BaseModel, metaclass=ABCMeta):
id: str
name: str
description: str
capabilities: CapabilityList


class CompletionModel(LanguageModel, metaclass=ABCMeta):
Expand Down
13 changes: 13 additions & 0 deletions app/llm/llm_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import yaml

from common import Singleton
from llm.capability import RequirementList
from llm.capability.capability_checker import calculate_capability_scores
from llm.external import LanguageModel, AnyLLM


Expand All @@ -26,6 +28,7 @@ def get_llm_by_id(self, llm_id):
return llm

def load_llms(self):
"""Load the llms from the config file"""
path = os.environ.get("LLM_CONFIG_PATH")
if not path:
raise Exception("LLM_CONFIG_PATH not set")
Expand All @@ -34,3 +37,13 @@ def load_llms(self):
loaded_llms = yaml.safe_load(file)

self.entries = LlmList.parse_obj({"llms": loaded_llms}).llms

def get_llms_sorted_by_capabilities_score(
self, requirements: RequirementList, invert_cost: bool = False
):
"""Get the llms sorted by their capability to requirement scores"""
scores = calculate_capability_scores(
[llm.capabilities for llm in self.entries], requirements, invert_cost
)
sorted_llms = sorted(zip(scores, self.entries), key=lambda pair: -pair[0])
return [llm for _, llm in sorted_llms]
Comment on lines +41 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get_llms_sorted_by_capabilities_score method is a key addition that aligns with the PR's objectives, efficiently sorting LLMs based on capability scores. This method enhances the model selection process by considering the specified requirements.

However, consider adding error handling or a fallback mechanism for cases where no models match the specified requirements, ensuring robustness in all scenarios.

8 changes: 6 additions & 2 deletions app/llm/request_handler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
from basic_request_handler import BasicRequestHandler
from request_handler_interface import RequestHandler
from llm.request_handler.request_handler_interface import RequestHandler
from llm.request_handler.basic_request_handler import BasicRequestHandler
from llm.request_handler.capability_request_handler import (
CapabilityRequestHandler,
CapabilityRequestHandlerSelectionMode,
)
3 changes: 2 additions & 1 deletion app/llm/request_handler/basic_request_handler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from domain import IrisMessage
from llm import RequestHandler, CompletionArguments
from llm.request_handler import RequestHandler
from llm.completion_arguments import CompletionArguments
from llm.llm_manager import LlmManager


Expand Down
Loading
Loading