Skip to content

Commit

Permalink
feat: remove pympler dependency and add better way to calculate size …
Browse files Browse the repository at this point in the history
…of tokenizer cache (#3580)
  • Loading branch information
jacopo-chevallard authored Jan 30, 2025
1 parent e2a3bcb commit 2fbd5d4
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 13 deletions.
1 change: 0 additions & 1 deletion core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ dependencies = [
"langchain-mistralai>=0.2.3",
"fasttext-langdetect>=1.0.5",
"langfuse>=2.57.0",
"pympler>=1.1",
]
readme = "README.md"
requires-python = ">= 3.11"
Expand Down
34 changes: 26 additions & 8 deletions core/quivr_core/llm/llm_endpoint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import os
from typing import Union, Any
from typing import Union
from urllib.parse import parse_qs, urlparse

import tiktoken
Expand All @@ -10,7 +10,6 @@
from langchain_openai import AzureChatOpenAI, ChatOpenAI
from pydantic import SecretStr
import time
from pympler import asizeof

from quivr_core.brain.info import LLMInfo
from quivr_core.rag.entities.config import DefaultModelSuppliers, LLMEndpointConfig
Expand All @@ -19,17 +18,14 @@
logger = logging.getLogger("quivr_core")


def get_size(obj: Any, seen: set | None = None) -> int:
return asizeof.asizeof(obj)


class LLMTokenizer:
_cache: dict[
int, tuple["LLMTokenizer", int, float]
] = {} # {hash: (tokenizer, size_bytes, last_access_time)}
_max_cache_size_mb: int = 50
_max_cache_count: int = 3 # Default maximum number of cached tokenizers
_max_cache_count: int = 5 # Default maximum number of cached tokenizers
_current_cache_size: int = 0
_default_size: int = 5 * 1024 * 1024

def __init__(self, tokenizer_hub: str | None, fallback_tokenizer: str):
self.tokenizer_hub = tokenizer_hub
Expand Down Expand Up @@ -63,7 +59,29 @@ def __init__(self, tokenizer_hub: str | None, fallback_tokenizer: str):
self.tokenizer = tiktoken.get_encoding(self.fallback_tokenizer)

# More accurate size estimation
self._size_bytes = get_size(self.tokenizer)
self._size_bytes = self._calculate_tokenizer_size()

def _calculate_tokenizer_size(self) -> int:
"""Calculate size of tokenizer by summing the sizes of its vocabulary and model files"""
# By default, return a size of 5 MB
if not hasattr(self.tokenizer, "vocab_files_names") or not hasattr(
self.tokenizer, "init_kwargs"
):
return self._default_size

total_size = 0

# Get the file keys from vocab_files_names
file_keys = self.tokenizer.vocab_files_names.keys()
# Look up these files in init_kwargs
for key in file_keys:
if file_path := self.tokenizer.init_kwargs.get(key):
try:
total_size += os.path.getsize(file_path)
except (OSError, FileNotFoundError):
logger.debug(f"Could not access tokenizer file: {file_path}")

return total_size if total_size > 0 else self._default_size

@classmethod
def load(cls, tokenizer_hub: str, fallback_tokenizer: str):
Expand Down
2 changes: 0 additions & 2 deletions core/requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,6 @@ pyflakes==3.2.0
pygments==2.18.0
# via ipython
# via rich
pympler==1.1
# via quivr-core
pytest==8.3.3
# via pytest-asyncio
# via pytest-benchmark
Expand Down
2 changes: 0 additions & 2 deletions core/requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,6 @@ pydantic-settings==2.6.1
# via langchain-community
pygments==2.18.0
# via rich
pympler==1.1
# via quivr-core
python-dateutil==2.8.2
# via pandas
python-dotenv==1.0.1
Expand Down

0 comments on commit 2fbd5d4

Please sign in to comment.