deepsense-ai · akonarski-ds · Oct 24, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,3 +1,5 @@
+default_language_version:
+    python: python3.10
 repos:
     - repo: https://github.com/pre-commit/pre-commit-hooks
       rev: v4.5.0
@@ -12,30 +14,20 @@ repos:
           - id: check-json
           - id: check-yaml
 
-      # PEP 8 compliant opinionated formatter.
-    - repo: https://github.com/psf/black
-      rev: 23.10.1
-      hooks:
-          - id: black
-            exclude: (docs/|notebooks/)
-            args: [--config, pyproject.toml]
-          - id: black-jupyter
-            files: \.ipynb$
-
-      # Cleaning unused imports.
-    - repo: https://github.com/hadialqattan/pycln
-      rev: v2.3.0
-      hooks:
-          - id: pycln
-            args: ["-a"]
-            exclude: (docs/|notebooks/)
 
-    # Modernizes python code and upgrade syntax for newer versions of the language
-    - repo: https://github.com/asottile/pyupgrade
-      rev: v3.15.0
+    - repo: https://github.com/charliermarsh/ruff-pre-commit
+      rev: v0.6.9
       hooks:
-          - id: pyupgrade
-            args: [--py38-plus]
+        # E1131: unsupported operand type(s) for | (unsupported-binary-operation)
+        - id: ruff
+          types_or: [ python, pyi, jupyter ]
+          exclude: (/test_|tests/|docs/|notebooks/)
+          args: [ --fix ]
+        # Formats Python, Pyi, and Jupyter files, excluding specified directories
+        - id: ruff-format
+          types_or: [ python, pyi, jupyter ]
+          exclude: (docs/|/tests/)
+          args: [ --quiet ]
 
       # Used to have proper type annotations for library code.
     - repo: https://github.com/pre-commit/mirrors-mypy
@@ -47,47 +39,3 @@ repos:
             additional_dependencies: [pydantic>=2.8.2, types-pyyaml>=6.0.12]
             exclude: (/test_|setup.py|/tests/|docs/)
 
-      # Sort imports alphabetically, and automatically separated into sections and by type.
-    - repo: https://github.com/timothycrosley/isort
-      rev: 5.13.2
-      hooks:
-          - id: isort
-            args: ["--profile", "black"]
-            exclude: (docs/|notebooks/)
-
-      # Checks Python source files for errors.
-    - repo: https://github.com/PyCQA/flake8
-      rev: 7.1.1
-      hooks:
-          - id: flake8
-            name: flake8
-            entry: flake8
-            language: python
-            types: [python]
-            args: [--config, .flake8]
-            exclude: (docs/)
-
-      # Enforces a coding standard, looks for code smells, and can make suggestions about how the code could be refactored.
-    - repo: https://github.com/pycqa/pylint
-      rev: v3.2.6
-      hooks:
-          - id: pylint
-            exclude: (/test_|tests/|docs/)
-            # # You can add additional plugins for pylint here,
-            # here is an example for pydantic, remember to enable it in pyproject.toml
-            # additional_dependencies:
-            #   - 'pylint_pydantic'
-            # args:
-            #   # pylint can have issue with python libraries based on C
-            #   # if it fails to find some objects likely you need to add them
-            #   # here:
-            #   ["--extension-pkg-whitelist=pydantic"]
-
-      # Finds common security issues in Python code.
-    - repo: https://github.com/PyCQA/bandit
-      rev: 1.7.5
-      hooks:
-          - id: bandit
-            args: [-c, pyproject.toml, --recursive, packages/]
-            additional_dependencies: [".[toml]"] # required for pyproject.toml support
-            exclude:  (notebooks/)
diff --git a/packages/ragbits-cli/src/ragbits/cli/__init__.py b/packages/ragbits-cli/src/ragbits/cli/__init__.py
@@ -18,7 +18,6 @@ def main() -> None:
         - if found it imports the `register` function from the `cli` module and calls it with the `app` object
         - register function should add the CLI commands to the `app` object
     """
-
     cli_enabled_modules = [
         module
         for module in pkgutil.iter_modules(ragbits.__path__)

diff --git a/packages/ragbits-core/examples/chromadb_example.py b/packages/ragbits-core/examples/chromadb_example.py
@@ -22,9 +22,10 @@
 ]
 
 
-async def main():
-    """Run the example."""
-
+async def main() -> None:
+    """
+    Run the example.
+    """
     chroma_client = chromadb.PersistentClient(path="chroma")
     embedding_client = LiteLLMEmbeddings()
 

diff --git a/packages/ragbits-core/examples/llm_example.py b/packages/ragbits-core/examples/llm_example.py
@@ -36,8 +36,9 @@ class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]):
     """
 
     system_prompt = """
-    You are a joke generator. The jokes you generate should be funny and not offensive. {% if not pun_allowed %}Also, make sure
-    that the jokes do not contain any puns.{% else %}You can use any type of joke, even if it contains puns.{% endif %}
+    You are a joke generator. The jokes you generate should be funny and not offensive.
+    {% if not pun_allowed %}Also, make sure that the jokes do not contain any puns.
+    {% else %}You can use any type of joke, even if it contains puns.{% endif %}
 
     Respond as json with two fields: joke and joke_category.
     """
@@ -47,7 +48,7 @@ class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]):
     """
 
 
-async def main():
+async def main() -> None:
     """
     Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set.
     """

diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 try:
     import litellm
 
@@ -8,7 +6,11 @@
     HAS_LITELLM = False
 
 from ragbits.core.embeddings import Embeddings
-from ragbits.core.embeddings.exceptions import EmbeddingConnectionError, EmbeddingResponseError, EmbeddingStatusError
+from ragbits.core.embeddings.exceptions import (
+    EmbeddingConnectionError,
+    EmbeddingResponseError,
+    EmbeddingStatusError,
+)
 
 
 class LiteLLMEmbeddings(Embeddings):
@@ -19,10 +21,10 @@ class LiteLLMEmbeddings(Embeddings):
     def __init__(
         self,
         model: str = "text-embedding-3-small",
-        options: Optional[dict] = None,
-        api_base: Optional[str] = None,
-        api_key: Optional[str] = None,
-        api_version: Optional[str] = None,
+        options: dict | None = None,
+        api_base: str | None = None,
+        api_key: str | None = None,
+        api_version: str | None = None,
     ) -> None:
         """
         Constructs the LiteLLMEmbeddingClient.
@@ -65,7 +67,6 @@ async def embed_text(self, data: list[str]) -> list[list[float]]:
             EmbeddingStatusError: If the embedding API returns an error status code.
             EmbeddingResponseError: If the embedding API response is invalid.
         """
-
         try:
             response = await litellm.aembedding(
                 input=data,

diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/local.py b/packages/ragbits-core/src/ragbits/core/embeddings/local.py
@@ -1,4 +1,4 @@
-from typing import Iterator, Optional
+from collections.abc import Iterator
 
 try:
     import torch
@@ -20,10 +20,9 @@ class LocalEmbeddings(Embeddings):
     def __init__(
         self,
         model_name: str,
-        api_key: Optional[str] = None,
+        api_key: str | None = None,
     ) -> None:
-        """
-        Constructs a new local LLM instance.
+        """Constructs a new local LLM instance.
 
         Args:
             model_name: Name of the model to use.
@@ -45,8 +44,7 @@ def __init__(
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, token=self.hf_api_key)
 
     async def embed_text(self, data: list[str], batch_size: int = 1) -> list[list[float]]:
-        """
-        Calls the appropriate encoder endpoint with the given data and options.
+        """Calls the appropriate encoder endpoint with the given data and options.
 
         Args:
             data: List of strings to get embeddings for.
@@ -58,7 +56,11 @@ async def embed_text(self, data: list[str], batch_size: int = 1) -> list[list[fl
         embeddings = []
         for batch in self._batch(data, batch_size):
             batch_dict = self.tokenizer(
-                batch, max_length=self.tokenizer.model_max_length, padding=True, truncation=True, return_tensors="pt"
+                batch,
+                max_length=self.tokenizer.model_max_length,
+                padding=True,
+                truncation=True,
+                return_tensors="pt",
             ).to(self.device)
             with torch.no_grad():
                 outputs = self.model(**batch_dict)

diff --git a/packages/ragbits-core/src/ragbits/core/llms/base.py b/packages/ragbits-core/src/ragbits/core/llms/base.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from functools import cached_property
-from typing import Generic, Optional, Type, cast, overload
+from typing import Generic, cast, overload
 
 from ragbits.core.prompt.base import BasePrompt, BasePromptWithParser, OutputT
 
@@ -12,9 +12,9 @@ class LLM(Generic[LLMClientOptions], ABC):
     Abstract class for interaction with Large Language Model.
     """
 
-    _options_cls: Type[LLMClientOptions]
+    _options_cls: type[LLMClientOptions]
 
-    def __init__(self, model_name: str, default_options: Optional[LLMOptions] = None) -> None:
+    def __init__(self, model_name: str, default_options: LLMOptions | None = None) -> None:
         """
         Constructs a new LLM instance.
 
@@ -39,7 +39,7 @@ def client(self) -> LLMClient:
         Client for the LLM.
         """
 
-    def count_tokens(self, prompt: BasePrompt) -> int:
+    def count_tokens(self, prompt: BasePrompt) -> int:  # noqa: PLR6301
         """
         Counts tokens in the prompt.
 
@@ -55,7 +55,7 @@ async def generate_raw(
         self,
         prompt: BasePrompt,
         *,
-        options: Optional[LLMOptions] = None,
+        options: LLMOptions | None = None,
     ) -> str:
         """
         Prepares and sends a prompt to the LLM and returns the raw response (without parsing).
@@ -83,24 +83,22 @@ async def generate(
         self,
         prompt: BasePromptWithParser[OutputT],
         *,
-        options: Optional[LLMOptions] = None,
-    ) -> OutputT:
-        ...
+        options: LLMOptions | None = None,
+    ) -> OutputT: ...
 
     @overload
     async def generate(
         self,
         prompt: BasePrompt,
         *,
-        options: Optional[LLMOptions] = None,
-    ) -> OutputT:
-        ...
+        options: LLMOptions | None = None,
+    ) -> OutputT: ...
 
     async def generate(
         self,
         prompt: BasePrompt,
         *,
-        options: Optional[LLMOptions] = None,
+        options: LLMOptions | None = None,
     ) -> OutputT:
         """
         Prepares and sends a prompt to the LLM and returns response parsed to the

diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from dataclasses import asdict, dataclass
-from typing import Any, ClassVar, Dict, Generic, Optional, Type, TypeVar
+from typing import Any, ClassVar, Generic, TypeVar
 
 from pydantic import BaseModel
 
@@ -14,7 +14,7 @@
 @dataclass
 class LLMOptions(ABC):
     """
-    Abstract dataclass that represents all available LLM call options.
+    A dataclass that represents all available LLM call options.
     """
 
     _not_given: ClassVar[Any] = None
@@ -35,7 +35,7 @@ def __or__(self, other: "LLMOptions") -> "LLMOptions":
 
         return self.__class__(**updated_dict)
 
-    def dict(self) -> Dict[str, Any]:
+    def dict(self) -> dict[str, Any]:
         """
         Creates a dictionary representation of the LLMOptions instance.
         If a value is None, it will be replaced with a provider-specific not-given sentinel.
@@ -70,7 +70,7 @@ async def call(
         conversation: ChatFormat,
         options: LLMClientOptions,
         json_mode: bool = False,
-        output_schema: Optional[Type[BaseModel] | Dict] = None,
+        output_schema: type[BaseModel] | dict | None = None,
     ) -> str:
         """
         Calls LLM inference API.

diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py
@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Type, Union
 
 from pydantic import BaseModel
 
@@ -25,15 +24,15 @@ class LiteLLMOptions(LLMOptions):
     Each of them is described in the [LiteLLM documentation](https://docs.litellm.ai/docs/completion/input).
     """
 
-    frequency_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN
-    max_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN
-    n: Union[Optional[int], NotGiven] = NOT_GIVEN
-    presence_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN
-    seed: Union[Optional[int], NotGiven] = NOT_GIVEN
-    stop: Union[Optional[Union[str, List[str]]], NotGiven] = NOT_GIVEN
-    temperature: Union[Optional[float], NotGiven] = NOT_GIVEN
-    top_p: Union[Optional[float], NotGiven] = NOT_GIVEN
-    mock_response: Union[Optional[str], NotGiven] = NOT_GIVEN
+    frequency_penalty: float | None | NotGiven = NOT_GIVEN
+    max_tokens: int | None | NotGiven = NOT_GIVEN
+    n: int | None | NotGiven = NOT_GIVEN
+    presence_penalty: float | None | NotGiven = NOT_GIVEN
+    seed: int | None | NotGiven = NOT_GIVEN
+    stop: str | list[str] | None | NotGiven = NOT_GIVEN
+    temperature: float | None | NotGiven = NOT_GIVEN
+    top_p: float | None | NotGiven = NOT_GIVEN
+    mock_response: str | None | NotGiven = NOT_GIVEN
 
 
 class LiteLLMClient(LLMClient[LiteLLMOptions]):
@@ -48,9 +47,9 @@ def __init__(
         self,
         model_name: str,
         *,
-        base_url: Optional[str] = None,
-        api_key: Optional[str] = None,
-        api_version: Optional[str] = None,
+        base_url: str | None = None,
+        api_key: str | None = None,
+        api_version: str | None = None,
         use_structured_output: bool = False,
     ) -> None:
         """
@@ -80,7 +79,7 @@ async def call(
         conversation: ChatFormat,
         options: LiteLLMOptions,
         json_mode: bool = False,
-        output_schema: Optional[Type[BaseModel] | Dict] = None,
+        output_schema: type[BaseModel] | dict | None = None,
     ) -> str:
         """
         Calls the appropriate LLM endpoint with the given prompt and options.