From f9b562e6db1cc0e60389089c131c001f2b158dfe Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Tue, 8 Oct 2024 09:48:40 +0200 Subject: [PATCH 01/28] Applying ruff automatic changes to files. --- .pre-commit-config.yaml | 111 +- .../ragbits-cli/src/ragbits/cli/__init__.py | 7 +- .../ragbits-core/examples/chromadb_example.py | 2 - packages/ragbits-core/examples/llm_example.py | 13 +- .../ragbits-core/examples/prompt_example.py | 10 +- packages/ragbits-core/src/ragbits/core/cli.py | 3 +- .../src/ragbits/core/embeddings/base.py | 6 +- .../src/ragbits/core/embeddings/exceptions.py | 12 +- .../src/ragbits/core/embeddings/litellm.py | 19 +- .../src/ragbits/core/embeddings/local.py | 13 +- .../src/ragbits/core/llms/base.py | 32 +- .../src/ragbits/core/llms/clients/base.py | 25 +- .../ragbits/core/llms/clients/exceptions.py | 12 +- .../src/ragbits/core/llms/clients/litellm.py | 39 +- .../src/ragbits/core/llms/clients/local.py | 36 +- .../src/ragbits/core/llms/litellm.py | 21 +- .../src/ragbits/core/llms/local.py | 18 +- .../src/ragbits/core/llms/types.py | 7 +- .../src/ragbits/core/prompt/__init__.py | 2 +- .../src/ragbits/core/prompt/base.py | 24 +- .../core/prompt/discovery/prompt_discovery.py | 10 +- .../src/ragbits/core/prompt/lab/app.py | 23 +- .../src/ragbits/core/prompt/parsers.py | 28 +- .../src/ragbits/core/prompt/prompt.py | 40 +- .../src/ragbits/core/prompt/promptfoo.py | 6 +- .../src/ragbits/core/vector_store/base.py | 15 +- .../core/vector_store/chromadb_store.py | 37 +- .../ragbits/core/vector_store/in_memory.py | 10 +- .../examples/simple_text.py | 1 - .../src/ragbits/document_search/_main.py | 22 +- .../document_search/documents/document.py | 31 +- .../document_search/documents/element.py | 19 +- .../document_search/documents/sources.py | 28 +- .../ingestion/document_processor.py | 12 +- .../ingestion/providers/unstructured.py | 13 +- .../retrieval/rephrasers/base.py | 6 +- .../retrieval/rephrasers/noop.py | 6 +- .../retrieval/rerankers/base.py | 6 +- .../retrieval/rerankers/noop.py | 9 +- pyproject.toml | 68 ++ ruffs_output.txt | 1060 +++++++++++++++++ scripts/create_ragbits_package.py | 3 +- scripts/update_ragbits_package.py | 20 +- 43 files changed, 1427 insertions(+), 458 deletions(-) create mode 100644 ruffs_output.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7428c1b03..ffd5221cd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,23 +12,34 @@ repos: - id: check-json - id: check-yaml - # PEP 8 compliant opinionated formatter. - - repo: https://github.com/psf/black - rev: 23.10.1 + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.6.9 hooks: - - id: black - exclude: (docs/|notebooks/) - args: [--config, pyproject.toml] - - id: black-jupyter - files: \.ipynb$ + - id: ruff + types_or: [ python, pyi, jupyter ] + exclude: (/test_|tests/|docs/|notebooks/) + args: [ --fix ] + - id: ruff-format + types_or: [ python, pyi, jupyter ] + exclude: (docs/) - # Cleaning unused imports. - - repo: https://github.com/hadialqattan/pycln - rev: v2.3.0 - hooks: - - id: pycln - args: ["-a"] - exclude: (docs/|notebooks/) +# # PEP 8 compliant opinionated formatter. +# - repo: https://github.com/psf/black +# rev: 23.10.1 +# hooks: +# - id: black +# exclude: (docs/|notebooks/) +# args: [--config, pyproject.toml] +# - id: black-jupyter +# files: \.ipynb$ + +# # Cleaning unused imports. +# - repo: https://github.com/hadialqattan/pycln +# rev: v2.3.0 +# hooks: +# - id: pycln +# args: ["-a"] +# exclude: (docs/|notebooks/) # Modernizes python code and upgrade syntax for newer versions of the language - repo: https://github.com/asottile/pyupgrade @@ -47,41 +58,41 @@ repos: additional_dependencies: [pydantic>=2.8.2, types-pyyaml>=6.0.12] exclude: (/test_|setup.py|/tests/|docs/) - # Sort imports alphabetically, and automatically separated into sections and by type. - - repo: https://github.com/timothycrosley/isort - rev: 5.13.2 - hooks: - - id: isort - args: ["--profile", "black"] - exclude: (docs/|notebooks/) - - # Checks Python source files for errors. - - repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 - hooks: - - id: flake8 - name: flake8 - entry: flake8 - language: python - types: [python] - args: [--config, .flake8] - exclude: (docs/) - - # Enforces a coding standard, looks for code smells, and can make suggestions about how the code could be refactored. - - repo: https://github.com/pycqa/pylint - rev: v3.2.6 - hooks: - - id: pylint - exclude: (/test_|tests/|docs/) - # # You can add additional plugins for pylint here, - # here is an example for pydantic, remember to enable it in pyproject.toml - # additional_dependencies: - # - 'pylint_pydantic' - # args: - # # pylint can have issue with python libraries based on C - # # if it fails to find some objects likely you need to add them - # # here: - # ["--extension-pkg-whitelist=pydantic"] +# # Sort imports alphabetically, and automatically separated into sections and by type. +# - repo: https://github.com/timothycrosley/isort +# rev: 5.13.2 +# hooks: +# - id: isort +# args: ["--profile", "black"] +# exclude: (docs/|notebooks/) +# +# # Checks Python source files for errors. +# - repo: https://github.com/PyCQA/flake8 +# rev: 7.1.1 +# hooks: +# - id: flake8 +# name: flake8 +# entry: flake8 +# language: python +# types: [python] +# args: [--config, .flake8] +# exclude: (docs/) +# +# # Enforces a coding standard, looks for code smells, and can make suggestions about how the code could be refactored. +# - repo: https://github.com/pycqa/pylint +# rev: v3.2.6 +# hooks: +# - id: pylint +# exclude: (/test_|tests/|docs/) +# # # You can add additional plugins for pylint here, +# # here is an example for pydantic, remember to enable it in pyproject.toml +# # additional_dependencies: +# # - 'pylint_pydantic' +# # args: +# # # pylint can have issue with python libraries based on C +# # # if it fails to find some objects likely you need to add them +# # # here: +# # ["--extension-pkg-whitelist=pydantic"] # Finds common security issues in Python code. - repo: https://github.com/PyCQA/bandit diff --git a/packages/ragbits-cli/src/ragbits/cli/__init__.py b/packages/ragbits-cli/src/ragbits/cli/__init__.py index 45d30fbee..5ba385a5b 100644 --- a/packages/ragbits-cli/src/ragbits/cli/__init__.py +++ b/packages/ragbits-cli/src/ragbits/cli/__init__.py @@ -1,16 +1,14 @@ import importlib.util import pkgutil -from typer import Typer - import ragbits +from typer import Typer app = Typer(no_args_is_help=True) def main() -> None: - """ - Main entry point for the CLI. + """Main entry point for the CLI. This function registers all the CLI modules in the ragbits packages: - iterates over every package in the ragbits.* namespace @@ -18,7 +16,6 @@ def main() -> None: - if found it imports the `register` function from the `cli` module and calls it with the `app` object - register function should add the CLI commands to the `app` object """ - cli_enabled_modules = [ module for module in pkgutil.iter_modules(ragbits.__path__) diff --git a/packages/ragbits-core/examples/chromadb_example.py b/packages/ragbits-core/examples/chromadb_example.py index 0a2a80157..7404f8c77 100644 --- a/packages/ragbits-core/examples/chromadb_example.py +++ b/packages/ragbits-core/examples/chromadb_example.py @@ -8,7 +8,6 @@ import asyncio import chromadb - from ragbits.core.embeddings.litellm import LiteLLMEmbeddings from ragbits.core.vector_store.chromadb_store import ChromaDBStore from ragbits.document_search import DocumentSearch @@ -24,7 +23,6 @@ async def main(): """Run the example.""" - chroma_client = chromadb.PersistentClient(path="chroma") embedding_client = LiteLLMEmbeddings() diff --git a/packages/ragbits-core/examples/llm_example.py b/packages/ragbits-core/examples/llm_example.py index adcd61e06..96feffc7a 100644 --- a/packages/ragbits-core/examples/llm_example.py +++ b/packages/ragbits-core/examples/llm_example.py @@ -7,14 +7,12 @@ import asyncio from pydantic import BaseModel - from ragbits.core.llms.litellm import LiteLLM from ragbits.core.prompt import Prompt class LoremPromptInput(BaseModel): - """ - Input format for the LoremPrompt. + """Input format for the LoremPrompt. """ theme: str @@ -22,8 +20,7 @@ class LoremPromptInput(BaseModel): class LoremPromptOutput(BaseModel): - """ - Output format for the LoremPrompt. + """Output format for the LoremPrompt. """ joke: str @@ -31,8 +28,7 @@ class LoremPromptOutput(BaseModel): class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): - """ - A prompt that generates jokes. + """A prompt that generates jokes. """ system_prompt = """ @@ -48,8 +44,7 @@ class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): async def main(): - """ - Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. + """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. """ llm = LiteLLM("gpt-4o-2024-08-06", use_structured_output=True) prompt = JokePrompt(LoremPromptInput(theme="software developers", pun_allowed=True)) diff --git a/packages/ragbits-core/examples/prompt_example.py b/packages/ragbits-core/examples/prompt_example.py index 64c37c360..156cd9d29 100644 --- a/packages/ragbits-core/examples/prompt_example.py +++ b/packages/ragbits-core/examples/prompt_example.py @@ -5,13 +5,11 @@ # ] # /// from pydantic import BaseModel - from ragbits.core.prompt import Prompt class LoremPromptInput(BaseModel): - """ - Input format for the LoremPrompt. + """Input format for the LoremPrompt. """ theme: str @@ -19,16 +17,14 @@ class LoremPromptInput(BaseModel): class LoremPromptOutput(BaseModel): - """ - Output format for the LoremPrompt. + """Output format for the LoremPrompt. """ text: str class LoremPrompt(Prompt[LoremPromptInput, LoremPromptOutput]): - """ - A prompt that generates Lorem Ipsum text. + """A prompt that generates Lorem Ipsum text. """ system_prompt = """ diff --git a/packages/ragbits-core/src/ragbits/core/cli.py b/packages/ragbits-core/src/ragbits/core/cli.py index 912a85a0b..4c60b53d0 100644 --- a/packages/ragbits-core/src/ragbits/core/cli.py +++ b/packages/ragbits-core/src/ragbits/core/cli.py @@ -7,8 +7,7 @@ def register(app: typer.Typer) -> None: - """ - Register the CLI commands for the package. + """Register the CLI commands for the package. Args: app: The Typer object to register the commands with. diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/base.py b/packages/ragbits-core/src/ragbits/core/embeddings/base.py index ede4fcadf..b47731575 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/base.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/base.py @@ -2,14 +2,12 @@ class Embeddings(ABC): - """ - Abstract client for communication with embedding models. + """Abstract client for communication with embedding models. """ @abstractmethod async def embed_text(self, data: list[str]) -> list[list[float]]: - """ - Creates embeddings for the given strings. + """Creates embeddings for the given strings. Args: data: List of strings to get embeddings for. diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py b/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py index 4dd99ad1e..c48ddb93d 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py @@ -1,6 +1,5 @@ class EmbeddingError(Exception): - """ - Base class for all exceptions raised by the EmbeddingClient. + """Base class for all exceptions raised by the EmbeddingClient. """ def __init__(self, message: str) -> None: @@ -9,8 +8,7 @@ def __init__(self, message: str) -> None: class EmbeddingConnectionError(EmbeddingError): - """ - Raised when there is an error connecting to the embedding API. + """Raised when there is an error connecting to the embedding API. """ def __init__(self, message: str = "Connection error.") -> None: @@ -18,8 +16,7 @@ def __init__(self, message: str = "Connection error.") -> None: class EmbeddingStatusError(EmbeddingError): - """ - Raised when an API response has a status code of 4xx or 5xx. + """Raised when an API response has a status code of 4xx or 5xx. """ def __init__(self, message: str, status_code: int) -> None: @@ -28,8 +25,7 @@ def __init__(self, message: str, status_code: int) -> None: class EmbeddingResponseError(EmbeddingError): - """ - Raised when an API response has an invalid schema. + """Raised when an API response has an invalid schema. """ def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py index ca9dcb6ca..165c70518 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py @@ -1,4 +1,3 @@ -from typing import Optional try: import litellm @@ -12,20 +11,18 @@ class LiteLLMEmbeddings(Embeddings): - """ - Client for creating text embeddings using LiteLLM API. + """Client for creating text embeddings using LiteLLM API. """ def __init__( self, model: str = "text-embedding-3-small", - options: Optional[dict] = None, - api_base: Optional[str] = None, - api_key: Optional[str] = None, - api_version: Optional[str] = None, + options: dict | None = None, + api_base: str | None = None, + api_key: str | None = None, + api_version: str | None = None, ) -> None: - """ - Constructs the LiteLLMEmbeddingClient. + """Constructs the LiteLLMEmbeddingClient. Args: model: Name of the [LiteLLM supported model](https://docs.litellm.ai/docs/embedding/supported_embedding)\ @@ -51,8 +48,7 @@ def __init__( self.api_version = api_version async def embed_text(self, data: list[str]) -> list[list[float]]: - """ - Creates embeddings for the given strings. + """Creates embeddings for the given strings. Args: data: List of strings to get embeddings for. @@ -65,7 +61,6 @@ async def embed_text(self, data: list[str]) -> list[list[float]]: EmbeddingStatusError: If the embedding API returns an error status code. EmbeddingResponseError: If the embedding API response is invalid. """ - try: response = await litellm.aembedding( input=data, diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/local.py b/packages/ragbits-core/src/ragbits/core/embeddings/local.py index 8a4f52bab..b38c26bea 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/local.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/local.py @@ -1,4 +1,4 @@ -from typing import Iterator, Optional +from collections.abc import Iterator try: import torch @@ -13,17 +13,15 @@ class LocalEmbeddings(Embeddings): - """ - Class for interaction with any encoder available in HuggingFace. + """Class for interaction with any encoder available in HuggingFace. """ def __init__( self, model_name: str, - api_key: Optional[str] = None, + api_key: str | None = None, ) -> None: - """ - Constructs a new local LLM instance. + """Constructs a new local LLM instance. Args: model_name: Name of the model to use. @@ -45,8 +43,7 @@ def __init__( self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, token=self.hf_api_key) async def embed_text(self, data: list[str], batch_size: int = 1) -> list[list[float]]: - """ - Calls the appropriate encoder endpoint with the given data and options. + """Calls the appropriate encoder endpoint with the given data and options. Args: data: List of strings to get embeddings for. diff --git a/packages/ragbits-core/src/ragbits/core/llms/base.py b/packages/ragbits-core/src/ragbits/core/llms/base.py index 178b054c3..bfbb6ba27 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from functools import cached_property -from typing import Generic, Optional, Type, cast, overload +from typing import Generic, cast, overload from ragbits.core.prompt.base import BasePrompt, BasePromptWithParser, OutputT @@ -8,15 +8,13 @@ class LLM(Generic[LLMClientOptions], ABC): - """ - Abstract class for interaction with Large Language Model. + """Abstract class for interaction with Large Language Model. """ - _options_cls: Type[LLMClientOptions] + _options_cls: type[LLMClientOptions] - def __init__(self, model_name: str, default_options: Optional[LLMOptions] = None) -> None: - """ - Constructs a new LLM instance. + def __init__(self, model_name: str, default_options: LLMOptions | None = None) -> None: + """Constructs a new LLM instance. Args: model_name: Name of the model to be used. @@ -35,13 +33,11 @@ def __init_subclass__(cls) -> None: @cached_property @abstractmethod def client(self) -> LLMClient: - """ - Client for the LLM. + """Client for the LLM. """ def count_tokens(self, prompt: BasePrompt) -> int: - """ - Counts tokens in the prompt. + """Counts tokens in the prompt. Args: prompt: Formatted prompt template with conversation and response parsing configuration. @@ -55,10 +51,9 @@ async def generate_raw( self, prompt: BasePrompt, *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> str: - """ - Prepares and sends a prompt to the LLM and returns the raw response (without parsing). + """Prepares and sends a prompt to the LLM and returns the raw response (without parsing). Args: prompt: Formatted prompt template with conversation. @@ -83,7 +78,7 @@ async def generate( self, prompt: BasePromptWithParser[OutputT], *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> OutputT: ... @@ -92,7 +87,7 @@ async def generate( self, prompt: BasePrompt, *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> OutputT: ... @@ -100,10 +95,9 @@ async def generate( self, prompt: BasePrompt, *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> OutputT: - """ - Prepares and sends a prompt to the LLM and returns response parsed to the + """Prepares and sends a prompt to the LLM and returns response parsed to the output type of the prompt (if available). Args: diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py index eaca8095c..e5b6571ad 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py @@ -1,9 +1,8 @@ from abc import ABC, abstractmethod from dataclasses import asdict, dataclass -from typing import Any, ClassVar, Dict, Generic, Optional, Type, TypeVar +from typing import Any, ClassVar, Generic, TypeVar from pydantic import BaseModel - from ragbits.core.prompt import ChatFormat from ..types import NotGiven @@ -13,15 +12,13 @@ @dataclass class LLMOptions(ABC): - """ - Abstract dataclass that represents all available LLM call options. + """Abstract dataclass that represents all available LLM call options. """ _not_given: ClassVar[Any] = None def __or__(self, other: "LLMOptions") -> "LLMOptions": - """ - Merges two LLMOptions, prioritizing non-NOT_GIVEN values from the 'other' object. + """Merges two LLMOptions, prioritizing non-NOT_GIVEN values from the 'other' object. """ self_dict = asdict(self) other_dict = asdict(other) @@ -35,9 +32,8 @@ def __or__(self, other: "LLMOptions") -> "LLMOptions": return self.__class__(**updated_dict) - def dict(self) -> Dict[str, Any]: - """ - Creates a dictionary representation of the LLMOptions instance. + def dict(self) -> dict[str, Any]: + """Creates a dictionary representation of the LLMOptions instance. If a value is None, it will be replaced with a provider-specific not-given sentinel. Returns: @@ -51,13 +47,11 @@ def dict(self) -> Dict[str, Any]: class LLMClient(Generic[LLMClientOptions], ABC): - """ - Abstract client for a direct communication with LLM. + """Abstract client for a direct communication with LLM. """ def __init__(self, model_name: str) -> None: - """ - Constructs a new LLMClient instance. + """Constructs a new LLMClient instance. Args: model_name: Name of the model to be used. @@ -70,10 +64,9 @@ async def call( conversation: ChatFormat, options: LLMClientOptions, json_mode: bool = False, - output_schema: Optional[Type[BaseModel] | Dict] = None, + output_schema: type[BaseModel] | dict | None = None, ) -> str: - """ - Calls LLM inference API. + """Calls LLM inference API. Args: conversation: List of dicts with "role" and "content" keys, representing the chat history so far. diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py b/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py index 0f1106bab..6550d883b 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py @@ -1,6 +1,5 @@ class LLMError(Exception): - """ - Base class for all exceptions raised by the LLMClient. + """Base class for all exceptions raised by the LLMClient. """ def __init__(self, message: str) -> None: @@ -9,8 +8,7 @@ def __init__(self, message: str) -> None: class LLMConnectionError(LLMError): - """ - Raised when there is an error connecting to the LLM API. + """Raised when there is an error connecting to the LLM API. """ def __init__(self, message: str = "Connection error.") -> None: @@ -18,8 +16,7 @@ def __init__(self, message: str = "Connection error.") -> None: class LLMStatusError(LLMError): - """ - Raised when an API response has a status code of 4xx or 5xx. + """Raised when an API response has a status code of 4xx or 5xx. """ def __init__(self, message: str, status_code: int) -> None: @@ -28,8 +25,7 @@ def __init__(self, message: str, status_code: int) -> None: class LLMResponseError(LLMError): - """ - Raised when an API response has an invalid schema. + """Raised when an API response has an invalid schema. """ def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py index f1620d8c8..4d994cbd2 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict, List, Optional, Type, Union from pydantic import BaseModel @@ -20,25 +19,23 @@ @dataclass class LiteLLMOptions(LLMOptions): - """ - Dataclass that represents all available LLM call options for the LiteLLM client. + """Dataclass that represents all available LLM call options for the LiteLLM client. Each of them is described in the [LiteLLM documentation](https://docs.litellm.ai/docs/completion/input). """ - frequency_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN - max_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN - n: Union[Optional[int], NotGiven] = NOT_GIVEN - presence_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN - seed: Union[Optional[int], NotGiven] = NOT_GIVEN - stop: Union[Optional[Union[str, List[str]]], NotGiven] = NOT_GIVEN - temperature: Union[Optional[float], NotGiven] = NOT_GIVEN - top_p: Union[Optional[float], NotGiven] = NOT_GIVEN - mock_response: Union[Optional[str], NotGiven] = NOT_GIVEN + frequency_penalty: float | None | NotGiven = NOT_GIVEN + max_tokens: int | None | NotGiven = NOT_GIVEN + n: int | None | NotGiven = NOT_GIVEN + presence_penalty: float | None | NotGiven = NOT_GIVEN + seed: int | None | NotGiven = NOT_GIVEN + stop: str | list[str] | None | NotGiven = NOT_GIVEN + temperature: float | None | NotGiven = NOT_GIVEN + top_p: float | None | NotGiven = NOT_GIVEN + mock_response: str | None | NotGiven = NOT_GIVEN class LiteLLMClient(LLMClient[LiteLLMOptions]): - """ - Client for the LiteLLM that supports calls to 100+ LLMs APIs, including OpenAI, Anthropic, VertexAI, + """Client for the LiteLLM that supports calls to 100+ LLMs APIs, including OpenAI, Anthropic, VertexAI, Hugging Face and others. """ @@ -48,13 +45,12 @@ def __init__( self, model_name: str, *, - base_url: Optional[str] = None, - api_key: Optional[str] = None, - api_version: Optional[str] = None, + base_url: str | None = None, + api_key: str | None = None, + api_version: str | None = None, use_structured_output: bool = False, ) -> None: - """ - Constructs a new LiteLLMClient instance. + """Constructs a new LiteLLMClient instance. Args: model_name: Name of the model to use. @@ -80,10 +76,9 @@ async def call( conversation: ChatFormat, options: LiteLLMOptions, json_mode: bool = False, - output_schema: Optional[Type[BaseModel] | Dict] = None, + output_schema: type[BaseModel] | dict | None = None, ) -> str: - """ - Calls the appropriate LLM endpoint with the given prompt and options. + """Calls the appropriate LLM endpoint with the given prompt and options. Args: conversation: List of dicts with "role" and "content" keys, representing the chat history so far. diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py index d3a1d0f62..ee77d715e 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict, List, Optional, Type, Union from pydantic import BaseModel @@ -19,26 +18,24 @@ @dataclass class LocalLLMOptions(LLMOptions): - """ - Dataclass that represents all available LLM call options for the local LLM client. + """Dataclass that represents all available LLM call options for the local LLM client. Each of them is described in the [HuggingFace documentation] (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long """ - repetition_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN - do_sample: Union[Optional[bool], NotGiven] = NOT_GIVEN - best_of: Union[Optional[int], NotGiven] = NOT_GIVEN - max_new_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN - top_k: Union[Optional[int], NotGiven] = NOT_GIVEN - top_p: Union[Optional[float], NotGiven] = NOT_GIVEN - seed: Union[Optional[int], NotGiven] = NOT_GIVEN - stop_sequences: Union[Optional[List[str]], NotGiven] = NOT_GIVEN - temperature: Union[Optional[float], NotGiven] = NOT_GIVEN + repetition_penalty: float | None | NotGiven = NOT_GIVEN + do_sample: bool | None | NotGiven = NOT_GIVEN + best_of: int | None | NotGiven = NOT_GIVEN + max_new_tokens: int | None | NotGiven = NOT_GIVEN + top_k: int | None | NotGiven = NOT_GIVEN + top_p: float | None | NotGiven = NOT_GIVEN + seed: int | None | NotGiven = NOT_GIVEN + stop_sequences: list[str] | None | NotGiven = NOT_GIVEN + temperature: float | None | NotGiven = NOT_GIVEN class LocalLLMClient(LLMClient[LocalLLMOptions]): - """ - Client for the local LLM that supports Hugging Face models. + """Client for the local LLM that supports Hugging Face models. """ _options_cls = LocalLLMOptions @@ -47,10 +44,9 @@ def __init__( self, model_name: str, *, - hf_api_key: Optional[str] = None, + hf_api_key: str | None = None, ) -> None: - """ - Constructs a new local LLMClient instance. + """Constructs a new local LLMClient instance. Args: model_name: Name of the model to use. @@ -74,10 +70,9 @@ async def call( conversation: ChatFormat, options: LocalLLMOptions, json_mode: bool = False, - output_schema: Optional[Type[BaseModel] | Dict] = None, + output_schema: type[BaseModel] | dict | None = None, ) -> str: - """ - Makes a call to the local LLM with the provided prompt and options. + """Makes a call to the local LLM with the provided prompt and options. Args: conversation: List of dicts with "role" and "content" keys, representing the chat history so far. @@ -88,7 +83,6 @@ async def call( Returns: Response string from LLM. """ - input_ids = self.tokenizer.apply_chat_template( conversation, add_generation_prompt=True, return_tensors="pt" ).to(self.model.device) diff --git a/packages/ragbits-core/src/ragbits/core/llms/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/litellm.py index 00524113e..0cbde8f6d 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/llms/litellm.py @@ -1,5 +1,4 @@ from functools import cached_property -from typing import Optional try: import litellm @@ -15,8 +14,7 @@ class LiteLLM(LLM[LiteLLMOptions]): - """ - Class for interaction with any LLM supported by LiteLLM API. + """Class for interaction with any LLM supported by LiteLLM API. """ _options_cls = LiteLLMOptions @@ -24,15 +22,14 @@ class LiteLLM(LLM[LiteLLMOptions]): def __init__( self, model_name: str = "gpt-3.5-turbo", - default_options: Optional[LiteLLMOptions] = None, + default_options: LiteLLMOptions | None = None, *, - base_url: Optional[str] = None, - api_key: Optional[str] = None, - api_version: Optional[str] = None, + base_url: str | None = None, + api_key: str | None = None, + api_version: str | None = None, use_structured_output: bool = False, ) -> None: - """ - Constructs a new LiteLLM instance. + """Constructs a new LiteLLM instance. Args: model_name: Name of the [LiteLLM supported model](https://docs.litellm.ai/docs/providers) to be used.\ @@ -61,8 +58,7 @@ def __init__( @cached_property def client(self) -> LiteLLMClient: - """ - Client for the LLM. + """Client for the LLM. """ return LiteLLMClient( model_name=self.model_name, @@ -73,8 +69,7 @@ def client(self) -> LiteLLMClient: ) def count_tokens(self, prompt: BasePrompt) -> int: - """ - Counts tokens in the prompt. + """Counts tokens in the prompt. Args: prompt: Formatted prompt template with conversation and response parsing configuration. diff --git a/packages/ragbits-core/src/ragbits/core/llms/local.py b/packages/ragbits-core/src/ragbits/core/llms/local.py index cf3cacbe1..fee357248 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/local.py @@ -1,5 +1,4 @@ from functools import cached_property -from typing import Optional try: from transformers import AutoTokenizer @@ -15,8 +14,7 @@ class LocalLLM(LLM[LocalLLMOptions]): - """ - Class for interaction with any LLM available in HuggingFace. + """Class for interaction with any LLM available in HuggingFace. """ _options_cls = LocalLLMOptions @@ -24,12 +22,11 @@ class LocalLLM(LLM[LocalLLMOptions]): def __init__( self, model_name: str, - default_options: Optional[LocalLLMOptions] = None, + default_options: LocalLLMOptions | None = None, *, - api_key: Optional[str] = None, + api_key: str | None = None, ) -> None: - """ - Constructs a new local LLM instance. + """Constructs a new local LLM instance. Args: model_name: Name of the model to use. This should be a model from the CausalLM class. @@ -48,8 +45,7 @@ def __init__( @cached_property def client(self) -> LocalLLMClient: - """ - Client for the LLM. + """Client for the LLM. Returns: The client used to interact with the LLM. @@ -57,8 +53,7 @@ def client(self) -> LocalLLMClient: return LocalLLMClient(model_name=self.model_name, hf_api_key=self.api_key) def count_tokens(self, prompt: BasePrompt) -> int: - """ - Counts tokens in the messages. + """Counts tokens in the messages. Args: prompt: Messages to count tokens for. @@ -66,6 +61,5 @@ def count_tokens(self, prompt: BasePrompt) -> int: Returns: Number of tokens in the messages. """ - input_ids = self.tokenizer.apply_chat_template(prompt.chat) return len(input_ids) diff --git a/packages/ragbits-core/src/ragbits/core/llms/types.py b/packages/ragbits-core/src/ragbits/core/llms/types.py index 19cded7bd..9d6e329be 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/types.py +++ b/packages/ragbits-core/src/ragbits/core/llms/types.py @@ -1,10 +1,11 @@ -from typing_extensions import Literal, override +from typing import Literal + +from typing_extensions import override # Sentinel class used until PEP 0661 is accepted class NotGiven: - """ - A sentinel singleton class used to distinguish omitted keyword arguments + """A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior). For example: diff --git a/packages/ragbits-core/src/ragbits/core/prompt/__init__.py b/packages/ragbits-core/src/ragbits/core/prompt/__init__.py index 007312ef3..9215c3e74 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/__init__.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/__init__.py @@ -1,3 +1,3 @@ from ragbits.core.prompt.prompt import ChatFormat, Prompt -__all__ = ["Prompt", "ChatFormat"] +__all__ = ["ChatFormat", "Prompt"] diff --git a/packages/ragbits-core/src/ragbits/core/prompt/base.py b/packages/ragbits-core/src/ragbits/core/prompt/base.py index 47bf427f1..3e1944f48 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/base.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/base.py @@ -1,23 +1,21 @@ from abc import ABCMeta, abstractmethod -from typing import Dict, Generic, List, Optional, Type +from typing import Generic from pydantic import BaseModel from typing_extensions import TypeVar -ChatFormat = List[Dict[str, str]] +ChatFormat = list[dict[str, str]] OutputT = TypeVar("OutputT", default=str) class BasePrompt(metaclass=ABCMeta): - """ - Base class for prompts + """Base class for prompts """ @property @abstractmethod def chat(self) -> ChatFormat: - """ - Returns the conversation in the standard OpenAI chat format. + """Returns the conversation in the standard OpenAI chat format. Returns: ChatFormat: A list of dictionaries, each containing the role and content of a message. @@ -25,29 +23,25 @@ def chat(self) -> ChatFormat: @property def json_mode(self) -> bool: - """ - Returns whether the prompt should be sent in JSON mode. + """Returns whether the prompt should be sent in JSON mode. """ return self.output_schema() is not None - def output_schema(self) -> Optional[Dict | Type[BaseModel]]: - """ - Returns the schema of the desired output. Can be used to request structured output from the LLM API + def output_schema(self) -> dict | type[BaseModel] | None: + """Returns the schema of the desired output. Can be used to request structured output from the LLM API or to validate the output. Can return either a Pydantic model or a JSON schema. """ return None class BasePromptWithParser(Generic[OutputT], BasePrompt, metaclass=ABCMeta): - """ - Base class for prompts that know how to parse the output from the LLM to their specific + """Base class for prompts that know how to parse the output from the LLM to their specific output type. """ @abstractmethod def parse_response(self, response: str) -> OutputT: - """ - Parse the response from the LLM to the desired output type. + """Parse the response from the LLM to the desired output type. Args: response (str): The response from the LLM. diff --git a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py index 0b5aad9e1..6bb577283 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py @@ -10,8 +10,7 @@ class PromptDiscovery: - """ - Discovers Prompt objects within Python modules. + """Discovers Prompt objects within Python modules. Args: file_pattern (str): The file pattern to search for Prompt objects. Defaults to "**/prompt_*.py" @@ -24,8 +23,7 @@ def __init__(self, file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = P @staticmethod def is_prompt_subclass(obj: Any) -> bool: - """ - Checks if an object is a class that is a subclass of Prompt (but not Prompt itself). + """Checks if an object is a class that is a subclass of Prompt (but not Prompt itself). Args: obj (any): The object to check. @@ -38,13 +36,11 @@ def is_prompt_subclass(obj: Any) -> bool: return inspect.isclass(obj) and not get_origin(obj) and issubclass(obj, Prompt) and obj != Prompt def discover(self) -> set[type[Prompt]]: - """ - Discovers Prompt objects within the specified file paths. + """Discovers Prompt objects within the specified file paths. Returns: set[Prompt]: The discovered Prompt objects. """ - result_set: set[type[Prompt]] = set() for file_path in self.root_path.glob(self.file_pattern): diff --git a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py index 2f9498f07..1e758352f 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py @@ -11,18 +11,16 @@ import jinja2 from pydantic import BaseModel -from rich.console import Console - from ragbits.core.llms import LiteLLM from ragbits.core.llms.clients import LiteLLMOptions from ragbits.core.prompt import Prompt from ragbits.core.prompt.discovery.prompt_discovery import DEFAULT_FILE_PATTERN, PromptDiscovery +from rich.console import Console @dataclass(frozen=True) class PromptState: - """ - Class to store the current state of the application. + """Class to store the current state of the application. This class holds various data structures used throughout the application's lifecycle. @@ -40,8 +38,7 @@ class PromptState: def render_prompt(index: int, system_prompt: str, user_prompt: str, state: PromptState, *args: Any) -> PromptState: - """ - Renders a prompt based on the provided key, system prompt, user prompt, and input variables. + """Renders a prompt based on the provided key, system prompt, user prompt, and input variables. This function constructs a Prompt object using the prompt constructor and input constructor associated with the given key. It then updates the current prompt in the application state. @@ -62,7 +59,7 @@ def render_prompt(index: int, system_prompt: str, user_prompt: str, state: Promp input_type = prompt_class.input_type input_fields = get_input_type_fields(input_type) - variables = {field["field_name"]: value for field, value in zip(input_fields, args)} + variables = {field["field_name"]: value for field, value in zip(input_fields, args, strict=False)} input_data = input_type(**variables) if input_type is not None else None prompt_object = prompt_class(input_data=input_data) state = replace(state, rendered_prompt=prompt_object) @@ -71,8 +68,7 @@ def render_prompt(index: int, system_prompt: str, user_prompt: str, state: Promp def list_prompt_choices(state: PromptState) -> list[tuple[str, int]]: - """ - Returns a list of prompt choices based on the discovered prompts. + """Returns a list of prompt choices based on the discovered prompts. This function generates a list of tuples containing the names of discovered prompts and their corresponding indices. @@ -87,8 +83,7 @@ def list_prompt_choices(state: PromptState) -> list[tuple[str, int]]: def send_prompt_to_llm(state: PromptState) -> str: - """ - Sends the current prompt to the LLM and returns the response. + """Sends the current prompt to the LLM and returns the response. This function creates a LiteLLM client using the LLM model name and API key stored in the application state. It then calls the LLM client to generate a response based on the current prompt. @@ -114,8 +109,7 @@ def send_prompt_to_llm(state: PromptState) -> str: def get_input_type_fields(obj: BaseModel | None) -> list[dict]: - """ - Retrieves the field names and default values from the input type of a prompt. + """Retrieves the field names and default values from the input type of a prompt. This function inspects the input type object associated with a prompt and extracts information about its fields, including their names and default values. @@ -137,8 +131,7 @@ def get_input_type_fields(obj: BaseModel | None) -> list[dict]: def lab_app( # pylint: disable=missing-param-doc file_pattern: str = DEFAULT_FILE_PATTERN, llm_model: str | None = None, llm_api_key: str | None = None ) -> None: - """ - Launches the interactive application for listing, rendering, and testing prompts + """Launches the interactive application for listing, rendering, and testing prompts defined within the current project. """ if not HAS_GRADIO: diff --git a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py index b52788b0d..414f6f1a5 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py @@ -1,4 +1,5 @@ -from typing import Any, Callable, Type, TypeVar +from collections.abc import Callable +from typing import Any, TypeVar from pydantic import BaseModel, ValidationError @@ -6,8 +7,7 @@ class ResponseParsingError(Exception): - """ - Raised when there is an error parsing an API response. + """Raised when there is an error parsing an API response. """ def __init__(self, message: str) -> None: @@ -16,8 +16,7 @@ def __init__(self, message: str) -> None: def int_parser(value: str) -> int: - """ - Parses a string to an integer. + """Parses a string to an integer. Args: value: String to parse. @@ -35,8 +34,7 @@ def int_parser(value: str) -> int: def str_parser(value: str) -> str: - """ - Parses a string. + """Parses a string. Args: value: String to parse. @@ -48,8 +46,7 @@ def str_parser(value: str) -> str: def float_parser(value: str) -> float: - """ - Parses a string to a float. + """Parses a string to a float. Args: value: String to parse. @@ -67,8 +64,7 @@ def float_parser(value: str) -> float: def bool_parser(value: str) -> bool: - """ - Parses a string to a boolean. + """Parses a string to a boolean. Args: value: String to parse. @@ -87,9 +83,8 @@ def bool_parser(value: str) -> bool: raise ResponseParsingError(f"Could not parse '{value}' as a boolean") -def build_pydantic_parser(model: Type[PydanticModelT]) -> Callable[[str], PydanticModelT]: - """ - Builds a parser for a specific Pydantic model. +def build_pydantic_parser(model: type[PydanticModelT]) -> Callable[[str], PydanticModelT]: + """Builds a parser for a specific Pydantic model. Args: model: Pydantic model to build the parser for. @@ -102,8 +97,7 @@ def build_pydantic_parser(model: Type[PydanticModelT]) -> Callable[[str], Pydant """ def parser(value: str) -> PydanticModelT: - """ - Parses a string to a Pydantic model. + """Parses a string to a Pydantic model. Args: value: String to parse. @@ -122,7 +116,7 @@ def parser(value: str) -> PydanticModelT: return parser -DEFAULT_PARSERS: dict[Type, Callable[[str], Any]] = { +DEFAULT_PARSERS: dict[type, Callable[[str], Any]] = { int: int_parser, str: str_parser, float: float_parser, diff --git a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py index 876a68a4d..b7f75875c 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py @@ -1,6 +1,7 @@ import textwrap from abc import ABCMeta -from typing import Any, Callable, Dict, Generic, Optional, Tuple, Type, cast, get_args, get_origin, overload +from collections.abc import Callable +from typing import Any, Generic, cast, get_args, get_origin, overload from jinja2 import Environment, Template, meta from pydantic import BaseModel @@ -9,18 +10,17 @@ from .base import BasePromptWithParser, ChatFormat, OutputT from .parsers import DEFAULT_PARSERS, build_pydantic_parser -InputT = TypeVar("InputT", bound=Optional[BaseModel]) +InputT = TypeVar("InputT", bound=BaseModel | None) class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=ABCMeta): - """ - Generic class for prompts. It contains the system and user prompts, and additional messages. + """Generic class for prompts. It contains the system and user prompts, and additional messages. To create a new prompt, subclass this class and provide the system and user prompts, and optionally the input and output types. The system prompt is optional. """ - system_prompt: Optional[str] = None + system_prompt: str | None = None user_prompt: str # Additional messages to be added to the conversation after the system prompt @@ -31,13 +31,13 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass= response_parser: Callable[[str], OutputT] # Automatically set in __init_subclass__ - input_type: Optional[Type[InputT]] - output_type: Type[OutputT] - system_prompt_template: Optional[Template] + input_type: type[InputT] | None + output_type: type[OutputT] + system_prompt_template: Template | None user_prompt_template: Template @classmethod - def _get_io_types(cls) -> Tuple: + def _get_io_types(cls) -> tuple: bases = get_original_bases(cls) for base in bases: if get_origin(base) is Prompt: @@ -64,7 +64,7 @@ def _parse_template(cls, template: str) -> Template: return Template(template) @classmethod - def _render_template(cls, template: Template, input_data: Optional[InputT]) -> str: + def _render_template(cls, template: Template, input_data: InputT | None) -> str: # Workaround for not being able to use `input is not None` # because of mypy issue: https://github.com/python/mypy/issues/12622 context = {} @@ -125,8 +125,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: @property def chat(self) -> ChatFormat: - """ - Returns the conversation in the standard OpenAI chat format. + """Returns the conversation in the standard OpenAI chat format. Returns: ChatFormat: A list of dictionaries, each containing the role and content of a message. @@ -144,8 +143,7 @@ def chat(self) -> ChatFormat: return chat def add_few_shot(self, user_message: str, assistant_message: str) -> "Prompt[InputT, OutputT]": - """ - Add a few-shot example to the conversation. + """Add a few-shot example to the conversation. Args: user_message (str): The message from the user. @@ -158,9 +156,8 @@ def add_few_shot(self, user_message: str, assistant_message: str) -> "Prompt[Inp self._instace_few_shots.append({"role": "assistant", "content": assistant_message}) return self - def output_schema(self) -> Optional[Dict | Type[BaseModel]]: - """ - Returns the schema of the desired output. Can be used to request structured output from the LLM API + def output_schema(self) -> dict | type[BaseModel] | None: + """Returns the schema of the desired output. Can be used to request structured output from the LLM API or to validate the output. Can return either a Pydantic model or a JSON schema. Returns: @@ -170,8 +167,7 @@ def output_schema(self) -> Optional[Dict | Type[BaseModel]]: @property def json_mode(self) -> bool: - """ - Returns whether the prompt should be sent in JSON mode. + """Returns whether the prompt should be sent in JSON mode. Returns: bool: Whether the prompt should be sent in JSON mode. @@ -179,8 +175,7 @@ def json_mode(self) -> bool: return issubclass(self.output_type, BaseModel) def parse_response(self, response: str) -> OutputT: - """ - Parse the response from the LLM to the desired output type. + """Parse the response from the LLM to the desired output type. Args: response (str): The response from the LLM. @@ -195,8 +190,7 @@ def parse_response(self, response: str) -> OutputT: @classmethod def to_promptfoo(cls, config: dict[str, Any]) -> ChatFormat: - """ - Generate a prompt in the promptfoo format from a promptfoo test configuration. + """Generate a prompt in the promptfoo format from a promptfoo test configuration. Args: config: The promptfoo test configuration. diff --git a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py index 450a2ef3d..4f904d875 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py @@ -2,17 +2,15 @@ from pathlib import Path import yaml -from rich.console import Console - from ragbits.core.prompt.discovery import PromptDiscovery from ragbits.core.prompt.discovery.prompt_discovery import DEFAULT_FILE_PATTERN +from rich.console import Console def generate_configs( file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd(), target_path: Path = Path("promptfooconfigs") ) -> None: - """ - Generates promptfoo configuration files for all discovered prompts. + """Generates promptfoo configuration files for all discovered prompts. Args: file_pattern: The file pattern to search for Prompt objects. Defaults to "**/prompt_*.py" diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/base.py b/packages/ragbits-core/src/ragbits/core/vector_store/base.py index 4d494c561..c01b56e25 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/base.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/base.py @@ -1,12 +1,10 @@ import abc -from typing import List from pydantic import BaseModel class VectorDBEntry(BaseModel): - """ - An object representing a vector database entry. + """An object representing a vector database entry. """ key: str @@ -15,14 +13,12 @@ class VectorDBEntry(BaseModel): class VectorStore(abc.ABC): - """ - A class with an implementation of Vector Store, allowing to store and retrieve vectors by similarity function. + """A class with an implementation of Vector Store, allowing to store and retrieve vectors by similarity function. """ @abc.abstractmethod - async def store(self, entries: List[VectorDBEntry]) -> None: - """ - Store entries in the vector store. + async def store(self, entries: list[VectorDBEntry]) -> None: + """Store entries in the vector store. Args: entries: The entries to store. @@ -30,8 +26,7 @@ async def store(self, entries: List[VectorDBEntry]) -> None: @abc.abstractmethod async def retrieve(self, vector: list[float], k: int = 5) -> list[VectorDBEntry]: - """ - Retrieve entries from the vector store. + """Retrieve entries from the vector store. Args: vector: The vector to search for. diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index 259fdc7f6..6c9798257 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -1,6 +1,6 @@ import json from hashlib import sha256 -from typing import List, Literal, Optional, Union +from typing import Literal try: import chromadb @@ -21,12 +21,11 @@ def __init__( self, index_name: str, chroma_client: chromadb.ClientAPI, - embedding_function: Union[Embeddings, chromadb.EmbeddingFunction], - max_distance: Optional[float] = None, + embedding_function: Embeddings | chromadb.EmbeddingFunction, + max_distance: float | None = None, distance_method: Literal["l2", "ip", "cosine"] = "l2", ): - """ - Initializes the ChromaDBStore with the given parameters. + """Initializes the ChromaDBStore with the given parameters. Args: index_name: The name of the index. @@ -47,8 +46,7 @@ def __init__( self._collection = self._get_chroma_collection() def _get_chroma_collection(self) -> chromadb.Collection: - """ - Based on the selected embedding_function, chooses how to retrieve the ChromaDB collection. + """Based on the selected embedding_function, chooses how to retrieve the ChromaDB collection. If the collection doesn't exist, it creates one. Returns: @@ -63,9 +61,8 @@ def _get_chroma_collection(self) -> chromadb.Collection: embedding_function=self._embedding_function, ) - def _return_best_match(self, retrieved: dict) -> Optional[str]: - """ - Based on the retrieved data, returns the best match or None if no match is found. + def _return_best_match(self, retrieved: dict) -> str | None: + """Based on the retrieved data, returns the best match or None if no match is found. Args: Retrieved data, with a column-first format @@ -90,30 +87,27 @@ def _process_db_entry(self, entry: VectorDBEntry) -> tuple[str, list[float], dic return doc_id, embedding, metadata @property - def embedding_function(self) -> Union[Embeddings, chromadb.EmbeddingFunction]: - """ - Returns the embedding function. + def embedding_function(self) -> Embeddings | chromadb.EmbeddingFunction: + """Returns the embedding function. Returns: The embedding function. """ return self._embedding_function - async def store(self, entries: List[VectorDBEntry]) -> None: - """ - Stores entries in the ChromaDB collection. + async def store(self, entries: list[VectorDBEntry]) -> None: + """Stores entries in the ChromaDB collection. Args: entries: The entries to store. """ entries_processed = list(map(self._process_db_entry, entries)) - ids, embeddings, metadatas = map(list, zip(*entries_processed)) + ids, embeddings, metadatas = map(list, zip(*entries_processed, strict=False)) self._collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas) - async def retrieve(self, vector: List[float], k: int = 5) -> List[VectorDBEntry]: - """ - Retrieves entries from the ChromaDB collection. + async def retrieve(self, vector: list[float], k: int = 5) -> list[VectorDBEntry]: + """Retrieves entries from the ChromaDB collection. Args: vector: The vector to query. @@ -137,8 +131,7 @@ async def retrieve(self, vector: List[float], k: int = 5) -> List[VectorDBEntry] return db_entries def __repr__(self) -> str: - """ - Returns the string representation of the object. + """Returns the string representation of the object. Returns: The string representation of the object. diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py index ce0576fa6..0af5c08f4 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py @@ -1,19 +1,16 @@ import numpy as np - from ragbits.core.vector_store.base import VectorDBEntry, VectorStore class InMemoryVectorStore(VectorStore): - """ - A simple in-memory implementation of Vector Store, storing vectors in memory. + """A simple in-memory implementation of Vector Store, storing vectors in memory. """ def __init__(self) -> None: self._storage: dict[str, VectorDBEntry] = {} async def store(self, entries: list[VectorDBEntry]) -> None: - """ - Store entries in the vector store. + """Store entries in the vector store. Args: entries: The entries to store. @@ -22,8 +19,7 @@ async def store(self, entries: list[VectorDBEntry]) -> None: self._storage[entry.key] = entry async def retrieve(self, vector: list[float], k: int = 5) -> list[VectorDBEntry]: - """ - Retrieve entries from the vector store. + """Retrieve entries from the vector store. Args: vector: The vector to search for. diff --git a/packages/ragbits-document-search/examples/simple_text.py b/packages/ragbits-document-search/examples/simple_text.py index c0a3fa440..2fb97de0b 100644 --- a/packages/ragbits-document-search/examples/simple_text.py +++ b/packages/ragbits-document-search/examples/simple_text.py @@ -25,7 +25,6 @@ async def main(): """Run the example.""" - document_search = DocumentSearch(embedder=LiteLLMEmbeddings(), vector_store=InMemoryVectorStore()) for document in documents: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 042898721..b80197ac2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -1,7 +1,6 @@ -from typing import Any, Optional, Union +from typing import Any from pydantic import BaseModel, Field - from ragbits.core.embeddings.base import Embeddings from ragbits.core.vector_store.base import VectorStore from ragbits.document_search.documents.document import Document, DocumentMeta @@ -15,8 +14,7 @@ class SearchConfig(BaseModel): - """ - Configuration for the search process. + """Configuration for the search process. """ reranker_kwargs: dict[str, Any] = Field(default_factory=dict) @@ -25,8 +23,7 @@ class SearchConfig(BaseModel): class DocumentSearch: - """ - A main entrypoint to the DocumentSearch functionality. + """A main entrypoint to the DocumentSearch functionality. It provides methods for both ingestion and retrieval. @@ -59,8 +56,7 @@ def __init__( self.document_processor_router = document_processor_router or DocumentProcessorRouter.from_config() async def search(self, query: str, search_config: SearchConfig = SearchConfig()) -> list[Element]: - """ - Search for the most relevant chunks for a query. + """Search for the most relevant chunks for a query. Args: query: The query to search for. @@ -79,10 +75,9 @@ async def search(self, query: str, search_config: SearchConfig = SearchConfig()) return self.reranker.rerank(elements) async def ingest_document( - self, document: Union[DocumentMeta, Document], document_processor: Optional[BaseProvider] = None + self, document: DocumentMeta | Document, document_processor: BaseProvider | None = None ) -> None: - """ - Ingest a document. + """Ingest a document. Args: document: The document or metadata of the document to ingest. @@ -97,12 +92,11 @@ async def ingest_document( await self.insert_elements(elements) async def insert_elements(self, elements: list[Element]) -> None: - """ - Insert Elements into the vector store. + """Insert Elements into the vector store. Args: elements: The list of Elements to insert. """ vectors = await self.embedder.embed_text([element.get_key() for element in elements]) - entries = [element.to_vector_db_entry(vector) for element, vector in zip(elements, vectors)] + entries = [element.to_vector_db_entry(vector) for element, vector in zip(elements, vectors, strict=False)] await self.vector_store.store(entries) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py index 2ca2ec9a6..cfc495487 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py @@ -1,10 +1,8 @@ import tempfile from enum import Enum from pathlib import Path -from typing import Union from pydantic import BaseModel, Field - from ragbits.document_search.documents.sources import GCSSource, LocalFileSource @@ -34,17 +32,15 @@ class DocumentType(str, Enum): class DocumentMeta(BaseModel): - """ - An object representing a document metadata. + """An object representing a document metadata. """ document_type: DocumentType - source: Union[LocalFileSource, GCSSource] = Field(..., discriminator="source_type") + source: LocalFileSource | GCSSource = Field(..., discriminator="source_type") @property def id(self) -> str: - """ - Get the document ID. + """Get the document ID. Returns: The document ID. @@ -52,8 +48,7 @@ def id(self) -> str: return self.source.get_id() async def fetch(self) -> "Document": - """ - This method fetches the document from source (potentially remote) and creates an object to interface with it. + """This method fetches the document from source (potentially remote) and creates an object to interface with it. Based on the document type, it will return a different object. Returns: @@ -64,8 +59,7 @@ async def fetch(self) -> "Document": @classmethod def create_text_document_from_literal(cls, content: str) -> "DocumentMeta": - """ - Create a text document from a literal content. + """Create a text document from a literal content. Args: content: The content of the document. @@ -83,8 +77,7 @@ def create_text_document_from_literal(cls, content: str) -> "DocumentMeta": @classmethod def from_local_path(cls, local_path: Path) -> "DocumentMeta": - """ - Create a document metadata from a local path. + """Create a document metadata from a local path. Args: local_path: The local path to the document. @@ -99,8 +92,7 @@ def from_local_path(cls, local_path: Path) -> "DocumentMeta": class Document(BaseModel): - """ - An object representing a document which is downloaded and stored locally. + """An object representing a document which is downloaded and stored locally. """ local_path: Path @@ -108,8 +100,7 @@ class Document(BaseModel): @classmethod def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "Document": - """ - Create a document from a document metadata. + """Create a document from a document metadata. Based on the document type, it will return a different object. Args: @@ -125,14 +116,12 @@ def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "D class TextDocument(Document): - """ - An object representing a text document. + """An object representing a text document. """ @property def content(self) -> str: - """ - Get the content of the document. + """Get the content of the document. Returns: The content of the document. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 744aed729..13a3ee668 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -2,14 +2,12 @@ from typing import ClassVar from pydantic import BaseModel - from ragbits.core.vector_store.base import VectorDBEntry from ragbits.document_search.documents.document import DocumentMeta class Element(BaseModel, ABC): - """ - An object representing an element in a document. + """An object representing an element in a document. """ element_type: str @@ -19,8 +17,7 @@ class Element(BaseModel, ABC): @abstractmethod def get_key(self) -> str: - """ - Get the key of the element which will be used to generate the vector. + """Get the key of the element which will be used to generate the vector. Returns: The key. @@ -37,8 +34,7 @@ def __pydantic_init_subclass__(cls, **kwargs: dict) -> None: # pylint: disable= @classmethod def from_vector_db_entry(cls, db_entry: VectorDBEntry) -> "Element": - """ - Create an element from a vector database entry. + """Create an element from a vector database entry. Args: db_entry: The vector database entry. @@ -53,8 +49,7 @@ def from_vector_db_entry(cls, db_entry: VectorDBEntry) -> "Element": return element_cls(**meta) def to_vector_db_entry(self, vector: list[float]) -> VectorDBEntry: - """ - Create a vector database entry from the element. + """Create a vector database entry from the element. Args: vector: The vector. @@ -70,16 +65,14 @@ def to_vector_db_entry(self, vector: list[float]) -> VectorDBEntry: class TextElement(Element): - """ - An object representing a text element in a document. + """An object representing a text element in a document. """ element_type: str = "text" content: str def get_key(self) -> str: - """ - Get the key of the element which will be used to generate the vector. + """Get the key of the element which will be used to generate the vector. Returns: The key. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py index fc5a93a83..83a9e39d8 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py @@ -17,14 +17,12 @@ class Source(BaseModel, ABC): - """ - An object representing a source. + """An object representing a source. """ @abstractmethod def get_id(self) -> str: - """ - Get the source ID. + """Get the source ID. Returns: The source ID. @@ -32,8 +30,7 @@ def get_id(self) -> str: @abstractmethod async def fetch(self) -> Path: - """ - Load the source. + """Load the source. Returns: The path to the source. @@ -41,16 +38,14 @@ async def fetch(self) -> Path: class LocalFileSource(Source): - """ - An object representing a local file source. + """An object representing a local file source. """ source_type: Literal["local_file"] = "local_file" path: Path def get_id(self) -> str: - """ - Get unique identifier of the object in the source. + """Get unique identifier of the object in the source. Returns: Unique identifier. @@ -58,8 +53,7 @@ def get_id(self) -> str: return f"local_file:{self.path.absolute()}" async def fetch(self) -> Path: - """ - Fetch the source. + """Fetch the source. Returns: The local path to the object fetched from the source. @@ -68,8 +62,7 @@ async def fetch(self) -> Path: class GCSSource(Source): - """ - An object representing a GCS file source. + """An object representing a GCS file source. """ source_type: Literal["gcs"] = "gcs" @@ -78,8 +71,7 @@ class GCSSource(Source): object_name: str def get_id(self) -> str: - """ - Get unique identifier of the object in the source. + """Get unique identifier of the object in the source. Returns: Unique identifier. @@ -87,8 +79,7 @@ def get_id(self) -> str: return f"gcs:gs://{self.bucket}/{self.object_name}" async def fetch(self) -> Path: - """ - Fetch the file from Google Cloud Storage and store it locally. + """Fetch the file from Google Cloud Storage and store it locally. The file is downloaded to a local directory specified by `local_dir`. If the file already exists locally, it will not be downloaded again. If the file doesn't exist locally, it will be fetched from GCS. @@ -101,7 +92,6 @@ async def fetch(self) -> Path: Raises: ImportError: If the required 'gcloud' package is not installed for Google Cloud Storage source. """ - if not HAS_GCLOUD_AIO: raise ImportError("You need to install the 'gcloud-aio-storage' package to use Google Cloud Storage") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py index 79da68d72..fc5414c97 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py @@ -1,5 +1,4 @@ import copy -from typing import Optional from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.providers.base import BaseProvider @@ -31,8 +30,7 @@ class DocumentProcessorRouter: - """ - The DocumentProcessorRouter is responsible for routing the document to the correct provider based on the document + """The DocumentProcessorRouter is responsible for routing the document to the correct provider based on the document metadata such as the document type. """ @@ -40,9 +38,8 @@ def __init__(self, providers: dict[DocumentType, BaseProvider]): self._providers = providers @classmethod - def from_config(cls, providers_config: Optional[ProvidersConfig] = None) -> "DocumentProcessorRouter": - """ - Create a DocumentProcessorRouter from a configuration. If the configuration is not provided, the default + def from_config(cls, providers_config: ProvidersConfig | None = None) -> "DocumentProcessorRouter": + """Create a DocumentProcessorRouter from a configuration. If the configuration is not provided, the default configuration will be used. If the configuration is provided, it will be merged with the default configuration, overriding the default values for the document types that are defined in the configuration. Example of the configuration: @@ -64,8 +61,7 @@ def from_config(cls, providers_config: Optional[ProvidersConfig] = None) -> "Doc return cls(providers=config) def get_provider(self, document_meta: DocumentMeta) -> BaseProvider: - """ - Get the provider for the document. + """Get the provider for the document. Args: document_meta: The document metadata. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py index 2e81b8abd..7c2d0775c 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py @@ -1,14 +1,12 @@ import os from io import BytesIO -from typing import Optional - -from unstructured.chunking.basic import chunk_elements -from unstructured.documents.elements import Element as UnstructuredElement -from unstructured.partition.api import partition_via_api from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element, TextElement from ragbits.document_search.ingestion.providers.base import BaseProvider +from unstructured.chunking.basic import chunk_elements +from unstructured.documents.elements import Element as UnstructuredElement +from unstructured.partition.api import partition_via_api DEFAULT_PARTITION_KWARGS: dict = { "strategy": "hi_res", @@ -25,8 +23,7 @@ class UnstructuredProvider(BaseProvider): - """ - A provider that uses the Unstructured API to process the documents. + """A provider that uses the Unstructured API to process the documents. """ SUPPORTED_DOCUMENT_TYPES = { @@ -50,7 +47,7 @@ class UnstructuredProvider(BaseProvider): DocumentType.XML, } - def __init__(self, partition_kwargs: Optional[dict] = None, chunking_kwargs: Optional[dict] = None): + def __init__(self, partition_kwargs: dict | None = None, chunking_kwargs: dict | None = None): """Initialize the UnstructuredProvider. Args: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py index a40b9f9be..cf9d47b24 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py @@ -2,15 +2,13 @@ class QueryRephraser(abc.ABC): - """ - Rephrases a query. Can provide multiple rephrased queries from one sentence / question. + """Rephrases a query. Can provide multiple rephrased queries from one sentence / question. """ @staticmethod @abc.abstractmethod def rephrase(query: str) -> list[str]: - """ - Rephrase a query. + """Rephrase a query. Args: query: The query to rephrase. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py index 8e6b92fd2..1760fbddb 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py @@ -2,14 +2,12 @@ class NoopQueryRephraser(QueryRephraser): - """ - A no-op query paraphraser that does not change the query. + """A no-op query paraphraser that does not change the query. """ @staticmethod def rephrase(query: str) -> list[str]: - """ - Mock implementation which outputs the same query as in input. + """Mock implementation which outputs the same query as in input. Args: query: The query to rephrase. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py index dec886475..78e0a1576 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py @@ -4,15 +4,13 @@ class Reranker(abc.ABC): - """ - Reranks chunks retrieved from vector store. + """Reranks chunks retrieved from vector store. """ @staticmethod @abc.abstractmethod def rerank(chunks: list[Element]) -> list[Element]: - """ - Rerank chunks. + """Rerank chunks. Args: chunks: The chunks to rerank. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py index 5f1ba744a..343beda8b 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py @@ -1,18 +1,15 @@ -from typing import List from ragbits.document_search.documents.element import Element from ragbits.document_search.retrieval.rerankers.base import Reranker class NoopReranker(Reranker): - """ - A no-op reranker that does not change the order of the chunks. + """A no-op reranker that does not change the order of the chunks. """ @staticmethod - def rerank(chunks: List[Element]) -> List[Element]: - """ - No reranking, returning the same chunks as in input. + def rerank(chunks: list[Element]) -> list[Element]: + """No reranking, returning the same chunks as in input. Args: chunks: The chunks to rerank. diff --git a/pyproject.toml b/pyproject.toml index 4130adb5b..919435ca5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -175,3 +175,71 @@ exclude_dirs = ["venv"] # B101 disables errors for asserts in the code # remember to not use asserts for security and control flows skips = ["B101"] + +[tool.ruff] +exclude = [".venv"] +extend-include = ["*.ipynb"] +line-length = 120 + +#[tool.ruff.lint] +preview = true +explicit-preview-rules = true +select = [ + # Default rules + "E", # pycodestyle errors + "F", # Pyflakes + + # Extra rules, by Michał Kustosz + "C4", # flake8-comprehensions + "C90", # mccabe complex structure + "D", # pydocstyle + "I", # isort + "PT", # flake8-pytest-style + "PL", # Pylint + "SIM", # flake8-simplify + "UP", # pyupgrade + "W", # pycodestyle warnings + + # Extra rules, by Jakub Cierocki + "S", # flake8-bandit + "ANN", # flake8-annotations + "B", # flake8-bugbear + "NPY", # NumPy-specific rules +] +extend-select = [ + # Extra, preview rules, by Jakub Cierocki + "RUF022", # unsorted-dunder-all + "PLR6301", # no-self-use +] +ignore = [ + "B028", # no-explicit-stacklevel, TODO confirm this + "C901", # complex-structure, TODO confirm this + "D100", + "D104", + "D105", + "D205", + "ANN002", + "ANN003", + "ANN101", + "ANN102", + "ANN204", + "PLR0913", +] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"src/main.py" = ["D103"] +"*.ipynb" = [ + "PLR2004", + "W293", + "D101", # D*** - we should not require docs in every Jupyter notebook + "D102", + "D103", + "D107" +] + +[tool.ruff.format] +docstring-code-format = true +docstring-code-line-length = 120 diff --git a/ruffs_output.txt b/ruffs_output.txt new file mode 100644 index 000000000..465fe5550 --- /dev/null +++ b/ruffs_output.txt @@ -0,0 +1,1060 @@ +ruff.....................................................................Failed +- hook id: ruff +- exit code: 1 + +warning: The top-level linter settings are deprecated in favour of their counterparts in the `lint` section. Please update the following options in `pyproject.toml`: + - 'extend-select' -> 'lint.extend-select' + - 'ignore' -> 'lint.ignore' + - 'select' -> 'lint.select' + - 'explicit-preview-rules' -> 'lint.explicit-preview-rules' +packages/ragbits-core/examples/chromadb_example.py:24:11: ANN201 Missing return type annotation for public function `main` + | +24 | async def main(): + | ^^^^ ANN201 +25 | """Run the example.""" +26 | chroma_client = chromadb.PersistentClient(path="chroma") + | + = help: Add return type annotation: `None` + +packages/ragbits-core/examples/llm_example.py:15:5: D200 One-line docstring should fit on one line + | +14 | class LoremPromptInput(BaseModel): +15 | """Input format for the LoremPrompt. + | _____^ +16 | | """ + | |_______^ D200 +17 | +18 | theme: str + | + = help: Reformat to one line + +packages/ragbits-core/examples/llm_example.py:23:5: D200 One-line docstring should fit on one line + | +22 | class LoremPromptOutput(BaseModel): +23 | """Output format for the LoremPrompt. + | _____^ +24 | | """ + | |_______^ D200 +25 | +26 | joke: str + | + = help: Reformat to one line + +packages/ragbits-core/examples/llm_example.py:31:5: D200 One-line docstring should fit on one line + | +30 | class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): +31 | """A prompt that generates jokes. + | _____^ +32 | | """ + | |_______^ D200 +33 | +34 | system_prompt = """ + | + = help: Reformat to one line + +packages/ragbits-core/examples/llm_example.py:35:121: E501 Line too long (127 > 120) + | +34 | system_prompt = """ +35 | You are a joke generator. The jokes you generate should be funny and not offensive. {% if not pun_allowed %}Also, make sure + | ^^^^^^^ E501 +36 | that the jokes do not contain any puns.{% else %}You can use any type of joke, even if it contains puns.{% endif %} + | + +packages/ragbits-core/examples/llm_example.py:46:11: ANN201 Missing return type annotation for public function `main` + | +46 | async def main(): + | ^^^^ ANN201 +47 | """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. +48 | """ + | + = help: Add return type annotation: `None` + +packages/ragbits-core/examples/llm_example.py:47:5: D200 One-line docstring should fit on one line + | +46 | async def main(): +47 | """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. + | _____^ +48 | | """ + | |_______^ D200 +49 | llm = LiteLLM("gpt-4o-2024-08-06", use_structured_output=True) +50 | prompt = JokePrompt(LoremPromptInput(theme="software developers", pun_allowed=True)) + | + = help: Reformat to one line + +packages/ragbits-core/examples/llm_example.py:47:121: E501 Line too long (123 > 120) + | +46 | async def main(): +47 | """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. + | ^^^ E501 +48 | """ +49 | llm = LiteLLM("gpt-4o-2024-08-06", use_structured_output=True) + | + +packages/ragbits-core/examples/prompt_example.py:12:5: D200 One-line docstring should fit on one line + | +11 | class LoremPromptInput(BaseModel): +12 | """Input format for the LoremPrompt. + | _____^ +13 | | """ + | |_______^ D200 +14 | +15 | theme: str + | + = help: Reformat to one line + +packages/ragbits-core/examples/prompt_example.py:20:5: D200 One-line docstring should fit on one line + | +19 | class LoremPromptOutput(BaseModel): +20 | """Output format for the LoremPrompt. + | _____^ +21 | | """ + | |_______^ D200 +22 | +23 | text: str + | + = help: Reformat to one line + +packages/ragbits-core/examples/prompt_example.py:27:5: D200 One-line docstring should fit on one line + | +26 | class LoremPrompt(Prompt[LoremPromptInput, LoremPromptOutput]): +27 | """A prompt that generates Lorem Ipsum text. + | _____^ +28 | | """ + | |_______^ D200 +29 | +30 | system_prompt = """ + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/embeddings/base.py:5:5: D200 One-line docstring should fit on one line + | +4 | class Embeddings(ABC): +5 | """Abstract client for communication with embedding models. + | _____^ +6 | | """ + | |_______^ D200 +7 | +8 | @abstractmethod + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:2:5: D200 One-line docstring should fit on one line + | +1 | class EmbeddingError(Exception): +2 | """Base class for all exceptions raised by the EmbeddingClient. + | _____^ +3 | | """ + | |_______^ D200 +4 | +5 | def __init__(self, message: str) -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:5:9: D107 Missing docstring in `__init__` + | +3 | """ +4 | +5 | def __init__(self, message: str) -> None: + | ^^^^^^^^ D107 +6 | super().__init__(message) +7 | self.message = message + | + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:11:5: D200 One-line docstring should fit on one line + | +10 | class EmbeddingConnectionError(EmbeddingError): +11 | """Raised when there is an error connecting to the embedding API. + | _____^ +12 | | """ + | |_______^ D200 +13 | +14 | def __init__(self, message: str = "Connection error.") -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:14:9: D107 Missing docstring in `__init__` + | +12 | """ +13 | +14 | def __init__(self, message: str = "Connection error.") -> None: + | ^^^^^^^^ D107 +15 | super().__init__(message) + | + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:19:5: D200 One-line docstring should fit on one line + | +18 | class EmbeddingStatusError(EmbeddingError): +19 | """Raised when an API response has a status code of 4xx or 5xx. + | _____^ +20 | | """ + | |_______^ D200 +21 | +22 | def __init__(self, message: str, status_code: int) -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:22:9: D107 Missing docstring in `__init__` + | +20 | """ +21 | +22 | def __init__(self, message: str, status_code: int) -> None: + | ^^^^^^^^ D107 +23 | super().__init__(message) +24 | self.status_code = status_code + | + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:28:5: D200 One-line docstring should fit on one line + | +27 | class EmbeddingResponseError(EmbeddingError): +28 | """Raised when an API response has an invalid schema. + | _____^ +29 | | """ + | |_______^ D200 +30 | +31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:31:9: D107 Missing docstring in `__init__` + | +29 | """ +30 | +31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: + | ^^^^^^^^ D107 +32 | super().__init__(message) + | + +packages/ragbits-core/src/ragbits/core/embeddings/litellm.py:14:5: D200 One-line docstring should fit on one line + | +13 | class LiteLLMEmbeddings(Embeddings): +14 | """Client for creating text embeddings using LiteLLM API. + | _____^ +15 | | """ + | |_______^ D200 +16 | +17 | def __init__( + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/embeddings/local.py:16:5: D200 One-line docstring should fit on one line + | +15 | class LocalEmbeddings(Embeddings): +16 | """Class for interaction with any encoder available in HuggingFace. + | _____^ +17 | | """ + | |_______^ D200 +18 | +19 | def __init__( + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/base.py:11:5: D200 One-line docstring should fit on one line + | +10 | class LLM(Generic[LLMClientOptions], ABC): +11 | """Abstract class for interaction with Large Language Model. + | _____^ +12 | | """ + | |_______^ D200 +13 | +14 | _options_cls: type[LLMClientOptions] + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/base.py:36:9: D200 One-line docstring should fit on one line + | +34 | @abstractmethod +35 | def client(self) -> LLMClient: +36 | """Client for the LLM. + | _________^ +37 | | """ + | |___________^ D200 +38 | +39 | def count_tokens(self, prompt: BasePrompt) -> int: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/base.py:39:9: PLR6301 Method `count_tokens` could be a function, class method, or static method + | +37 | """ +38 | +39 | def count_tokens(self, prompt: BasePrompt) -> int: + | ^^^^^^^^^^^^ PLR6301 +40 | """Counts tokens in the prompt. + | + +packages/ragbits-core/src/ragbits/core/llms/clients/base.py:14:7: B024 `LLMOptions` is an abstract base class, but it has no abstract methods + | +13 | @dataclass +14 | class LLMOptions(ABC): + | ^^^^^^^^^^ B024 +15 | """Abstract dataclass that represents all available LLM call options. +16 | """ + | + +packages/ragbits-core/src/ragbits/core/llms/clients/base.py:15:5: D200 One-line docstring should fit on one line + | +13 | @dataclass +14 | class LLMOptions(ABC): +15 | """Abstract dataclass that represents all available LLM call options. + | _____^ +16 | | """ + | |_______^ D200 +17 | +18 | _not_given: ClassVar[Any] = None + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/clients/base.py:21:9: D200 One-line docstring should fit on one line + | +20 | def __or__(self, other: "LLMOptions") -> "LLMOptions": +21 | """Merges two LLMOptions, prioritizing non-NOT_GIVEN values from the 'other' object. + | _________^ +22 | | """ + | |___________^ D200 +23 | self_dict = asdict(self) +24 | other_dict = asdict(other) + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/clients/base.py:50:5: D200 One-line docstring should fit on one line + | +49 | class LLMClient(Generic[LLMClientOptions], ABC): +50 | """Abstract client for a direct communication with LLM. + | _____^ +51 | | """ + | |_______^ D200 +52 | +53 | def __init__(self, model_name: str) -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:2:5: D200 One-line docstring should fit on one line + | +1 | class LLMError(Exception): +2 | """Base class for all exceptions raised by the LLMClient. + | _____^ +3 | | """ + | |_______^ D200 +4 | +5 | def __init__(self, message: str) -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:5:9: D107 Missing docstring in `__init__` + | +3 | """ +4 | +5 | def __init__(self, message: str) -> None: + | ^^^^^^^^ D107 +6 | super().__init__(message) +7 | self.message = message + | + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:11:5: D200 One-line docstring should fit on one line + | +10 | class LLMConnectionError(LLMError): +11 | """Raised when there is an error connecting to the LLM API. + | _____^ +12 | | """ + | |_______^ D200 +13 | +14 | def __init__(self, message: str = "Connection error.") -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:14:9: D107 Missing docstring in `__init__` + | +12 | """ +13 | +14 | def __init__(self, message: str = "Connection error.") -> None: + | ^^^^^^^^ D107 +15 | super().__init__(message) + | + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:19:5: D200 One-line docstring should fit on one line + | +18 | class LLMStatusError(LLMError): +19 | """Raised when an API response has a status code of 4xx or 5xx. + | _____^ +20 | | """ + | |_______^ D200 +21 | +22 | def __init__(self, message: str, status_code: int) -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:22:9: D107 Missing docstring in `__init__` + | +20 | """ +21 | +22 | def __init__(self, message: str, status_code: int) -> None: + | ^^^^^^^^ D107 +23 | super().__init__(message) +24 | self.status_code = status_code + | + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:28:5: D200 One-line docstring should fit on one line + | +27 | class LLMResponseError(LLMError): +28 | """Raised when an API response has an invalid schema. + | _____^ +29 | | """ + | |_______^ D200 +30 | +31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:31:9: D107 Missing docstring in `__init__` + | +29 | """ +30 | +31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: + | ^^^^^^^^ D107 +32 | super().__init__(message) + | + +packages/ragbits-core/src/ragbits/core/llms/clients/local.py:21:5: D415 First line should end with a period, question mark, or exclamation point + | +19 | @dataclass +20 | class LocalLLMOptions(LLMOptions): +21 | """Dataclass that represents all available LLM call options for the local LLM client. + | _____^ +22 | | Each of them is described in the [HuggingFace documentation] +23 | | (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long +24 | | """ + | |_______^ D415 +25 | +26 | repetition_penalty: float | None | NotGiven = NOT_GIVEN + | + = help: Add closing punctuation + +packages/ragbits-core/src/ragbits/core/llms/clients/local.py:23:121: E501 Line too long (168 > 120) + | +21 | """Dataclass that represents all available LLM call options for the local LLM client. +22 | Each of them is described in the [HuggingFace documentation] +23 | (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ E501 +24 | """ + | + +packages/ragbits-core/src/ragbits/core/llms/clients/local.py:38:5: D200 One-line docstring should fit on one line + | +37 | class LocalLLMClient(LLMClient[LocalLLMOptions]): +38 | """Client for the local LLM that supports Hugging Face models. + | _____^ +39 | | """ + | |_______^ D200 +40 | +41 | _options_cls = LocalLLMOptions + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/litellm.py:17:5: D200 One-line docstring should fit on one line + | +16 | class LiteLLM(LLM[LiteLLMOptions]): +17 | """Class for interaction with any LLM supported by LiteLLM API. + | _____^ +18 | | """ + | |_______^ D200 +19 | +20 | _options_cls = LiteLLMOptions + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/litellm.py:61:9: D200 One-line docstring should fit on one line + | +59 | @cached_property +60 | def client(self) -> LiteLLMClient: +61 | """Client for the LLM. + | _________^ +62 | | """ + | |___________^ D200 +63 | return LiteLLMClient( +64 | model_name=self.model_name, + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/llms/local.py:17:5: D200 One-line docstring should fit on one line + | +16 | class LocalLLM(LLM[LocalLLMOptions]): +17 | """Class for interaction with any LLM available in HuggingFace. + | _____^ +18 | | """ + | |_______^ D200 +19 | +20 | _options_cls = LocalLLMOptions + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/prompt/base.py:12:5: D200 One-line docstring should fit on one line + | +11 | class BasePrompt(metaclass=ABCMeta): +12 | """Base class for prompts + | _____^ +13 | | """ + | |_______^ D200 +14 | +15 | @property + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/prompt/base.py:12:5: D415 First line should end with a period, question mark, or exclamation point + | +11 | class BasePrompt(metaclass=ABCMeta): +12 | """Base class for prompts + | _____^ +13 | | """ + | |_______^ D415 +14 | +15 | @property + | + = help: Add closing punctuation + +packages/ragbits-core/src/ragbits/core/prompt/base.py:26:9: D200 One-line docstring should fit on one line + | +24 | @property +25 | def json_mode(self) -> bool: +26 | """Returns whether the prompt should be sent in JSON mode. + | _________^ +27 | | """ + | |___________^ D200 +28 | return self.output_schema() is not None + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/prompt/base.py:30:9: PLR6301 Method `output_schema` could be a function, class method, or static method + | +28 | return self.output_schema() is not None +29 | +30 | def output_schema(self) -> dict | type[BaseModel] | None: + | ^^^^^^^^^^^^^ PLR6301 +31 | """Returns the schema of the desired output. Can be used to request structured output from the LLM API +32 | or to validate the output. Can return either a Pydantic model or a JSON schema. + | + +packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:20:9: D107 Missing docstring in `__init__` + | +18 | """ +19 | +20 | def __init__(self, file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd()): + | ^^^^^^^^ D107 +21 | self.file_pattern = file_pattern +22 | self.root_path = root_path + | + +packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:20:84: B008 Do not perform function call `Path.cwd` in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable + | +18 | """ +19 | +20 | def __init__(self, file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd()): + | ^^^^^^^^^^ B008 +21 | self.file_pattern = file_pattern +22 | self.root_path = root_path + | + +packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:25:33: ANN401 Dynamically typed expressions (typing.Any) are disallowed in `obj` + | +24 | @staticmethod +25 | def is_prompt_subclass(obj: Any) -> bool: + | ^^^ ANN401 +26 | """Checks if an object is a class that is a subclass of Prompt (but not Prompt itself). + | + +packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:57:13: S101 Use of `assert` detected + | +55 | module = importlib.util.module_from_spec(spec) +56 | +57 | assert spec.loader is not None + | ^^^^^^ S101 +58 | +59 | try: + | + +packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:40:96: ANN401 Dynamically typed expressions (typing.Any) are disallowed in `*args` + | +40 | def render_prompt(index: int, system_prompt: str, user_prompt: str, state: PromptState, *args: Any) -> PromptState: + | ^^^ ANN401 +41 | """Renders a prompt based on the provided key, system prompt, user prompt, and input variables. + | + +packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:97:5: S101 Use of `assert` detected + | +95 | str: The response generated by the LLM. +96 | """ +97 | assert state.llm_model_name is not None, "LLM model name is not set." + | ^^^^^^ S101 +98 | llm_client = LiteLLM(model_name=state.llm_model_name, api_key=state.llm_api_key) + | + +packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:100:5: S101 Use of `assert` detected + | + 98 | llm_client = LiteLLM(model_name=state.llm_model_name, api_key=state.llm_api_key) + 99 | +100 | assert state.rendered_prompt is not None, "Prompt has not been rendered yet." + | ^^^^^^ S101 +101 | try: +102 | response = asyncio.run( + | + +packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:173:17: SIM117 Use a single `with` statement with multiple contexts instead of nested `with` statements + | +171 | list_of_vars = [] +172 | with gr.Row(): +173 | with gr.Column(scale=1): + | _________________^ +174 | | with gr.Tab("Inputs"): + | |__________________________________________^ SIM117 +175 | input_fields: list = get_input_type_fields(prompt.input_type) +176 | for entry in input_fields: + | + = help: Combine `with` statements + +packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:187:17: SIM117 Use a single `with` statement with multiple contexts instead of nested `with` statements + | +185 | render_prompt_button = gr.Button(value="Render prompts") +186 | +187 | with gr.Column(scale=4): + | _________________^ +188 | | with gr.Tab("Prompt"): + | |__________________________________________^ SIM117 +189 | with gr.Row(): +190 | with gr.Column(): + | + = help: Combine `with` statements + +packages/ragbits-core/src/ragbits/core/prompt/parsers.py:10:5: D200 One-line docstring should fit on one line + | + 9 | class ResponseParsingError(Exception): +10 | """Raised when there is an error parsing an API response. + | _____^ +11 | | """ + | |_______^ D200 +12 | +13 | def __init__(self, message: str) -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/prompt/parsers.py:13:9: D107 Missing docstring in `__init__` + | +11 | """ +12 | +13 | def __init__(self, message: str) -> None: + | ^^^^^^^^ D107 +14 | super().__init__(message) +15 | self.message = message + | + +packages/ragbits-core/src/ragbits/core/prompt/prompt.py:49:17: S101 Use of `assert` detected + | +47 | output_type = args[1] if len(args) > 1 else str +48 | +49 | assert input_type is None or issubclass( + | ^^^^^^ S101 +50 | input_type, BaseModel +51 | ), "Input type must be a subclass of BaseModel" + | + +packages/ragbits-core/src/ragbits/core/prompt/prompt.py:57:15: S701 By default, jinja2 sets `autoescape` to `False`. Consider using `autoescape=True` or the `select_autoescape` function to mitigate XSS vulnerabilities. + | +55 | @classmethod +56 | def _parse_template(cls, template: str) -> Template: +57 | env = Environment() # nosec B701 - HTML autoescaping not needed for plain text + | ^^^^^^^^^^^ S701 +58 | ast = env.parse(template) +59 | template_variables = meta.find_undeclared_variables(ast) + | + +packages/ragbits-core/src/ragbits/core/prompt/prompt.py:90:42: ANN401 Dynamically typed expressions (typing.Any) are disallowed in `**kwargs` + | +89 | @classmethod +90 | def __init_subclass__(cls, **kwargs: Any) -> None: + | ^^^ ANN401 +91 | if not hasattr(cls, "user_prompt") or cls.user_prompt is None: +92 | raise ValueError("User prompt must be provided") + | + +packages/ragbits-core/src/ragbits/core/prompt/prompt.py:111:9: D107 Missing docstring in `__init__` + | +109 | ... +110 | +111 | def __init__(self, *args: Any, **kwargs: Any) -> None: + | ^^^^^^^^ D107 +112 | input_data = args[0] if args else kwargs.get("input_data") +113 | if self.input_type and input_data is None: + | + +packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py:11:65: B008 Do not perform function call `Path.cwd` in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable + | +10 | def generate_configs( +11 | file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd(), target_path: Path = Path("promptfooconfigs") + | ^^^^^^^^^^ B008 +12 | ) -> None: +13 | """Generates promptfoo configuration files for all discovered prompts. + | + +packages/ragbits-core/src/ragbits/core/vector_store/base.py:7:5: D200 One-line docstring should fit on one line + | + 6 | class VectorDBEntry(BaseModel): + 7 | """An object representing a vector database entry. + | _____^ + 8 | | """ + | |_______^ D200 + 9 | +10 | key: str + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/vector_store/base.py:16:5: D200 One-line docstring should fit on one line + | +15 | class VectorStore(abc.ABC): +16 | """A class with an implementation of Vector Store, allowing to store and retrieve vectors by similarity function. + | _____^ +17 | | """ + | |_______^ D200 +18 | +19 | @abc.abstractmethod + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py:18:5: D415 First line should end with a period, question mark, or exclamation point + | +17 | class ChromaDBStore(VectorStore): +18 | """Class that stores text embeddings using [Chroma](https://docs.trychroma.com/)""" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ D415 +19 | +20 | def __init__( + | + = help: Add closing punctuation + +packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py:64:9: D417 Missing argument description in the docstring for `_return_best_match`: `retrieved` + | +62 | ) +63 | +64 | def _return_best_match(self, retrieved: dict) -> str | None: + | ^^^^^^^^^^^^^^^^^^ D417 +65 | """Based on the retrieved data, returns the best match or None if no match is found. + | + +packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py:78:9: PLR6301 Method `_process_db_entry` could be a function, class method, or static method + | +76 | return None +77 | +78 | def _process_db_entry(self, entry: VectorDBEntry) -> tuple[str, list[float], dict]: + | ^^^^^^^^^^^^^^^^^ PLR6301 +79 | doc_id = sha256(entry.key.encode("utf-8")).hexdigest() +80 | embedding = entry.vector + | + +packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py:6:5: D200 One-line docstring should fit on one line + | +5 | class InMemoryVectorStore(VectorStore): +6 | """A simple in-memory implementation of Vector Store, storing vectors in memory. + | _____^ +7 | | """ + | |_______^ D200 +8 | +9 | def __init__(self) -> None: + | + = help: Reformat to one line + +packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py:9:9: D107 Missing docstring in `__init__` + | + 7 | """ + 8 | + 9 | def __init__(self) -> None: + | ^^^^^^^^ D107 +10 | self._storage: dict[str, VectorDBEntry] = {} + | + +packages/ragbits-document-search/examples/simple_text.py:26:11: ANN201 Missing return type annotation for public function `main` + | +26 | async def main(): + | ^^^^ ANN201 +27 | """Run the example.""" +28 | document_search = DocumentSearch(embedder=LiteLLMEmbeddings(), vector_store=InMemoryVectorStore()) + | + = help: Add return type annotation: `None` + +packages/ragbits-document-search/src/ragbits/document_search/_main.py:17:5: D200 One-line docstring should fit on one line + | +16 | class SearchConfig(BaseModel): +17 | """Configuration for the search process. + | _____^ +18 | | """ + | |_______^ D200 +19 | +20 | reranker_kwargs: dict[str, Any] = Field(default_factory=dict) + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/_main.py:58:70: B008 Do not perform function call `SearchConfig` in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable + | +56 | self.document_processor_router = document_processor_router or DocumentProcessorRouter.from_config() +57 | +58 | async def search(self, query: str, search_config: SearchConfig = SearchConfig()) -> list[Element]: + | ^^^^^^^^^^^^^^ B008 +59 | """Search for the most relevant chunks for a query. + | + +packages/ragbits-document-search/src/ragbits/document_search/documents/document.py:35:5: D200 One-line docstring should fit on one line + | +34 | class DocumentMeta(BaseModel): +35 | """An object representing a document metadata. + | _____^ +36 | | """ + | |_______^ D200 +37 | +38 | document_type: DocumentType + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/documents/document.py:95:5: D200 One-line docstring should fit on one line + | +94 | class Document(BaseModel): +95 | """An object representing a document which is downloaded and stored locally. + | _____^ +96 | | """ + | |_______^ D200 +97 | +98 | local_path: Path + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/documents/document.py:119:5: D200 One-line docstring should fit on one line + | +118 | class TextDocument(Document): +119 | """An object representing a text document. + | _____^ +120 | | """ + | |_______^ D200 +121 | +122 | @property + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/documents/element.py:10:5: D200 One-line docstring should fit on one line + | + 9 | class Element(BaseModel, ABC): +10 | """An object representing an element in a document. + | _____^ +11 | | """ + | |_______^ D200 +12 | +13 | element_type: str + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/documents/element.py:68:5: D200 One-line docstring should fit on one line + | +67 | class TextElement(Element): +68 | """An object representing a text element in a document. + | _____^ +69 | | """ + | |_______^ D200 +70 | +71 | element_type: str = "text" + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py:20:5: D200 One-line docstring should fit on one line + | +19 | class Source(BaseModel, ABC): +20 | """An object representing a source. + | _____^ +21 | | """ + | |_______^ D200 +22 | +23 | @abstractmethod + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py:41:5: D200 One-line docstring should fit on one line + | +40 | class LocalFileSource(Source): +41 | """An object representing a local file source. + | _____^ +42 | | """ + | |_______^ D200 +43 | +44 | source_type: Literal["local_file"] = "local_file" + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py:65:5: D200 One-line docstring should fit on one line + | +64 | class GCSSource(Source): +65 | """An object representing a GCS file source. + | _____^ +66 | | """ + | |_______^ D200 +67 | +68 | source_type: Literal["gcs"] = "gcs" + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py:37:9: D107 Missing docstring in `__init__` + | +35 | """ +36 | +37 | def __init__(self, providers: dict[DocumentType, BaseProvider]): + | ^^^^^^^^ D107 +38 | self._providers = providers + | + +packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py:42:9: D415 First line should end with a period, question mark, or exclamation point + | +40 | @classmethod +41 | def from_config(cls, providers_config: ProvidersConfig | None = None) -> "DocumentProcessorRouter": +42 | """Create a DocumentProcessorRouter from a configuration. If the configuration is not provided, the default + | _________^ +43 | | configuration will be used. If the configuration is provided, it will be merged with the default configuration, +44 | | overriding the default values for the document types that are defined in the configuration. +45 | | Example of the configuration: +46 | | { +47 | | DocumentType.TXT: YourCustomProviderClass(), +48 | | DocumentType.PDF: UnstructuredProvider(), +49 | | } +50 | | +51 | | Args: +52 | | providers_config: The dictionary with the providers configuration, mapping the document types to the +53 | | provider class. +54 | | +55 | | Returns: +56 | | The DocumentProcessorRouter. +57 | | """ + | |___________^ D415 +58 | config = copy.deepcopy(DEFAULT_PROVIDERS_CONFIG) +59 | config.update(providers_config if providers_config is not None else {}) + | + = help: Add closing punctuation + +packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py:10:9: D107 Missing docstring in `__init__` + | + 8 | """Raised when the document type is not supported by the provider.""" + 9 | +10 | def __init__(self, provider_name: str, document_type: DocumentType) -> None: + | ^^^^^^^^ D107 +11 | message = f"Document type {document_type} is not supported by the {provider_name}" +12 | super().__init__(message) + | + +packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py:26:5: D200 One-line docstring should fit on one line + | +25 | class UnstructuredProvider(BaseProvider): +26 | """A provider that uses the Unstructured API to process the documents. + | _____^ +27 | | """ + | |_______^ D200 +28 | +29 | SUPPORTED_DOCUMENT_TYPES = { + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py:50:9: D417 Missing argument description in the docstring for `__init__`: `chunking_kwargs` + | +48 | } +49 | +50 | def __init__(self, partition_kwargs: dict | None = None, chunking_kwargs: dict | None = None): + | ^^^^^^^^ D417 +51 | """Initialize the UnstructuredProvider. + | + +packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py:5:5: D200 One-line docstring should fit on one line + | +4 | class QueryRephraser(abc.ABC): +5 | """Rephrases a query. Can provide multiple rephrased queries from one sentence / question. + | _____^ +6 | | """ + | |_______^ D200 +7 | +8 | @staticmethod + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py:5:5: D200 One-line docstring should fit on one line + | +4 | class NoopQueryRephraser(QueryRephraser): +5 | """A no-op query paraphraser that does not change the query. + | _____^ +6 | | """ + | |_______^ D200 +7 | +8 | @staticmethod + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py:7:5: D200 One-line docstring should fit on one line + | + 6 | class Reranker(abc.ABC): + 7 | """Reranks chunks retrieved from vector store. + | _____^ + 8 | | """ + | |_______^ D200 + 9 | +10 | @staticmethod + | + = help: Reformat to one line + +packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py:7:5: D200 One-line docstring should fit on one line + | + 6 | class NoopReranker(Reranker): + 7 | """A no-op reranker that does not change the order of the chunks. + | _____^ + 8 | | """ + | |_______^ D200 + 9 | +10 | @staticmethod + | + = help: Reformat to one line + +scripts/create_ragbits_package.py:22:5: D200 One-line docstring should fit on one line + | +21 | def run() -> None: +22 | """Create a new Ragbits package. + | _____^ +23 | | """ + | |_______^ D200 +24 | package_name: str = text("Enter the package name", default="ragbits-") + | + = help: Reformat to one line + +scripts/update_ragbits_package.py:29:5: D200 One-line docstring should fit on one line + | +28 | class UpdateType(Enum): +29 | """Enum representing the type of version update: major, minor, or patch. + | _____^ +30 | | """ + | |_______^ D200 +31 | +32 | MAJOR = "major" + | + = help: Reformat to one line + +scripts/update_ragbits_package.py:43:5: ANN202 Missing return type annotation for private function `_version_to_list` + | +43 | def _version_to_list(version_string): + | ^^^^^^^^^^^^^^^^ ANN202 +44 | return [int(part) for part in version_string.split(".")] + | + = help: Add return type annotation + +scripts/update_ragbits_package.py:43:22: ANN001 Missing type annotation for function argument `version_string` + | +43 | def _version_to_list(version_string): + | ^^^^^^^^^^^^^^ ANN001 +44 | return [int(part) for part in version_string.split(".")] + | + +scripts/update_ragbits_package.py:95:5: S101 Use of `assert` detected + | +93 | (PACKAGES_DIR / pkg_name / "pyproject.toml").write_text(tomlkit.dumps(pkg_pyproject)) +94 | +95 | assert isinstance(new_version, str) + | ^^^^^^ S101 +96 | pprint(f"[green]The {pkg_name} package was successfully updated from {version} to {new_version}.[/green]") + | + +Found 95 errors. +No fixes available (58 hidden fixes can be enabled with the `--unsafe-fixes` option). + diff --git a/scripts/create_ragbits_package.py b/scripts/create_ragbits_package.py index 56b9e0031..cf94b7b4b 100644 --- a/scripts/create_ragbits_package.py +++ b/scripts/create_ragbits_package.py @@ -19,8 +19,7 @@ def run() -> None: - """ - Create a new Ragbits package. + """Create a new Ragbits package. """ package_name: str = text("Enter the package name", default="ragbits-") diff --git a/scripts/update_ragbits_package.py b/scripts/update_ragbits_package.py index 85c9758fb..c41686787 100644 --- a/scripts/update_ragbits_package.py +++ b/scripts/update_ragbits_package.py @@ -16,7 +16,6 @@ from copy import deepcopy from enum import Enum from pathlib import Path -from typing import Optional import tomlkit import typer @@ -27,8 +26,7 @@ class UpdateType(Enum): - """ - Enum representing the type of version update: major, minor, or patch. + """Enum representing the type of version update: major, minor, or patch. """ MAJOR = "major" @@ -36,7 +34,7 @@ class UpdateType(Enum): PATCH = "patch" -def _update_type_to_enum(update_type: Optional[str] = None) -> Optional[UpdateType]: +def _update_type_to_enum(update_type: str | None = None) -> UpdateType | None: if update_type is not None: return UpdateType(update_type) return None @@ -46,7 +44,7 @@ def _version_to_list(version_string): return [int(part) for part in version_string.split(".")] -def _check_update_type(version: str, new_version: str) -> Optional[UpdateType]: +def _check_update_type(version: str, new_version: str) -> UpdateType | None: version_list = _version_to_list(version) new_version_list = _version_to_list(new_version) @@ -75,9 +73,9 @@ def _get_updated_version(version: str, update_type: UpdateType) -> str: def _update_pkg_version( pkg_name: str, - pkg_pyproject: Optional[tomlkit.TOMLDocument] = None, - new_version: Optional[str] = None, - update_type: Optional[UpdateType] = None, + pkg_pyproject: tomlkit.TOMLDocument | None = None, + new_version: str | None = None, + update_type: UpdateType | None = None, ) -> tuple[str, str]: if not pkg_pyproject: pkg_pyproject = tomlkit.parse((PACKAGES_DIR / pkg_name / "pyproject.toml").read_text()) @@ -100,9 +98,8 @@ def _update_pkg_version( return version, new_version -def run(pkg_name: Optional[str] = typer.Argument(None), update_type: Optional[str] = typer.Argument(None)) -> None: - """ - Main entry point for the package version updater. Updates package versions based on user input. +def run(pkg_name: str | None = typer.Argument(None), update_type: str | None = typer.Argument(None)) -> None: + """Main entry point for the package version updater. Updates package versions based on user input. Based on the provided package name and update type, this function updates the version of a specific package. If the package is "ragbits-core", all other packages that depend on it @@ -120,7 +117,6 @@ def run(pkg_name: Optional[str] = typer.Argument(None), update_type: Optional[st Raises: ValueError: If the provided `pkg_name` is not found in the available packages. """ - packages: list[str] = [obj.name for obj in PACKAGES_DIR.iterdir() if obj.is_dir()] if pkg_name is not None: From 4209360ea8a9674606a672d6d8b7f8dc98e44980 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Tue, 8 Oct 2024 10:47:42 +0200 Subject: [PATCH 02/28] Adding config options to support Ruff. Disabling no new line at the beginning of docstrigns. --- .pre-commit-config.yaml | 11 +++++++ pyproject.toml | 68 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7428c1b03..d35d26dba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,17 @@ repos: - id: check-json - id: check-yaml + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.6.9 + hooks: + - id: ruff + types_or: [ python, pyi, jupyter ] + exclude: (/test_|tests/|docs/|notebooks/) +# args: [ --fix ] + - id: ruff-format + types_or: [ python, pyi, jupyter ] + exclude: (docs/) + # PEP 8 compliant opinionated formatter. - repo: https://github.com/psf/black rev: 23.10.1 diff --git a/pyproject.toml b/pyproject.toml index 4130adb5b..b957bd82f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -175,3 +175,71 @@ exclude_dirs = ["venv"] # B101 disables errors for asserts in the code # remember to not use asserts for security and control flows skips = ["B101"] + +[tool.ruff] +exclude = [".venv"] +extend-include = ["*.ipynb"] +line-length = 120 + +[tool.ruff.lint] +preview = true +explicit-preview-rules = true +select = [ + # Default rules + "E", # pycodestyle errors + "F", # Pyflakes + + # Extra rules, by Michał Kustosz + "C4", # flake8-comprehensions + "C90", # mccabe complex structure + "D", # pydocstyle + "I", # isort + "PT", # flake8-pytest-style + "PL", # Pylint + "SIM", # flake8-simplify + "UP", # pyupgrade + "W", # pycodestyle warnings + + # Extra rules, by Jakub Cierocki + "S", # flake8-bandit + "ANN", # flake8-annotations + "B", # flake8-bugbear + "NPY", # NumPy-specific rules +] +extend-select = [ + # Extra, preview rules, by Jakub Cierocki + "RUF022", # unsorted-dunder-all + "PLR6301", # no-self-use +] +ignore = [ + "B028", # no-explicit-stacklevel, TODO confirm this + "C901", # complex-structure, TODO confirm this + "D100", + "D104", + "D105", + "D205", + "D212", + "ANN002", + "ANN003", + "ANN101", + "ANN102", + "ANN204", + "PLR0913", +] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"*.ipynb" = [ + "PLR2004", + "W293", + "D101", # D*** - we should not require docs in every Jupyter notebook + "D102", + "D103", + "D107" +] + +[tool.ruff.format] +docstring-code-format = true +docstring-code-line-length = 120 From 5bedf86d40beece682933ffd7a0e7bf16b406059 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Tue, 8 Oct 2024 13:32:41 +0200 Subject: [PATCH 03/28] One line docs are still pretty. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b957bd82f..48595c6e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -217,6 +217,7 @@ ignore = [ "D100", "D104", "D105", + "D200", "D205", "D212", "ANN002", From 5fcf55bcd8c7f650b6f1254e7b60ec1dadd4a92b Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Thu, 10 Oct 2024 10:58:51 +0200 Subject: [PATCH 04/28] Applying ruff with isort (with isort). --- .pre-commit-config.yaml | 10 +--- .../ragbits-cli/src/ragbits/cli/__init__.py | 1 - .../ragbits-core/examples/chromadb_example.py | 1 - .../src/ragbits/core/embeddings/litellm.py | 10 ++-- .../src/ragbits/core/embeddings/local.py | 4 +- .../src/ragbits/core/llms/base.py | 14 ++--- .../src/ragbits/core/llms/clients/base.py | 6 +- .../src/ragbits/core/llms/clients/litellm.py | 27 +++++---- .../src/ragbits/core/llms/clients/local.py | 24 ++++---- .../src/ragbits/core/llms/litellm.py | 9 ++- .../src/ragbits/core/llms/local.py | 6 +- .../src/ragbits/core/prompt/__init__.py | 2 +- .../src/ragbits/core/prompt/base.py | 6 +- .../core/prompt/discovery/prompt_discovery.py | 1 - .../src/ragbits/core/prompt/lab/app.py | 2 +- .../src/ragbits/core/prompt/parsers.py | 7 ++- .../src/ragbits/core/prompt/prompt.py | 19 ++++--- .../src/ragbits/core/vector_store/base.py | 3 +- .../core/vector_store/chromadb_store.py | 16 +++--- .../examples/simple_text.py | 1 - .../src/ragbits/document_search/_main.py | 6 +- .../document_search/documents/document.py | 3 +- .../document_search/documents/sources.py | 1 - .../ingestion/document_processor.py | 3 +- .../ingestion/providers/unstructured.py | 3 +- .../retrieval/rerankers/noop.py | 3 +- pyproject.toml | 55 ++++++------------- scripts/update_ragbits_package.py | 14 ++--- 28 files changed, 104 insertions(+), 153 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d35d26dba..6c93efa31 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: - id: ruff types_or: [ python, pyi, jupyter ] exclude: (/test_|tests/|docs/|notebooks/) -# args: [ --fix ] + args: [ --fix ] - id: ruff-format types_or: [ python, pyi, jupyter ] exclude: (docs/) @@ -58,14 +58,6 @@ repos: additional_dependencies: [pydantic>=2.8.2, types-pyyaml>=6.0.12] exclude: (/test_|setup.py|/tests/|docs/) - # Sort imports alphabetically, and automatically separated into sections and by type. - - repo: https://github.com/timothycrosley/isort - rev: 5.13.2 - hooks: - - id: isort - args: ["--profile", "black"] - exclude: (docs/|notebooks/) - # Checks Python source files for errors. - repo: https://github.com/PyCQA/flake8 rev: 7.1.1 diff --git a/packages/ragbits-cli/src/ragbits/cli/__init__.py b/packages/ragbits-cli/src/ragbits/cli/__init__.py index 45d30fbee..dbef2e983 100644 --- a/packages/ragbits-cli/src/ragbits/cli/__init__.py +++ b/packages/ragbits-cli/src/ragbits/cli/__init__.py @@ -18,7 +18,6 @@ def main() -> None: - if found it imports the `register` function from the `cli` module and calls it with the `app` object - register function should add the CLI commands to the `app` object """ - cli_enabled_modules = [ module for module in pkgutil.iter_modules(ragbits.__path__) diff --git a/packages/ragbits-core/examples/chromadb_example.py b/packages/ragbits-core/examples/chromadb_example.py index 0a2a80157..4c30d1918 100644 --- a/packages/ragbits-core/examples/chromadb_example.py +++ b/packages/ragbits-core/examples/chromadb_example.py @@ -24,7 +24,6 @@ async def main(): """Run the example.""" - chroma_client = chromadb.PersistentClient(path="chroma") embedding_client = LiteLLMEmbeddings() diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py index ca9dcb6ca..649099c3b 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py @@ -1,4 +1,3 @@ -from typing import Optional try: import litellm @@ -19,10 +18,10 @@ class LiteLLMEmbeddings(Embeddings): def __init__( self, model: str = "text-embedding-3-small", - options: Optional[dict] = None, - api_base: Optional[str] = None, - api_key: Optional[str] = None, - api_version: Optional[str] = None, + options: dict | None = None, + api_base: str | None = None, + api_key: str | None = None, + api_version: str | None = None, ) -> None: """ Constructs the LiteLLMEmbeddingClient. @@ -65,7 +64,6 @@ async def embed_text(self, data: list[str]) -> list[list[float]]: EmbeddingStatusError: If the embedding API returns an error status code. EmbeddingResponseError: If the embedding API response is invalid. """ - try: response = await litellm.aembedding( input=data, diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/local.py b/packages/ragbits-core/src/ragbits/core/embeddings/local.py index 8a4f52bab..20dfe2c61 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/local.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/local.py @@ -1,4 +1,4 @@ -from typing import Iterator, Optional +from collections.abc import Iterator try: import torch @@ -20,7 +20,7 @@ class LocalEmbeddings(Embeddings): def __init__( self, model_name: str, - api_key: Optional[str] = None, + api_key: str | None = None, ) -> None: """ Constructs a new local LLM instance. diff --git a/packages/ragbits-core/src/ragbits/core/llms/base.py b/packages/ragbits-core/src/ragbits/core/llms/base.py index 178b054c3..4f4b6fedb 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from functools import cached_property -from typing import Generic, Optional, Type, cast, overload +from typing import Generic, cast, overload from ragbits.core.prompt.base import BasePrompt, BasePromptWithParser, OutputT @@ -12,9 +12,9 @@ class LLM(Generic[LLMClientOptions], ABC): Abstract class for interaction with Large Language Model. """ - _options_cls: Type[LLMClientOptions] + _options_cls: type[LLMClientOptions] - def __init__(self, model_name: str, default_options: Optional[LLMOptions] = None) -> None: + def __init__(self, model_name: str, default_options: LLMOptions | None = None) -> None: """ Constructs a new LLM instance. @@ -55,7 +55,7 @@ async def generate_raw( self, prompt: BasePrompt, *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> str: """ Prepares and sends a prompt to the LLM and returns the raw response (without parsing). @@ -83,7 +83,7 @@ async def generate( self, prompt: BasePromptWithParser[OutputT], *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> OutputT: ... @@ -92,7 +92,7 @@ async def generate( self, prompt: BasePrompt, *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> OutputT: ... @@ -100,7 +100,7 @@ async def generate( self, prompt: BasePrompt, *, - options: Optional[LLMOptions] = None, + options: LLMOptions | None = None, ) -> OutputT: """ Prepares and sends a prompt to the LLM and returns response parsed to the diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py index eaca8095c..47f38aa18 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from dataclasses import asdict, dataclass -from typing import Any, ClassVar, Dict, Generic, Optional, Type, TypeVar +from typing import Any, ClassVar, Generic, TypeVar from pydantic import BaseModel @@ -35,7 +35,7 @@ def __or__(self, other: "LLMOptions") -> "LLMOptions": return self.__class__(**updated_dict) - def dict(self) -> Dict[str, Any]: + def dict(self) -> dict[str, Any]: """ Creates a dictionary representation of the LLMOptions instance. If a value is None, it will be replaced with a provider-specific not-given sentinel. @@ -70,7 +70,7 @@ async def call( conversation: ChatFormat, options: LLMClientOptions, json_mode: bool = False, - output_schema: Optional[Type[BaseModel] | Dict] = None, + output_schema: type[BaseModel] | dict | None = None, ) -> str: """ Calls LLM inference API. diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py index f1620d8c8..11b9c3f88 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict, List, Optional, Type, Union from pydantic import BaseModel @@ -25,15 +24,15 @@ class LiteLLMOptions(LLMOptions): Each of them is described in the [LiteLLM documentation](https://docs.litellm.ai/docs/completion/input). """ - frequency_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN - max_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN - n: Union[Optional[int], NotGiven] = NOT_GIVEN - presence_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN - seed: Union[Optional[int], NotGiven] = NOT_GIVEN - stop: Union[Optional[Union[str, List[str]]], NotGiven] = NOT_GIVEN - temperature: Union[Optional[float], NotGiven] = NOT_GIVEN - top_p: Union[Optional[float], NotGiven] = NOT_GIVEN - mock_response: Union[Optional[str], NotGiven] = NOT_GIVEN + frequency_penalty: float | None | NotGiven = NOT_GIVEN + max_tokens: int | None | NotGiven = NOT_GIVEN + n: int | None | NotGiven = NOT_GIVEN + presence_penalty: float | None | NotGiven = NOT_GIVEN + seed: int | None | NotGiven = NOT_GIVEN + stop: str | list[str] | None | NotGiven = NOT_GIVEN + temperature: float | None | NotGiven = NOT_GIVEN + top_p: float | None | NotGiven = NOT_GIVEN + mock_response: str | None | NotGiven = NOT_GIVEN class LiteLLMClient(LLMClient[LiteLLMOptions]): @@ -48,9 +47,9 @@ def __init__( self, model_name: str, *, - base_url: Optional[str] = None, - api_key: Optional[str] = None, - api_version: Optional[str] = None, + base_url: str | None = None, + api_key: str | None = None, + api_version: str | None = None, use_structured_output: bool = False, ) -> None: """ @@ -80,7 +79,7 @@ async def call( conversation: ChatFormat, options: LiteLLMOptions, json_mode: bool = False, - output_schema: Optional[Type[BaseModel] | Dict] = None, + output_schema: type[BaseModel] | dict | None = None, ) -> str: """ Calls the appropriate LLM endpoint with the given prompt and options. diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py index d3a1d0f62..26f3671da 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict, List, Optional, Type, Union from pydantic import BaseModel @@ -25,15 +24,15 @@ class LocalLLMOptions(LLMOptions): (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long """ - repetition_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN - do_sample: Union[Optional[bool], NotGiven] = NOT_GIVEN - best_of: Union[Optional[int], NotGiven] = NOT_GIVEN - max_new_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN - top_k: Union[Optional[int], NotGiven] = NOT_GIVEN - top_p: Union[Optional[float], NotGiven] = NOT_GIVEN - seed: Union[Optional[int], NotGiven] = NOT_GIVEN - stop_sequences: Union[Optional[List[str]], NotGiven] = NOT_GIVEN - temperature: Union[Optional[float], NotGiven] = NOT_GIVEN + repetition_penalty: float | None | NotGiven = NOT_GIVEN + do_sample: bool | None | NotGiven = NOT_GIVEN + best_of: int | None | NotGiven = NOT_GIVEN + max_new_tokens: int | None | NotGiven = NOT_GIVEN + top_k: int | None | NotGiven = NOT_GIVEN + top_p: float | None | NotGiven = NOT_GIVEN + seed: int | None | NotGiven = NOT_GIVEN + stop_sequences: list[str] | None | NotGiven = NOT_GIVEN + temperature: float | None | NotGiven = NOT_GIVEN class LocalLLMClient(LLMClient[LocalLLMOptions]): @@ -47,7 +46,7 @@ def __init__( self, model_name: str, *, - hf_api_key: Optional[str] = None, + hf_api_key: str | None = None, ) -> None: """ Constructs a new local LLMClient instance. @@ -74,7 +73,7 @@ async def call( conversation: ChatFormat, options: LocalLLMOptions, json_mode: bool = False, - output_schema: Optional[Type[BaseModel] | Dict] = None, + output_schema: type[BaseModel] | dict | None = None, ) -> str: """ Makes a call to the local LLM with the provided prompt and options. @@ -88,7 +87,6 @@ async def call( Returns: Response string from LLM. """ - input_ids = self.tokenizer.apply_chat_template( conversation, add_generation_prompt=True, return_tensors="pt" ).to(self.model.device) diff --git a/packages/ragbits-core/src/ragbits/core/llms/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/litellm.py index 00524113e..c4f8c4c7e 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/llms/litellm.py @@ -1,5 +1,4 @@ from functools import cached_property -from typing import Optional try: import litellm @@ -24,11 +23,11 @@ class LiteLLM(LLM[LiteLLMOptions]): def __init__( self, model_name: str = "gpt-3.5-turbo", - default_options: Optional[LiteLLMOptions] = None, + default_options: LiteLLMOptions | None = None, *, - base_url: Optional[str] = None, - api_key: Optional[str] = None, - api_version: Optional[str] = None, + base_url: str | None = None, + api_key: str | None = None, + api_version: str | None = None, use_structured_output: bool = False, ) -> None: """ diff --git a/packages/ragbits-core/src/ragbits/core/llms/local.py b/packages/ragbits-core/src/ragbits/core/llms/local.py index cf3cacbe1..0d4699063 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/local.py @@ -1,5 +1,4 @@ from functools import cached_property -from typing import Optional try: from transformers import AutoTokenizer @@ -24,9 +23,9 @@ class LocalLLM(LLM[LocalLLMOptions]): def __init__( self, model_name: str, - default_options: Optional[LocalLLMOptions] = None, + default_options: LocalLLMOptions | None = None, *, - api_key: Optional[str] = None, + api_key: str | None = None, ) -> None: """ Constructs a new local LLM instance. @@ -66,6 +65,5 @@ def count_tokens(self, prompt: BasePrompt) -> int: Returns: Number of tokens in the messages. """ - input_ids = self.tokenizer.apply_chat_template(prompt.chat) return len(input_ids) diff --git a/packages/ragbits-core/src/ragbits/core/prompt/__init__.py b/packages/ragbits-core/src/ragbits/core/prompt/__init__.py index 007312ef3..9215c3e74 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/__init__.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/__init__.py @@ -1,3 +1,3 @@ from ragbits.core.prompt.prompt import ChatFormat, Prompt -__all__ = ["Prompt", "ChatFormat"] +__all__ = ["ChatFormat", "Prompt"] diff --git a/packages/ragbits-core/src/ragbits/core/prompt/base.py b/packages/ragbits-core/src/ragbits/core/prompt/base.py index 47bf427f1..17c30e9ed 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/base.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/base.py @@ -1,10 +1,10 @@ from abc import ABCMeta, abstractmethod -from typing import Dict, Generic, List, Optional, Type +from typing import Generic from pydantic import BaseModel from typing_extensions import TypeVar -ChatFormat = List[Dict[str, str]] +ChatFormat = list[dict[str, str]] OutputT = TypeVar("OutputT", default=str) @@ -30,7 +30,7 @@ def json_mode(self) -> bool: """ return self.output_schema() is not None - def output_schema(self) -> Optional[Dict | Type[BaseModel]]: + def output_schema(self) -> dict | type[BaseModel] | None: """ Returns the schema of the desired output. Can be used to request structured output from the LLM API or to validate the output. Can return either a Pydantic model or a JSON schema. diff --git a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py index 0b5aad9e1..df2eb33a7 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py @@ -44,7 +44,6 @@ def discover(self) -> set[type[Prompt]]: Returns: set[Prompt]: The discovered Prompt objects. """ - result_set: set[type[Prompt]] = set() for file_path in self.root_path.glob(self.file_pattern): diff --git a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py index 2f9498f07..c4e627480 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py @@ -62,7 +62,7 @@ def render_prompt(index: int, system_prompt: str, user_prompt: str, state: Promp input_type = prompt_class.input_type input_fields = get_input_type_fields(input_type) - variables = {field["field_name"]: value for field, value in zip(input_fields, args)} + variables = {field["field_name"]: value for field, value in zip(input_fields, args, strict=False)} input_data = input_type(**variables) if input_type is not None else None prompt_object = prompt_class(input_data=input_data) state = replace(state, rendered_prompt=prompt_object) diff --git a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py index b52788b0d..baf6462ef 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py @@ -1,4 +1,5 @@ -from typing import Any, Callable, Type, TypeVar +from collections.abc import Callable +from typing import Any, TypeVar from pydantic import BaseModel, ValidationError @@ -87,7 +88,7 @@ def bool_parser(value: str) -> bool: raise ResponseParsingError(f"Could not parse '{value}' as a boolean") -def build_pydantic_parser(model: Type[PydanticModelT]) -> Callable[[str], PydanticModelT]: +def build_pydantic_parser(model: type[PydanticModelT]) -> Callable[[str], PydanticModelT]: """ Builds a parser for a specific Pydantic model. @@ -122,7 +123,7 @@ def parser(value: str) -> PydanticModelT: return parser -DEFAULT_PARSERS: dict[Type, Callable[[str], Any]] = { +DEFAULT_PARSERS: dict[type, Callable[[str], Any]] = { int: int_parser, str: str_parser, float: float_parser, diff --git a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py index 876a68a4d..23b4e38a2 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py @@ -1,6 +1,7 @@ import textwrap from abc import ABCMeta -from typing import Any, Callable, Dict, Generic, Optional, Tuple, Type, cast, get_args, get_origin, overload +from collections.abc import Callable +from typing import Any, Generic, cast, get_args, get_origin, overload from jinja2 import Environment, Template, meta from pydantic import BaseModel @@ -9,7 +10,7 @@ from .base import BasePromptWithParser, ChatFormat, OutputT from .parsers import DEFAULT_PARSERS, build_pydantic_parser -InputT = TypeVar("InputT", bound=Optional[BaseModel]) +InputT = TypeVar("InputT", bound=BaseModel | None) class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=ABCMeta): @@ -20,7 +21,7 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass= and optionally the input and output types. The system prompt is optional. """ - system_prompt: Optional[str] = None + system_prompt: str | None = None user_prompt: str # Additional messages to be added to the conversation after the system prompt @@ -31,13 +32,13 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass= response_parser: Callable[[str], OutputT] # Automatically set in __init_subclass__ - input_type: Optional[Type[InputT]] - output_type: Type[OutputT] - system_prompt_template: Optional[Template] + input_type: type[InputT] | None + output_type: type[OutputT] + system_prompt_template: Template | None user_prompt_template: Template @classmethod - def _get_io_types(cls) -> Tuple: + def _get_io_types(cls) -> tuple: bases = get_original_bases(cls) for base in bases: if get_origin(base) is Prompt: @@ -64,7 +65,7 @@ def _parse_template(cls, template: str) -> Template: return Template(template) @classmethod - def _render_template(cls, template: Template, input_data: Optional[InputT]) -> str: + def _render_template(cls, template: Template, input_data: InputT | None) -> str: # Workaround for not being able to use `input is not None` # because of mypy issue: https://github.com/python/mypy/issues/12622 context = {} @@ -158,7 +159,7 @@ def add_few_shot(self, user_message: str, assistant_message: str) -> "Prompt[Inp self._instace_few_shots.append({"role": "assistant", "content": assistant_message}) return self - def output_schema(self) -> Optional[Dict | Type[BaseModel]]: + def output_schema(self) -> dict | type[BaseModel] | None: """ Returns the schema of the desired output. Can be used to request structured output from the LLM API or to validate the output. Can return either a Pydantic model or a JSON schema. diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/base.py b/packages/ragbits-core/src/ragbits/core/vector_store/base.py index 4d494c561..65269f5d2 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/base.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/base.py @@ -1,5 +1,4 @@ import abc -from typing import List from pydantic import BaseModel @@ -20,7 +19,7 @@ class VectorStore(abc.ABC): """ @abc.abstractmethod - async def store(self, entries: List[VectorDBEntry]) -> None: + async def store(self, entries: list[VectorDBEntry]) -> None: """ Store entries in the vector store. diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index 259fdc7f6..cf3c6f566 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -1,6 +1,6 @@ import json from hashlib import sha256 -from typing import List, Literal, Optional, Union +from typing import Literal try: import chromadb @@ -21,8 +21,8 @@ def __init__( self, index_name: str, chroma_client: chromadb.ClientAPI, - embedding_function: Union[Embeddings, chromadb.EmbeddingFunction], - max_distance: Optional[float] = None, + embedding_function: Embeddings | chromadb.EmbeddingFunction, + max_distance: float | None = None, distance_method: Literal["l2", "ip", "cosine"] = "l2", ): """ @@ -63,7 +63,7 @@ def _get_chroma_collection(self) -> chromadb.Collection: embedding_function=self._embedding_function, ) - def _return_best_match(self, retrieved: dict) -> Optional[str]: + def _return_best_match(self, retrieved: dict) -> str | None: """ Based on the retrieved data, returns the best match or None if no match is found. @@ -90,7 +90,7 @@ def _process_db_entry(self, entry: VectorDBEntry) -> tuple[str, list[float], dic return doc_id, embedding, metadata @property - def embedding_function(self) -> Union[Embeddings, chromadb.EmbeddingFunction]: + def embedding_function(self) -> Embeddings | chromadb.EmbeddingFunction: """ Returns the embedding function. @@ -99,7 +99,7 @@ def embedding_function(self) -> Union[Embeddings, chromadb.EmbeddingFunction]: """ return self._embedding_function - async def store(self, entries: List[VectorDBEntry]) -> None: + async def store(self, entries: list[VectorDBEntry]) -> None: """ Stores entries in the ChromaDB collection. @@ -107,11 +107,11 @@ async def store(self, entries: List[VectorDBEntry]) -> None: entries: The entries to store. """ entries_processed = list(map(self._process_db_entry, entries)) - ids, embeddings, metadatas = map(list, zip(*entries_processed)) + ids, embeddings, metadatas = map(list, zip(*entries_processed, strict=False)) self._collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas) - async def retrieve(self, vector: List[float], k: int = 5) -> List[VectorDBEntry]: + async def retrieve(self, vector: list[float], k: int = 5) -> list[VectorDBEntry]: """ Retrieves entries from the ChromaDB collection. diff --git a/packages/ragbits-document-search/examples/simple_text.py b/packages/ragbits-document-search/examples/simple_text.py index c0a3fa440..2fb97de0b 100644 --- a/packages/ragbits-document-search/examples/simple_text.py +++ b/packages/ragbits-document-search/examples/simple_text.py @@ -25,7 +25,6 @@ async def main(): """Run the example.""" - document_search = DocumentSearch(embedder=LiteLLMEmbeddings(), vector_store=InMemoryVectorStore()) for document in documents: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 042898721..e81a7ba13 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Union +from typing import Any from pydantic import BaseModel, Field @@ -79,7 +79,7 @@ async def search(self, query: str, search_config: SearchConfig = SearchConfig()) return self.reranker.rerank(elements) async def ingest_document( - self, document: Union[DocumentMeta, Document], document_processor: Optional[BaseProvider] = None + self, document: DocumentMeta | Document, document_processor: BaseProvider | None = None ) -> None: """ Ingest a document. @@ -104,5 +104,5 @@ async def insert_elements(self, elements: list[Element]) -> None: elements: The list of Elements to insert. """ vectors = await self.embedder.embed_text([element.get_key() for element in elements]) - entries = [element.to_vector_db_entry(vector) for element, vector in zip(elements, vectors)] + entries = [element.to_vector_db_entry(vector) for element, vector in zip(elements, vectors, strict=False)] await self.vector_store.store(entries) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py index 2ca2ec9a6..db5a7beeb 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py @@ -1,7 +1,6 @@ import tempfile from enum import Enum from pathlib import Path -from typing import Union from pydantic import BaseModel, Field @@ -39,7 +38,7 @@ class DocumentMeta(BaseModel): """ document_type: DocumentType - source: Union[LocalFileSource, GCSSource] = Field(..., discriminator="source_type") + source: LocalFileSource | GCSSource = Field(..., discriminator="source_type") @property def id(self) -> str: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py index fc5a93a83..94e50b2a9 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py @@ -101,7 +101,6 @@ async def fetch(self) -> Path: Raises: ImportError: If the required 'gcloud' package is not installed for Google Cloud Storage source. """ - if not HAS_GCLOUD_AIO: raise ImportError("You need to install the 'gcloud-aio-storage' package to use Google Cloud Storage") diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py index 79da68d72..1687a7e3a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py @@ -1,5 +1,4 @@ import copy -from typing import Optional from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.providers.base import BaseProvider @@ -40,7 +39,7 @@ def __init__(self, providers: dict[DocumentType, BaseProvider]): self._providers = providers @classmethod - def from_config(cls, providers_config: Optional[ProvidersConfig] = None) -> "DocumentProcessorRouter": + def from_config(cls, providers_config: ProvidersConfig | None = None) -> "DocumentProcessorRouter": """ Create a DocumentProcessorRouter from a configuration. If the configuration is not provided, the default configuration will be used. If the configuration is provided, it will be merged with the default configuration, diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py index 2e81b8abd..e5929a01f 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py @@ -1,6 +1,5 @@ import os from io import BytesIO -from typing import Optional from unstructured.chunking.basic import chunk_elements from unstructured.documents.elements import Element as UnstructuredElement @@ -50,7 +49,7 @@ class UnstructuredProvider(BaseProvider): DocumentType.XML, } - def __init__(self, partition_kwargs: Optional[dict] = None, chunking_kwargs: Optional[dict] = None): + def __init__(self, partition_kwargs: dict | None = None, chunking_kwargs: dict | None = None): """Initialize the UnstructuredProvider. Args: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py index 5f1ba744a..85714aa8c 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py @@ -1,4 +1,3 @@ -from typing import List from ragbits.document_search.documents.element import Element from ragbits.document_search.retrieval.rerankers.base import Reranker @@ -10,7 +9,7 @@ class NoopReranker(Reranker): """ @staticmethod - def rerank(chunks: List[Element]) -> List[Element]: + def rerank(chunks: list[Element]) -> list[Element]: """ No reranking, returning the same chunks as in input. diff --git a/pyproject.toml b/pyproject.toml index 48595c6e1..13620469d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,46 +36,8 @@ members = [ multi_line_output = 3 line_length = 120 include_trailing_comma = true -known_first_party = ['ragbits'] -known_third_party = [ # Most popular libraries. Extend if necessary. - 'IPython', - 'PIL', - 'cv2', - 'dotenv', - 'editdistance', - 'fastapi', - 'fire', - 'hydra', - 'joblib', - 'loguru', - 'luigi', - 'matplotlib', - 'neptune', - 'neptune_config', - 'nltk', - 'numpy', - 'omegaconf', - 'pandas', - 'pqdm', - 'pydantic', - 'pytest', - 'pytorch_lightning', - 'requests', - 'scipy', - 'setuptools', - 'shapely', - 'skimage', - 'sklearn', - 'streamlit', - 'torch', - 'torchvision', - 'tqdm', - 'typer', -] -skip_gitignore = true -[tool.black] -line_length = 120 +skip_gitignore = true [tool.pytest] norecursedirs = [ @@ -128,6 +90,7 @@ ignore_missing_imports = false disallow_untyped_defs = true [tool.pylint.basic] +py-version=3.10 good-names = "i,j,x,y,z,x1,y1,z1,x2,y2,z2,cv,df,dx,dy,dz,w,h,c,b,g,qa,q,a" max-args = 8 @@ -228,6 +191,7 @@ ignore = [ "PLR0913", ] + [tool.ruff.lint.pydocstyle] convention = "google" @@ -244,3 +208,16 @@ convention = "google" [tool.ruff.format] docstring-code-format = true docstring-code-line-length = 120 + + +[tool.ruff.lint.isort] +#multi-line-output = "3" # Matches multi_line_output = 3 in isort +#include_trailing_comma = true +known-first-party = ["ragbits"] +known-third-party = [ + "IPython", "PIL", "cv2", "dotenv", "editdistance", "fastapi", "fire", "hydra", + "joblib", "loguru", "luigi", "matplotlib", "neptune", "neptune_config", "nltk", + "numpy", "omegaconf", "pandas", "pqdm", "pydantic", "pytest", "pytorch_lightning", + "requests", "scipy", "setuptools", "shapely", "skimage", "sklearn", "streamlit", + "torch", "torchvision", "tqdm", "typer" +] diff --git a/scripts/update_ragbits_package.py b/scripts/update_ragbits_package.py index 85c9758fb..7d10fc3a6 100644 --- a/scripts/update_ragbits_package.py +++ b/scripts/update_ragbits_package.py @@ -16,7 +16,6 @@ from copy import deepcopy from enum import Enum from pathlib import Path -from typing import Optional import tomlkit import typer @@ -36,7 +35,7 @@ class UpdateType(Enum): PATCH = "patch" -def _update_type_to_enum(update_type: Optional[str] = None) -> Optional[UpdateType]: +def _update_type_to_enum(update_type: str | None = None) -> UpdateType | None: if update_type is not None: return UpdateType(update_type) return None @@ -46,7 +45,7 @@ def _version_to_list(version_string): return [int(part) for part in version_string.split(".")] -def _check_update_type(version: str, new_version: str) -> Optional[UpdateType]: +def _check_update_type(version: str, new_version: str) -> UpdateType | None: version_list = _version_to_list(version) new_version_list = _version_to_list(new_version) @@ -75,9 +74,9 @@ def _get_updated_version(version: str, update_type: UpdateType) -> str: def _update_pkg_version( pkg_name: str, - pkg_pyproject: Optional[tomlkit.TOMLDocument] = None, - new_version: Optional[str] = None, - update_type: Optional[UpdateType] = None, + pkg_pyproject: tomlkit.TOMLDocument | None = None, + new_version: str | None = None, + update_type: UpdateType | None = None, ) -> tuple[str, str]: if not pkg_pyproject: pkg_pyproject = tomlkit.parse((PACKAGES_DIR / pkg_name / "pyproject.toml").read_text()) @@ -100,7 +99,7 @@ def _update_pkg_version( return version, new_version -def run(pkg_name: Optional[str] = typer.Argument(None), update_type: Optional[str] = typer.Argument(None)) -> None: +def run(pkg_name: str | None = typer.Argument(None), update_type: str | None = typer.Argument(None)) -> None: """ Main entry point for the package version updater. Updates package versions based on user input. @@ -120,7 +119,6 @@ def run(pkg_name: Optional[str] = typer.Argument(None), update_type: Optional[st Raises: ValueError: If the provided `pkg_name` is not found in the available packages. """ - packages: list[str] = [obj.name for obj in PACKAGES_DIR.iterdir() if obj.is_dir()] if pkg_name is not None: From de78074d116926a9a5f2b1883bb236d50ec5ab77 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 08:20:59 +0200 Subject: [PATCH 05/28] Switching to just ruff and mypy. --- .pre-commit-config.yaml | 65 +------------------ .../src/ragbits/core/embeddings/litellm.py | 7 +- .../src/ragbits/core/embeddings/local.py | 6 +- .../src/ragbits/core/llms/base.py | 6 +- .../src/ragbits/core/llms/types.py | 7 +- .../src/ragbits/core/prompt/lab/app.py | 39 ++++++++--- .../src/ragbits/core/prompt/parsers.py | 4 +- .../src/ragbits/core/prompt/prompt.py | 6 +- .../src/ragbits/core/prompt/promptfoo.py | 4 +- .../tests/unit/prompts/test_prompt.py | 45 ++++++++++--- .../unit/vector_stores/test_chromadb_store.py | 18 ++++- .../src/ragbits/document_search/_main.py | 4 +- .../document_search/documents/element.py | 2 +- .../ingestion/document_processor.py | 4 +- .../ingestion/providers/dummy.py | 6 +- .../retrieval/rerankers/noop.py | 1 - .../tests/unit/test_document_search.py | 3 +- .../tests/unit/test_documents.py | 6 +- .../tests/unit/test_providers.py | 12 +++- pyproject.toml | 7 -- scripts/update_ragbits_package.py | 10 ++- 21 files changed, 144 insertions(+), 118 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6c93efa31..57066e422 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,42 +12,20 @@ repos: - id: check-json - id: check-yaml + - repo: https://github.com/charliermarsh/ruff-pre-commit rev: v0.6.9 hooks: + # E1131: unsupported operand type(s) for | (unsupported-binary-operation) - id: ruff types_or: [ python, pyi, jupyter ] exclude: (/test_|tests/|docs/|notebooks/) args: [ --fix ] + # Formats Python, Pyi, and Jupyter files, excluding specified directories - id: ruff-format types_or: [ python, pyi, jupyter ] exclude: (docs/) - # PEP 8 compliant opinionated formatter. - - repo: https://github.com/psf/black - rev: 23.10.1 - hooks: - - id: black - exclude: (docs/|notebooks/) - args: [--config, pyproject.toml] - - id: black-jupyter - files: \.ipynb$ - - # Cleaning unused imports. - - repo: https://github.com/hadialqattan/pycln - rev: v2.3.0 - hooks: - - id: pycln - args: ["-a"] - exclude: (docs/|notebooks/) - - # Modernizes python code and upgrade syntax for newer versions of the language - - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 - hooks: - - id: pyupgrade - args: [--py38-plus] - # Used to have proper type annotations for library code. - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.11.2 @@ -57,40 +35,3 @@ repos: # such as types-python-dateutil additional_dependencies: [pydantic>=2.8.2, types-pyyaml>=6.0.12] exclude: (/test_|setup.py|/tests/|docs/) - - # Checks Python source files for errors. - - repo: https://github.com/PyCQA/flake8 - rev: 7.1.1 - hooks: - - id: flake8 - name: flake8 - entry: flake8 - language: python - types: [python] - args: [--config, .flake8] - exclude: (docs/) - - # Enforces a coding standard, looks for code smells, and can make suggestions about how the code could be refactored. - - repo: https://github.com/pycqa/pylint - rev: v3.2.6 - hooks: - - id: pylint - exclude: (/test_|tests/|docs/) - # # You can add additional plugins for pylint here, - # here is an example for pydantic, remember to enable it in pyproject.toml - # additional_dependencies: - # - 'pylint_pydantic' - # args: - # # pylint can have issue with python libraries based on C - # # if it fails to find some objects likely you need to add them - # # here: - # ["--extension-pkg-whitelist=pydantic"] - - # Finds common security issues in Python code. - - repo: https://github.com/PyCQA/bandit - rev: 1.7.5 - hooks: - - id: bandit - args: [-c, pyproject.toml, --recursive, packages/] - additional_dependencies: [".[toml]"] # required for pyproject.toml support - exclude: (notebooks/) diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py index 649099c3b..64e47143c 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py @@ -1,4 +1,3 @@ - try: import litellm @@ -7,7 +6,11 @@ HAS_LITELLM = False from ragbits.core.embeddings.base import Embeddings -from ragbits.core.embeddings.exceptions import EmbeddingConnectionError, EmbeddingResponseError, EmbeddingStatusError +from ragbits.core.embeddings.exceptions import ( + EmbeddingConnectionError, + EmbeddingResponseError, + EmbeddingStatusError, +) class LiteLLMEmbeddings(Embeddings): diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/local.py b/packages/ragbits-core/src/ragbits/core/embeddings/local.py index 20dfe2c61..9b3ed6e73 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/local.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/local.py @@ -58,7 +58,11 @@ async def embed_text(self, data: list[str], batch_size: int = 1) -> list[list[fl embeddings = [] for batch in self._batch(data, batch_size): batch_dict = self.tokenizer( - batch, max_length=self.tokenizer.model_max_length, padding=True, truncation=True, return_tensors="pt" + batch, + max_length=self.tokenizer.model_max_length, + padding=True, + truncation=True, + return_tensors="pt", ).to(self.device) with torch.no_grad(): outputs = self.model(**batch_dict) diff --git a/packages/ragbits-core/src/ragbits/core/llms/base.py b/packages/ragbits-core/src/ragbits/core/llms/base.py index 4f4b6fedb..5eedb9fb9 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/base.py @@ -84,8 +84,7 @@ async def generate( prompt: BasePromptWithParser[OutputT], *, options: LLMOptions | None = None, - ) -> OutputT: - ... + ) -> OutputT: ... @overload async def generate( @@ -93,8 +92,7 @@ async def generate( prompt: BasePrompt, *, options: LLMOptions | None = None, - ) -> OutputT: - ... + ) -> OutputT: ... async def generate( self, diff --git a/packages/ragbits-core/src/ragbits/core/llms/types.py b/packages/ragbits-core/src/ragbits/core/llms/types.py index 19cded7bd..8bb5949be 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/types.py +++ b/packages/ragbits-core/src/ragbits/core/llms/types.py @@ -1,4 +1,6 @@ -from typing_extensions import Literal, override +from typing import Literal + +from typing_extensions import override # Sentinel class used until PEP 0661 is accepted @@ -10,8 +12,7 @@ class NotGiven: For example: ```py - def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: - ... + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ... get(timeout=1) # 1s timeout diff --git a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py index c4e627480..9758308b7 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py @@ -16,7 +16,10 @@ from ragbits.core.llms import LiteLLM from ragbits.core.llms.clients import LiteLLMOptions from ragbits.core.prompt import Prompt -from ragbits.core.prompt.discovery.prompt_discovery import DEFAULT_FILE_PATTERN, PromptDiscovery +from ragbits.core.prompt.discovery.prompt_discovery import ( + DEFAULT_FILE_PATTERN, + PromptDiscovery, +) @dataclass(frozen=True) @@ -135,7 +138,9 @@ def get_input_type_fields(obj: BaseModel | None) -> list[dict]: def lab_app( # pylint: disable=missing-param-doc - file_pattern: str = DEFAULT_FILE_PATTERN, llm_model: str | None = None, llm_api_key: str | None = None + file_pattern: str = DEFAULT_FILE_PATTERN, + llm_model: str | None = None, + llm_api_key: str | None = None, ) -> None: """ Launches the interactive application for listing, rendering, and testing prompts @@ -169,7 +174,9 @@ def lab_app( # pylint: disable=missing-param-doc ) prompt_selection_dropdown = gr.Dropdown( - choices=list_prompt_choices(prompts_state.value), value=0, label="Select Prompt" + choices=list_prompt_choices(prompts_state.value), + value=0, + label="Select Prompt", ) @gr.render(inputs=[prompt_selection_dropdown, prompts_state]) @@ -196,7 +203,9 @@ def show_split(index: int, state: gr.State) -> None: with gr.Row(): with gr.Column(): prompt_details_system_prompt = gr.Textbox( - label="System Prompt", value=prompt.system_prompt, interactive=True + label="System Prompt", + value=prompt.system_prompt, + interactive=True, ) with gr.Column(): @@ -204,20 +213,28 @@ def show_split(index: int, state: gr.State) -> None: state.rendered_prompt.rendered_system_prompt if state.rendered_prompt else "" ) gr.Textbox( - label="Rendered System Prompt", value=rendered_system_prompt, interactive=False + label="Rendered System Prompt", + value=rendered_system_prompt, + interactive=False, ) with gr.Row(): with gr.Column(): prompt_details_user_prompt = gr.Textbox( - label="User Prompt", value=prompt.user_prompt, interactive=True + label="User Prompt", + value=prompt.user_prompt, + interactive=True, ) with gr.Column(): rendered_user_prompt = ( state.rendered_prompt.rendered_user_prompt if state.rendered_prompt else "" ) - gr.Textbox(label="Rendered User Prompt", value=rendered_user_prompt, interactive=False) + gr.Textbox( + label="Rendered User Prompt", + value=rendered_user_prompt, + interactive=False, + ) llm_enabled = state.llm_model_name is not None prompt_ready = state.rendered_prompt is not None @@ -226,9 +243,13 @@ def show_split(index: int, state: gr.State) -> None: interactive=llm_enabled and prompt_ready, ) gr.Markdown( - "To enable this button, select an LLM model when starting the app in CLI.", visible=not llm_enabled + "To enable this button, select an LLM model when starting the app in CLI.", + visible=not llm_enabled, + ) + gr.Markdown( + "To enable this button, render a prompt first.", + visible=llm_enabled and not prompt_ready, ) - gr.Markdown("To enable this button, render a prompt first.", visible=llm_enabled and not prompt_ready) llm_prompt_response = gr.Textbox(lines=10, label="LLM response") render_prompt_button.click( diff --git a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py index baf6462ef..cf3001f0d 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py @@ -88,7 +88,9 @@ def bool_parser(value: str) -> bool: raise ResponseParsingError(f"Could not parse '{value}' as a boolean") -def build_pydantic_parser(model: type[PydanticModelT]) -> Callable[[str], PydanticModelT]: +def build_pydantic_parser( + model: type[PydanticModelT], +) -> Callable[[str], PydanticModelT]: """ Builds a parser for a specific Pydantic model. diff --git a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py index 23b4e38a2..a3cdabf99 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py @@ -102,12 +102,10 @@ def __init_subclass__(cls, **kwargs: Any) -> None: return super().__init_subclass__(**kwargs) @overload - def __init__(self: "Prompt[None, OutputT]") -> None: - ... + def __init__(self: "Prompt[None, OutputT]") -> None: ... @overload - def __init__(self: "Prompt[InputT, OutputT]", input_data: InputT) -> None: - ... + def __init__(self: "Prompt[InputT, OutputT]", input_data: InputT) -> None: ... def __init__(self, *args: Any, **kwargs: Any) -> None: input_data = args[0] if args else kwargs.get("input_data") diff --git a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py index 450a2ef3d..cfdf3cf2f 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py @@ -9,7 +9,9 @@ def generate_configs( - file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd(), target_path: Path = Path("promptfooconfigs") + file_pattern: str = DEFAULT_FILE_PATTERN, + root_path: Path = Path.cwd(), + target_path: Path = Path("promptfooconfigs"), ) -> None: """ Generates promptfoo configuration files for all discovered prompts. diff --git a/packages/ragbits-core/tests/unit/prompts/test_prompt.py b/packages/ragbits-core/tests/unit/prompts/test_prompt.py index 3af149bcf..ada32e3f9 100644 --- a/packages/ragbits-core/tests/unit/prompts/test_prompt.py +++ b/packages/ragbits-core/tests/unit/prompts/test_prompt.py @@ -124,7 +124,10 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable assert prompt.rendered_system_prompt == "You are a song generator for a adult named Alice." assert prompt.rendered_user_prompt == "Theme for the song is rock." assert prompt.chat == [ - {"role": "system", "content": "You are a song generator for a adult named Alice."}, + { + "role": "system", + "content": "You are a song generator for a adult named Alice.", + }, {"role": "user", "content": "Theme for the song is rock."}, ] @@ -157,7 +160,10 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable prompt = TestPrompt(_PromptInput(name="John", age=15, theme="rock")) assert prompt.chat == [ - {"role": "system", "content": "You are a song generator for a child named John."}, + { + "role": "system", + "content": "You are a song generator for a child named John.", + }, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": "It's a really catchy tune."}, {"role": "user", "content": "Theme for the song is rock."}, @@ -179,7 +185,10 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable prompt.add_few_shot("Theme for the song is pop.", "It's a really catchy tune.") assert prompt.chat == [ - {"role": "system", "content": "You are a song generator for a child named John."}, + { + "role": "system", + "content": "You are a song generator for a child named John.", + }, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": "It's a really catchy tune."}, {"role": "user", "content": "Theme for the song is rock."}, @@ -202,13 +211,22 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable ] prompt = TestPrompt(_PromptInput(name="John", age=15, theme="rock")) - prompt.add_few_shot("Theme for the song is experimental underground jazz.", "It's quite hard to dance to.") + prompt.add_few_shot( + "Theme for the song is experimental underground jazz.", + "It's quite hard to dance to.", + ) assert prompt.chat == [ - {"role": "system", "content": "You are a song generator for a child named John."}, + { + "role": "system", + "content": "You are a song generator for a child named John.", + }, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": "It's a really catchy tune."}, - {"role": "user", "content": "Theme for the song is experimental underground jazz."}, + { + "role": "user", + "content": "Theme for the song is experimental underground jazz.", + }, {"role": "assistant", "content": "It's quite hard to dance to."}, {"role": "user", "content": "Theme for the song is rock."}, ] @@ -268,7 +286,10 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable user_prompt = "Theme for the song is {{ theme }}." assert TestPrompt.to_promptfoo(promptfoo_test_config) == [ - {"role": "system", "content": "You are a song generator for a adult named John."}, + { + "role": "system", + "content": "You are a song generator for a adult named John.", + }, {"role": "user", "content": "Theme for the song is pop."}, ] @@ -293,14 +314,20 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable prompt2.add_few_shot("Theme for the song is 90s pop.", "Why do I know all the words?") assert prompt1.chat == [ - {"role": "system", "content": "You are a song generator for a child named John."}, + { + "role": "system", + "content": "You are a song generator for a child named John.", + }, {"role": "user", "content": "Theme for the song is 80s disco."}, {"role": "assistant", "content": "I can't stop dancing."}, {"role": "user", "content": "Theme for the song is pop."}, ] assert prompt2.chat == [ - {"role": "system", "content": "You are a song generator for a adult named Alice."}, + { + "role": "system", + "content": "You are a song generator for a adult named Alice.", + }, {"role": "user", "content": "Theme for the song is 90s pop."}, {"role": "assistant", "content": "Why do I know all the words?"}, {"role": "user", "content": "Theme for the song is rock."}, diff --git a/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py index 1d08f90e9..6822f7799 100644 --- a/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py +++ b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py @@ -55,7 +55,11 @@ def mock_vector_db_entry(): vector=[0.1, 0.2, 0.3], metadata={ "content": "test content", - "document": {"title": "test title", "source": {"path": "/test/path"}, "document_type": "test_type"}, + "document": { + "title": "test title", + "source": {"path": "/test/path"}, + "document_type": "test_type", + }, }, ) @@ -63,7 +67,11 @@ def mock_vector_db_entry(): def test_chromadbstore_init_import_error(): with patch("ragbits.core.vector_store.chromadb_store.HAS_CHROMADB", False): with pytest.raises(ImportError): - ChromaDBStore(index_name="test_index", chroma_client=MagicMock(), embedding_function=MagicMock()) + ChromaDBStore( + index_name="test_index", + chroma_client=MagicMock(), + embedding_function=MagicMock(), + ) def test_get_chroma_collection(mock_chromadb_store): @@ -79,7 +87,11 @@ async def test_stores_entries_correctly(mock_chromadb_store): vector=[0.1, 0.2, 0.3], metadata={ "content": "test content", - "document": {"title": "test title", "source": {"path": "/test/path"}, "document_type": "test_type"}, + "document": { + "title": "test title", + "source": {"path": "/test/path"}, + "document_type": "test_type", + }, }, ) ] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index e81a7ba13..6076ded1e 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -79,7 +79,9 @@ async def search(self, query: str, search_config: SearchConfig = SearchConfig()) return self.reranker.rerank(elements) async def ingest_document( - self, document: DocumentMeta | Document, document_processor: BaseProvider | None = None + self, + document: DocumentMeta | Document, + document_processor: BaseProvider | None = None, ) -> None: """ Ingest a document. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 744aed729..20c4b4b29 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -27,7 +27,7 @@ def get_key(self) -> str: """ @classmethod - def __pydantic_init_subclass__(cls, **kwargs: dict) -> None: # pylint: disable=unused-argument + def __pydantic_init_subclass__(cls) -> None: # pylint: disable=unused-argument element_type_default = cls.model_fields["element_type"].default if element_type_default is None: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py index 1687a7e3a..85508c51f 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py @@ -2,7 +2,9 @@ from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.ingestion.providers.base import BaseProvider -from ragbits.document_search.ingestion.providers.unstructured import UnstructuredProvider +from ragbits.document_search.ingestion.providers.unstructured import ( + UnstructuredProvider, +) ProvidersConfig = dict[DocumentType, BaseProvider] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py index 712f31a09..11d063925 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py @@ -1,4 +1,8 @@ -from ragbits.document_search.documents.document import DocumentMeta, DocumentType, TextDocument +from ragbits.document_search.documents.document import ( + DocumentMeta, + DocumentType, + TextDocument, +) from ragbits.document_search.documents.element import Element, TextElement from ragbits.document_search.ingestion.providers.base import BaseProvider diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py index 85714aa8c..e417a5614 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py @@ -1,4 +1,3 @@ - from ragbits.document_search.documents.element import Element from ragbits.document_search.retrieval.rerankers.base import Reranker diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index 8f6ee9e1c..21d838c35 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -17,7 +17,8 @@ [ DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George"), Document.from_document_meta( - DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George"), Path("test.txt") + DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George"), + Path("test.txt"), ), ], ) diff --git a/packages/ragbits-document-search/tests/unit/test_documents.py b/packages/ragbits-document-search/tests/unit/test_documents.py index 5fd490b90..e5a1dd018 100644 --- a/packages/ragbits-document-search/tests/unit/test_documents.py +++ b/packages/ragbits-document-search/tests/unit/test_documents.py @@ -1,7 +1,11 @@ import tempfile from pathlib import Path -from ragbits.document_search.documents.document import DocumentMeta, DocumentType, TextDocument +from ragbits.document_search.documents.document import ( + DocumentMeta, + DocumentType, + TextDocument, +) from ragbits.document_search.documents.sources import LocalFileSource diff --git a/packages/ragbits-document-search/tests/unit/test_providers.py b/packages/ragbits-document-search/tests/unit/test_providers.py index 8b6eb9d9c..bc3c3c75b 100644 --- a/packages/ragbits-document-search/tests/unit/test_providers.py +++ b/packages/ragbits-document-search/tests/unit/test_providers.py @@ -1,7 +1,9 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.providers.base import DocumentTypeNotSupportedError +from ragbits.document_search.ingestion.providers.base import ( + DocumentTypeNotSupportedError, +) from ragbits.document_search.ingestion.providers.unstructured import ( UNSTRUCTURED_API_KEY_ENV, UNSTRUCTURED_API_URL_ENV, @@ -10,7 +12,9 @@ @pytest.mark.parametrize("document_type", UnstructuredProvider.SUPPORTED_DOCUMENT_TYPES) -def test_unsupported_provider_validates_supported_document_types_passes(document_type: DocumentType): +def test_unsupported_provider_validates_supported_document_types_passes( + document_type: DocumentType, +): UnstructuredProvider().validate_document_type(document_type) @@ -30,7 +34,9 @@ async def test_unstructured_provider_raises_value_error_when_api_key_not_set(): assert f"{UNSTRUCTURED_API_KEY_ENV} environment variable is not set" in str(err.value) -async def test_unstructured_provider_raises_value_error_when_api_url_not_set(monkeypatch: pytest.MonkeyPatch): +async def test_unstructured_provider_raises_value_error_when_api_url_not_set( + monkeypatch: pytest.MonkeyPatch, +): monkeypatch.setenv(UNSTRUCTURED_API_KEY_ENV, "dummy_key") with pytest.raises(ValueError) as err: await UnstructuredProvider().process( diff --git a/pyproject.toml b/pyproject.toml index 13620469d..8328ae1d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,8 +151,6 @@ select = [ # Default rules "E", # pycodestyle errors "F", # Pyflakes - - # Extra rules, by Michał Kustosz "C4", # flake8-comprehensions "C90", # mccabe complex structure "D", # pydocstyle @@ -162,15 +160,12 @@ select = [ "SIM", # flake8-simplify "UP", # pyupgrade "W", # pycodestyle warnings - - # Extra rules, by Jakub Cierocki "S", # flake8-bandit "ANN", # flake8-annotations "B", # flake8-bugbear "NPY", # NumPy-specific rules ] extend-select = [ - # Extra, preview rules, by Jakub Cierocki "RUF022", # unsorted-dunder-all "PLR6301", # no-self-use ] @@ -211,8 +206,6 @@ docstring-code-line-length = 120 [tool.ruff.lint.isort] -#multi-line-output = "3" # Matches multi_line_output = 3 in isort -#include_trailing_comma = true known-first-party = ["ragbits"] known-third-party = [ "IPython", "PIL", "cv2", "dotenv", "editdistance", "fastapi", "fire", "hydra", diff --git a/scripts/update_ragbits_package.py b/scripts/update_ragbits_package.py index 7d10fc3a6..9c4816d0d 100644 --- a/scripts/update_ragbits_package.py +++ b/scripts/update_ragbits_package.py @@ -88,7 +88,10 @@ def _update_pkg_version( new_version = _get_updated_version(version, update_type=update_type) else: pprint(f"Current version of the [bold]{pkg_name}[/bold] package is: [bold]{version}[/bold]") - new_version = text("Enter the new version", default=_get_updated_version(version, UpdateType.PATCH)) + new_version = text( + "Enter the new version", + default=_get_updated_version(version, UpdateType.PATCH), + ) pkg_pyproject["project"]["version"] = new_version (PACKAGES_DIR / pkg_name / "pyproject.toml").write_text(tomlkit.dumps(pkg_pyproject)) @@ -99,7 +102,10 @@ def _update_pkg_version( return version, new_version -def run(pkg_name: str | None = typer.Argument(None), update_type: str | None = typer.Argument(None)) -> None: +def run( + pkg_name: str | None = typer.Argument(None), + update_type: str | None = typer.Argument(None), +) -> None: """ Main entry point for the package version updater. Updates package versions based on user input. From a76ab51822cc130c83893ee22df8b5dcaa899b5b Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 09:21:26 +0200 Subject: [PATCH 06/28] Switching off automatic fixes. Ruff ignores D200. --- .pre-commit-config.yaml | 2 +- ruffs_output.txt | 1060 --------------------------------------- 2 files changed, 1 insertion(+), 1061 deletions(-) delete mode 100644 ruffs_output.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 57066e422..90dfbbfb1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,7 @@ repos: - id: ruff types_or: [ python, pyi, jupyter ] exclude: (/test_|tests/|docs/|notebooks/) - args: [ --fix ] +# args: [ --fix ] # Formats Python, Pyi, and Jupyter files, excluding specified directories - id: ruff-format types_or: [ python, pyi, jupyter ] diff --git a/ruffs_output.txt b/ruffs_output.txt deleted file mode 100644 index 465fe5550..000000000 --- a/ruffs_output.txt +++ /dev/null @@ -1,1060 +0,0 @@ -ruff.....................................................................Failed -- hook id: ruff -- exit code: 1 - -warning: The top-level linter settings are deprecated in favour of their counterparts in the `lint` section. Please update the following options in `pyproject.toml`: - - 'extend-select' -> 'lint.extend-select' - - 'ignore' -> 'lint.ignore' - - 'select' -> 'lint.select' - - 'explicit-preview-rules' -> 'lint.explicit-preview-rules' -packages/ragbits-core/examples/chromadb_example.py:24:11: ANN201 Missing return type annotation for public function `main` - | -24 | async def main(): - | ^^^^ ANN201 -25 | """Run the example.""" -26 | chroma_client = chromadb.PersistentClient(path="chroma") - | - = help: Add return type annotation: `None` - -packages/ragbits-core/examples/llm_example.py:15:5: D200 One-line docstring should fit on one line - | -14 | class LoremPromptInput(BaseModel): -15 | """Input format for the LoremPrompt. - | _____^ -16 | | """ - | |_______^ D200 -17 | -18 | theme: str - | - = help: Reformat to one line - -packages/ragbits-core/examples/llm_example.py:23:5: D200 One-line docstring should fit on one line - | -22 | class LoremPromptOutput(BaseModel): -23 | """Output format for the LoremPrompt. - | _____^ -24 | | """ - | |_______^ D200 -25 | -26 | joke: str - | - = help: Reformat to one line - -packages/ragbits-core/examples/llm_example.py:31:5: D200 One-line docstring should fit on one line - | -30 | class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): -31 | """A prompt that generates jokes. - | _____^ -32 | | """ - | |_______^ D200 -33 | -34 | system_prompt = """ - | - = help: Reformat to one line - -packages/ragbits-core/examples/llm_example.py:35:121: E501 Line too long (127 > 120) - | -34 | system_prompt = """ -35 | You are a joke generator. The jokes you generate should be funny and not offensive. {% if not pun_allowed %}Also, make sure - | ^^^^^^^ E501 -36 | that the jokes do not contain any puns.{% else %}You can use any type of joke, even if it contains puns.{% endif %} - | - -packages/ragbits-core/examples/llm_example.py:46:11: ANN201 Missing return type annotation for public function `main` - | -46 | async def main(): - | ^^^^ ANN201 -47 | """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. -48 | """ - | - = help: Add return type annotation: `None` - -packages/ragbits-core/examples/llm_example.py:47:5: D200 One-line docstring should fit on one line - | -46 | async def main(): -47 | """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. - | _____^ -48 | | """ - | |_______^ D200 -49 | llm = LiteLLM("gpt-4o-2024-08-06", use_structured_output=True) -50 | prompt = JokePrompt(LoremPromptInput(theme="software developers", pun_allowed=True)) - | - = help: Reformat to one line - -packages/ragbits-core/examples/llm_example.py:47:121: E501 Line too long (123 > 120) - | -46 | async def main(): -47 | """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. - | ^^^ E501 -48 | """ -49 | llm = LiteLLM("gpt-4o-2024-08-06", use_structured_output=True) - | - -packages/ragbits-core/examples/prompt_example.py:12:5: D200 One-line docstring should fit on one line - | -11 | class LoremPromptInput(BaseModel): -12 | """Input format for the LoremPrompt. - | _____^ -13 | | """ - | |_______^ D200 -14 | -15 | theme: str - | - = help: Reformat to one line - -packages/ragbits-core/examples/prompt_example.py:20:5: D200 One-line docstring should fit on one line - | -19 | class LoremPromptOutput(BaseModel): -20 | """Output format for the LoremPrompt. - | _____^ -21 | | """ - | |_______^ D200 -22 | -23 | text: str - | - = help: Reformat to one line - -packages/ragbits-core/examples/prompt_example.py:27:5: D200 One-line docstring should fit on one line - | -26 | class LoremPrompt(Prompt[LoremPromptInput, LoremPromptOutput]): -27 | """A prompt that generates Lorem Ipsum text. - | _____^ -28 | | """ - | |_______^ D200 -29 | -30 | system_prompt = """ - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/embeddings/base.py:5:5: D200 One-line docstring should fit on one line - | -4 | class Embeddings(ABC): -5 | """Abstract client for communication with embedding models. - | _____^ -6 | | """ - | |_______^ D200 -7 | -8 | @abstractmethod - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:2:5: D200 One-line docstring should fit on one line - | -1 | class EmbeddingError(Exception): -2 | """Base class for all exceptions raised by the EmbeddingClient. - | _____^ -3 | | """ - | |_______^ D200 -4 | -5 | def __init__(self, message: str) -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:5:9: D107 Missing docstring in `__init__` - | -3 | """ -4 | -5 | def __init__(self, message: str) -> None: - | ^^^^^^^^ D107 -6 | super().__init__(message) -7 | self.message = message - | - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:11:5: D200 One-line docstring should fit on one line - | -10 | class EmbeddingConnectionError(EmbeddingError): -11 | """Raised when there is an error connecting to the embedding API. - | _____^ -12 | | """ - | |_______^ D200 -13 | -14 | def __init__(self, message: str = "Connection error.") -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:14:9: D107 Missing docstring in `__init__` - | -12 | """ -13 | -14 | def __init__(self, message: str = "Connection error.") -> None: - | ^^^^^^^^ D107 -15 | super().__init__(message) - | - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:19:5: D200 One-line docstring should fit on one line - | -18 | class EmbeddingStatusError(EmbeddingError): -19 | """Raised when an API response has a status code of 4xx or 5xx. - | _____^ -20 | | """ - | |_______^ D200 -21 | -22 | def __init__(self, message: str, status_code: int) -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:22:9: D107 Missing docstring in `__init__` - | -20 | """ -21 | -22 | def __init__(self, message: str, status_code: int) -> None: - | ^^^^^^^^ D107 -23 | super().__init__(message) -24 | self.status_code = status_code - | - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:28:5: D200 One-line docstring should fit on one line - | -27 | class EmbeddingResponseError(EmbeddingError): -28 | """Raised when an API response has an invalid schema. - | _____^ -29 | | """ - | |_______^ D200 -30 | -31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:31:9: D107 Missing docstring in `__init__` - | -29 | """ -30 | -31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: - | ^^^^^^^^ D107 -32 | super().__init__(message) - | - -packages/ragbits-core/src/ragbits/core/embeddings/litellm.py:14:5: D200 One-line docstring should fit on one line - | -13 | class LiteLLMEmbeddings(Embeddings): -14 | """Client for creating text embeddings using LiteLLM API. - | _____^ -15 | | """ - | |_______^ D200 -16 | -17 | def __init__( - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/embeddings/local.py:16:5: D200 One-line docstring should fit on one line - | -15 | class LocalEmbeddings(Embeddings): -16 | """Class for interaction with any encoder available in HuggingFace. - | _____^ -17 | | """ - | |_______^ D200 -18 | -19 | def __init__( - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/base.py:11:5: D200 One-line docstring should fit on one line - | -10 | class LLM(Generic[LLMClientOptions], ABC): -11 | """Abstract class for interaction with Large Language Model. - | _____^ -12 | | """ - | |_______^ D200 -13 | -14 | _options_cls: type[LLMClientOptions] - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/base.py:36:9: D200 One-line docstring should fit on one line - | -34 | @abstractmethod -35 | def client(self) -> LLMClient: -36 | """Client for the LLM. - | _________^ -37 | | """ - | |___________^ D200 -38 | -39 | def count_tokens(self, prompt: BasePrompt) -> int: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/base.py:39:9: PLR6301 Method `count_tokens` could be a function, class method, or static method - | -37 | """ -38 | -39 | def count_tokens(self, prompt: BasePrompt) -> int: - | ^^^^^^^^^^^^ PLR6301 -40 | """Counts tokens in the prompt. - | - -packages/ragbits-core/src/ragbits/core/llms/clients/base.py:14:7: B024 `LLMOptions` is an abstract base class, but it has no abstract methods - | -13 | @dataclass -14 | class LLMOptions(ABC): - | ^^^^^^^^^^ B024 -15 | """Abstract dataclass that represents all available LLM call options. -16 | """ - | - -packages/ragbits-core/src/ragbits/core/llms/clients/base.py:15:5: D200 One-line docstring should fit on one line - | -13 | @dataclass -14 | class LLMOptions(ABC): -15 | """Abstract dataclass that represents all available LLM call options. - | _____^ -16 | | """ - | |_______^ D200 -17 | -18 | _not_given: ClassVar[Any] = None - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/clients/base.py:21:9: D200 One-line docstring should fit on one line - | -20 | def __or__(self, other: "LLMOptions") -> "LLMOptions": -21 | """Merges two LLMOptions, prioritizing non-NOT_GIVEN values from the 'other' object. - | _________^ -22 | | """ - | |___________^ D200 -23 | self_dict = asdict(self) -24 | other_dict = asdict(other) - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/clients/base.py:50:5: D200 One-line docstring should fit on one line - | -49 | class LLMClient(Generic[LLMClientOptions], ABC): -50 | """Abstract client for a direct communication with LLM. - | _____^ -51 | | """ - | |_______^ D200 -52 | -53 | def __init__(self, model_name: str) -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:2:5: D200 One-line docstring should fit on one line - | -1 | class LLMError(Exception): -2 | """Base class for all exceptions raised by the LLMClient. - | _____^ -3 | | """ - | |_______^ D200 -4 | -5 | def __init__(self, message: str) -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:5:9: D107 Missing docstring in `__init__` - | -3 | """ -4 | -5 | def __init__(self, message: str) -> None: - | ^^^^^^^^ D107 -6 | super().__init__(message) -7 | self.message = message - | - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:11:5: D200 One-line docstring should fit on one line - | -10 | class LLMConnectionError(LLMError): -11 | """Raised when there is an error connecting to the LLM API. - | _____^ -12 | | """ - | |_______^ D200 -13 | -14 | def __init__(self, message: str = "Connection error.") -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:14:9: D107 Missing docstring in `__init__` - | -12 | """ -13 | -14 | def __init__(self, message: str = "Connection error.") -> None: - | ^^^^^^^^ D107 -15 | super().__init__(message) - | - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:19:5: D200 One-line docstring should fit on one line - | -18 | class LLMStatusError(LLMError): -19 | """Raised when an API response has a status code of 4xx or 5xx. - | _____^ -20 | | """ - | |_______^ D200 -21 | -22 | def __init__(self, message: str, status_code: int) -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:22:9: D107 Missing docstring in `__init__` - | -20 | """ -21 | -22 | def __init__(self, message: str, status_code: int) -> None: - | ^^^^^^^^ D107 -23 | super().__init__(message) -24 | self.status_code = status_code - | - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:28:5: D200 One-line docstring should fit on one line - | -27 | class LLMResponseError(LLMError): -28 | """Raised when an API response has an invalid schema. - | _____^ -29 | | """ - | |_______^ D200 -30 | -31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py:31:9: D107 Missing docstring in `__init__` - | -29 | """ -30 | -31 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: - | ^^^^^^^^ D107 -32 | super().__init__(message) - | - -packages/ragbits-core/src/ragbits/core/llms/clients/local.py:21:5: D415 First line should end with a period, question mark, or exclamation point - | -19 | @dataclass -20 | class LocalLLMOptions(LLMOptions): -21 | """Dataclass that represents all available LLM call options for the local LLM client. - | _____^ -22 | | Each of them is described in the [HuggingFace documentation] -23 | | (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long -24 | | """ - | |_______^ D415 -25 | -26 | repetition_penalty: float | None | NotGiven = NOT_GIVEN - | - = help: Add closing punctuation - -packages/ragbits-core/src/ragbits/core/llms/clients/local.py:23:121: E501 Line too long (168 > 120) - | -21 | """Dataclass that represents all available LLM call options for the local LLM client. -22 | Each of them is described in the [HuggingFace documentation] -23 | (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ E501 -24 | """ - | - -packages/ragbits-core/src/ragbits/core/llms/clients/local.py:38:5: D200 One-line docstring should fit on one line - | -37 | class LocalLLMClient(LLMClient[LocalLLMOptions]): -38 | """Client for the local LLM that supports Hugging Face models. - | _____^ -39 | | """ - | |_______^ D200 -40 | -41 | _options_cls = LocalLLMOptions - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/litellm.py:17:5: D200 One-line docstring should fit on one line - | -16 | class LiteLLM(LLM[LiteLLMOptions]): -17 | """Class for interaction with any LLM supported by LiteLLM API. - | _____^ -18 | | """ - | |_______^ D200 -19 | -20 | _options_cls = LiteLLMOptions - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/litellm.py:61:9: D200 One-line docstring should fit on one line - | -59 | @cached_property -60 | def client(self) -> LiteLLMClient: -61 | """Client for the LLM. - | _________^ -62 | | """ - | |___________^ D200 -63 | return LiteLLMClient( -64 | model_name=self.model_name, - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/llms/local.py:17:5: D200 One-line docstring should fit on one line - | -16 | class LocalLLM(LLM[LocalLLMOptions]): -17 | """Class for interaction with any LLM available in HuggingFace. - | _____^ -18 | | """ - | |_______^ D200 -19 | -20 | _options_cls = LocalLLMOptions - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/prompt/base.py:12:5: D200 One-line docstring should fit on one line - | -11 | class BasePrompt(metaclass=ABCMeta): -12 | """Base class for prompts - | _____^ -13 | | """ - | |_______^ D200 -14 | -15 | @property - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/prompt/base.py:12:5: D415 First line should end with a period, question mark, or exclamation point - | -11 | class BasePrompt(metaclass=ABCMeta): -12 | """Base class for prompts - | _____^ -13 | | """ - | |_______^ D415 -14 | -15 | @property - | - = help: Add closing punctuation - -packages/ragbits-core/src/ragbits/core/prompt/base.py:26:9: D200 One-line docstring should fit on one line - | -24 | @property -25 | def json_mode(self) -> bool: -26 | """Returns whether the prompt should be sent in JSON mode. - | _________^ -27 | | """ - | |___________^ D200 -28 | return self.output_schema() is not None - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/prompt/base.py:30:9: PLR6301 Method `output_schema` could be a function, class method, or static method - | -28 | return self.output_schema() is not None -29 | -30 | def output_schema(self) -> dict | type[BaseModel] | None: - | ^^^^^^^^^^^^^ PLR6301 -31 | """Returns the schema of the desired output. Can be used to request structured output from the LLM API -32 | or to validate the output. Can return either a Pydantic model or a JSON schema. - | - -packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:20:9: D107 Missing docstring in `__init__` - | -18 | """ -19 | -20 | def __init__(self, file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd()): - | ^^^^^^^^ D107 -21 | self.file_pattern = file_pattern -22 | self.root_path = root_path - | - -packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:20:84: B008 Do not perform function call `Path.cwd` in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable - | -18 | """ -19 | -20 | def __init__(self, file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd()): - | ^^^^^^^^^^ B008 -21 | self.file_pattern = file_pattern -22 | self.root_path = root_path - | - -packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:25:33: ANN401 Dynamically typed expressions (typing.Any) are disallowed in `obj` - | -24 | @staticmethod -25 | def is_prompt_subclass(obj: Any) -> bool: - | ^^^ ANN401 -26 | """Checks if an object is a class that is a subclass of Prompt (but not Prompt itself). - | - -packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py:57:13: S101 Use of `assert` detected - | -55 | module = importlib.util.module_from_spec(spec) -56 | -57 | assert spec.loader is not None - | ^^^^^^ S101 -58 | -59 | try: - | - -packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:40:96: ANN401 Dynamically typed expressions (typing.Any) are disallowed in `*args` - | -40 | def render_prompt(index: int, system_prompt: str, user_prompt: str, state: PromptState, *args: Any) -> PromptState: - | ^^^ ANN401 -41 | """Renders a prompt based on the provided key, system prompt, user prompt, and input variables. - | - -packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:97:5: S101 Use of `assert` detected - | -95 | str: The response generated by the LLM. -96 | """ -97 | assert state.llm_model_name is not None, "LLM model name is not set." - | ^^^^^^ S101 -98 | llm_client = LiteLLM(model_name=state.llm_model_name, api_key=state.llm_api_key) - | - -packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:100:5: S101 Use of `assert` detected - | - 98 | llm_client = LiteLLM(model_name=state.llm_model_name, api_key=state.llm_api_key) - 99 | -100 | assert state.rendered_prompt is not None, "Prompt has not been rendered yet." - | ^^^^^^ S101 -101 | try: -102 | response = asyncio.run( - | - -packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:173:17: SIM117 Use a single `with` statement with multiple contexts instead of nested `with` statements - | -171 | list_of_vars = [] -172 | with gr.Row(): -173 | with gr.Column(scale=1): - | _________________^ -174 | | with gr.Tab("Inputs"): - | |__________________________________________^ SIM117 -175 | input_fields: list = get_input_type_fields(prompt.input_type) -176 | for entry in input_fields: - | - = help: Combine `with` statements - -packages/ragbits-core/src/ragbits/core/prompt/lab/app.py:187:17: SIM117 Use a single `with` statement with multiple contexts instead of nested `with` statements - | -185 | render_prompt_button = gr.Button(value="Render prompts") -186 | -187 | with gr.Column(scale=4): - | _________________^ -188 | | with gr.Tab("Prompt"): - | |__________________________________________^ SIM117 -189 | with gr.Row(): -190 | with gr.Column(): - | - = help: Combine `with` statements - -packages/ragbits-core/src/ragbits/core/prompt/parsers.py:10:5: D200 One-line docstring should fit on one line - | - 9 | class ResponseParsingError(Exception): -10 | """Raised when there is an error parsing an API response. - | _____^ -11 | | """ - | |_______^ D200 -12 | -13 | def __init__(self, message: str) -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/prompt/parsers.py:13:9: D107 Missing docstring in `__init__` - | -11 | """ -12 | -13 | def __init__(self, message: str) -> None: - | ^^^^^^^^ D107 -14 | super().__init__(message) -15 | self.message = message - | - -packages/ragbits-core/src/ragbits/core/prompt/prompt.py:49:17: S101 Use of `assert` detected - | -47 | output_type = args[1] if len(args) > 1 else str -48 | -49 | assert input_type is None or issubclass( - | ^^^^^^ S101 -50 | input_type, BaseModel -51 | ), "Input type must be a subclass of BaseModel" - | - -packages/ragbits-core/src/ragbits/core/prompt/prompt.py:57:15: S701 By default, jinja2 sets `autoescape` to `False`. Consider using `autoescape=True` or the `select_autoescape` function to mitigate XSS vulnerabilities. - | -55 | @classmethod -56 | def _parse_template(cls, template: str) -> Template: -57 | env = Environment() # nosec B701 - HTML autoescaping not needed for plain text - | ^^^^^^^^^^^ S701 -58 | ast = env.parse(template) -59 | template_variables = meta.find_undeclared_variables(ast) - | - -packages/ragbits-core/src/ragbits/core/prompt/prompt.py:90:42: ANN401 Dynamically typed expressions (typing.Any) are disallowed in `**kwargs` - | -89 | @classmethod -90 | def __init_subclass__(cls, **kwargs: Any) -> None: - | ^^^ ANN401 -91 | if not hasattr(cls, "user_prompt") or cls.user_prompt is None: -92 | raise ValueError("User prompt must be provided") - | - -packages/ragbits-core/src/ragbits/core/prompt/prompt.py:111:9: D107 Missing docstring in `__init__` - | -109 | ... -110 | -111 | def __init__(self, *args: Any, **kwargs: Any) -> None: - | ^^^^^^^^ D107 -112 | input_data = args[0] if args else kwargs.get("input_data") -113 | if self.input_type and input_data is None: - | - -packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py:11:65: B008 Do not perform function call `Path.cwd` in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable - | -10 | def generate_configs( -11 | file_pattern: str = DEFAULT_FILE_PATTERN, root_path: Path = Path.cwd(), target_path: Path = Path("promptfooconfigs") - | ^^^^^^^^^^ B008 -12 | ) -> None: -13 | """Generates promptfoo configuration files for all discovered prompts. - | - -packages/ragbits-core/src/ragbits/core/vector_store/base.py:7:5: D200 One-line docstring should fit on one line - | - 6 | class VectorDBEntry(BaseModel): - 7 | """An object representing a vector database entry. - | _____^ - 8 | | """ - | |_______^ D200 - 9 | -10 | key: str - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/vector_store/base.py:16:5: D200 One-line docstring should fit on one line - | -15 | class VectorStore(abc.ABC): -16 | """A class with an implementation of Vector Store, allowing to store and retrieve vectors by similarity function. - | _____^ -17 | | """ - | |_______^ D200 -18 | -19 | @abc.abstractmethod - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py:18:5: D415 First line should end with a period, question mark, or exclamation point - | -17 | class ChromaDBStore(VectorStore): -18 | """Class that stores text embeddings using [Chroma](https://docs.trychroma.com/)""" - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ D415 -19 | -20 | def __init__( - | - = help: Add closing punctuation - -packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py:64:9: D417 Missing argument description in the docstring for `_return_best_match`: `retrieved` - | -62 | ) -63 | -64 | def _return_best_match(self, retrieved: dict) -> str | None: - | ^^^^^^^^^^^^^^^^^^ D417 -65 | """Based on the retrieved data, returns the best match or None if no match is found. - | - -packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py:78:9: PLR6301 Method `_process_db_entry` could be a function, class method, or static method - | -76 | return None -77 | -78 | def _process_db_entry(self, entry: VectorDBEntry) -> tuple[str, list[float], dict]: - | ^^^^^^^^^^^^^^^^^ PLR6301 -79 | doc_id = sha256(entry.key.encode("utf-8")).hexdigest() -80 | embedding = entry.vector - | - -packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py:6:5: D200 One-line docstring should fit on one line - | -5 | class InMemoryVectorStore(VectorStore): -6 | """A simple in-memory implementation of Vector Store, storing vectors in memory. - | _____^ -7 | | """ - | |_______^ D200 -8 | -9 | def __init__(self) -> None: - | - = help: Reformat to one line - -packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py:9:9: D107 Missing docstring in `__init__` - | - 7 | """ - 8 | - 9 | def __init__(self) -> None: - | ^^^^^^^^ D107 -10 | self._storage: dict[str, VectorDBEntry] = {} - | - -packages/ragbits-document-search/examples/simple_text.py:26:11: ANN201 Missing return type annotation for public function `main` - | -26 | async def main(): - | ^^^^ ANN201 -27 | """Run the example.""" -28 | document_search = DocumentSearch(embedder=LiteLLMEmbeddings(), vector_store=InMemoryVectorStore()) - | - = help: Add return type annotation: `None` - -packages/ragbits-document-search/src/ragbits/document_search/_main.py:17:5: D200 One-line docstring should fit on one line - | -16 | class SearchConfig(BaseModel): -17 | """Configuration for the search process. - | _____^ -18 | | """ - | |_______^ D200 -19 | -20 | reranker_kwargs: dict[str, Any] = Field(default_factory=dict) - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/_main.py:58:70: B008 Do not perform function call `SearchConfig` in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable - | -56 | self.document_processor_router = document_processor_router or DocumentProcessorRouter.from_config() -57 | -58 | async def search(self, query: str, search_config: SearchConfig = SearchConfig()) -> list[Element]: - | ^^^^^^^^^^^^^^ B008 -59 | """Search for the most relevant chunks for a query. - | - -packages/ragbits-document-search/src/ragbits/document_search/documents/document.py:35:5: D200 One-line docstring should fit on one line - | -34 | class DocumentMeta(BaseModel): -35 | """An object representing a document metadata. - | _____^ -36 | | """ - | |_______^ D200 -37 | -38 | document_type: DocumentType - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/documents/document.py:95:5: D200 One-line docstring should fit on one line - | -94 | class Document(BaseModel): -95 | """An object representing a document which is downloaded and stored locally. - | _____^ -96 | | """ - | |_______^ D200 -97 | -98 | local_path: Path - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/documents/document.py:119:5: D200 One-line docstring should fit on one line - | -118 | class TextDocument(Document): -119 | """An object representing a text document. - | _____^ -120 | | """ - | |_______^ D200 -121 | -122 | @property - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/documents/element.py:10:5: D200 One-line docstring should fit on one line - | - 9 | class Element(BaseModel, ABC): -10 | """An object representing an element in a document. - | _____^ -11 | | """ - | |_______^ D200 -12 | -13 | element_type: str - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/documents/element.py:68:5: D200 One-line docstring should fit on one line - | -67 | class TextElement(Element): -68 | """An object representing a text element in a document. - | _____^ -69 | | """ - | |_______^ D200 -70 | -71 | element_type: str = "text" - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py:20:5: D200 One-line docstring should fit on one line - | -19 | class Source(BaseModel, ABC): -20 | """An object representing a source. - | _____^ -21 | | """ - | |_______^ D200 -22 | -23 | @abstractmethod - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py:41:5: D200 One-line docstring should fit on one line - | -40 | class LocalFileSource(Source): -41 | """An object representing a local file source. - | _____^ -42 | | """ - | |_______^ D200 -43 | -44 | source_type: Literal["local_file"] = "local_file" - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py:65:5: D200 One-line docstring should fit on one line - | -64 | class GCSSource(Source): -65 | """An object representing a GCS file source. - | _____^ -66 | | """ - | |_______^ D200 -67 | -68 | source_type: Literal["gcs"] = "gcs" - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py:37:9: D107 Missing docstring in `__init__` - | -35 | """ -36 | -37 | def __init__(self, providers: dict[DocumentType, BaseProvider]): - | ^^^^^^^^ D107 -38 | self._providers = providers - | - -packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py:42:9: D415 First line should end with a period, question mark, or exclamation point - | -40 | @classmethod -41 | def from_config(cls, providers_config: ProvidersConfig | None = None) -> "DocumentProcessorRouter": -42 | """Create a DocumentProcessorRouter from a configuration. If the configuration is not provided, the default - | _________^ -43 | | configuration will be used. If the configuration is provided, it will be merged with the default configuration, -44 | | overriding the default values for the document types that are defined in the configuration. -45 | | Example of the configuration: -46 | | { -47 | | DocumentType.TXT: YourCustomProviderClass(), -48 | | DocumentType.PDF: UnstructuredProvider(), -49 | | } -50 | | -51 | | Args: -52 | | providers_config: The dictionary with the providers configuration, mapping the document types to the -53 | | provider class. -54 | | -55 | | Returns: -56 | | The DocumentProcessorRouter. -57 | | """ - | |___________^ D415 -58 | config = copy.deepcopy(DEFAULT_PROVIDERS_CONFIG) -59 | config.update(providers_config if providers_config is not None else {}) - | - = help: Add closing punctuation - -packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py:10:9: D107 Missing docstring in `__init__` - | - 8 | """Raised when the document type is not supported by the provider.""" - 9 | -10 | def __init__(self, provider_name: str, document_type: DocumentType) -> None: - | ^^^^^^^^ D107 -11 | message = f"Document type {document_type} is not supported by the {provider_name}" -12 | super().__init__(message) - | - -packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py:26:5: D200 One-line docstring should fit on one line - | -25 | class UnstructuredProvider(BaseProvider): -26 | """A provider that uses the Unstructured API to process the documents. - | _____^ -27 | | """ - | |_______^ D200 -28 | -29 | SUPPORTED_DOCUMENT_TYPES = { - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py:50:9: D417 Missing argument description in the docstring for `__init__`: `chunking_kwargs` - | -48 | } -49 | -50 | def __init__(self, partition_kwargs: dict | None = None, chunking_kwargs: dict | None = None): - | ^^^^^^^^ D417 -51 | """Initialize the UnstructuredProvider. - | - -packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py:5:5: D200 One-line docstring should fit on one line - | -4 | class QueryRephraser(abc.ABC): -5 | """Rephrases a query. Can provide multiple rephrased queries from one sentence / question. - | _____^ -6 | | """ - | |_______^ D200 -7 | -8 | @staticmethod - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py:5:5: D200 One-line docstring should fit on one line - | -4 | class NoopQueryRephraser(QueryRephraser): -5 | """A no-op query paraphraser that does not change the query. - | _____^ -6 | | """ - | |_______^ D200 -7 | -8 | @staticmethod - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py:7:5: D200 One-line docstring should fit on one line - | - 6 | class Reranker(abc.ABC): - 7 | """Reranks chunks retrieved from vector store. - | _____^ - 8 | | """ - | |_______^ D200 - 9 | -10 | @staticmethod - | - = help: Reformat to one line - -packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py:7:5: D200 One-line docstring should fit on one line - | - 6 | class NoopReranker(Reranker): - 7 | """A no-op reranker that does not change the order of the chunks. - | _____^ - 8 | | """ - | |_______^ D200 - 9 | -10 | @staticmethod - | - = help: Reformat to one line - -scripts/create_ragbits_package.py:22:5: D200 One-line docstring should fit on one line - | -21 | def run() -> None: -22 | """Create a new Ragbits package. - | _____^ -23 | | """ - | |_______^ D200 -24 | package_name: str = text("Enter the package name", default="ragbits-") - | - = help: Reformat to one line - -scripts/update_ragbits_package.py:29:5: D200 One-line docstring should fit on one line - | -28 | class UpdateType(Enum): -29 | """Enum representing the type of version update: major, minor, or patch. - | _____^ -30 | | """ - | |_______^ D200 -31 | -32 | MAJOR = "major" - | - = help: Reformat to one line - -scripts/update_ragbits_package.py:43:5: ANN202 Missing return type annotation for private function `_version_to_list` - | -43 | def _version_to_list(version_string): - | ^^^^^^^^^^^^^^^^ ANN202 -44 | return [int(part) for part in version_string.split(".")] - | - = help: Add return type annotation - -scripts/update_ragbits_package.py:43:22: ANN001 Missing type annotation for function argument `version_string` - | -43 | def _version_to_list(version_string): - | ^^^^^^^^^^^^^^ ANN001 -44 | return [int(part) for part in version_string.split(".")] - | - -scripts/update_ragbits_package.py:95:5: S101 Use of `assert` detected - | -93 | (PACKAGES_DIR / pkg_name / "pyproject.toml").write_text(tomlkit.dumps(pkg_pyproject)) -94 | -95 | assert isinstance(new_version, str) - | ^^^^^^ S101 -96 | pprint(f"[green]The {pkg_name} package was successfully updated from {version} to {new_version}.[/green]") - | - -Found 95 errors. -No fixes available (58 hidden fixes can be enabled with the `--unsafe-fixes` option). - From 71029feddfbd64bb4410ad8ca11c9ce78cf00630 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 10:02:19 +0200 Subject: [PATCH 07/28] Manually fixed one-line docstrings to comply with our standards. --- packages/ragbits-core/examples/llm_example.py | 12 +++++------- packages/ragbits-core/examples/prompt_example.py | 9 ++++++--- .../ragbits-core/src/ragbits/core/embeddings/base.py | 3 ++- .../src/ragbits/core/embeddings/exceptions.py | 12 ++++++++---- .../src/ragbits/core/embeddings/local.py | 3 ++- .../src/ragbits/core/llms/clients/exceptions.py | 12 ++++++++---- .../src/ragbits/core/llms/clients/local.py | 3 ++- .../ragbits-core/src/ragbits/core/llms/litellm.py | 6 ++++-- packages/ragbits-core/src/ragbits/core/llms/local.py | 3 ++- .../ragbits-core/src/ragbits/core/prompt/prompt.py | 6 ++---- .../src/ragbits/core/vector_store/in_memory.py | 3 ++- .../ragbits/document_search/documents/document.py | 9 ++++----- .../src/ragbits/document_search/documents/element.py | 6 +++--- .../src/ragbits/document_search/documents/sources.py | 9 ++++++--- .../document_search/retrieval/rephrasers/base.py | 3 ++- .../document_search/retrieval/rephrasers/noop.py | 6 ++++-- scripts/create_ragbits_package.py | 3 ++- 17 files changed, 64 insertions(+), 44 deletions(-) diff --git a/packages/ragbits-core/examples/llm_example.py b/packages/ragbits-core/examples/llm_example.py index 0c4b91bfe..efc17f662 100644 --- a/packages/ragbits-core/examples/llm_example.py +++ b/packages/ragbits-core/examples/llm_example.py @@ -12,24 +12,21 @@ class LoremPromptInput(BaseModel): - """Input format for the LoremPrompt. - """ + """Input format for the LoremPrompt.""" theme: str pun_allowed: bool = False class LoremPromptOutput(BaseModel): - """Output format for the LoremPrompt. - """ + """Output format for the LoremPrompt.""" joke: str joke_category: str class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): - """A prompt that generates jokes. - """ + """A prompt that generates jokes.""" system_prompt = """ You are a joke generator. The jokes you generate should be funny and not offensive. {% if not pun_allowed %}Also, make sure @@ -44,7 +41,8 @@ class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): async def main(): - """Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. + """ + Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. """ llm = LiteLLM("gpt-4o-2024-08-06", use_structured_output=True) prompt = JokePrompt(LoremPromptInput(theme="software developers", pun_allowed=True)) diff --git a/packages/ragbits-core/examples/prompt_example.py b/packages/ragbits-core/examples/prompt_example.py index 7c1cf676c..3bd627020 100644 --- a/packages/ragbits-core/examples/prompt_example.py +++ b/packages/ragbits-core/examples/prompt_example.py @@ -9,7 +9,8 @@ class LoremPromptInput(BaseModel): - """Input format for the LoremPrompt. + """ + Input format for the LoremPrompt. """ theme: str @@ -17,14 +18,16 @@ class LoremPromptInput(BaseModel): class LoremPromptOutput(BaseModel): - """Output format for the LoremPrompt. + """ + Output format for the LoremPrompt. """ text: str class LoremPrompt(Prompt[LoremPromptInput, LoremPromptOutput]): - """A prompt that generates Lorem Ipsum text. + """ + A prompt that generates Lorem Ipsum text. """ system_prompt = """ diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/base.py b/packages/ragbits-core/src/ragbits/core/embeddings/base.py index b47731575..2ae860d29 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/base.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/base.py @@ -2,7 +2,8 @@ class Embeddings(ABC): - """Abstract client for communication with embedding models. + """ + Abstract client for communication with embedding models. """ @abstractmethod diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py b/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py index c48ddb93d..4dd99ad1e 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py @@ -1,5 +1,6 @@ class EmbeddingError(Exception): - """Base class for all exceptions raised by the EmbeddingClient. + """ + Base class for all exceptions raised by the EmbeddingClient. """ def __init__(self, message: str) -> None: @@ -8,7 +9,8 @@ def __init__(self, message: str) -> None: class EmbeddingConnectionError(EmbeddingError): - """Raised when there is an error connecting to the embedding API. + """ + Raised when there is an error connecting to the embedding API. """ def __init__(self, message: str = "Connection error.") -> None: @@ -16,7 +18,8 @@ def __init__(self, message: str = "Connection error.") -> None: class EmbeddingStatusError(EmbeddingError): - """Raised when an API response has a status code of 4xx or 5xx. + """ + Raised when an API response has a status code of 4xx or 5xx. """ def __init__(self, message: str, status_code: int) -> None: @@ -25,7 +28,8 @@ def __init__(self, message: str, status_code: int) -> None: class EmbeddingResponseError(EmbeddingError): - """Raised when an API response has an invalid schema. + """ + Raised when an API response has an invalid schema. """ def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/local.py b/packages/ragbits-core/src/ragbits/core/embeddings/local.py index 1b0644a30..a13f7f1da 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/local.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/local.py @@ -13,7 +13,8 @@ class LocalEmbeddings(Embeddings): - """Class for interaction with any encoder available in HuggingFace. + """ + Class for interaction with any encoder available in HuggingFace. """ def __init__( diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py b/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py index 6550d883b..0f1106bab 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/exceptions.py @@ -1,5 +1,6 @@ class LLMError(Exception): - """Base class for all exceptions raised by the LLMClient. + """ + Base class for all exceptions raised by the LLMClient. """ def __init__(self, message: str) -> None: @@ -8,7 +9,8 @@ def __init__(self, message: str) -> None: class LLMConnectionError(LLMError): - """Raised when there is an error connecting to the LLM API. + """ + Raised when there is an error connecting to the LLM API. """ def __init__(self, message: str = "Connection error.") -> None: @@ -16,7 +18,8 @@ def __init__(self, message: str = "Connection error.") -> None: class LLMStatusError(LLMError): - """Raised when an API response has a status code of 4xx or 5xx. + """ + Raised when an API response has a status code of 4xx or 5xx. """ def __init__(self, message: str, status_code: int) -> None: @@ -25,7 +28,8 @@ def __init__(self, message: str, status_code: int) -> None: class LLMResponseError(LLMError): - """Raised when an API response has an invalid schema. + """ + Raised when an API response has an invalid schema. """ def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py index ee77d715e..443e84e1d 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py @@ -35,7 +35,8 @@ class LocalLLMOptions(LLMOptions): class LocalLLMClient(LLMClient[LocalLLMOptions]): - """Client for the local LLM that supports Hugging Face models. + """ + Client for the local LLM that supports Hugging Face models. """ _options_cls = LocalLLMOptions diff --git a/packages/ragbits-core/src/ragbits/core/llms/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/litellm.py index 0cbde8f6d..3b3603e59 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/llms/litellm.py @@ -14,7 +14,8 @@ class LiteLLM(LLM[LiteLLMOptions]): - """Class for interaction with any LLM supported by LiteLLM API. + """ + Class for interaction with any LLM supported by LiteLLM API. """ _options_cls = LiteLLMOptions @@ -58,7 +59,8 @@ def __init__( @cached_property def client(self) -> LiteLLMClient: - """Client for the LLM. + """ + Client for the LLM. """ return LiteLLMClient( model_name=self.model_name, diff --git a/packages/ragbits-core/src/ragbits/core/llms/local.py b/packages/ragbits-core/src/ragbits/core/llms/local.py index fee357248..bfd6d81e7 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/local.py @@ -14,7 +14,8 @@ class LocalLLM(LLM[LocalLLMOptions]): - """Class for interaction with any LLM available in HuggingFace. + """ + Class for interaction with any LLM available in HuggingFace. """ _options_cls = LocalLLMOptions diff --git a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py index 165a18bea..659893242 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py @@ -104,12 +104,10 @@ def __init_subclass__(cls, **kwargs: Any) -> None: return super().__init_subclass__(**kwargs) @overload - def __init__(self: "Prompt[None, OutputT]") -> None: - ... + def __init__(self: "Prompt[None, OutputT]") -> None: ... @overload - def __init__(self: "Prompt[InputT, OutputT]", input_data: InputT) -> None: - ... + def __init__(self: "Prompt[InputT, OutputT]", input_data: InputT) -> None: ... def __init__(self, *args: Any, **kwargs: Any) -> None: input_data = args[0] if args else kwargs.get("input_data") diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py index 0af5c08f4..8fc5ee455 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py @@ -3,7 +3,8 @@ class InMemoryVectorStore(VectorStore): - """A simple in-memory implementation of Vector Store, storing vectors in memory. + """ + A simple in-memory implementation of Vector Store, storing vectors in memory. """ def __init__(self) -> None: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py index cfc495487..6143bc4ff 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py @@ -32,7 +32,8 @@ class DocumentType(str, Enum): class DocumentMeta(BaseModel): - """An object representing a document metadata. + """ + An object representing a document metadata. """ document_type: DocumentType @@ -92,8 +93,7 @@ def from_local_path(cls, local_path: Path) -> "DocumentMeta": class Document(BaseModel): - """An object representing a document which is downloaded and stored locally. - """ + """An object representing a document which is downloaded and stored locally.""" local_path: Path metadata: DocumentMeta @@ -116,8 +116,7 @@ def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "D class TextDocument(Document): - """An object representing a text document. - """ + """An object representing a text document.""" @property def content(self) -> str: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 659586644..02f86433d 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -7,7 +7,8 @@ class Element(BaseModel, ABC): - """An object representing an element in a document. + """ + An object representing an element in a document. """ element_type: str @@ -65,8 +66,7 @@ def to_vector_db_entry(self, vector: list[float]) -> VectorDBEntry: class TextElement(Element): - """An object representing a text element in a document. - """ + """An object representing a text element in a document.""" element_type: str = "text" content: str diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py index 83a9e39d8..496fd6dcc 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py @@ -17,7 +17,8 @@ class Source(BaseModel, ABC): - """An object representing a source. + """ + An object representing a source. """ @abstractmethod @@ -38,7 +39,8 @@ async def fetch(self) -> Path: class LocalFileSource(Source): - """An object representing a local file source. + """ + An object representing a local file source. """ source_type: Literal["local_file"] = "local_file" @@ -62,7 +64,8 @@ async def fetch(self) -> Path: class GCSSource(Source): - """An object representing a GCS file source. + """ + An object representing a GCS file source. """ source_type: Literal["gcs"] = "gcs" diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py index cf9d47b24..49a9b2ebd 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py @@ -2,7 +2,8 @@ class QueryRephraser(abc.ABC): - """Rephrases a query. Can provide multiple rephrased queries from one sentence / question. + """ + Rephrases a query. Can provide multiple rephrased queries from one sentence / question. """ @staticmethod diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py index 1760fbddb..8e6b92fd2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py @@ -2,12 +2,14 @@ class NoopQueryRephraser(QueryRephraser): - """A no-op query paraphraser that does not change the query. + """ + A no-op query paraphraser that does not change the query. """ @staticmethod def rephrase(query: str) -> list[str]: - """Mock implementation which outputs the same query as in input. + """ + Mock implementation which outputs the same query as in input. Args: query: The query to rephrase. diff --git a/scripts/create_ragbits_package.py b/scripts/create_ragbits_package.py index cf94b7b4b..56b9e0031 100644 --- a/scripts/create_ragbits_package.py +++ b/scripts/create_ragbits_package.py @@ -19,7 +19,8 @@ def run() -> None: - """Create a new Ragbits package. + """ + Create a new Ragbits package. """ package_name: str = text("Enter the package name", default="ragbits-") From a962e5c11ff0550a79074f37dd7ba7e41917da1f Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 10:03:06 +0200 Subject: [PATCH 08/28] And one more file. --- .../src/ragbits/document_search/retrieval/rerankers/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py index 78e0a1576..dec886475 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py @@ -4,13 +4,15 @@ class Reranker(abc.ABC): - """Reranks chunks retrieved from vector store. + """ + Reranks chunks retrieved from vector store. """ @staticmethod @abc.abstractmethod def rerank(chunks: list[Element]) -> list[Element]: - """Rerank chunks. + """ + Rerank chunks. Args: chunks: The chunks to rerank. From 17b6b0989696a0e8fd03654ed0eefe0e00062f5f Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 10:35:47 +0200 Subject: [PATCH 09/28] Fixing fixable ruff lint errors. --- .pre-commit-config.yaml | 2 +- packages/ragbits-cli/src/ragbits/cli/__init__.py | 3 ++- packages/ragbits-core/examples/chromadb_example.py | 1 - packages/ragbits-core/examples/llm_example.py | 1 + packages/ragbits-core/examples/prompt_example.py | 1 + packages/ragbits-core/src/ragbits/core/embeddings/litellm.py | 3 --- .../ragbits-core/src/ragbits/core/vector_store/__init__.py | 2 +- .../ragbits-core/src/ragbits/core/vector_store/in_memory.py | 1 + .../src/ragbits/document_search/documents/document.py | 1 + .../src/ragbits/document_search/documents/element.py | 1 + scripts/update_ragbits_package.py | 1 - 11 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 90dfbbfb1..57066e422 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,7 @@ repos: - id: ruff types_or: [ python, pyi, jupyter ] exclude: (/test_|tests/|docs/|notebooks/) -# args: [ --fix ] + args: [ --fix ] # Formats Python, Pyi, and Jupyter files, excluding specified directories - id: ruff-format types_or: [ python, pyi, jupyter ] diff --git a/packages/ragbits-cli/src/ragbits/cli/__init__.py b/packages/ragbits-cli/src/ragbits/cli/__init__.py index 5ba385a5b..6e27a7f11 100644 --- a/packages/ragbits-cli/src/ragbits/cli/__init__.py +++ b/packages/ragbits-cli/src/ragbits/cli/__init__.py @@ -1,9 +1,10 @@ import importlib.util import pkgutil -import ragbits from typer import Typer +import ragbits + app = Typer(no_args_is_help=True) diff --git a/packages/ragbits-core/examples/chromadb_example.py b/packages/ragbits-core/examples/chromadb_example.py index 4abe81823..b7cdc682e 100644 --- a/packages/ragbits-core/examples/chromadb_example.py +++ b/packages/ragbits-core/examples/chromadb_example.py @@ -24,7 +24,6 @@ async def main(): """Run the example.""" - chroma_client = chromadb.PersistentClient(path="chroma") embedding_client = LiteLLMEmbeddings() diff --git a/packages/ragbits-core/examples/llm_example.py b/packages/ragbits-core/examples/llm_example.py index efc17f662..62f2bd89c 100644 --- a/packages/ragbits-core/examples/llm_example.py +++ b/packages/ragbits-core/examples/llm_example.py @@ -7,6 +7,7 @@ import asyncio from pydantic import BaseModel + from ragbits.core.llms.litellm import LiteLLM from ragbits.core.prompt import Prompt diff --git a/packages/ragbits-core/examples/prompt_example.py b/packages/ragbits-core/examples/prompt_example.py index 3bd627020..437e833cd 100644 --- a/packages/ragbits-core/examples/prompt_example.py +++ b/packages/ragbits-core/examples/prompt_example.py @@ -5,6 +5,7 @@ # ] # /// from pydantic import BaseModel + from ragbits.core.prompt import Prompt diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py index 2860c2634..1b3fbe5b7 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py @@ -1,5 +1,3 @@ -from typing import Optional - try: import litellm @@ -69,7 +67,6 @@ async def embed_text(self, data: list[str]) -> list[list[float]]: EmbeddingStatusError: If the embedding API returns an error status code. EmbeddingResponseError: If the embedding API response is invalid. """ - try: response = await litellm.aembedding( input=data, diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py index 86c29f71b..2048d09ce 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py @@ -2,4 +2,4 @@ from .chromadb_store import ChromaDBStore from .in_memory import InMemoryVectorStore -__all__ = ["VectorStore", "VectorDBEntry", "InMemoryVectorStore", "ChromaDBStore"] +__all__ = ["ChromaDBStore", "InMemoryVectorStore", "VectorDBEntry", "VectorStore"] diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py index 8fc5ee455..d0931c7cd 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py @@ -1,4 +1,5 @@ import numpy as np + from ragbits.core.vector_store.base import VectorDBEntry, VectorStore diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py index 6143bc4ff..51b2375f7 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py @@ -3,6 +3,7 @@ from pathlib import Path from pydantic import BaseModel, Field + from ragbits.document_search.documents.sources import GCSSource, LocalFileSource diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 02f86433d..102320961 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -2,6 +2,7 @@ from typing import ClassVar from pydantic import BaseModel + from ragbits.core.vector_store.base import VectorDBEntry from ragbits.document_search.documents.document import DocumentMeta diff --git a/scripts/update_ragbits_package.py b/scripts/update_ragbits_package.py index 134187175..65a91996a 100644 --- a/scripts/update_ragbits_package.py +++ b/scripts/update_ragbits_package.py @@ -16,7 +16,6 @@ from copy import deepcopy from enum import Enum from pathlib import Path -from typing import Optional import tomlkit import typer From 802188f5b506c4ac050f1fd3b9179d80df5f1861 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 10:38:14 +0200 Subject: [PATCH 10/28] Adding reasonable fixes from --unsafe-fixes. --- .../ragbits-core/examples/chromadb_example.py | 6 +- packages/ragbits-core/examples/llm_example.py | 2 +- .../src/ragbits/core/prompt/base.py | 2 +- .../src/ragbits/core/prompt/lab/app.py | 98 +++++++++---------- .../core/vector_store/chromadb_store.py | 4 +- .../examples/simple_text.py | 6 +- .../ingestion/document_processor.py | 2 +- 7 files changed, 62 insertions(+), 58 deletions(-) diff --git a/packages/ragbits-core/examples/chromadb_example.py b/packages/ragbits-core/examples/chromadb_example.py index b7cdc682e..77238258f 100644 --- a/packages/ragbits-core/examples/chromadb_example.py +++ b/packages/ragbits-core/examples/chromadb_example.py @@ -22,8 +22,10 @@ ] -async def main(): - """Run the example.""" +async def main() -> None: + """ + Run the example. + """ chroma_client = chromadb.PersistentClient(path="chroma") embedding_client = LiteLLMEmbeddings() diff --git a/packages/ragbits-core/examples/llm_example.py b/packages/ragbits-core/examples/llm_example.py index 62f2bd89c..08d226410 100644 --- a/packages/ragbits-core/examples/llm_example.py +++ b/packages/ragbits-core/examples/llm_example.py @@ -41,7 +41,7 @@ class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): """ -async def main(): +async def main() -> None: """ Example of using the LiteLLM client with a Prompt class. Requires the OPENAI_API_KEY environment variable to be set. """ diff --git a/packages/ragbits-core/src/ragbits/core/prompt/base.py b/packages/ragbits-core/src/ragbits/core/prompt/base.py index 17c30e9ed..9f2f086be 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/base.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/base.py @@ -10,7 +10,7 @@ class BasePrompt(metaclass=ABCMeta): """ - Base class for prompts + Base class for prompts. """ @property diff --git a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py index 7c45dec1f..cd3798456 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py @@ -180,57 +180,55 @@ def show_split(index: int, state: gr.State) -> None: prompt = state.prompts[index] list_of_vars = [] with gr.Row(): - with gr.Column(scale=1): - with gr.Tab("Inputs"): - input_fields: list = get_input_type_fields(prompt.input_type) - for entry in input_fields: - with gr.Row(): - var = gr.Textbox( - label=entry["field_name"], - value=entry["field_default_value"], - interactive=True, - ) - list_of_vars.append(var) - - render_prompt_button = gr.Button(value="Render prompts") - - with gr.Column(scale=4): - with gr.Tab("Prompt"): + with gr.Column(scale=1), gr.Tab("Inputs"): + input_fields: list = get_input_type_fields(prompt.input_type) + for entry in input_fields: with gr.Row(): - with gr.Column(): - prompt_details_system_prompt = gr.Textbox( - label="System Prompt", - value=prompt.system_prompt, - interactive=True, - ) - - with gr.Column(): - rendered_system_prompt = ( - state.rendered_prompt.rendered_system_prompt if state.rendered_prompt else "" - ) - gr.Textbox( - label="Rendered System Prompt", - value=rendered_system_prompt, - interactive=False, - ) - - with gr.Row(): - with gr.Column(): - prompt_details_user_prompt = gr.Textbox( - label="User Prompt", - value=prompt.user_prompt, - interactive=True, - ) - - with gr.Column(): - rendered_user_prompt = ( - state.rendered_prompt.rendered_user_prompt if state.rendered_prompt else "" - ) - gr.Textbox( - label="Rendered User Prompt", - value=rendered_user_prompt, - interactive=False, - ) + var = gr.Textbox( + label=entry["field_name"], + value=entry["field_default_value"], + interactive=True, + ) + list_of_vars.append(var) + + render_prompt_button = gr.Button(value="Render prompts") + + with gr.Column(scale=4), gr.Tab("Prompt"): + with gr.Row(): + with gr.Column(): + prompt_details_system_prompt = gr.Textbox( + label="System Prompt", + value=prompt.system_prompt, + interactive=True, + ) + + with gr.Column(): + rendered_system_prompt = ( + state.rendered_prompt.rendered_system_prompt if state.rendered_prompt else "" + ) + gr.Textbox( + label="Rendered System Prompt", + value=rendered_system_prompt, + interactive=False, + ) + + with gr.Row(): + with gr.Column(): + prompt_details_user_prompt = gr.Textbox( + label="User Prompt", + value=prompt.user_prompt, + interactive=True, + ) + + with gr.Column(): + rendered_user_prompt = ( + state.rendered_prompt.rendered_user_prompt if state.rendered_prompt else "" + ) + gr.Textbox( + label="Rendered User Prompt", + value=rendered_user_prompt, + interactive=False, + ) llm_enabled = state.llm_model_name is not None prompt_ready = state.rendered_prompt is not None diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index c74322879..e7550898e 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -14,7 +14,9 @@ class ChromaDBStore(VectorStore): - """Class that stores text embeddings using [Chroma](https://docs.trychroma.com/)""" + """ + Class that stores text embeddings using [Chroma](https://docs.trychroma.com/). + """ def __init__( self, diff --git a/packages/ragbits-document-search/examples/simple_text.py b/packages/ragbits-document-search/examples/simple_text.py index 0c4a4ccf2..36e4ed6a9 100644 --- a/packages/ragbits-document-search/examples/simple_text.py +++ b/packages/ragbits-document-search/examples/simple_text.py @@ -23,8 +23,10 @@ ] -async def main(): - """Run the example.""" +async def main() -> None: + """ + Run the example. + """ document_search = DocumentSearch(embedder=LiteLLMEmbeddings(), vector_store=InMemoryVectorStore()) for document in documents: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py index 85508c51f..b9158cc14 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py @@ -50,7 +50,7 @@ def from_config(cls, providers_config: ProvidersConfig | None = None) -> "Docume { DocumentType.TXT: YourCustomProviderClass(), DocumentType.PDF: UnstructuredProvider(), - } + }. Args: providers_config: The dictionary with the providers configuration, mapping the document types to the From dda128793f9793ac7cd28f151ba5204271922de7 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 10:57:30 +0200 Subject: [PATCH 11/28] Mypy issues resolved. --- .pre-commit-config.yaml | 3 +++ .../ragbits-core/src/ragbits/core/prompt/prompt.py | 2 +- .../src/ragbits/document_search/documents/element.py | 4 ++-- .../ingestion/providers/unstructured.py | 10 +++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 57066e422..e94dc598d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,5 @@ +default_language_version: + python: python3.10 repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 @@ -35,3 +37,4 @@ repos: # such as types-python-dateutil additional_dependencies: [pydantic>=2.8.2, types-pyyaml>=6.0.12] exclude: (/test_|setup.py|/tests/|docs/) + diff --git a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py index 659893242..8e5413a4b 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py @@ -11,7 +11,7 @@ from .parsers import DEFAULT_PARSERS, build_pydantic_parser InputT = TypeVar("InputT", bound=BaseModel | None) -FewShotExample = Tuple[str | InputT, str | OutputT] +FewShotExample = tuple[str | InputT, str | OutputT] class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=ABCMeta): diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 102320961..c5f8c9ff0 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import ClassVar +from typing import Any, ClassVar from pydantic import BaseModel @@ -26,7 +26,7 @@ def get_key(self) -> str: """ @classmethod - def __pydantic_init_subclass__(cls) -> None: # pylint: disable=unused-argument + def __pydantic_init_subclass__(cls, **kwargs: Any) -> None: # pylint: disable=unused-argument element_type_default = cls.model_fields["element_type"].default if element_type_default is None: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py index 085c7fa1f..e476a1ae7 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py @@ -53,10 +53,10 @@ class UnstructuredProvider(BaseProvider): def __init__( self, - partition_kwargs: Optional[dict] = None, - chunking_kwargs: Optional[dict] = None, - api_key: Optional[str] = None, - api_server: Optional[str] = None, + partition_kwargs: dict | None = None, + chunking_kwargs: dict | None = None, + api_key: str | None = None, + api_server: str | None = None, use_api: bool = False, ) -> None: """Initialize the UnstructuredProvider. @@ -142,7 +142,7 @@ def _to_text_element(element: UnstructuredElement, document_meta: DocumentMeta) ) -def _set_or_raise(name: str, value: Optional[str], env_var: str) -> str: +def _set_or_raise(name: str, value: str | None, env_var: str) -> str: if value is not None: return value if (env_value := os.getenv(env_var)) is None: From 4d2d9a0a25dee1c52033a6ab416bafdd80821f91 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 12:29:59 +0200 Subject: [PATCH 12/28] Fixed vast majority of ruff's alerts. --- packages/ragbits-core/examples/llm_example.py | 5 ++-- .../src/ragbits/core/llms/clients/base.py | 4 +-- .../src/ragbits/core/llms/clients/local.py | 5 ++-- .../core/prompt/discovery/prompt_discovery.py | 8 +++--- .../src/ragbits/core/prompt/lab/app.py | 6 ++--- .../src/ragbits/core/prompt/prompt.py | 6 ++--- .../src/ragbits/core/prompt/promptfoo.py | 5 +++- .../src/ragbits/core/utils/_pyproject.py | 25 ++++++++++++++++--- .../core/vector_store/chromadb_store.py | 7 +++--- .../src/ragbits/document_search/_main.py | 4 ++- .../document_search/documents/element.py | 2 +- .../ingestion/providers/unstructured.py | 1 + pyproject.toml | 1 + scripts/update_ragbits_package.py | 5 ++-- 14 files changed, 57 insertions(+), 27 deletions(-) diff --git a/packages/ragbits-core/examples/llm_example.py b/packages/ragbits-core/examples/llm_example.py index 08d226410..53be7febb 100644 --- a/packages/ragbits-core/examples/llm_example.py +++ b/packages/ragbits-core/examples/llm_example.py @@ -30,8 +30,9 @@ class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): """A prompt that generates jokes.""" system_prompt = """ - You are a joke generator. The jokes you generate should be funny and not offensive. {% if not pun_allowed %}Also, make sure - that the jokes do not contain any puns.{% else %}You can use any type of joke, even if it contains puns.{% endif %} + You are a joke generator. The jokes you generate should be funny and not offensive. + {% if not pun_allowed %}Also, make sure that the jokes do not contain any puns. + {% else %}You can use any type of joke, even if it contains puns.{% endif %} Respond as json with two fields: joke and joke_category. """ diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py index 47f38aa18..62a553037 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py @@ -12,9 +12,9 @@ @dataclass -class LLMOptions(ABC): +class LLMOptions: """ - Abstract dataclass that represents all available LLM call options. + A dataclass that represents all available LLM call options. """ _not_given: ClassVar[Any] = None diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py index 443e84e1d..24aba0ceb 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py @@ -18,9 +18,10 @@ @dataclass class LocalLLMOptions(LLMOptions): - """Dataclass that represents all available LLM call options for the local LLM client. + """ + Dataclass that represents all available LLM call options for the local LLM client. Each of them is described in the [HuggingFace documentation] - (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long + (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # noqa: E501 """ repetition_penalty: float | None | NotGiven = NOT_GIVEN diff --git a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py index e2e72077a..4382fe60f 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py @@ -16,12 +16,12 @@ class PromptDiscovery: root_path (Path): The root path to search for Prompt objects. Defaults to the directory where the script is run. """ - def __init__(self, file_pattern: str = core_config.prompt_path_pattern, root_path: Path = Path.cwd()): + def __init__(self, file_pattern: str = core_config.prompt_path_pattern, root_path: Path | None = None): self.file_pattern = file_pattern - self.root_path = root_path + self.root_path = root_path or Path.cwd() @staticmethod - def is_prompt_subclass(obj: Any) -> bool: + def is_prompt_subclass(obj: Any) -> bool: # noqa: ANN401 """Checks if an object is a class that is a subclass of Prompt (but not Prompt itself). Args: @@ -53,7 +53,7 @@ def discover(self) -> set[type[Prompt]]: module = importlib.util.module_from_spec(spec) - assert spec.loader is not None + assert spec.loader is not None # noqa: S101 try: spec.loader.exec_module(module) diff --git a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py index cd3798456..04a1e5b39 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/lab/app.py @@ -40,7 +40,7 @@ class PromptState: llm_api_key: str | None = None -def render_prompt(index: int, system_prompt: str, user_prompt: str, state: PromptState, *args: Any) -> PromptState: +def render_prompt(index: int, system_prompt: str, user_prompt: str, state: PromptState, *args: Any) -> PromptState: # noqa: ANN401 """ Renders a prompt based on the provided key, system prompt, user prompt, and input variables. @@ -100,10 +100,10 @@ def send_prompt_to_llm(state: PromptState) -> str: Returns: str: The response generated by the LLM. """ - assert state.llm_model_name is not None, "LLM model name is not set." + assert state.llm_model_name is not None, "LLM model name is not set." # noqa: S101 llm_client = LiteLLM(model_name=state.llm_model_name, api_key=state.llm_api_key) - assert state.rendered_prompt is not None, "Prompt has not been rendered yet." + assert state.rendered_prompt is not None, "Prompt has not been rendered yet." # noqa: S101 try: response = asyncio.run( llm_client.client.call(conversation=state.rendered_prompt.chat, options=LiteLLMOptions()) diff --git a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py index 8e5413a4b..ca0870d5e 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py @@ -49,7 +49,7 @@ def _get_io_types(cls) -> tuple: input_type = None if input_type is type(None) else input_type output_type = args[1] if len(args) > 1 else str - assert input_type is None or issubclass( + assert input_type is None or issubclass( # noqa: S101 input_type, BaseModel ), "Input type must be a subclass of BaseModel" return (input_type, output_type) @@ -57,7 +57,7 @@ def _get_io_types(cls) -> tuple: @classmethod def _parse_template(cls, template: str) -> Template: - env = Environment() # nosec B701 - HTML autoescaping not needed for plain text + env = Environment() # nosec B701 - HTML autoescaping not needed for plain text #noqa: S701 ast = env.parse(template) template_variables = meta.find_undeclared_variables(ast) input_fields = cls.input_type.model_fields.keys() if cls.input_type else set() @@ -90,7 +90,7 @@ def _detect_response_parser(cls) -> Callable[[str], OutputT]: raise ValueError(f"Response parser not provided for output type {cls.output_type}") @classmethod - def __init_subclass__(cls, **kwargs: Any) -> None: + def __init_subclass__(cls, **kwargs: Any) -> None: # noqa: ANN401 if not hasattr(cls, "user_prompt") or cls.user_prompt is None: raise ValueError("User prompt must be provided") diff --git a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py index c650edd6d..530a13af0 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py @@ -10,7 +10,7 @@ def generate_configs( file_pattern: str = core_config.prompt_path_pattern, - root_path: Path = Path.cwd(), + root_path: Path | None = None, target_path: Path = Path("promptfooconfigs"), ) -> None: """ @@ -21,6 +21,9 @@ def generate_configs( root_path: The root path to search for Prompt objects. Defaults to the directory where the script is run. target_path: The path to save the promptfoo configuration files. Defaults to "promptfooconfigs". """ + if not root_path: + root_path = Path.cwd() + prompts = PromptDiscovery(file_pattern=file_pattern, root_path=root_path).discover() Console().print( f"Discovered {len(prompts)} prompts." diff --git a/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py b/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py index f29a55f7f..d51e76778 100644 --- a/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py +++ b/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py @@ -5,7 +5,20 @@ from pydantic import BaseModel -def find_pyproject(current_dir: Path = Path.cwd()) -> Path: +def _get_current_dir(current_dir: Path | None = None) -> Path: + """ + Returns the current directory if `current_dir` is None. + + Args: + current_dir (Path, optional): The directory to check. Defaults to None. + + Returns: + Path: The current directory. + """ + return current_dir or Path.cwd() + + +def find_pyproject(current_dir: Path | None = None) -> Path: """ Find the pyproject.toml file in the current directory or any of its parents. @@ -19,6 +32,8 @@ def find_pyproject(current_dir: Path = Path.cwd()) -> Path: Raises: FileNotFoundError: If the pyproject.toml file is not found. """ + current_dir = _get_current_dir(current_dir) + possible_dirs = [current_dir, *current_dir.parents] for possible_dir in possible_dirs: pyproject = possible_dir / "pyproject.toml" @@ -27,7 +42,7 @@ def find_pyproject(current_dir: Path = Path.cwd()) -> Path: raise FileNotFoundError("pyproject.toml not found") -def get_ragbits_config(current_dir: Path = Path.cwd()) -> dict[str, Any]: +def get_ragbits_config(current_dir: Path | None = None) -> dict[str, Any]: """ Get the ragbits configuration from the project's pyproject.toml file. @@ -41,6 +56,8 @@ def get_ragbits_config(current_dir: Path = Path.cwd()) -> dict[str, Any]: Returns: dict: The ragbits configuration. """ + current_dir = _get_current_dir(current_dir) + try: pyproject = find_pyproject(current_dir) except FileNotFoundError: @@ -57,7 +74,7 @@ def get_ragbits_config(current_dir: Path = Path.cwd()) -> dict[str, Any]: def get_config_instance( - model: type[ConfigModelT], subproject: str | None = None, current_dir: Path = Path.cwd() + model: type[ConfigModelT], subproject: str | None = None, current_dir: Path | None = None ) -> ConfigModelT: """ Creates an instace of pydantic model loaded with the configuration from pyproject.toml. @@ -72,6 +89,8 @@ def get_config_instance( Returns: ConfigModelT: The model instance loaded with the configuration """ + current_dir = _get_current_dir(current_dir) + config = get_ragbits_config(current_dir) print(config) if subproject: diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index e7550898e..b7f77be9f 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -69,17 +69,18 @@ def _return_best_match(self, retrieved: dict) -> str | None: Based on the retrieved data, returns the best match or None if no match is found. Args: - Retrieved data, with a column-first format + retrieved: Retrieved data, with a column-first format. Returns: - The best match or None if no match is found + The best match or None if no match is found. """ if self._max_distance is None or retrieved["distances"][0][0] <= self._max_distance: return retrieved["documents"][0][0] return None - def _process_db_entry(self, entry: VectorDBEntry) -> tuple[str, list[float], dict]: + @staticmethod + def _process_db_entry(entry: VectorDBEntry) -> tuple[str, list[float], dict]: doc_id = sha256(entry.key.encode("utf-8")).hexdigest() embedding = entry.vector diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 34cef5982..74e905d67 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -58,7 +58,7 @@ def __init__( self.reranker = reranker or NoopReranker() self.document_processor_router = document_processor_router or DocumentProcessorRouter.from_config() - async def search(self, query: str, search_config: SearchConfig = SearchConfig()) -> list[Element]: + async def search(self, query: str, search_config: SearchConfig | None = None) -> list[Element]: """ Search for the most relevant chunks for a query. @@ -69,6 +69,8 @@ async def search(self, query: str, search_config: SearchConfig = SearchConfig()) Returns: A list of chunks. """ + if not search_config: + search_config = SearchConfig() queries = self.query_rephraser.rephrase(query) elements = [] for rephrased_query in queries: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index c5f8c9ff0..1038ad20a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -26,7 +26,7 @@ def get_key(self) -> str: """ @classmethod - def __pydantic_init_subclass__(cls, **kwargs: Any) -> None: # pylint: disable=unused-argument + def __pydantic_init_subclass__(cls, **kwargs: Any) -> None: # pylint: disable=unused-argument #noqa: ANN401 element_type_default = cls.model_fields["element_type"].default if element_type_default is None: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py index e476a1ae7..5bf9ec36a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py @@ -69,6 +69,7 @@ def __init__( variable will be used. api_server: The API server URL to use for the Unstructured API. If not specified, the UNSTRUCTURED_SERVER_URL environment variable will be used. + use_api: Flag to determine whether to use the API or not. Defaults to False. """ self.partition_kwargs = partition_kwargs or DEFAULT_PARTITION_KWARGS self.chunking_kwargs = chunking_kwargs or DEFAULT_CHUNKING_KWARGS diff --git a/pyproject.toml b/pyproject.toml index 0344ebdbe..f9ae10f6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,6 +183,7 @@ ignore = [ "D100", "D104", "D105", + "D107", "D200", "D205", "D212", diff --git a/scripts/update_ragbits_package.py b/scripts/update_ragbits_package.py index 65a91996a..05e5d9e6a 100644 --- a/scripts/update_ragbits_package.py +++ b/scripts/update_ragbits_package.py @@ -41,7 +41,7 @@ def _update_type_to_enum(update_type: str | None = None) -> UpdateType | None: return None -def _version_to_list(version_string): +def _version_to_list(version_string: str) -> list[int]: return [int(part) for part in version_string.split(".")] @@ -96,7 +96,8 @@ def _update_pkg_version( pkg_pyproject["project"]["version"] = new_version (PACKAGES_DIR / pkg_name / "pyproject.toml").write_text(tomlkit.dumps(pkg_pyproject)) - assert isinstance(new_version, str) + if not isinstance(new_version, str): + raise TypeError("new_version must be a string") pprint(f"[green]The {pkg_name} package was successfully updated from {version} to {new_version}.[/green]") return version, new_version From c98bda18737776fddea4e479acfac1cc78c0f62d Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 11 Oct 2024 14:19:36 +0200 Subject: [PATCH 13/28] Fixed the rest of ruff's claims. --- .../ragbits-core/src/ragbits/core/llms/base.py | 2 +- .../src/ragbits/core/llms/clients/local.py | 4 ++-- .../ragbits-core/src/ragbits/core/prompt/base.py | 2 +- .../ragbits-core/src/ragbits/core/prompt/prompt.py | 14 ++++++++------ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/packages/ragbits-core/src/ragbits/core/llms/base.py b/packages/ragbits-core/src/ragbits/core/llms/base.py index 5eedb9fb9..a4aed07ae 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/base.py @@ -39,7 +39,7 @@ def client(self) -> LLMClient: Client for the LLM. """ - def count_tokens(self, prompt: BasePrompt) -> int: + def count_tokens(self, prompt: BasePrompt) -> int: # noqa: PLR6301 """ Counts tokens in the prompt. diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py index 24aba0ceb..ac371ed00 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py @@ -21,8 +21,8 @@ class LocalLLMOptions(LLMOptions): """ Dataclass that represents all available LLM call options for the local LLM client. Each of them is described in the [HuggingFace documentation] - (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # noqa: E501 - """ + (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). + """ # noqa: E501 repetition_penalty: float | None | NotGiven = NOT_GIVEN do_sample: bool | None | NotGiven = NOT_GIVEN diff --git a/packages/ragbits-core/src/ragbits/core/prompt/base.py b/packages/ragbits-core/src/ragbits/core/prompt/base.py index 9f2f086be..fe28d71f5 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/base.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/base.py @@ -30,7 +30,7 @@ def json_mode(self) -> bool: """ return self.output_schema() is not None - def output_schema(self) -> dict | type[BaseModel] | None: + def output_schema(self) -> dict | type[BaseModel] | None: # noqa: PLR6301 """ Returns the schema of the desired output. Can be used to request structured output from the LLM API or to validate the output. Can return either a Pydantic model or a JSON schema. diff --git a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py index ca0870d5e..be140ae9f 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/prompt.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/prompt.py @@ -57,7 +57,7 @@ def _get_io_types(cls) -> tuple: @classmethod def _parse_template(cls, template: str) -> Template: - env = Environment() # nosec B701 - HTML autoescaping not needed for plain text #noqa: S701 + env = Environment(autoescape=True) ast = env.parse(template) template_variables = meta.find_undeclared_variables(ast) input_fields = cls.input_type.model_fields.keys() if cls.input_type else set() @@ -169,15 +169,17 @@ def list_few_shots(self) -> ChatFormat: result: ChatFormat = [] for user_message, assistant_message in self.few_shots + self._instace_few_shots: if not isinstance(user_message, str): - user_message = self._render_template(self.user_prompt_template, user_message) + user_content = self._render_template(self.user_prompt_template, user_message) + else: + user_content = user_message if isinstance(assistant_message, BaseModel): - assistant_message = assistant_message.model_dump_json() + assistant_content = assistant_message.model_dump_json() else: - assistant_message = str(assistant_message) + assistant_content = str(assistant_message) - result.append({"role": "user", "content": user_message}) - result.append({"role": "assistant", "content": assistant_message}) + result.append({"role": "user", "content": user_content}) + result.append({"role": "assistant", "content": assistant_content}) return result def output_schema(self) -> dict | type[BaseModel] | None: From 57ef338a5dee6bc16b68ce888630b5376e26e4f4 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Mon, 14 Oct 2024 10:51:33 +0200 Subject: [PATCH 14/28] Aligning to micpst's comments. --- .../ragbits-cli/src/ragbits/cli/__init__.py | 3 +- packages/ragbits-core/examples/llm_example.py | 12 +++-- packages/ragbits-core/src/ragbits/core/cli.py | 3 +- .../src/ragbits/core/embeddings/base.py | 3 +- .../src/ragbits/core/llms/clients/litellm.py | 12 +++-- .../src/ragbits/core/llms/clients/local.py | 6 ++- .../src/ragbits/core/llms/litellm.py | 6 ++- .../src/ragbits/core/llms/local.py | 9 ++-- .../src/ragbits/core/llms/types.py | 3 +- .../core/prompt/discovery/prompt_discovery.py | 9 ++-- .../src/ragbits/core/prompt/parsers.py | 4 +- .../ragbits/core/vector_store/in_memory.py | 6 ++- .../document_search/documents/document.py | 27 +++++++--- .../document_search/documents/element.py | 16 ++++-- .../document_search/documents/sources.py | 18 ++++--- .../ingestion/providers/base.py | 14 ++++-- .../ingestion/providers/dummy.py | 6 ++- .../ingestion/providers/unstructured.py | 9 ++-- .../retrieval/rephrasers/base.py | 3 +- pyproject.toml | 50 ------------------- 20 files changed, 114 insertions(+), 105 deletions(-) diff --git a/packages/ragbits-cli/src/ragbits/cli/__init__.py b/packages/ragbits-cli/src/ragbits/cli/__init__.py index 6e27a7f11..dbef2e983 100644 --- a/packages/ragbits-cli/src/ragbits/cli/__init__.py +++ b/packages/ragbits-cli/src/ragbits/cli/__init__.py @@ -9,7 +9,8 @@ def main() -> None: - """Main entry point for the CLI. + """ + Main entry point for the CLI. This function registers all the CLI modules in the ragbits packages: - iterates over every package in the ragbits.* namespace diff --git a/packages/ragbits-core/examples/llm_example.py b/packages/ragbits-core/examples/llm_example.py index 53be7febb..91dfe2bf0 100644 --- a/packages/ragbits-core/examples/llm_example.py +++ b/packages/ragbits-core/examples/llm_example.py @@ -13,21 +13,27 @@ class LoremPromptInput(BaseModel): - """Input format for the LoremPrompt.""" + """ + Input format for the LoremPrompt. + """ theme: str pun_allowed: bool = False class LoremPromptOutput(BaseModel): - """Output format for the LoremPrompt.""" + """ + Output format for the LoremPrompt. + """ joke: str joke_category: str class JokePrompt(Prompt[LoremPromptInput, LoremPromptOutput]): - """A prompt that generates jokes.""" + """ + A prompt that generates jokes. + """ system_prompt = """ You are a joke generator. The jokes you generate should be funny and not offensive. diff --git a/packages/ragbits-core/src/ragbits/core/cli.py b/packages/ragbits-core/src/ragbits/core/cli.py index 4c60b53d0..912a85a0b 100644 --- a/packages/ragbits-core/src/ragbits/core/cli.py +++ b/packages/ragbits-core/src/ragbits/core/cli.py @@ -7,7 +7,8 @@ def register(app: typer.Typer) -> None: - """Register the CLI commands for the package. + """ + Register the CLI commands for the package. Args: app: The Typer object to register the commands with. diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/base.py b/packages/ragbits-core/src/ragbits/core/embeddings/base.py index 2ae860d29..ede4fcadf 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/base.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/base.py @@ -8,7 +8,8 @@ class Embeddings(ABC): @abstractmethod async def embed_text(self, data: list[str]) -> list[list[float]]: - """Creates embeddings for the given strings. + """ + Creates embeddings for the given strings. Args: data: List of strings to get embeddings for. diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py index 4d994cbd2..11b9c3f88 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/litellm.py @@ -19,7 +19,8 @@ @dataclass class LiteLLMOptions(LLMOptions): - """Dataclass that represents all available LLM call options for the LiteLLM client. + """ + Dataclass that represents all available LLM call options for the LiteLLM client. Each of them is described in the [LiteLLM documentation](https://docs.litellm.ai/docs/completion/input). """ @@ -35,7 +36,8 @@ class LiteLLMOptions(LLMOptions): class LiteLLMClient(LLMClient[LiteLLMOptions]): - """Client for the LiteLLM that supports calls to 100+ LLMs APIs, including OpenAI, Anthropic, VertexAI, + """ + Client for the LiteLLM that supports calls to 100+ LLMs APIs, including OpenAI, Anthropic, VertexAI, Hugging Face and others. """ @@ -50,7 +52,8 @@ def __init__( api_version: str | None = None, use_structured_output: bool = False, ) -> None: - """Constructs a new LiteLLMClient instance. + """ + Constructs a new LiteLLMClient instance. Args: model_name: Name of the model to use. @@ -78,7 +81,8 @@ async def call( json_mode: bool = False, output_schema: type[BaseModel] | dict | None = None, ) -> str: - """Calls the appropriate LLM endpoint with the given prompt and options. + """ + Calls the appropriate LLM endpoint with the given prompt and options. Args: conversation: List of dicts with "role" and "content" keys, representing the chat history so far. diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py index ac371ed00..28f0987e6 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/local.py @@ -48,7 +48,8 @@ def __init__( *, hf_api_key: str | None = None, ) -> None: - """Constructs a new local LLMClient instance. + """ + Constructs a new local LLMClient instance. Args: model_name: Name of the model to use. @@ -74,7 +75,8 @@ async def call( json_mode: bool = False, output_schema: type[BaseModel] | dict | None = None, ) -> str: - """Makes a call to the local LLM with the provided prompt and options. + """ + Makes a call to the local LLM with the provided prompt and options. Args: conversation: List of dicts with "role" and "content" keys, representing the chat history so far. diff --git a/packages/ragbits-core/src/ragbits/core/llms/litellm.py b/packages/ragbits-core/src/ragbits/core/llms/litellm.py index 3b3603e59..c4f8c4c7e 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/litellm.py +++ b/packages/ragbits-core/src/ragbits/core/llms/litellm.py @@ -30,7 +30,8 @@ def __init__( api_version: str | None = None, use_structured_output: bool = False, ) -> None: - """Constructs a new LiteLLM instance. + """ + Constructs a new LiteLLM instance. Args: model_name: Name of the [LiteLLM supported model](https://docs.litellm.ai/docs/providers) to be used.\ @@ -71,7 +72,8 @@ def client(self) -> LiteLLMClient: ) def count_tokens(self, prompt: BasePrompt) -> int: - """Counts tokens in the prompt. + """ + Counts tokens in the prompt. Args: prompt: Formatted prompt template with conversation and response parsing configuration. diff --git a/packages/ragbits-core/src/ragbits/core/llms/local.py b/packages/ragbits-core/src/ragbits/core/llms/local.py index bfd6d81e7..0d4699063 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/local.py +++ b/packages/ragbits-core/src/ragbits/core/llms/local.py @@ -27,7 +27,8 @@ def __init__( *, api_key: str | None = None, ) -> None: - """Constructs a new local LLM instance. + """ + Constructs a new local LLM instance. Args: model_name: Name of the model to use. This should be a model from the CausalLM class. @@ -46,7 +47,8 @@ def __init__( @cached_property def client(self) -> LocalLLMClient: - """Client for the LLM. + """ + Client for the LLM. Returns: The client used to interact with the LLM. @@ -54,7 +56,8 @@ def client(self) -> LocalLLMClient: return LocalLLMClient(model_name=self.model_name, hf_api_key=self.api_key) def count_tokens(self, prompt: BasePrompt) -> int: - """Counts tokens in the messages. + """ + Counts tokens in the messages. Args: prompt: Messages to count tokens for. diff --git a/packages/ragbits-core/src/ragbits/core/llms/types.py b/packages/ragbits-core/src/ragbits/core/llms/types.py index f0447098a..8bb5949be 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/types.py +++ b/packages/ragbits-core/src/ragbits/core/llms/types.py @@ -5,7 +5,8 @@ # Sentinel class used until PEP 0661 is accepted class NotGiven: - """A sentinel singleton class used to distinguish omitted keyword arguments + """ + A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior). For example: diff --git a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py index 4382fe60f..15ecf8f01 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/discovery/prompt_discovery.py @@ -9,7 +9,8 @@ class PromptDiscovery: - """Discovers Prompt objects within Python modules. + """ + Discovers Prompt objects within Python modules. Args: file_pattern (str): The file pattern to search for Prompt objects. Defaults to "**/prompt_*.py" @@ -22,7 +23,8 @@ def __init__(self, file_pattern: str = core_config.prompt_path_pattern, root_pat @staticmethod def is_prompt_subclass(obj: Any) -> bool: # noqa: ANN401 - """Checks if an object is a class that is a subclass of Prompt (but not Prompt itself). + """ + Checks if an object is a class that is a subclass of Prompt (but not Prompt itself). Args: obj (any): The object to check. @@ -35,7 +37,8 @@ def is_prompt_subclass(obj: Any) -> bool: # noqa: ANN401 return inspect.isclass(obj) and not get_origin(obj) and issubclass(obj, Prompt) and obj != Prompt def discover(self) -> set[type[Prompt]]: - """Discovers Prompt objects within the specified file paths. + """ + Discovers Prompt objects within the specified file paths. Returns: set[Prompt]: The discovered Prompt objects. diff --git a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py index cf3001f0d..baf6462ef 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/parsers.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/parsers.py @@ -88,9 +88,7 @@ def bool_parser(value: str) -> bool: raise ResponseParsingError(f"Could not parse '{value}' as a boolean") -def build_pydantic_parser( - model: type[PydanticModelT], -) -> Callable[[str], PydanticModelT]: +def build_pydantic_parser(model: type[PydanticModelT]) -> Callable[[str], PydanticModelT]: """ Builds a parser for a specific Pydantic model. diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py index d0931c7cd..ce0576fa6 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py @@ -12,7 +12,8 @@ def __init__(self) -> None: self._storage: dict[str, VectorDBEntry] = {} async def store(self, entries: list[VectorDBEntry]) -> None: - """Store entries in the vector store. + """ + Store entries in the vector store. Args: entries: The entries to store. @@ -21,7 +22,8 @@ async def store(self, entries: list[VectorDBEntry]) -> None: self._storage[entry.key] = entry async def retrieve(self, vector: list[float], k: int = 5) -> list[VectorDBEntry]: - """Retrieve entries from the vector store. + """ + Retrieve entries from the vector store. Args: vector: The vector to search for. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py index 51b2375f7..6be36dbd2 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/document.py @@ -8,7 +8,9 @@ class DocumentType(str, Enum): - """Types of documents that can be stored.""" + """ + Types of documents that can be stored. + """ MD = "md" TXT = "txt" @@ -50,7 +52,8 @@ def id(self) -> str: return self.source.get_id() async def fetch(self) -> "Document": - """This method fetches the document from source (potentially remote) and creates an object to interface with it. + """ + This method fetches the document from source (potentially remote) and creates an object to interface with it. Based on the document type, it will return a different object. Returns: @@ -61,7 +64,8 @@ async def fetch(self) -> "Document": @classmethod def create_text_document_from_literal(cls, content: str) -> "DocumentMeta": - """Create a text document from a literal content. + """ + Create a text document from a literal content. Args: content: The content of the document. @@ -79,7 +83,8 @@ def create_text_document_from_literal(cls, content: str) -> "DocumentMeta": @classmethod def from_local_path(cls, local_path: Path) -> "DocumentMeta": - """Create a document metadata from a local path. + """ + Create a document metadata from a local path. Args: local_path: The local path to the document. @@ -94,14 +99,17 @@ def from_local_path(cls, local_path: Path) -> "DocumentMeta": class Document(BaseModel): - """An object representing a document which is downloaded and stored locally.""" + """ + An object representing a document which is downloaded and stored locally. + """ local_path: Path metadata: DocumentMeta @classmethod def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "Document": - """Create a document from a document metadata. + """ + Create a document from a document metadata. Based on the document type, it will return a different object. Args: @@ -117,11 +125,14 @@ def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "D class TextDocument(Document): - """An object representing a text document.""" + """ + An object representing a text document. + """ @property def content(self) -> str: - """Get the content of the document. + """ + Get the content of the document. Returns: The content of the document. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index 1038ad20a..e6f8ff91c 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -19,7 +19,8 @@ class Element(BaseModel, ABC): @abstractmethod def get_key(self) -> str: - """Get the key of the element which will be used to generate the vector. + """ + Get the key of the element which will be used to generate the vector. Returns: The key. @@ -36,7 +37,8 @@ def __pydantic_init_subclass__(cls, **kwargs: Any) -> None: # pylint: disable=u @classmethod def from_vector_db_entry(cls, db_entry: VectorDBEntry) -> "Element": - """Create an element from a vector database entry. + """ + Create an element from a vector database entry. Args: db_entry: The vector database entry. @@ -51,7 +53,8 @@ def from_vector_db_entry(cls, db_entry: VectorDBEntry) -> "Element": return element_cls(**meta) def to_vector_db_entry(self, vector: list[float]) -> VectorDBEntry: - """Create a vector database entry from the element. + """ + Create a vector database entry from the element. Args: vector: The vector. @@ -67,13 +70,16 @@ def to_vector_db_entry(self, vector: list[float]) -> VectorDBEntry: class TextElement(Element): - """An object representing a text element in a document.""" + """ + An object representing a text element in a document. + """ element_type: str = "text" content: str def get_key(self) -> str: - """Get the key of the element which will be used to generate the vector. + """ + Get the key of the element which will be used to generate the vector. Returns: The key. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py index 496fd6dcc..94e50b2a9 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py @@ -23,7 +23,8 @@ class Source(BaseModel, ABC): @abstractmethod def get_id(self) -> str: - """Get the source ID. + """ + Get the source ID. Returns: The source ID. @@ -31,7 +32,8 @@ def get_id(self) -> str: @abstractmethod async def fetch(self) -> Path: - """Load the source. + """ + Load the source. Returns: The path to the source. @@ -47,7 +49,8 @@ class LocalFileSource(Source): path: Path def get_id(self) -> str: - """Get unique identifier of the object in the source. + """ + Get unique identifier of the object in the source. Returns: Unique identifier. @@ -55,7 +58,8 @@ def get_id(self) -> str: return f"local_file:{self.path.absolute()}" async def fetch(self) -> Path: - """Fetch the source. + """ + Fetch the source. Returns: The local path to the object fetched from the source. @@ -74,7 +78,8 @@ class GCSSource(Source): object_name: str def get_id(self) -> str: - """Get unique identifier of the object in the source. + """ + Get unique identifier of the object in the source. Returns: Unique identifier. @@ -82,7 +87,8 @@ def get_id(self) -> str: return f"gcs:gs://{self.bucket}/{self.object_name}" async def fetch(self) -> Path: - """Fetch the file from Google Cloud Storage and store it locally. + """ + Fetch the file from Google Cloud Storage and store it locally. The file is downloaded to a local directory specified by `local_dir`. If the file already exists locally, it will not be downloaded again. If the file doesn't exist locally, it will be fetched from GCS. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py index d046e7c70..2b99a9bc8 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/base.py @@ -5,7 +5,9 @@ class DocumentTypeNotSupportedError(Exception): - """Raised when the document type is not supported by the provider.""" + """ + Raised when the document type is not supported by the provider. + """ def __init__(self, provider_name: str, document_type: DocumentType) -> None: message = f"Document type {document_type} is not supported by the {provider_name}" @@ -13,13 +15,16 @@ def __init__(self, provider_name: str, document_type: DocumentType) -> None: class BaseProvider(ABC): - """A base class for the document processing providers.""" + """ + A base class for the document processing providers. + """ SUPPORTED_DOCUMENT_TYPES: set[DocumentType] @abstractmethod async def process(self, document_meta: DocumentMeta) -> list[Element]: - """Process the document. + """ + Process the document. Args: document_meta: The document to process. @@ -29,7 +34,8 @@ async def process(self, document_meta: DocumentMeta) -> list[Element]: """ def validate_document_type(self, document_type: DocumentType) -> None: - """Check if the provider supports the document type. + """ + Check if the provider supports the document type. Args: document_type: The document type. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py index 11d063925..97de0c2c0 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/dummy.py @@ -8,7 +8,8 @@ class DummyProvider(BaseProvider): - """This is a mock provider that returns a TextElement with the content of the document. + """ + This is a mock provider that returns a TextElement with the content of the document. It should be used for testing purposes only. TODO: Remove this provider after the implementation of the real providers. @@ -17,7 +18,8 @@ class DummyProvider(BaseProvider): SUPPORTED_DOCUMENT_TYPES = {DocumentType.TXT} async def process(self, document_meta: DocumentMeta) -> list[Element]: - """Process the text document. + """ + Process the text document. Args: document_meta: The document to process. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py index 5bf9ec36a..d82ffe682 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured.py @@ -59,7 +59,8 @@ def __init__( api_server: str | None = None, use_api: bool = False, ) -> None: - """Initialize the UnstructuredProvider. + """ + Initialize the UnstructuredProvider. Args: partition_kwargs: The additional arguments for the partitioning. Refer to the Unstructured API documentation @@ -80,7 +81,8 @@ def __init__( @property def client(self) -> UnstructuredClient: - """Get the UnstructuredClient instance. If the client is not initialized, it will be created. + """ + Get the UnstructuredClient instance. If the client is not initialized, it will be created. Returns: The UnstructuredClient instance. @@ -97,7 +99,8 @@ def client(self) -> UnstructuredClient: return self._client async def process(self, document_meta: DocumentMeta) -> list[Element]: - """Process the document using the Unstructured API. + """ + Process the document using the Unstructured API. Args: document_meta: The document to process. diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py index 49a9b2ebd..a40b9f9be 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py @@ -9,7 +9,8 @@ class QueryRephraser(abc.ABC): @staticmethod @abc.abstractmethod def rephrase(query: str) -> list[str]: - """Rephrase a query. + """ + Rephrase a query. Args: query: The query to rephrase. diff --git a/pyproject.toml b/pyproject.toml index f9ae10f6b..08e5c6de6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,56 +97,6 @@ module = "ragbits.*" ignore_missing_imports = false disallow_untyped_defs = true -[tool.pylint.basic] -py-version=3.10 -good-names = "i,j,x,y,z,x1,y1,z1,x2,y2,z2,cv,df,dx,dy,dz,w,h,c,b,g,qa,q,a" -max-args = 8 - -[tool.pylint.main] -load-plugins = ["pylint.extensions.docparams"] - -[tool.pylint.messages_control] -disable = [ - "suppressed-message", - # therefore we wouldn't have to install full dependency set in order to lint - "import-error", - # sometimes we create a dataclass or Pydantic module and just don't need public methods - "too-few-public-methods", - # below is handled by pycln - "unused-import", - # below is handled by isort - "wrong-import-order", - # too restrictive - "too-many-instance-attributes", - # not necessary nor useful in our projects - "missing-module-docstring", -] - -[tool.pylint.format] -max-line-length = 120 - -[tool.pylint.miscellaneous] -notes = ["XXX"] - -[tool.pylint.parameter_documentation] -accept-no-param-doc = false -accept-no-raise-doc = false -accept-no-return-doc = false -accept-no-yields-doc = false -default-docstring-type = "google" - -[tool.pylint.design] -max-locals = 20 - -[tool.pylint.similarities] -min-similarity-lines = 10 - -[tool.bandit] -exclude_dirs = ["venv"] -# B101 disables errors for asserts in the code -# remember to not use asserts for security and control flows -skips = ["B101"] - [tool.ruff] exclude = [".venv"] extend-include = ["*.ipynb"] From 12460382275d6e9f18e7be904a4e0d8179df27f7 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Mon, 14 Oct 2024 14:17:47 +0200 Subject: [PATCH 15/28] Unifying default dir argument assignment. --- .../src/ragbits/core/prompt/promptfoo.py | 3 +-- .../src/ragbits/core/utils/_pyproject.py | 19 +++---------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py index 530a13af0..a9525bc35 100644 --- a/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py +++ b/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py @@ -21,8 +21,7 @@ def generate_configs( root_path: The root path to search for Prompt objects. Defaults to the directory where the script is run. target_path: The path to save the promptfoo configuration files. Defaults to "promptfooconfigs". """ - if not root_path: - root_path = Path.cwd() + root_path = root_path or Path.cwd() prompts = PromptDiscovery(file_pattern=file_pattern, root_path=root_path).discover() Console().print( diff --git a/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py b/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py index d51e76778..0fcebfa69 100644 --- a/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py +++ b/packages/ragbits-core/src/ragbits/core/utils/_pyproject.py @@ -5,19 +5,6 @@ from pydantic import BaseModel -def _get_current_dir(current_dir: Path | None = None) -> Path: - """ - Returns the current directory if `current_dir` is None. - - Args: - current_dir (Path, optional): The directory to check. Defaults to None. - - Returns: - Path: The current directory. - """ - return current_dir or Path.cwd() - - def find_pyproject(current_dir: Path | None = None) -> Path: """ Find the pyproject.toml file in the current directory or any of its parents. @@ -32,7 +19,7 @@ def find_pyproject(current_dir: Path | None = None) -> Path: Raises: FileNotFoundError: If the pyproject.toml file is not found. """ - current_dir = _get_current_dir(current_dir) + current_dir = current_dir or Path.cwd() possible_dirs = [current_dir, *current_dir.parents] for possible_dir in possible_dirs: @@ -56,7 +43,7 @@ def get_ragbits_config(current_dir: Path | None = None) -> dict[str, Any]: Returns: dict: The ragbits configuration. """ - current_dir = _get_current_dir(current_dir) + current_dir = current_dir or Path.cwd() try: pyproject = find_pyproject(current_dir) @@ -89,7 +76,7 @@ def get_config_instance( Returns: ConfigModelT: The model instance loaded with the configuration """ - current_dir = _get_current_dir(current_dir) + current_dir = current_dir or Path.cwd() config = get_ragbits_config(current_dir) print(config) From 3d0e547d08779111fd60c7f43987afbf8fac49f9 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Tue, 15 Oct 2024 10:34:14 +0200 Subject: [PATCH 16/28] Unnecessary modification of first entries of prompt chat. --- .../tests/unit/prompts/test_prompt.py | 37 +++++-------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/packages/ragbits-core/tests/unit/prompts/test_prompt.py b/packages/ragbits-core/tests/unit/prompts/test_prompt.py index 50d74fb14..b6d93a500 100644 --- a/packages/ragbits-core/tests/unit/prompts/test_prompt.py +++ b/packages/ragbits-core/tests/unit/prompts/test_prompt.py @@ -124,10 +124,7 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable assert prompt.rendered_system_prompt == "You are a song generator for a adult named Alice." assert prompt.rendered_user_prompt == "Theme for the song is rock." assert prompt.chat == [ - { - "role": "system", - "content": "You are a song generator for a adult named Alice.", - }, + {"role": "system", "content": "You are a song generator for a adult named Alice."}, {"role": "user", "content": "Theme for the song is rock."}, ] @@ -183,10 +180,7 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable prompt = TestPrompt(_PromptInput(name="John", age=15, theme="rock")) assert prompt.chat == [ - { - "role": "system", - "content": "You are a song generator for a child named John.", - }, + {"role": "system", "content": "You are a song generator for a child named John."}, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": "It's a really catchy tune."}, {"role": "user", "content": "Theme for the song is rock."}, @@ -230,10 +224,8 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable prompt.add_few_shot(_PromptInput(name="Alice", age=30, theme="pop"), "It's a really catchy tune.") assert prompt.chat == [ - { - "role": "system", - "content": "You are a song generator for a child named John.", - }, + {"role": "system", + "content": "You are a song generator for a child named John."}, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": "It's a really catchy tune."}, {"role": "user", "content": "Theme for the song is rock."}, @@ -289,10 +281,8 @@ class TestPrompt(Prompt[_PromptInput, _PromptOutput]): # pylint: disable=unused prompt.add_few_shot("Theme for the song is disco.", _PromptOutput(song_title="Disco song", song_lyrics="Boogie!")) assert prompt.chat == [ - { - "role": "system", - "content": "You are a song generator for a child named John.", - }, + {"role": "system", + "content": "You are a song generator for a child named John."}, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": '{"song_title":"Pop song","song_lyrics":"La la la"}'}, {"role": "user", "content": "Theme for the song is disco."}, @@ -319,10 +309,7 @@ class GoodNameDetectorPrompt(Prompt[_PromptInput, bool]): # pylint: disable=unu prompt.add_few_shot(_PromptInput(theme="pop", name="The blood of a demon", age=75), False) assert prompt.chat == [ - { - "role": "system", - "content": "You detect whether the name name is a good name for a song with the given theme, given the age limit.", - }, + {"role": "system", "content": "You detect whether the name name is a good name for a song with the given theme, given the age limit."}, {"role": "user", "content": "The name is I love you more than my cat, the theme is pop and the age is 15."}, {"role": "assistant", "content": "True"}, {"role": "user", "content": "The name is The blood of a demon, the theme is pop and the age is 75."}, @@ -413,20 +400,14 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable prompt2.add_few_shot("Theme for the song is 90s pop.", "Why do I know all the words?") assert prompt1.chat == [ - { - "role": "system", - "content": "You are a song generator for a child named John.", - }, + {"role": "system", "content": "You are a song generator for a child named John."}, {"role": "user", "content": "Theme for the song is 80s disco."}, {"role": "assistant", "content": "I can't stop dancing."}, {"role": "user", "content": "Theme for the song is pop."}, ] assert prompt2.chat == [ - { - "role": "system", - "content": "You are a song generator for a adult named Alice.", - }, + {"role": "system", "content": "You are a song generator for a adult named Alice."}, {"role": "user", "content": "Theme for the song is 90s pop."}, {"role": "assistant", "content": "Why do I know all the words?"}, {"role": "user", "content": "Theme for the song is rock."}, From 72e69896a6205ee807ff9e6b1224dd1883f54b26 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Tue, 15 Oct 2024 10:39:20 +0200 Subject: [PATCH 17/28] Missing corrections. --- .../ragbits-core/tests/unit/prompts/test_prompt.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/packages/ragbits-core/tests/unit/prompts/test_prompt.py b/packages/ragbits-core/tests/unit/prompts/test_prompt.py index b6d93a500..81478b954 100644 --- a/packages/ragbits-core/tests/unit/prompts/test_prompt.py +++ b/packages/ragbits-core/tests/unit/prompts/test_prompt.py @@ -224,8 +224,7 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable prompt.add_few_shot(_PromptInput(name="Alice", age=30, theme="pop"), "It's a really catchy tune.") assert prompt.chat == [ - {"role": "system", - "content": "You are a song generator for a child named John."}, + {"role": "system", "content": "You are a song generator for a child named John."}, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": "It's a really catchy tune."}, {"role": "user", "content": "Theme for the song is rock."}, @@ -281,8 +280,7 @@ class TestPrompt(Prompt[_PromptInput, _PromptOutput]): # pylint: disable=unused prompt.add_few_shot("Theme for the song is disco.", _PromptOutput(song_title="Disco song", song_lyrics="Boogie!")) assert prompt.chat == [ - {"role": "system", - "content": "You are a song generator for a child named John."}, + {"role": "system", "content": "You are a song generator for a child named John."}, {"role": "user", "content": "Theme for the song is pop."}, {"role": "assistant", "content": '{"song_title":"Pop song","song_lyrics":"La la la"}'}, {"role": "user", "content": "Theme for the song is disco."}, @@ -372,10 +370,7 @@ class TestPrompt(Prompt[_PromptInput, str]): # pylint: disable=unused-variable user_prompt = "Theme for the song is {{ theme }}." assert TestPrompt.to_promptfoo(promptfoo_test_config) == [ - { - "role": "system", - "content": "You are a song generator for a adult named John.", - }, + {"role": "system", "content": "You are a song generator for a adult named John."}, {"role": "user", "content": "Theme for the song is pop."}, ] From 26b8f0bc39879170b4629d8c738d203788db0c4f Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Tue, 15 Oct 2024 16:02:19 +0200 Subject: [PATCH 18/28] Back to ABC for LLMOptions. --- packages/ragbits-core/src/ragbits/core/llms/clients/base.py | 2 +- .../src/ragbits/document_search/ingestion/document_processor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py index 62a553037..b22dec5a7 100644 --- a/packages/ragbits-core/src/ragbits/core/llms/clients/base.py +++ b/packages/ragbits-core/src/ragbits/core/llms/clients/base.py @@ -12,7 +12,7 @@ @dataclass -class LLMOptions: +class LLMOptions(ABC): """ A dataclass that represents all available LLM call options. """ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py index b9158cc14..85508c51f 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py @@ -50,7 +50,7 @@ def from_config(cls, providers_config: ProvidersConfig | None = None) -> "Docume { DocumentType.TXT: YourCustomProviderClass(), DocumentType.PDF: UnstructuredProvider(), - }. + } Args: providers_config: The dictionary with the providers configuration, mapping the document types to the From 7f40eb4a4a9dee5d9d02463b003e3e2b88be044a Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Wed, 16 Oct 2024 10:49:30 +0200 Subject: [PATCH 19/28] Excluding tests from formatting. --- .pre-commit-config.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e94dc598d..a9e009835 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,7 +26,8 @@ repos: # Formats Python, Pyi, and Jupyter files, excluding specified directories - id: ruff-format types_or: [ python, pyi, jupyter ] - exclude: (docs/) + exclude: (docs/|/tests/) + args: [ --quiet ] # Used to have proper type annotations for library code. - repo: https://github.com/pre-commit/mirrors-mypy From e5fc91399216b343109f58419e6bca1305973010 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Wed, 16 Oct 2024 11:20:03 +0200 Subject: [PATCH 20/28] After-update pre-commit run. --- .../ragbits-core/src/ragbits/core/embeddings/noop.py | 3 +-- .../src/ragbits/core/utils/config_handling.py | 2 +- .../src/ragbits/core/vector_store/__init__.py | 1 - .../ragbits-document-search/examples/documents_chat.py | 6 ++++-- .../examples/from_config_example.py | 3 +-- .../src/ragbits/document_search/_main.py | 1 - .../document_search/ingestion/document_processor.py | 2 -- .../document_search/ingestion/providers/__init__.py | 1 - .../document_search/retrieval/rephrasers/__init__.py | 4 +--- .../document_search/retrieval/rerankers/__init__.py | 4 +--- pyproject.toml | 10 +++------- 11 files changed, 12 insertions(+), 25 deletions(-) diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/noop.py b/packages/ragbits-core/src/ragbits/core/embeddings/noop.py index e2d91c86b..386b8e025 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/noop.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/noop.py @@ -10,7 +10,7 @@ class NoopEmbeddings(Embeddings): or as a placeholder when an actual embedding model is not required. """ - async def embed_text(self, data: list[str]) -> list[list[float]]: + async def embed_text(self, data: list[str]) -> list[list[float]]: # noqa: PLR6301 """ Embeds a list of strings into a list of vectors. @@ -21,5 +21,4 @@ async def embed_text(self, data: list[str]) -> list[list[float]]: A list of embedding vectors, where each vector is a fixed value of [0.1, 0.1] for each input string. """ - return [[0.1, 0.1]] * len(data) diff --git a/packages/ragbits-core/src/ragbits/core/utils/config_handling.py b/packages/ragbits-core/src/ragbits/core/utils/config_handling.py index 4ec9bf2db..8523779cb 100644 --- a/packages/ragbits-core/src/ragbits/core/utils/config_handling.py +++ b/packages/ragbits-core/src/ragbits/core/utils/config_handling.py @@ -3,7 +3,7 @@ from typing import Any -def get_cls_from_config(cls_path: str, default_module: ModuleType) -> Any: +def get_cls_from_config(cls_path: str, default_module: ModuleType) -> Any: # noqa: ANN401 """ Retrieves and returns a class based on the given type string. The class can be either in the default module or a specified module if provided in the type string. diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py index 3b05232e4..46f082eda 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py @@ -21,7 +21,6 @@ def get_vector_store(vector_store_config: dict) -> VectorStore: An instance of the specified VectorStore class, initialized with the provided config (if any) or default arguments. """ - vector_store_cls = get_cls_from_config(vector_store_config["type"], module) config = vector_store_config.get("config", {}) diff --git a/packages/ragbits-document-search/examples/documents_chat.py b/packages/ragbits-document-search/examples/documents_chat.py index b5d4121dc..4ed895e41 100644 --- a/packages/ragbits-document-search/examples/documents_chat.py +++ b/packages/ragbits-document-search/examples/documents_chat.py @@ -6,8 +6,8 @@ # "ragbits-core[chromadb, litellm]", # ] # /// +from collections.abc import AsyncIterator from pathlib import Path -from typing import AsyncIterator import chromadb import gradio as gr @@ -119,7 +119,9 @@ def _load_database(self, database_path: str) -> str: return self.DATABASE_LOADED_MESSAGE async def _handle_message( - self, message: str, history: list[dict] # pylint: disable=unused-argument + self, + message: str, + history: list[dict], # pylint: disable=unused-argument ) -> AsyncIterator[str]: if not self._documents_ingested: yield self.NO_DOCUMENTS_INGESTED_MESSAGE diff --git a/packages/ragbits-document-search/examples/from_config_example.py b/packages/ragbits-document-search/examples/from_config_example.py index 1599cf84a..17f01cc7f 100644 --- a/packages/ragbits-document-search/examples/from_config_example.py +++ b/packages/ragbits-document-search/examples/from_config_example.py @@ -35,9 +35,8 @@ } -async def main(): +async def main() -> None: """Run the example.""" - document_search = DocumentSearch.from_config(config) for document in documents: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index fda7a9eb4..536aa6310 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -71,7 +71,6 @@ def from_config(cls, config: dict) -> "DocumentSearch": Returns: DocumentSearch: An initialized instance of the DocumentSearch class. """ - embedder = get_embeddings(config["embedder"]) query_rephraser = get_rephraser(config.get("rephraser")) reranker = get_reranker(config.get("reranker")) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py index 5983885c5..5782d8d1c 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/document_processor.py @@ -60,7 +60,6 @@ def from_dict_to_providers_config(dict_config: dict) -> ProvidersConfig: Returns: ProvidersConfig object. """ - providers_config = {} for document_type, config in dict_config.items(): @@ -87,7 +86,6 @@ def from_config(cls, providers_config: ProvidersConfig | None = None) -> "Docume Returns: The DocumentProcessorRouter. """ - config = copy.deepcopy(DEFAULT_PROVIDERS_CONFIG) config.update(providers_config if providers_config is not None else {}) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py index fc2c5d2f5..bef0ee40a 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py @@ -22,7 +22,6 @@ def get_provider(provider_config: dict) -> BaseProvider: An instance of the specified Provider class, initialized with the provided config (if any) or default arguments. """ - provider_cls = get_cls_from_config(provider_config["type"], module) config = provider_config.get("config", {}) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py index b136d4f38..8abb0d1d4 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py @@ -1,5 +1,4 @@ import sys -from typing import Optional from ragbits.core.utils.config_handling import get_cls_from_config @@ -11,7 +10,7 @@ module = sys.modules[__name__] -def get_rephraser(rephraser_config: Optional[dict]) -> QueryRephraser: +def get_rephraser(rephraser_config: dict | None) -> QueryRephraser: """ Initializes and returns a QueryRephraser object based on the provided configuration. @@ -22,7 +21,6 @@ def get_rephraser(rephraser_config: Optional[dict]) -> QueryRephraser: An instance of the specified QueryRephraser class, initialized with the provided config (if any) or default arguments. """ - if rephraser_config is None: return NoopQueryRephraser() diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/__init__.py index 5dab75f9a..95a4cfab5 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/__init__.py @@ -1,5 +1,4 @@ import sys -from typing import Optional from ragbits.core.utils.config_handling import get_cls_from_config @@ -11,7 +10,7 @@ module = sys.modules[__name__] -def get_reranker(reranker_config: Optional[dict]) -> Reranker: +def get_reranker(reranker_config: dict | None) -> Reranker: """ Initializes and returns a Reranker object based on the provided configuration. @@ -22,7 +21,6 @@ def get_reranker(reranker_config: Optional[dict]) -> Reranker: An instance of the specified Reranker class, initialized with the provided config (if any) or default arguments. """ - if reranker_config is None: return NoopReranker() diff --git a/pyproject.toml b/pyproject.toml index 08e5c6de6..40facd452 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,13 +40,6 @@ members = [ "packages/ragbits-cli" ] -[tool.isort] -multi_line_output = 3 -line_length = 120 -include_trailing_comma = true - -skip_gitignore = true - [tool.pytest] norecursedirs = [ '.git', @@ -128,12 +121,14 @@ extend-select = [ "PLR6301", # no-self-use ] ignore = [ + "B024", "B028", # no-explicit-stacklevel, TODO confirm this "C901", # complex-structure, TODO confirm this "D100", "D104", "D105", "D107", + "D415", "D200", "D205", "D212", @@ -158,6 +153,7 @@ convention = "google" "D103", "D107" ] +# "/home/patryk/repositories/internal/ragnarok_1.0/packages/ragbits-core/tests/unit/prompts/test_prompt.py" = ["S101"] [tool.ruff.format] docstring-code-format = true From 38dd2c5257394b27ad34a3320200704c63433d91 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Wed, 16 Oct 2024 11:22:16 +0200 Subject: [PATCH 21/28] Last error deruffed. --- .../examples/documents_chat.py | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/packages/ragbits-document-search/examples/documents_chat.py b/packages/ragbits-document-search/examples/documents_chat.py index 4ed895e41..86b7f00bd 100644 --- a/packages/ragbits-document-search/examples/documents_chat.py +++ b/packages/ragbits-document-search/examples/documents_chat.py @@ -139,32 +139,31 @@ def prepare_layout(self) -> gr.Blocks: Returns: gradio layout """ - with gr.Blocks(fill_height=True, fill_width=True) as app: - with gr.Row(): - with gr.Column(scale=self._columns_ratios[0]): - with gr.Group(): - documents_picker = gr.File(file_count="multiple", label=self.DOCUMENT_PICKER_LABEL) - create_btn = gr.Button(self.DATABASE_CREATE_BUTTON_LABEL) - creating_status_display = gr.Textbox( - label=self.DATABASE_CREATION_STATUS_LABEL, - interactive=False, - placeholder=self.DATABASE_CREATION_STATUS_PLACEHOLDER, - ) - - with gr.Group(): - database_path = gr.Textbox(label=self.DATABASE_TEXT_BOX_LABEL) - load_btn = gr.Button(self.DATABASE_LOAD_BUTTON_LABEL) - loading_status_display = gr.Textbox( - label=self.DATABASE_LOADING_STATUS_LABEL, - interactive=False, - placeholder=self.DATABASE_LOADING_STATUS_PLACEHOLDER, - ) - load_btn.click(fn=self._load_database, inputs=database_path, outputs=loading_status_display) - create_btn.click(fn=self._create_database, inputs=documents_picker, outputs=creating_status_display) - - with gr.Column(scale=self._columns_ratios[1]): - chat_interface = gr.ChatInterface(self._handle_message, type="messages") - chat_interface.chatbot.height = f"{self._chatbot_height_vh}vh" + with gr.Blocks(fill_height=True, fill_width=True) as app, gr.Row(): + with gr.Column(scale=self._columns_ratios[0]): + with gr.Group(): + documents_picker = gr.File(file_count="multiple", label=self.DOCUMENT_PICKER_LABEL) + create_btn = gr.Button(self.DATABASE_CREATE_BUTTON_LABEL) + creating_status_display = gr.Textbox( + label=self.DATABASE_CREATION_STATUS_LABEL, + interactive=False, + placeholder=self.DATABASE_CREATION_STATUS_PLACEHOLDER, + ) + + with gr.Group(): + database_path = gr.Textbox(label=self.DATABASE_TEXT_BOX_LABEL) + load_btn = gr.Button(self.DATABASE_LOAD_BUTTON_LABEL) + loading_status_display = gr.Textbox( + label=self.DATABASE_LOADING_STATUS_LABEL, + interactive=False, + placeholder=self.DATABASE_LOADING_STATUS_PLACEHOLDER, + ) + load_btn.click(fn=self._load_database, inputs=database_path, outputs=loading_status_display) + create_btn.click(fn=self._create_database, inputs=documents_picker, outputs=creating_status_display) + + with gr.Column(scale=self._columns_ratios[1]): + chat_interface = gr.ChatInterface(self._handle_message, type="messages") + chat_interface.chatbot.height = f"{self._chatbot_height_vh}vh" return app From a903dd6a9718c8be716fa8a7514a791b755875b6 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski Date: Fri, 18 Oct 2024 16:28:25 +0200 Subject: [PATCH 22/28] This works as intended. Let's have tests that are formatted by `ruff-format` --- .pre-commit-config.yaml | 2 +- packages/ragbits-core/tests/unit/prompts/test_prompt.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9e009835..982b85361 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,7 +26,7 @@ repos: # Formats Python, Pyi, and Jupyter files, excluding specified directories - id: ruff-format types_or: [ python, pyi, jupyter ] - exclude: (docs/|/tests/) + exclude: (docs/) args: [ --quiet ] # Used to have proper type annotations for library code. diff --git a/packages/ragbits-core/tests/unit/prompts/test_prompt.py b/packages/ragbits-core/tests/unit/prompts/test_prompt.py index 81478b954..e96bcaa4f 100644 --- a/packages/ragbits-core/tests/unit/prompts/test_prompt.py +++ b/packages/ragbits-core/tests/unit/prompts/test_prompt.py @@ -307,7 +307,10 @@ class GoodNameDetectorPrompt(Prompt[_PromptInput, bool]): # pylint: disable=unu prompt.add_few_shot(_PromptInput(theme="pop", name="The blood of a demon", age=75), False) assert prompt.chat == [ - {"role": "system", "content": "You detect whether the name name is a good name for a song with the given theme, given the age limit."}, + { + "role": "system", + "content": "You detect whether the name name is a good name for a song with the given theme, given the age limit.", + }, {"role": "user", "content": "The name is I love you more than my cat, the theme is pop and the age is 15."}, {"role": "assistant", "content": "True"}, {"role": "user", "content": "The name is The blood of a demon, the theme is pop and the age is 75."}, From 6a43980910a8449e8c75886e0625043b5bb81f62 Mon Sep 17 00:00:00 2001 From: Alan Konarski Date: Thu, 24 Oct 2024 13:24:48 +0200 Subject: [PATCH 23/28] Fix mypy and ruff issues --- .github/workflows/ci.yml | 10 ++- .pre-commit-config.yaml | 26 ------- CONTRIBUTING.md | 22 +++++- examples/apps/documents_chat.py | 2 +- examples/document-search/chroma.py | 2 +- packages/ragbits-cli/py.typed | 0 .../ragbits-cli/src/ragbits/cli/__init__.py | 5 +- packages/ragbits-core/py.typed | 0 packages/ragbits-core/src/ragbits/core/cli.py | 2 +- .../src/ragbits/core/utils/decorators.py | 3 +- .../src/ragbits/core/vector_store/__init__.py | 2 +- .../core/vector_store/chromadb_store.py | 6 +- .../ragbits/core/vector_store/in_memory.py | 2 +- .../unit/llms/factory/test_get_default_llm.py | 5 +- .../unit/llms/factory/test_has_default_llm.py | 6 +- .../tests/unit/llms/test_litellm.py | 3 +- .../tests/unit/prompts/test_prompt.py | 3 +- .../tests/unit/utils/test_decorators.py | 8 +-- .../unit/vector_stores/test_chromadb_store.py | 71 ++++++++++--------- .../vector_stores/test_simple_vector_store.py | 22 +++--- packages/ragbits-document-search/py.typed | 0 .../src/ragbits/document_search/_main.py | 13 ++-- .../ingestion/providers/__init__.py | 2 +- .../providers/unstructured/default.py | 4 +- .../providers/unstructured/images.py | 17 ++--- .../ingestion/providers/unstructured/pdf.py | 9 ++- .../ingestion/providers/unstructured/utils.py | 9 ++- .../retrieval/rephrasers/__init__.py | 8 +-- .../retrieval/rephrasers/noop.py | 2 +- .../ragbits-document-search/tests/helpers.py | 2 +- .../tests/integration/test_sources.py | 4 +- .../tests/integration/test_unstructured.py | 20 +++--- .../tests/unit/test_document_search.py | 23 +++--- .../tests/unit/test_providers.py | 4 +- packages/ragbits-evaluate/py.typed | 0 .../src/ragbits/evaluate/evaluator.py | 14 ++-- .../src/ragbits/evaluate/metrics/base.py | 6 +- .../evaluate/metrics/document_search.py | 4 +- .../src/ragbits/evaluate/pipelines/base.py | 4 +- .../src/ragbits/evaluate/utils.py | 2 +- pyproject.toml | 15 +++- scripts/update_ragbits_package.py | 4 +- uv.lock | 64 +++++++++++++++-- 43 files changed, 255 insertions(+), 175 deletions(-) create mode 100644 packages/ragbits-cli/py.typed create mode 100644 packages/ragbits-core/py.typed create mode 100644 packages/ragbits-document-search/py.typed create mode 100644 packages/ragbits-evaluate/py.typed diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a3d5c5687..e13e546d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,9 +38,17 @@ jobs: - name: Run pre-commit checks run: | - source .venv/bin/activate pre-commit run --all-files --show-diff-on-failure --color always + - name: Run ruff formatter + run: uvx ruff format --check + + - name: Run ruff linter + run: uvx ruff check + + - name: Run mypy + run: uv run mypy . + - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 982b85361..c5c768846 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,29 +13,3 @@ repos: - id: check-toml - id: check-json - id: check-yaml - - - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.6.9 - hooks: - # E1131: unsupported operand type(s) for | (unsupported-binary-operation) - - id: ruff - types_or: [ python, pyi, jupyter ] - exclude: (/test_|tests/|docs/|notebooks/) - args: [ --fix ] - # Formats Python, Pyi, and Jupyter files, excluding specified directories - - id: ruff-format - types_or: [ python, pyi, jupyter ] - exclude: (docs/) - args: [ --quiet ] - - # Used to have proper type annotations for library code. - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.11.2 - hooks: - - id: mypy - # You can add additional plugins for mypy below - # such as types-python-dateutil - additional_dependencies: [pydantic>=2.8.2, types-pyyaml>=6.0.12] - exclude: (/test_|setup.py|/tests/|docs/) - diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5a4ce4358..30209efc4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,9 +11,29 @@ To build and run Ragbits from the source code: $ source ./setup_dev_env.sh ``` +## Linting and formatting +We use `ruff` for linting and formatting our code. To format your code, run: + +```bash +$ uvx ruff format +``` + +To lint the code, run: +```bash +$ uvx ruff check --fix +``` + +## Type checking +We use `mypy` for type checking. To perform type checking, simply run: + +```bash +$ uv run mypy . +``` + + ## Install pre-commit -To ensure code quality we use pre-commit hook with several checks. Setup it by: +We also have some check run via pre-commit hook. Setup it by: ``` pre-commit install diff --git a/examples/apps/documents_chat.py b/examples/apps/documents_chat.py index fd13fbdec..7f2ec879f 100644 --- a/examples/apps/documents_chat.py +++ b/examples/apps/documents_chat.py @@ -104,7 +104,7 @@ def _prepare_document_search(self, database_path: str, index_name: str) -> None: chroma_client=chroma_client, embedding_function=embedding_client, ) - self.document_search = DocumentSearch(embedder=vector_store.embedding_function, vector_store=vector_store) + self.document_search = DocumentSearch(embedder=embedding_client, vector_store=vector_store) async def _create_database(self, document_paths: list[str]) -> str: for path in document_paths: diff --git a/examples/document-search/chroma.py b/examples/document-search/chroma.py index a8c96157f..0c5421a2d 100644 --- a/examples/document-search/chroma.py +++ b/examples/document-search/chroma.py @@ -35,7 +35,7 @@ async def main() -> None: chroma_client=chroma_client, embedding_function=embedding_client, ) - document_search = DocumentSearch(embedder=vector_store.embedding_function, vector_store=vector_store) + document_search = DocumentSearch(embedder=embedding_client, vector_store=vector_store) await document_search.ingest(documents) diff --git a/packages/ragbits-cli/py.typed b/packages/ragbits-cli/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/ragbits-cli/src/ragbits/cli/__init__.py b/packages/ragbits-cli/src/ragbits/cli/__init__.py index dbb147235..526963607 100644 --- a/packages/ragbits-cli/src/ragbits/cli/__init__.py +++ b/packages/ragbits-cli/src/ragbits/cli/__init__.py @@ -19,13 +19,10 @@ def main() -> None: - if found it imports the `register` function from the `cli` module and calls it with the `app` object - register function should add the CLI commands to the `app` object """ - cli_enabled_modules = [ module for module in pkgutil.iter_modules(ragbits.__path__) - if module.ispkg - and module.name != "cli" - and (Path(module.module_finder.path) / module.name / "cli.py").exists() # type: ignore + if module.ispkg and module.name != "cli" and (Path(module.module_finder.path) / module.name / "cli.py").exists() # type: ignore ] for module in cli_enabled_modules: diff --git a/packages/ragbits-core/py.typed b/packages/ragbits-core/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/ragbits-core/src/ragbits/core/cli.py b/packages/ragbits-core/src/ragbits/core/cli.py index bb1d9bae2..772fd03ec 100644 --- a/packages/ragbits-core/src/ragbits/core/cli.py +++ b/packages/ragbits-core/src/ragbits/core/cli.py @@ -33,7 +33,7 @@ def lab( @prompts_app.command() def generate_promptfoo_configs( file_pattern: str = core_config.prompt_path_pattern, - root_path: Path = Path.cwd(), + root_path: Path = Path.cwd(), # noqa: B008 target_path: Path = Path("promptfooconfigs"), ) -> None: """ diff --git a/packages/ragbits-core/src/ragbits/core/utils/decorators.py b/packages/ragbits-core/src/ragbits/core/utils/decorators.py index a585fe5ef..531b54b8e 100644 --- a/packages/ragbits-core/src/ragbits/core/utils/decorators.py +++ b/packages/ragbits-core/src/ragbits/core/utils/decorators.py @@ -1,9 +1,10 @@ # pylint: disable=missing-function-docstring,missing-return-doc import asyncio +from collections.abc import Callable from functools import wraps from importlib.util import find_spec -from typing import Callable, ParamSpec, TypeVar +from typing import ParamSpec, TypeVar _P = ParamSpec("_P") _T = TypeVar("_T") diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py index 0a9812634..e3233251c 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py @@ -5,7 +5,7 @@ from .chromadb_store import ChromaDBStore from .in_memory import InMemoryVectorStore -__all__ = ["InMemoryVectorStore", "VectorDBEntry", "VectorStore", "ChromaDBStore", "WhereQuery"] +__all__ = ["ChromaDBStore", "InMemoryVectorStore", "VectorDBEntry", "VectorStore", "WhereQuery"] module = sys.modules[__name__] diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index da245ae30..32788c863 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -156,8 +156,8 @@ async def retrieve(self, vector: list[float], k: int = 5) -> list[VectorDBEntry] embeddings = query_result.get("embeddings") or [] db_entries = [] - for meta_list, embeddings_list in zip(metadatas, embeddings): - for meta, embedding in zip(meta_list, embeddings_list): + for meta_list, embeddings_list in zip(metadatas, embeddings, strict=False): + for meta, embedding in zip(meta_list, embeddings_list, strict=False): db_entry = VectorDBEntry( key=str(meta["__key"]), vector=list(embedding), @@ -193,7 +193,7 @@ async def list( embeddings = get_results.get("embeddings") or [] db_entries = [] - for meta, embedding in zip(metadatas, embeddings): + for meta, embedding in zip(metadatas, embeddings, strict=False): db_entry = VectorDBEntry( key=str(meta["__key"]), vector=list(embedding), diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py index 121f1c432..7691ffbd6 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py @@ -46,7 +46,7 @@ async def retrieve(self, vector: list[float], k: int = 5) -> list[VectorDBEntry] @staticmethod def _calculate_squared_euclidean(vector_x: list[float], vector_b: list[float]) -> float: - return np.linalg.norm(np.array(vector_x) - np.array(vector_b)) + return float(np.linalg.norm(np.array(vector_x) - np.array(vector_b))) async def list( self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0 diff --git a/packages/ragbits-core/tests/unit/llms/factory/test_get_default_llm.py b/packages/ragbits-core/tests/unit/llms/factory/test_get_default_llm.py index c07272fb3..617efcdaf 100644 --- a/packages/ragbits-core/tests/unit/llms/factory/test_get_default_llm.py +++ b/packages/ragbits-core/tests/unit/llms/factory/test_get_default_llm.py @@ -1,13 +1,14 @@ +import pytest + from ragbits.core.config import core_config from ragbits.core.llms.factory import get_default_llm from ragbits.core.llms.litellm import LiteLLM -def test_get_default_llm(monkeypatch): +def test_get_default_llm(monkeypatch: pytest.MonkeyPatch) -> None: """ Test the get_llm_from_factory function. """ - monkeypatch.setattr(core_config, "default_llm_factory", "factory.test_get_llm_from_factory.mock_llm_factory") llm = get_default_llm() diff --git a/packages/ragbits-core/tests/unit/llms/factory/test_has_default_llm.py b/packages/ragbits-core/tests/unit/llms/factory/test_has_default_llm.py index 59c864837..e3dd87da9 100644 --- a/packages/ragbits-core/tests/unit/llms/factory/test_has_default_llm.py +++ b/packages/ragbits-core/tests/unit/llms/factory/test_has_default_llm.py @@ -1,8 +1,10 @@ +import pytest + from ragbits.core.config import core_config from ragbits.core.llms.factory import has_default_llm -def test_has_default_llm(monkeypatch): +def test_has_default_llm(monkeypatch: pytest.MonkeyPatch) -> None: """ Test the has_default_llm function when the default LLM factory is not set. """ @@ -11,7 +13,7 @@ def test_has_default_llm(monkeypatch): assert has_default_llm() is False -def test_has_default_llm_false(monkeypatch): +def test_has_default_llm_false(monkeypatch: pytest.MonkeyPatch) -> None: """ Test the has_default_llm function when the default LLM factory is set. """ diff --git a/packages/ragbits-core/tests/unit/llms/test_litellm.py b/packages/ragbits-core/tests/unit/llms/test_litellm.py index 3a7fcefa7..39016e913 100644 --- a/packages/ragbits-core/tests/unit/llms/test_litellm.py +++ b/packages/ragbits-core/tests/unit/llms/test_litellm.py @@ -55,7 +55,8 @@ def chat(self) -> ChatFormat: """ return [{"content": self.message, "role": "user"}] - def parse_response(self, response: str) -> int: + @staticmethod + def parse_response(response: str) -> int: """ Parser for the prompt. diff --git a/packages/ragbits-core/tests/unit/prompts/test_prompt.py b/packages/ragbits-core/tests/unit/prompts/test_prompt.py index e96bcaa4f..1fad74996 100644 --- a/packages/ragbits-core/tests/unit/prompts/test_prompt.py +++ b/packages/ragbits-core/tests/unit/prompts/test_prompt.py @@ -309,7 +309,8 @@ class GoodNameDetectorPrompt(Prompt[_PromptInput, bool]): # pylint: disable=unu assert prompt.chat == [ { "role": "system", - "content": "You detect whether the name name is a good name for a song with the given theme, given the age limit.", + "content": "You detect whether the name name is a good name for a song with the given theme, given the age" + " limit.", }, {"role": "user", "content": "The name is I love you more than my cat, the theme is pop and the age is 15."}, {"role": "assistant", "content": "True"}, diff --git a/packages/ragbits-core/tests/unit/utils/test_decorators.py b/packages/ragbits-core/tests/unit/utils/test_decorators.py index 49752e3d4..780a2df5c 100644 --- a/packages/ragbits-core/tests/unit/utils/test_decorators.py +++ b/packages/ragbits-core/tests/unit/utils/test_decorators.py @@ -20,8 +20,8 @@ def some_function() -> str: some_function() assert ( - str(exc.value) - == "Following dependencies are missing: nonexistent_dependency. Please install them using `pip install nonexistent_dependency`." + str(exc.value) == "Following dependencies are missing: nonexistent_dependency." + " Please install them using `pip install nonexistent_dependency`." ) @@ -42,6 +42,6 @@ def some_function() -> str: some_function() assert ( - str(exc.value) - == "Following dependencies are missing: nonexistent_dependency. Please install them using `pip install nonexistent_dependency`." + str(exc.value) == "Following dependencies are missing: nonexistent_dependency." + " Please install them using `pip install nonexistent_dependency`." ) diff --git a/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py index 7f5d4334b..5a44f8777 100644 --- a/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py +++ b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py @@ -1,4 +1,5 @@ from hashlib import sha256 +from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -18,7 +19,7 @@ def mock_embedding_function(): @pytest.fixture -def mock_chromadb_store(mock_chroma_client, mock_embedding_function): +def mock_chromadb_store(mock_chroma_client: MagicMock, mock_embedding_function: MagicMock): return ChromaDBStore( index_name="test_index", chroma_client=mock_chroma_client, @@ -27,10 +28,10 @@ def mock_chromadb_store(mock_chroma_client, mock_embedding_function): class MockEmbeddings(Embeddings): - async def embed_text(self, text): + async def embed_text(self, text: list[str]): # noqa: PLR6301 return [[0.4, 0.5, 0.6]] - def __call__(self, input): + def __call__(self, input: list[str]): return self.embed_text(input) @@ -40,7 +41,9 @@ def custom_embedding_function(): @pytest.fixture -def mock_chromadb_store_with_custom_embedding_function(mock_chroma_client, custom_embedding_function): +def mock_chromadb_store_with_custom_embedding_function( + mock_chroma_client: MagicMock, custom_embedding_function: MagicMock +): return ChromaDBStore( index_name="test_index", chroma_client=mock_chroma_client, @@ -65,22 +68,21 @@ def mock_vector_db_entry(): def test_chromadbstore_init_import_error(): - with patch("ragbits.core.vector_store.chromadb_store.HAS_CHROMADB", False): - with pytest.raises(ImportError): - ChromaDBStore( - index_name="test_index", - chroma_client=MagicMock(), - embedding_function=MagicMock(), - ) + with patch("ragbits.core.vector_store.chromadb_store.HAS_CHROMADB", False), pytest.raises(ImportError): + ChromaDBStore( + index_name="test_index", + chroma_client=MagicMock(), + embedding_function=MagicMock(), + ) -def test_get_chroma_collection(mock_chromadb_store): +def test_get_chroma_collection(mock_chromadb_store: ChromaDBStore): _ = mock_chromadb_store._get_chroma_collection() - assert mock_chromadb_store._chroma_client.get_or_create_collection.called + assert mock_chromadb_store._chroma_client.get_or_create_collection.called # type: ignore -async def test_stores_entries_correctly(mock_chromadb_store): +async def test_stores_entries_correctly(mock_chromadb_store: ChromaDBStore): data = [ VectorDBEntry( key="test_key", @@ -98,37 +100,38 @@ async def test_stores_entries_correctly(mock_chromadb_store): await mock_chromadb_store.store(data) - mock_chromadb_store._chroma_client.get_or_create_collection().add.assert_called_once() + mock_chromadb_store._chroma_client.get_or_create_collection().add.assert_called_once() # type: ignore -def test_process_db_entry(mock_chromadb_store, mock_vector_db_entry): +def test_process_db_entry(mock_chromadb_store: ChromaDBStore, mock_vector_db_entry: VectorDBEntry): id, embedding, metadata = mock_chromadb_store._process_db_entry(mock_vector_db_entry) assert id == sha256(b"test_key").hexdigest() assert embedding == [0.1, 0.2, 0.3] assert ( - metadata["__metadata"] - == '{"content": "test content", "document": {"title": "test title", "source": {"path": "/test/path"}, "document_type": "test_type"}}' + metadata["__metadata"] == '{"content": "test content", "document": {"title": "test title", "source":' + ' {"path": "/test/path"}, "document_type": "test_type"}}' ) assert metadata["__key"] == "test_key" -async def test_store(mock_chromadb_store, mock_vector_db_entry): +async def test_store(mock_chromadb_store: ChromaDBStore, mock_vector_db_entry: VectorDBEntry): await mock_chromadb_store.store([mock_vector_db_entry]) - assert mock_chromadb_store._chroma_client.get_or_create_collection().add.called + assert mock_chromadb_store._chroma_client.get_or_create_collection().add.called # type: ignore -async def test_retrieves_entries_correctly(mock_chromadb_store): +async def test_retrieves_entries_correctly(mock_chromadb_store: ChromaDBStore): vector = [0.1, 0.2, 0.3] mock_collection = mock_chromadb_store._get_chroma_collection() - mock_collection.query.return_value = { + mock_collection.query.return_value = { # type: ignore "documents": [["test content"]], "metadatas": [ [ { "__key": "test_key", - "__metadata": '{"content": "test content", "document": {"title": "test title", "source": {"path": "/test/path"}, "document_type": "test_type"}}', + "__metadata": '{"content": "test content", "document": {"title": "test title", "source":' + ' {"path": "/test/path"}, "document_type": "test_type"}}', } ] ], @@ -143,18 +146,20 @@ async def test_retrieves_entries_correctly(mock_chromadb_store): assert entries[0].vector == [0.12, 0.25, 0.29] -async def test_lists_entries_correctly(mock_chromadb_store): +async def test_lists_entries_correctly(mock_chromadb_store: ChromaDBStore): mock_collection = mock_chromadb_store._get_chroma_collection() - mock_collection.get.return_value = { + mock_collection.get.return_value = { # type: ignore "documents": ["test content", "test content 2"], "metadatas": [ { "__key": "test_key", - "__metadata": '{"content": "test content", "document": {"title": "test title", "source": {"path": "/test/path"}, "document_type": "test_type"}}', + "__metadata": '{"content": "test content", "document": {"title": "test title", "source":' + ' {"path": "/test/path"}, "document_type": "test_type"}}', }, { "__key": "test_key_2", - "__metadata": '{"content": "test content 2", "document": {"title": "test title 2", "source": {"path": "/test/path"}, "document_type": "test_type"}}', + "__metadata": '{"content": "test content 2", "document": {"title": "test title 2", "source":' + ' {"path": "/test/path"}, "document_type": "test_type"}}', }, ], "embeddings": [[0.12, 0.25, 0.29], [0.13, 0.26, 0.30]], @@ -171,29 +176,31 @@ async def test_lists_entries_correctly(mock_chromadb_store): assert entries[1].vector == [0.13, 0.26, 0.30] -async def test_handles_empty_retrieve(mock_chromadb_store): +async def test_handles_empty_retrieve(mock_chromadb_store: ChromaDBStore): vector = [0.1, 0.2, 0.3] mock_collection = mock_chromadb_store._get_chroma_collection() - mock_collection.query.return_value = {"documents": [], "metadatas": []} + mock_collection.query.return_value = {"documents": [], "metadatas": []} # type: ignore entries = await mock_chromadb_store.retrieve(vector) assert len(entries) == 0 -def test_repr(mock_chromadb_store): +def test_repr(mock_chromadb_store: ChromaDBStore): assert repr(mock_chromadb_store) == "ChromaDBStore(index_name=test_index)" @pytest.mark.parametrize( - "retrieved, max_distance, expected", + ("retrieved", "max_distance", "expected"), [ ({"distances": [[0.1]], "documents": [["test content"]]}, None, "test content"), ({"distances": [[0.1]], "documents": [["test content"]]}, 0.2, "test content"), ({"distances": [[0.3]], "documents": [["test content"]]}, 0.2, None), ], ) -def test_return_best_match(mock_chromadb_store, retrieved, max_distance, expected): +def test_return_best_match( + mock_chromadb_store: ChromaDBStore, retrieved: dict[str, Any], max_distance: float | None, expected: str | None +): mock_chromadb_store._max_distance = max_distance result = mock_chromadb_store._return_best_match(retrieved) diff --git a/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py b/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py index 6356bef5a..1bb899e8e 100644 --- a/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py +++ b/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py @@ -51,7 +51,7 @@ async def store_fixture(): return store -async def test_simple_vector_store(store): +async def test_simple_vector_store(store: InMemoryVectorStore): search_vector = [0.4, 0.4] results = await store.retrieve(search_vector, 2) @@ -61,7 +61,7 @@ async def test_simple_vector_store(store): assert results[1].metadata["name"] == "fluffy" -async def test_list_all(store): +async def test_list_all(store: InMemoryVectorStore): results = await store.list() assert len(results) == 6 @@ -69,7 +69,7 @@ async def test_list_all(store): assert names == ["spikey", "fluffy", "slimy", "scaly", "hairy", "spotty"] -async def test_list_limit(store): +async def test_list_limit(store: InMemoryVectorStore): results = await store.list(limit=3) assert len(results) == 3 @@ -77,7 +77,7 @@ async def test_list_limit(store): assert names == {"spikey", "fluffy", "slimy"} -async def test_list_offset(store): +async def test_list_offset(store: InMemoryVectorStore): results = await store.list(offset=3) assert len(results) == 3 @@ -85,7 +85,7 @@ async def test_list_offset(store): assert names == {"scaly", "hairy", "spotty"} -async def test_limit_with_offset(store): +async def test_limit_with_offset(store: InMemoryVectorStore): results = await store.list(limit=2, offset=3) assert len(results) == 2 @@ -93,7 +93,7 @@ async def test_limit_with_offset(store): assert names == {"scaly", "hairy"} -async def test_where(store): +async def test_where(store: InMemoryVectorStore): results = await store.list(where={"type": "insect"}) assert len(results) == 2 @@ -101,14 +101,14 @@ async def test_where(store): assert names == {"hairy", "spotty"} -async def test_multiple_where(store): +async def test_multiple_where(store: InMemoryVectorStore): results = await store.list(where={"type": "insect", "age": 1}) assert len(results) == 1 assert results[0].metadata["name"] == "spotty" -async def test_empty_where(store): +async def test_empty_where(store: InMemoryVectorStore): results = await store.list(where={}) assert len(results) == 6 @@ -116,19 +116,19 @@ async def test_empty_where(store): assert names == {"spikey", "fluffy", "slimy", "scaly", "hairy", "spotty"} -async def test_empty_results(store): +async def test_empty_results(store: InMemoryVectorStore): results = await store.list(where={"type": "bird"}) assert len(results) == 0 -async def test_empty_results_with_limit(store): +async def test_empty_results_with_limit(store: InMemoryVectorStore): results = await store.list(where={"type": "bird"}, limit=2) assert len(results) == 0 -async def test_where_limit(store): +async def test_where_limit(store: InMemoryVectorStore): results = await store.list(where={"type": "insect"}, limit=1) assert len(results) == 1 diff --git a/packages/ragbits-document-search/py.typed b/packages/ragbits-document-search/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 9d31e7448..a537ae99d 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from typing import Any from pydantic import BaseModel, Field @@ -83,7 +84,7 @@ def from_config(cls, config: dict) -> "DocumentSearch": return cls(embedder, vector_store, query_rephraser, reranker, document_processor_router) - async def search(self, query: str, search_config: SearchConfig = SearchConfig()) -> list[Element]: + async def search(self, query: str, search_config: SearchConfig | None = None) -> list[Element]: """ Search for the most relevant chunks for a query. @@ -94,6 +95,7 @@ async def search(self, query: str, search_config: SearchConfig = SearchConfig()) Returns: A list of chunks. """ + search_config = search_config or SearchConfig() queries = await self.query_rephraser.rephrase(query) elements = [] for rephrased_query in queries: @@ -105,7 +107,7 @@ async def search(self, query: str, search_config: SearchConfig = SearchConfig()) async def _process_document( self, - document: DocumentMeta | Document | (LocalFileSource, GCSSource), + document: DocumentMeta | Document | LocalFileSource | GCSSource, document_processor: BaseProvider | None = None, ) -> list[Element]: """ @@ -113,6 +115,8 @@ async def _process_document( Args: document: The document to process. + document_processor: The document processor to use. If not provided, the document processor will be + determined based on the document metadata. Returns: The elements. @@ -132,8 +136,8 @@ async def _process_document( async def ingest( self, - documents: Sequence[DocumentMeta | Document | Union[LocalFileSource, GCSSource]], - document_processor: Optional[BaseProvider] = None, + documents: Sequence[DocumentMeta | Document | LocalFileSource | GCSSource], + document_processor: BaseProvider | None = None, ) -> None: """ Ingest multiple documents. @@ -143,7 +147,6 @@ async def ingest( document_processor: The document processor to use. If not provided, the document processor will be determined based on the document metadata. """ - elements = [] # TODO: Parallelize for document in documents: diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py index 11009c911..7a927f36c 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/__init__.py @@ -12,8 +12,8 @@ "BaseProvider", "DummyProvider", "UnstructuredDefaultProvider", - "UnstructuredPdfProvider", "UnstructuredImageProvider", + "UnstructuredPdfProvider", ] module = sys.modules[__name__] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/default.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/default.py index 6c3741b99..c786df944 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/default.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/default.py @@ -78,7 +78,7 @@ def __init__( self.api_key = api_key self.api_server = api_server self.use_api = use_api - self._client = None + self._client: UnstructuredClient | None = None self.ignore_images = ignore_images @property @@ -132,7 +132,7 @@ async def process(self, document_meta: DocumentMeta) -> list[Element]: } } ) - elements = elements_from_dicts(res.elements) + elements = elements_from_dicts(res.elements) # type: ignore else: elements = partition( file=BytesIO(document.local_path.read_bytes()), diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/images.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/images.py index 466ace1dc..bfbfc58ef 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/images.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/images.py @@ -1,5 +1,4 @@ from pathlib import Path -from typing import Optional from PIL import Image from unstructured.chunking.basic import chunk_elements @@ -33,12 +32,12 @@ class UnstructuredImageProvider(UnstructuredDefaultProvider): def __init__( self, - partition_kwargs: Optional[dict] = None, - chunking_kwargs: Optional[dict] = None, - api_key: Optional[str] = None, - api_server: Optional[str] = None, + partition_kwargs: dict | None = None, + chunking_kwargs: dict | None = None, + api_key: str | None = None, + api_server: str | None = None, use_api: bool = False, - llm: Optional[LLM] = None, + llm: LLM | None = None, ) -> None: """Initialize the UnstructuredPdfProvider. @@ -50,6 +49,7 @@ def __init__( variable will be used. api_server: The API server URL to use for the Unstructured API. If not specified, the UNSTRUCTURED_SERVER_URL environment variable will be used. + use_api: Whether to use the Unstructured API. If False, the provider will only use the local processing. llm: llm to use """ super().__init__(partition_kwargs, chunking_kwargs, api_key, api_server, use_api) @@ -89,8 +89,9 @@ async def _to_image_element( @staticmethod def _load_document_as_image( - document_path: Path, page: Optional[int] = None # pylint: disable=unused-argument - ) -> Image: + document_path: Path, + page: int | None = None, # pylint: disable=unused-argument + ) -> Image.Image: return Image.open(document_path).convert("RGB") @staticmethod diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/pdf.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/pdf.py index 585282196..345417499 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/pdf.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/pdf.py @@ -1,5 +1,4 @@ from pathlib import Path -from typing import Optional from pdf2image import convert_from_path from PIL import Image @@ -20,8 +19,8 @@ class UnstructuredPdfProvider(UnstructuredImageProvider): } @staticmethod - def _load_document_as_image(document_path: Path, page: Optional[int] = None) -> Image: - return convert_from_path(document_path, first_page=page, last_page=page)[0] + def _load_document_as_image(document_path: Path, page: int | None = None) -> Image.Image: + return convert_from_path(document_path, first_page=page, last_page=page)[0] # type: ignore @staticmethod def _convert_coordinates( @@ -35,10 +34,10 @@ def _convert_coordinates( ) -> tuple[float, float, float, float]: new_system = CoordinateSystem(image_width, image_height) new_system.orientation = Orientation.SCREEN - new_top_x, new_top_y = element.metadata.coordinates.system.convert_coordinates_to_new_system( + new_top_x, new_top_y = element.metadata.coordinates.system.convert_coordinates_to_new_system( # type: ignore new_system, top_x, top_y ) - new_bottom_x, new_bottom_y = element.metadata.coordinates.system.convert_coordinates_to_new_system( + new_bottom_x, new_bottom_y = element.metadata.coordinates.system.convert_coordinates_to_new_system( # type: ignore new_system, bottom_x, bottom_y ) return new_top_x, new_top_y, new_bottom_x, new_bottom_y diff --git a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/utils.py b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/utils.py index 5347168d4..6a8d548ec 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/utils.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/ingestion/providers/unstructured/utils.py @@ -1,7 +1,6 @@ import base64 import io import os -from typing import Optional from PIL import Image from unstructured.documents.elements import Element as UnstructuredElement @@ -28,7 +27,7 @@ def to_text_element(element: UnstructuredElement, document_meta: DocumentMeta) - ) -def check_required_argument(value: Optional[str], arg_name: str, fallback_env: str) -> str: +def check_required_argument(value: str | None, arg_name: str, fallback_env: str) -> str: """ Checks if given environment variable is set and returns it or raises an error @@ -58,11 +57,11 @@ def extract_image_coordinates(element: UnstructuredElement) -> tuple[float, floa Returns: x of top left corner, y of top left corner, x of bottom right corner, y of bottom right corner """ - p1, p2, p3, p4 = element.metadata.coordinates.points + p1, p2, p3, p4 = element.metadata.coordinates.points # type: ignore return min(p1[0], p2[0]), min(p1[1], p4[1]), max(p3[0], p4[0]), max(p2[1], p3[1]) -def crop_and_convert_to_bytes(image: Image, x0: float, y0: float, x1: float, y1: float) -> bytes: +def crop_and_convert_to_bytes(image: Image.Image, x0: float, y0: float, x1: float, y1: float) -> bytes: """ Crops the image and converts to bytes Args: @@ -90,7 +89,7 @@ class ImageDescriber: def __init__(self, llm: LLM): self.llm = llm - async def get_image_description(self, image_bytes: bytes, prompt: Optional[str] = DEFAULT_PROMPT) -> str: + async def get_image_description(self, image_bytes: bytes, prompt: str | None = DEFAULT_PROMPT) -> str: """ Provides summary of the image (passed as bytes) diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py index d26afd20d..3b7b109fa 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py @@ -7,12 +7,12 @@ from ragbits.document_search.retrieval.rephrasers.prompts import QueryRephraserInput, QueryRephraserPrompt __all__ = [ - "get_rephraser", - "QueryRephraser", - "NoopQueryRephraser", "LLMQueryRephraser", - "QueryRephraserPrompt", + "NoopQueryRephraser", + "QueryRephraser", "QueryRephraserInput", + "QueryRephraserPrompt", + "get_rephraser", ] module = sys.modules[__name__] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py index b48bd7dee..2201e6da0 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py @@ -6,7 +6,7 @@ class NoopQueryRephraser(QueryRephraser): A no-op query paraphraser that does not change the query. """ - async def rephrase(self, query: str) -> list[str]: + async def rephrase(self, query: str) -> list[str]: # noqa: PLR6301 """ Mock implementation which outputs the same query as in input. diff --git a/packages/ragbits-document-search/tests/helpers.py b/packages/ragbits-document-search/tests/helpers.py index 4ed552e5f..25b752852 100644 --- a/packages/ragbits-document-search/tests/helpers.py +++ b/packages/ragbits-document-search/tests/helpers.py @@ -2,4 +2,4 @@ def env_vars_not_set(env_vars: list[str]) -> bool: - return all([os.environ.get(env_var) is None for env_var in env_vars]) + return all(os.environ.get(env_var) is None for env_var in env_vars) diff --git a/packages/ragbits-document-search/tests/integration/test_sources.py b/packages/ragbits-document-search/tests/integration/test_sources.py index f42ab8a56..8ad8576eb 100644 --- a/packages/ragbits-document-search/tests/integration/test_sources.py +++ b/packages/ragbits-document-search/tests/integration/test_sources.py @@ -10,12 +10,12 @@ os.environ[LOCAL_STORAGE_DIR_ENV] = Path(__file__).parent.as_posix() -HF_TOKEN_ENV = "HF_TOKEN" # nosec +HF_TOKEN_ENV = "HF_TOKEN" # noqa: S105 HF_DATASET_PATH = "micpst/hf-docs" @pytest.mark.skipif( - env_vars_not_set([HF_TOKEN_ENV]), + env_vars_not_set([HF_TOKEN_ENV]), # noqa: S105 reason="Hugging Face environment variables not set", ) async def test_huggingface_source_fetch() -> None: diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py index e37cc1209..05c2e45c9 100644 --- a/packages/ragbits-document-search/tests/integration/test_unstructured.py +++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py @@ -3,7 +3,7 @@ import pytest from ragbits.document_search.documents.document import DocumentMeta, DocumentType -from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter, ProvidersConfig from ragbits.document_search.ingestion.providers.unstructured.default import ( DEFAULT_PARTITION_KWARGS, UNSTRUCTURED_API_KEY_ENV, @@ -27,7 +27,7 @@ ), ], ) -async def test_document_processor_processes_text_document_with_unstructured_provider(config): +async def test_document_processor_processes_text_document_with_unstructured_provider(config: ProvidersConfig): document_processor = DocumentProcessorRouter.from_config(config) document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") @@ -35,7 +35,7 @@ async def test_document_processor_processes_text_document_with_unstructured_prov assert isinstance(document_processor._providers[DocumentType.TXT], UnstructuredDefaultProvider) assert len(elements) == 1 - assert elements[0].content == "Name of Peppa's brother is George." + assert elements[0].content == "Name of Peppa's brother is George." # type: ignore @pytest.mark.skipif( @@ -49,7 +49,7 @@ async def test_document_processor_processes_md_document_with_unstructured_provid elements = await document_processor.get_provider(document_meta).process(document_meta) assert len(elements) == 1 - assert elements[0].content == "Ragbits\n\nRepository for internal experiment with our upcoming LLM framework." + assert elements[0].content == "Ragbits\n\nRepository for internal experiment with our upcoming LLM framework." # type: ignore @pytest.mark.skipif( @@ -61,14 +61,14 @@ async def test_document_processor_processes_md_document_with_unstructured_provid reason="OpenAI API environment variables not set", ) @pytest.mark.parametrize("file_name", ["transformers_paper_page.pdf", "transformers_paper_page.png"]) -async def test_document_processor_processes_image_document_with_unstructured_provider(file_name): +async def test_document_processor_processes_image_document_with_unstructured_provider(file_name: str): document_processor = DocumentProcessorRouter.from_config() document_meta = DocumentMeta.from_local_path(Path(__file__).parent / file_name) elements = await document_processor.get_provider(document_meta).process(document_meta) assert len(elements) == 7 - assert elements[-1].description != "" + assert elements[-1].description != "" # type: ignore @pytest.mark.parametrize( @@ -84,14 +84,14 @@ async def test_document_processor_processes_image_document_with_unstructured_pro ), ], ) -async def test_unstructured_provider_document_with_default_partition_kwargs(use_api): +async def test_unstructured_provider_document_with_default_partition_kwargs(use_api: bool): document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") unstructured_provider = UnstructuredDefaultProvider(use_api=use_api) elements = await unstructured_provider.process(document_meta) assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS assert len(elements) == 1 - assert elements[0].content == "Name of Peppa's brother is George." + assert elements[0].content == "Name of Peppa's brother is George." # type: ignore @pytest.mark.parametrize( @@ -107,7 +107,7 @@ async def test_unstructured_provider_document_with_default_partition_kwargs(use_ ), ], ) -async def test_unstructured_provider_document_with_custom_partition_kwargs(use_api): +async def test_unstructured_provider_document_with_custom_partition_kwargs(use_api: bool): document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") partition_kwargs = {"languages": ["pl"], "strategy": "fast"} unstructured_provider = UnstructuredDefaultProvider(use_api=use_api, partition_kwargs=partition_kwargs) @@ -115,4 +115,4 @@ async def test_unstructured_provider_document_with_custom_partition_kwargs(use_a assert unstructured_provider.partition_kwargs == partition_kwargs assert len(elements) == 1 - assert elements[0].content == "Name of Peppa's brother is George." + assert elements[0].content == "Name of Peppa's brother is George." # type: ignore diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index 016f9828a..8edb635ee 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -1,6 +1,5 @@ import tempfile from pathlib import Path -from typing import Union from unittest.mock import AsyncMock import pytest @@ -12,6 +11,7 @@ from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import LocalFileSource from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter +from ragbits.document_search.ingestion.providers import BaseProvider from ragbits.document_search.ingestion.providers.dummy import DummyProvider CONFIG = { @@ -23,7 +23,7 @@ @pytest.mark.parametrize( - "document, expected", + ("document", "expected"), [ ( DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George"), @@ -37,7 +37,7 @@ ), ], ) -async def test_document_search_from_config(document, expected): +async def test_document_search_from_config(document: DocumentMeta, expected: str): document_search = DocumentSearch.from_config(CONFIG) await document_search.ingest([document]) @@ -46,14 +46,14 @@ async def test_document_search_from_config(document, expected): first_result = results[0] assert isinstance(first_result, TextElement) - assert first_result.content == expected + assert first_result.content == expected # type: ignore async def test_document_search_ingest_from_source(): embeddings_mock = AsyncMock() embeddings_mock.embed_text.return_value = [[0.1, 0.1]] - providers = {DocumentType.TXT: DummyProvider()} + providers: dict[DocumentType, BaseProvider] = {DocumentType.TXT: DummyProvider()} router = DocumentProcessorRouter.from_config(providers) document_search = DocumentSearch( @@ -73,7 +73,7 @@ async def test_document_search_ingest_from_source(): first_result = results[0] assert isinstance(first_result, TextElement) - assert first_result.content == "Name of Peppa's brother is George" + assert first_result.content == "Name of Peppa's brother is George" # type: ignore @pytest.mark.parametrize( @@ -86,7 +86,7 @@ async def test_document_search_ingest_from_source(): ), ], ) -async def test_document_search_ingest(document: Union[DocumentMeta, Document]): +async def test_document_search_ingest(document: DocumentMeta | Document): embeddings_mock = AsyncMock() embeddings_mock.embed_text.return_value = [[0.1, 0.1]] @@ -99,7 +99,7 @@ async def test_document_search_ingest(document: Union[DocumentMeta, Document]): first_result = results[0] assert isinstance(first_result, TextElement) - assert first_result.content == "Name of Peppa's brother is George" + assert first_result.content == "Name of Peppa's brother is George" # type: ignore async def test_document_search_insert_elements(): @@ -122,7 +122,7 @@ async def test_document_search_insert_elements(): first_result = results[0] assert isinstance(first_result, TextElement) - assert first_result.content == "Name of Peppa's brother is George" + assert first_result.content == "Name of Peppa's brother is George" # type: ignore async def test_document_search_with_no_results(): @@ -147,7 +147,7 @@ async def test_document_search_with_search_config(): results = await document_search.search("Peppa's brother", search_config=SearchConfig(vector_store_kwargs={"k": 1})) assert len(results) == 1 - assert results[0].content == "Name of Peppa's brother is George" + assert results[0].content == "Name of Peppa's brother is George" # type: ignore async def test_document_search_ingest_multiple_from_sources(): @@ -160,5 +160,6 @@ async def test_document_search_ingest_multiple_from_sources(): ) results = await document_search.search("foo") + assert len(results) == 2 - assert {result.content for result in results} == {"foo", "bar"} + assert {result.content for result in results} == {"foo", "bar"} # type: ignore diff --git a/packages/ragbits-document-search/tests/unit/test_providers.py b/packages/ragbits-document-search/tests/unit/test_providers.py index 8444dbf10..5975610ed 100644 --- a/packages/ragbits-document-search/tests/unit/test_providers.py +++ b/packages/ragbits-document-search/tests/unit/test_providers.py @@ -39,7 +39,7 @@ async def test_unstructured_provider_raises_value_error_when_api_key_not_set(): DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") ) - assert "Either pass api_key argument or set the UNSTRUCTURED_API_KEY environment variable" == str(err.value) + assert str(err.value) == "Either pass api_key argument or set the UNSTRUCTURED_API_KEY environment variable" @patch.dict(os.environ, {}, clear=True) @@ -49,4 +49,4 @@ async def test_unstructured_provider_raises_value_error_when_server_url_not_set( DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.") ) - assert "Either pass api_server argument or set the UNSTRUCTURED_SERVER_URL environment variable" == str(err.value) + assert str(err.value) == "Either pass api_server argument or set the UNSTRUCTURED_SERVER_URL environment variable" diff --git a/packages/ragbits-evaluate/py.typed b/packages/ragbits-evaluate/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py index b57d6f7fa..ea1d078ae 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py @@ -1,6 +1,7 @@ import time +from collections.abc import Iterable from dataclasses import asdict -from typing import Any, Iterable +from typing import Any from tqdm.asyncio import tqdm @@ -52,7 +53,7 @@ async def _call_pipeline( Args: pipeline: The pipeline to be called. - data: The evaluation data. + dataset: The dataset to be processed. Returns: The evaluation results and performance metrics. @@ -62,7 +63,8 @@ async def _call_pipeline( end_time = time.perf_counter() return pipe_outputs, self._compute_time_perf(start_time, end_time, len(pipe_outputs)) - def _results_processor(self, results: list[EvaluationResult]) -> dict[str, Any]: + @staticmethod + def _results_processor(results: list[EvaluationResult]) -> dict[str, Any]: """ Process the results. @@ -74,7 +76,8 @@ def _results_processor(self, results: list[EvaluationResult]) -> dict[str, Any]: """ return {"results": [asdict(result) for result in results]} - def _compute_metrics(self, metrics: MetricSet, results: list[EvaluationResult]) -> dict[str, Any]: + @staticmethod + def _compute_metrics(metrics: MetricSet, results: list[EvaluationResult]) -> dict[str, Any]: """ Compute a metric using the given inputs. @@ -87,7 +90,8 @@ def _compute_metrics(self, metrics: MetricSet, results: list[EvaluationResult]) """ return {"metrics": metrics.compute(results)} - def _compute_time_perf(self, start_time: float, end_time: float, num_samples: int) -> dict[str, Any]: + @staticmethod + def _compute_time_perf(start_time: float, end_time: float, num_samples: int) -> dict[str, Any]: """ Compute the performance metrics. diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/base.py b/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/base.py index c168c708e..0f93465c1 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/base.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/base.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Generic, Optional, TypeVar +from typing import Any, Generic, TypeVar from omegaconf import DictConfig from typing_extensions import Self @@ -14,7 +14,7 @@ class Metric(Generic[ResultT], ABC): Base class for metrics. """ - def __init__(self, config: Optional[DictConfig] = None) -> None: + def __init__(self, config: DictConfig | None = None) -> None: """ Initializes the metric. @@ -52,7 +52,7 @@ def __init__(self, *metrics: type[Metric[ResultT]]) -> None: self._metrics = metrics self.metrics: list[Metric[ResultT]] = [] - def __call__(self, config: Optional[DictConfig] = None) -> Self: + def __call__(self, config: DictConfig | None = None) -> Self: """ Initializes the metrics. diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/document_search.py b/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/document_search.py index 132e9b6a8..d4906ba9f 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/document_search.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/document_search.py @@ -1,6 +1,6 @@ import importlib from abc import ABC -from typing import Any, Optional +from typing import Any from continuous_eval.metrics.retrieval import PrecisionRecallF1, RankedRetrievalMetrics from omegaconf import DictConfig @@ -17,7 +17,7 @@ class DocumentSearchMetric(Metric[DocumentSearchResult], ABC): metric_cls: type[PrecisionRecallF1 | RankedRetrievalMetrics] - def __init__(self, config: Optional[DictConfig] = None) -> None: + def __init__(self, config: DictConfig | None = None) -> None: """ Initializes the metric. diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py index d435b05d8..9f08a862d 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Any, Optional +from typing import Any from omegaconf import DictConfig @@ -17,7 +17,7 @@ class EvaluationPipeline(ABC): Collection evaluation pipeline. """ - def __init__(self, config: Optional[DictConfig] = None) -> None: + def __init__(self, config: DictConfig | None = None) -> None: """ Initializes the evaluation pipeline. diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py b/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py index 917418ce2..d71f172b6 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py @@ -10,7 +10,7 @@ from omegaconf import DictConfig -def _save(file_path: Path, **data: Any) -> None: +def _save(file_path: Path, **data: Any) -> None: # noqa: ANN401 """ Save the data to a file. Add the current timestamp and Python version to the data. diff --git a/pyproject.toml b/pyproject.toml index 534f116e1..eff2111ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,8 @@ dev-dependencies = [ "mkdocstrings-python>=1.11.1", "griffe>=1.3.2", "griffe-typingdoc>=0.2.7", + "types-PyYAML>=6.0.2", + "mypy>=1.13.0", ] [tool.uv.sources] @@ -87,6 +89,7 @@ mypy_path = [ "packages/ragbits-document-search/src", "packages/ragbits-evaluate/src", ] +exclude = ["scripts"] [[tool.mypy.overrides]] module = "ragbits.*" @@ -156,8 +159,16 @@ convention = "google" "D103", "D107" ] -# "/home/patryk/repositories/internal/ragnarok_1.0/packages/ragbits-core/tests/unit/prompts/test_prompt.py" = ["S101"] - +"**/tests/**/*.py" = [ + "S101", # asserts allowed in tests... + "ARG", # Unused function args + "D101", # Missing docstring in public class + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "PT011", # we prefer assert at the end of the test instead of using "match" parameter in pytest.raises + "ANN201", # Missing return type annotation + "PLR2004", # Magic numbers are ok in tests +] [tool.ruff.format] docstring-code-format = true docstring-code-line-length = 120 diff --git a/scripts/update_ragbits_package.py b/scripts/update_ragbits_package.py index 79aba21dc..3dd3ecf23 100644 --- a/scripts/update_ragbits_package.py +++ b/scripts/update_ragbits_package.py @@ -18,7 +18,7 @@ from enum import Enum from pathlib import Path -import tomlkit +import tomlkit # type: ignore import typer from inquirer.shortcuts import confirm, list_input, text from rich import print as pprint @@ -108,7 +108,7 @@ def _update_pkg_version( return version, new_version -def _sync_ragbits_deps(pkg_name: str, pkg_version: str, pkg_new_version: str, update_version: bool = True): +def _sync_ragbits_deps(pkg_name: str, pkg_version: str, pkg_new_version: str, update_version: bool = True) -> None: ragbits_pkg_project = tomlkit.parse((PACKAGES_DIR / "ragbits" / "pyproject.toml").read_text()) ragbits_deps: list[str] = [dep.split("==")[0] for dep in ragbits_pkg_project["project"]["dependencies"]] diff --git a/uv.lock b/uv.lock index c9d6b69fc..39c170f0a 100644 --- a/uv.lock +++ b/uv.lock @@ -2338,6 +2338,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/90/ab/0301c945a704218bc9435f0e3c88884f6b19ef234d8899fb47ce1ccfd0c9/munkres-1.1.4-py2.py3-none-any.whl", hash = "sha256:6b01867d4a8480d865aea2326e4b8f7c46431e9e55b4a2e32d989307d7bced2a", size = 7015 }, ] +[[package]] +name = "mypy" +version = "1.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mypy-extensions" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e8/21/7e9e523537991d145ab8a0a2fd98548d67646dc2aaaf6091c31ad883e7c1/mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e", size = 3152532 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/8c/206de95a27722b5b5a8c85ba3100467bd86299d92a4f71c6b9aa448bfa2f/mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a", size = 11020731 }, + { url = "https://files.pythonhosted.org/packages/ab/bb/b31695a29eea76b1569fd28b4ab141a1adc9842edde080d1e8e1776862c7/mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80", size = 10184276 }, + { url = "https://files.pythonhosted.org/packages/a5/2d/4a23849729bb27934a0e079c9c1aad912167d875c7b070382a408d459651/mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7", size = 12587706 }, + { url = "https://files.pythonhosted.org/packages/5c/c3/d318e38ada50255e22e23353a469c791379825240e71b0ad03e76ca07ae6/mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f", size = 13105586 }, + { url = "https://files.pythonhosted.org/packages/4a/25/3918bc64952370c3dbdbd8c82c363804678127815febd2925b7273d9482c/mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372", size = 9632318 }, + { url = "https://files.pythonhosted.org/packages/d0/19/de0822609e5b93d02579075248c7aa6ceaddcea92f00bf4ea8e4c22e3598/mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d", size = 10939027 }, + { url = "https://files.pythonhosted.org/packages/c8/71/6950fcc6ca84179137e4cbf7cf41e6b68b4a339a1f5d3e954f8c34e02d66/mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d", size = 10108699 }, + { url = "https://files.pythonhosted.org/packages/26/50/29d3e7dd166e74dc13d46050b23f7d6d7533acf48f5217663a3719db024e/mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b", size = 12506263 }, + { url = "https://files.pythonhosted.org/packages/3f/1d/676e76f07f7d5ddcd4227af3938a9c9640f293b7d8a44dd4ff41d4db25c1/mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73", size = 12984688 }, + { url = "https://files.pythonhosted.org/packages/9c/03/5a85a30ae5407b1d28fab51bd3e2103e52ad0918d1e68f02a7778669a307/mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca", size = 9626811 }, + { url = "https://files.pythonhosted.org/packages/fb/31/c526a7bd2e5c710ae47717c7a5f53f616db6d9097caf48ad650581e81748/mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5", size = 11077900 }, + { url = "https://files.pythonhosted.org/packages/83/67/b7419c6b503679d10bd26fc67529bc6a1f7a5f220bbb9f292dc10d33352f/mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e", size = 10074818 }, + { url = "https://files.pythonhosted.org/packages/ba/07/37d67048786ae84e6612575e173d713c9a05d0ae495dde1e68d972207d98/mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2", size = 12589275 }, + { url = "https://files.pythonhosted.org/packages/1f/17/b1018c6bb3e9f1ce3956722b3bf91bff86c1cefccca71cec05eae49d6d41/mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0", size = 13037783 }, + { url = "https://files.pythonhosted.org/packages/cb/32/cd540755579e54a88099aee0287086d996f5a24281a673f78a0e14dba150/mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2", size = 9726197 }, + { url = "https://files.pythonhosted.org/packages/11/bb/ab4cfdc562cad80418f077d8be9b4491ee4fb257440da951b85cbb0a639e/mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7", size = 11069721 }, + { url = "https://files.pythonhosted.org/packages/59/3b/a393b1607cb749ea2c621def5ba8c58308ff05e30d9dbdc7c15028bca111/mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62", size = 10063996 }, + { url = "https://files.pythonhosted.org/packages/d1/1f/6b76be289a5a521bb1caedc1f08e76ff17ab59061007f201a8a18cc514d1/mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8", size = 12584043 }, + { url = "https://files.pythonhosted.org/packages/a6/83/5a85c9a5976c6f96e3a5a7591aa28b4a6ca3a07e9e5ba0cec090c8b596d6/mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7", size = 13036996 }, + { url = "https://files.pythonhosted.org/packages/b4/59/c39a6f752f1f893fccbcf1bdd2aca67c79c842402b5283563d006a67cf76/mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc", size = 9737709 }, + { url = "https://files.pythonhosted.org/packages/3b/86/72ce7f57431d87a7ff17d442f521146a6585019eb8f4f31b7c02801f78ad/mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a", size = 2647043 }, +] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -3506,21 +3540,22 @@ wheels = [ [[package]] name = "ragbits" -version = "0.1.0" +version = "0.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "ragbits-cli" }, { name = "ragbits-core" }, { name = "ragbits-document-search" }, + { name = "ragbits-evaluate" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bc/72/1459951c9dac4e7b25ea5ea11029653fc49090c49ebc18c73887f8b345bd/ragbits-0.1.0.tar.gz", hash = "sha256:fed59d645121440ad3df2bdb58646c589cbf004601b6ba13c5e99afb0b0d826b", size = 1579 } +sdist = { url = "https://files.pythonhosted.org/packages/d4/9c/c5a704e364e5d02aeb00746a486910392373a1eb511ec0efc8e8360551b1/ragbits-0.2.0.tar.gz", hash = "sha256:12a1ab581a92fccb49b57528a41056966d400af01dc858005ff5ba22aecf6ce1", size = 2320 } wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/bb/a2861dacbe5d8005e9c8827676ae325df40663ab661093b6f1cf2a221f37/ragbits-0.1.0-py3-none-any.whl", hash = "sha256:50d82616d28e2994d9d673778c6e6ab9db6736901fbe1c2580e9e2beb0dd884d", size = 1176 }, + { url = "https://files.pythonhosted.org/packages/19/bf/aa5f9f35b24047466cb2fdc065e095d0c609d22004801309e51b402a8211/ragbits-0.2.0-py3-none-any.whl", hash = "sha256:ce690c17a6fcbff402781bdc568df39dfc7f07566c542cd206fb5a631c9ecbb9", size = 1185 }, ] [[package]] name = "ragbits-cli" -version = "0.1.0" +version = "0.2.0" source = { editable = "packages/ragbits-cli" } dependencies = [ { name = "ragbits-core" }, @@ -3535,7 +3570,7 @@ requires-dist = [ [[package]] name = "ragbits-core" -version = "0.1.0" +version = "0.2.0" source = { editable = "packages/ragbits-core" } dependencies = [ { name = "jinja2" }, @@ -3598,7 +3633,7 @@ dev = [ [[package]] name = "ragbits-document-search" -version = "0.1.0" +version = "0.2.0" source = { editable = "packages/ragbits-document-search" } dependencies = [ { name = "pdf2image" }, @@ -3647,11 +3682,12 @@ dev = [ [[package]] name = "ragbits-evaluate" -version = "0.1.0" +version = "0.2.0" source = { editable = "packages/ragbits-evaluate" } dependencies = [ { name = "hydra-core" }, { name = "neptune" }, + { name = "ragbits-core" }, ] [package.optional-dependencies] @@ -3673,6 +3709,7 @@ requires-dist = [ { name = "continuous-eval", marker = "extra == 'relari'", specifier = "~=0.3.12" }, { name = "hydra-core", specifier = "~=1.3.2" }, { name = "neptune", specifier = "~=1.12.0" }, + { name = "ragbits-core", editable = "packages/ragbits-core" }, ] [package.metadata.requires-dev] @@ -3705,11 +3742,13 @@ dev = [ { name = "mkdocs-material-extensions" }, { name = "mkdocstrings" }, { name = "mkdocstrings-python" }, + { name = "mypy" }, { name = "pip-licenses" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, + { name = "types-pyyaml" }, ] [package.metadata] @@ -3730,11 +3769,13 @@ dev = [ { name = "mkdocs-material-extensions", specifier = ">=1.3.1" }, { name = "mkdocstrings", specifier = ">=0.26.1" }, { name = "mkdocstrings-python", specifier = ">=1.11.1" }, + { name = "mypy", specifier = ">=1.13.0" }, { name = "pip-licenses", specifier = ">=4.0.0,<5.0.0" }, { name = "pre-commit", specifier = "~=3.8.0" }, { name = "pytest", specifier = "~=8.3.3" }, { name = "pytest-asyncio", specifier = "~=0.24.0" }, { name = "pytest-cov", specifier = "~=5.0.0" }, + { name = "types-pyyaml", specifier = ">=6.0.2" }, ] [[package]] @@ -4690,6 +4731,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/d6/ba5f61958f358028f2e2ba1b8e225b8e263053bd57d3a79e2d2db64c807b/types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d", size = 9693 }, ] +[[package]] +name = "types-pyyaml" +version = "6.0.12.20240917" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/92/7d/a95df0a11f95c8f48d7683f03e4aed1a2c0fc73e9de15cca4d38034bea1a/types-PyYAML-6.0.12.20240917.tar.gz", hash = "sha256:d1405a86f9576682234ef83bcb4e6fff7c9305c8b1fbad5e0bcd4f7dbdc9c587", size = 12381 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/2c/c1d81d680997d24b0542aa336f0a65bd7835e5224b7670f33a7d617da379/types_PyYAML-6.0.12.20240917-py3-none-any.whl", hash = "sha256:392b267f1c0fe6022952462bf5d6523f31e37f6cea49b14cee7ad634b6301570", size = 15264 }, +] + [[package]] name = "typing-extensions" version = "4.12.2" From 5fe6eb61e38524dafe03ece77fbb03975e0cfc29 Mon Sep 17 00:00:00 2001 From: Alan Konarski Date: Thu, 24 Oct 2024 13:26:53 +0200 Subject: [PATCH 24/28] Update contributing guide --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 30209efc4..b650bf51f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -33,7 +33,7 @@ $ uv run mypy . ## Install pre-commit -We also have some check run via pre-commit hook. Setup it by: +We also run some checks through a pre-commit hook. To set it up, follow these steps: ``` pre-commit install From 7d235321938e25a2cecea38eec2bd6438f70edeb Mon Sep 17 00:00:00 2001 From: Alan Konarski Date: Thu, 24 Oct 2024 13:33:52 +0200 Subject: [PATCH 25/28] Fix ci --- .github/workflows/ci.yml | 1 + .../src/ragbits/core/vector_store/chromadb_store.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e13e546d9..2e226a9ad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,6 +38,7 @@ jobs: - name: Run pre-commit checks run: | + source .venv/bin/activate pre-commit run --all-files --show-diff-on-failure --color always - name: Run ruff formatter diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index 32788c863..6dceefc19 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import json from hashlib import sha256 from typing import Literal From f809b4d5d8929415db7f021355ef56c0de9bdb42 Mon Sep 17 00:00:00 2001 From: Alan Konarski Date: Thu, 24 Oct 2024 13:37:16 +0200 Subject: [PATCH 26/28] Fix ci --- .../src/ragbits/core/vector_store/chromadb_store.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index 6dceefc19..0296a9c8c 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -24,8 +24,8 @@ class ChromaDBStore(VectorStore): def __init__( self, index_name: str, - chroma_client: "chromadb.ClientAPI", - embedding_function: Embeddings | "chromadb.EmbeddingFunction", + chroma_client: chromadb.ClientAPI, + embedding_function: Embeddings | chromadb.EmbeddingFunction, max_distance: float | None = None, distance_method: Literal["l2", "ip", "cosine"] = "l2", ): @@ -51,7 +51,7 @@ def __init__( self._collection = self._get_chroma_collection() @classmethod - def from_config(cls, config: dict) -> "ChromaDBStore": + def from_config(cls, config: dict) -> ChromaDBStore: """ Creates and returns an instance of the ChromaDBStore class from the given configuration. @@ -76,7 +76,7 @@ def from_config(cls, config: dict) -> "ChromaDBStore": distance_method=config.get("distance_method", "l2"), ) - def _get_chroma_collection(self) -> "chromadb.Collection": + def _get_chroma_collection(self) -> chromadb.Collection: """ Based on the selected embedding_function, chooses how to retrieve the ChromaDB collection. If the collection doesn't exist, it creates one. @@ -121,7 +121,7 @@ def _process_db_entry(entry: VectorDBEntry) -> tuple[str, list[float], dict]: return doc_id, embedding, metadata @property - def embedding_function(self) -> Embeddings | "chromadb.EmbeddingFunction": + def embedding_function(self) -> Embeddings | chromadb.EmbeddingFunction: """ Returns the embedding function. From 8cf70aa9accabdb475fdf54b92b4a88bcdfc9386 Mon Sep 17 00:00:00 2001 From: Alan Konarski Date: Thu, 24 Oct 2024 13:50:44 +0200 Subject: [PATCH 27/28] Add more repositories for trivy --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2e226a9ad..86cfd1c73 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,8 @@ jobs: - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master + env: + TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2,public.ecr.aws/aquasecurity/trivy-db:2 with: scan-type: "fs" ignore-unfixed: true @@ -60,6 +62,7 @@ jobs: format: "table" output: "trivy-scanning-results.txt" + - name: Format trivy message run: | echo "Trivy scanning results." >> trivy.txt From f29285d2dcf98ce4784636385fc965d972869510 Mon Sep 17 00:00:00 2001 From: Alan Konarski Date: Thu, 24 Oct 2024 13:58:50 +0200 Subject: [PATCH 28/28] Remove unnecessary empty line --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 86cfd1c73..33d0baaca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,7 +62,6 @@ jobs: format: "table" output: "trivy-scanning-results.txt" - - name: Format trivy message run: | echo "Trivy scanning results." >> trivy.txt