deepsense-ai · kzamlynska · Dec 16, 2024 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/examples/document-search/chroma_cli.py b/examples/document-search/chroma_cli.py
@@ -0,0 +1,109 @@
+"""
+Ragbits Document Search Example: Chroma x CLI trace handler
+
+This example demonstrates how to use the `DocumentSearch` class to search for documents with a more advanced setup.
+We will use the `LiteLLMEmbeddings` class to embed the documents and the query, the `ChromaVectorStore` class to store
+the embeddings, and the CLI Trace handler to trace the operations.
+
+The script performs the following steps:
+
+    1. Create a list of documents.
+    2. Initialize the `LiteLLMEmbeddings` class with the OpenAI `text-embedding-3-small` embedding model.
+    3. Initialize the `ChromaVectorStore` class with a `PersistentClient` instance and an index name.
+    4. Initialize the `DocumentSearch` class with the embedder and the vector store.
+    5. Ingest the documents into the `DocumentSearch` instance.
+    6. List all documents in the vector store.
+    7. Search for documents using a query.
+    8. Print the list of all documents and the search results.
+
+To run the script, execute the following command:
+
+    ```bash
+    uv run examples/document-search/chroma_cli.py
+    ```
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "ragbits-document-search",
+#     "ragbits-core[chroma,cli]",
+# ]
+# ///
+"""
+
+import asyncio
+
+from chromadb import EphemeralClient
+
+from ragbits.core import audit
+from ragbits.core.embeddings.litellm import LiteLLMEmbeddings
+from ragbits.core.vector_stores.chroma import ChromaVectorStore
+from ragbits.document_search import DocumentSearch, SearchConfig
+from ragbits.document_search.documents.document import DocumentMeta
+
+audit.set_trace_handlers("cli")
+
+documents = [
+    DocumentMeta.create_text_document_from_literal(
+        """
+        RIP boiled water. You will be mist.
+        """
+    ),
+    DocumentMeta.create_text_document_from_literal(
+        """
+        Why doesn't James Bond fart in bed? Because it would blow his cover.
+        """
+    ),
+    DocumentMeta.create_text_document_from_literal(
+        """
+        Why programmers don't like to swim? Because they're scared of the floating points.
+        """
+    ),
+    DocumentMeta.create_text_document_from_literal(
+        """
+        This one is completely unrelated.
+        """
+    ),
+]
+
+
+async def main() -> None:
+    """
+    Run the example.
+    """
+    embedder = LiteLLMEmbeddings(
+        model="text-embedding-3-small",
+    )
+    vector_store = ChromaVectorStore(
+        client=EphemeralClient(),
+        index_name="jokes",
+    )
+    document_search = DocumentSearch(
+        embedder=embedder,
+        vector_store=vector_store,
+    )
+
+    await document_search.ingest(documents)
+
+    all_documents = await vector_store.list()
+
+    print()
+    print("All documents:")
+    print([doc.metadata["content"] for doc in all_documents])
+
+    query = "I'm boiling my water and I need a joke"
+    vector_store_kwargs = {
+        "k": 2,
+        "max_distance": None,
+    }
+    results = await document_search.search(
+        query,
+        config=SearchConfig(vector_store_kwargs=vector_store_kwargs),
+    )
+
+    print()
+    print(f"Documents similar to: {query}")
+    print([element.text_representation for element in results])
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/packages/ragbits-cli/src/ragbits/cli/__init__.py b/packages/ragbits-cli/src/ragbits/cli/__init__.py
@@ -1,4 +1,5 @@
 import importlib.util
+import os
 import pkgutil
 from pathlib import Path
 from typing import Annotated
@@ -8,6 +9,7 @@
 from typer.main import get_command
 
 import ragbits
+from ragbits.core import audit
 
 from .state import OutputType, cli_state, print_output
 
@@ -28,9 +30,17 @@ def ragbits_cli(
     output: Annotated[
         OutputType, typer.Option("--output", "-o", help="Set the output type (text or json)")
     ] = OutputType.text.value,  # type: ignore
+    verbose: bool = typer.Option(
+        bool(int(os.getenv("RAGBITS_VERBOSE", "0"))), "--verbose", "-v", help="Enable verbose mode"
+    ),
 ) -> None:
     """Common CLI arguments for all ragbits commands."""
     cli_state.output_type = output
+    cli_state.verbose = verbose
+
+    if verbose:
+        typer.echo("Verbose mode is enabled.")
+        audit.set_trace_handlers("cli")
 
 
 def autoregister() -> None:

diff --git a/packages/ragbits-cli/src/ragbits/cli/state.py b/packages/ragbits-cli/src/ragbits/cli/state.py
@@ -19,6 +19,7 @@ class OutputType(Enum):
 class CliState:
     """A dataclass describing CLI state"""
 
+    verbose: bool = False
     output_type: OutputType = OutputType.text
 
 

diff --git a/packages/ragbits-core/src/ragbits/core/audit/__init__.py b/packages/ragbits-core/src/ragbits/core/audit/__init__.py
@@ -42,6 +42,12 @@ def set_trace_handlers(handlers: Handler | list[Handler]) -> None:
                 from ragbits.core.audit.otel import OtelTraceHandler
 
                 _trace_handlers.append(OtelTraceHandler())
+
+            elif handler == "cli":
+                from ragbits.core.audit.cli import CLITraceHandler
+
+                _trace_handlers.append(CLITraceHandler())
+
             else:
                 raise ValueError(f"Handler {handler} not found.")
         else:
@@ -61,6 +67,7 @@ def trace(name: str | None = None, **inputs: Any) -> Iterator[SimpleNamespace]:
         The output data.
     """
     # We need to go up 2 frames (trace() and __enter__()) to get the parent function.
+
     parent_frame = inspect.stack()[2].frame
     name = (
         (
@@ -74,6 +81,7 @@ def trace(name: str | None = None, **inputs: Any) -> Iterator[SimpleNamespace]:
 
     with ExitStack() as stack:
         outputs = [stack.enter_context(handler.trace(name, **inputs)) for handler in _trace_handlers]
+
         yield (out := SimpleNamespace())
         for output in outputs:
             output.__dict__.update(vars(out))

diff --git a/packages/ragbits-core/src/ragbits/core/audit/cli.py b/packages/ragbits-core/src/ragbits/core/audit/cli.py
@@ -0,0 +1,132 @@
+import time
+from typing import Optional
+
+from rich import print as rich_print
+from rich.tree import Tree
+
+from ragbits.core.audit import TraceHandler
+
+
+class CLISpan:
+    """
+    CLI Span represents a single operation within a trace.
+    """
+
+    def __init__(self, name: str, inputs: dict, parent: Optional["CLISpan"] = None):
+        """
+        Constructs a new CLI Span.
+        Sets the start time of the span - the wall time at which the operation started.
+        Sets the span status to 'started'.
+
+        Args:
+            name: The name of the span.
+            inputs: The inputs of the span.
+            parent: the parent of initiated span.
+        """
+        self.name = name
+        self.parent = parent
+        self.start_time: float = time.perf_counter()
+        self.end_time: float | None = None
+        self.children: list[CLISpan] = []
+        self.status: str = "started"
+        self.inputs: dict = inputs or {}
+        self.outputs: dict = {}
+
+    def end(self) -> None:
+        """Sets the current time as the span's end time.
+        The span's end time is the wall time at which the operation finished.
+        Only the first call to `end` should modify the span,
+        further calls are ignored.
+        """
+        if self.end_time is None:
+            self.end_time = time.perf_counter()
+
+    def to_tree(self, tree: Tree | None = None, color: str = "bold blue") -> Tree | None:
+        """
+        Convert theCLISpan object and its children into a Rich Tree structure for console rendering.
+
+        Args:
+            tree (Tree, optional): An existing Rich Tree object to which the span will be added.
+                               If None, a new tree is created for the root span.
+            color (str, optional): The color of the text rendered to console.
+
+        Returns:
+            Tree: A Rich Tree object representing the span hierarchy, including its events and children.
+        """
+        secondary_color = "grey50"
+        error_color = "bold red"
+        child_color = "bold green"
+        duration = self.end_time - self.start_time if self.end_time else 0.0
+
+        if tree is None:
+            tree = Tree(
+                f"[{color}]{self.name}[/{color}] Duration: {duration:.3f}s\n"
+                f"[{secondary_color}]Inputs: {self.inputs}\nOutputs: {self.outputs})[/{secondary_color}]"
+            )
+
+        else:
+            child_tree = tree.add(
+                f"[{color}]{self.name}[/{color}] Duration: {duration:.3f}s\n"
+                f"[{secondary_color}]Inputs: {self.inputs}\nOutputs: {self.outputs})[/{secondary_color}]"
+            )
+            tree = child_tree
+
+        for child in self.children:
+            if child.status == "error":
+                child.to_tree(tree, error_color)
+            else:
+                child.to_tree(tree, child_color)
+        return tree
+
+
+class CLITraceHandler(TraceHandler[CLISpan]):
+    """
+    CLITraceHandler class for all trace handlers.
+    """
+
+    def start(self, name: str, inputs: dict, current_span: CLISpan | None = None) -> CLISpan:  # noqa: PLR6301
+        """
+        Log input data at the beginning of the trace.
+
+        Args:
+            name: The name of the trace.
+            inputs: The input data.
+            current_span: The current trace span.
+
+        Returns:
+            The updated current trace span.
+        """
+        span = CLISpan(name, inputs, current_span)
+
+        if current_span:
+            current_span.children.append(span)
+
+        return span
+
+    def stop(self, outputs: dict, current_span: CLISpan) -> None:  # noqa: PLR6301
+        """
+        Log output data at the end of the trace.
+
+        Args:
+            outputs: The output data.
+            current_span: The current trace span.
+        """
+        current_span.end()
+        current_span.status = "done"
+        current_span.outputs = outputs
+
+        if current_span.parent is None:
+            rich_print(current_span.to_tree())
+
+    def error(self, error: Exception, current_span: CLISpan) -> None:  # noqa: PLR6301
+        """
+        Log error during the trace.
+
+        Args:
+            error: The error that occurred.
+            current_span: The current trace span.
+        """
+        current_span.end()
+        current_span.status = "error"
+        if current_span.parent is None:
+            rich_print(current_span.to_tree())
diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py b/packages/ragbits-core/src/ragbits/core/embeddings/litellm.py
@@ -103,10 +103,9 @@ async def embed_text(self, data: list[str], options: LiteLLMEmbeddingsOptions |
             if not response.data:
                 raise EmbeddingEmptyResponseError()
 
-            outputs.embeddings = [embedding["embedding"] for embedding in response.data]
             if response.usage:
                 outputs.completion_tokens = response.usage.completion_tokens
                 outputs.prompt_tokens = response.usage.prompt_tokens
                 outputs.total_tokens = response.usage.total_tokens
 
-        return outputs.embeddings
+        return [embedding["embedding"] for embedding in response.data]