-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add cli event handler #256
base: main
Are you sure you want to change the base?
Changes from 35 commits
345e337
691dc71
a9167bf
11f8f1b
d04d8e9
4b7931e
56d9e17
773edf0
99c8045
766c036
aa73c6a
d8f6dab
0a64ac9
4f85b6a
a2b3821
eea3984
5324f8f
3da2dac
c4a653e
7bb3a62
ca5ac89
60978ac
8b9bdf1
6d4dff8
ef0dff2
b0484f8
0e353a0
de571ff
8b62a6a
39b8fc5
4947470
76ef33c
723e86e
dc5164a
4ca2cef
358bf6c
fffb878
66fdaf6
a3df4eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
""" | ||
Ragbits Document Search Example: Chroma x CLI trace handler | ||
|
||
This example demonstrates how to use the `DocumentSearch` class to search for documents with a more advanced setup. | ||
We will use the `LiteLLMEmbeddings` class to embed the documents and the query, the `ChromaVectorStore` class to store | ||
the embeddings, and the CLI Trace handler to trace the operations. | ||
|
||
The script performs the following steps: | ||
|
||
1. Create a list of documents. | ||
2. Initialize the `LiteLLMEmbeddings` class with the OpenAI `text-embedding-3-small` embedding model. | ||
3. Initialize the `ChromaVectorStore` class with a `PersistentClient` instance and an index name. | ||
4. Initialize the `DocumentSearch` class with the embedder and the vector store. | ||
5. Ingest the documents into the `DocumentSearch` instance. | ||
6. List all documents in the vector store. | ||
7. Search for documents using a query. | ||
8. Print the list of all documents and the search results. | ||
|
||
To run the script, execute the following command: | ||
|
||
```bash | ||
uv run examples/document-search/chroma_cli.py | ||
``` | ||
# /// script | ||
# requires-python = ">=3.10" | ||
# dependencies = [ | ||
# "ragbits-document-search", | ||
# "ragbits-core[chroma,cli]", | ||
# ] | ||
# /// | ||
""" | ||
|
||
import asyncio | ||
|
||
from chromadb import EphemeralClient | ||
|
||
from ragbits.core import audit | ||
from ragbits.core.embeddings.litellm import LiteLLMEmbeddings | ||
from ragbits.core.vector_stores.chroma import ChromaVectorStore | ||
from ragbits.document_search import DocumentSearch, SearchConfig | ||
from ragbits.document_search.documents.document import DocumentMeta | ||
|
||
audit.set_trace_handlers("cli") | ||
|
||
documents = [ | ||
DocumentMeta.create_text_document_from_literal( | ||
""" | ||
RIP boiled water. You will be mist. | ||
""" | ||
), | ||
DocumentMeta.create_text_document_from_literal( | ||
""" | ||
Why doesn't James Bond fart in bed? Because it would blow his cover. | ||
""" | ||
), | ||
DocumentMeta.create_text_document_from_literal( | ||
""" | ||
Why programmers don't like to swim? Because they're scared of the floating points. | ||
""" | ||
), | ||
DocumentMeta.create_text_document_from_literal( | ||
""" | ||
This one is completely unrelated. | ||
""" | ||
), | ||
] | ||
|
||
|
||
async def main() -> None: | ||
""" | ||
Run the example. | ||
""" | ||
embedder = LiteLLMEmbeddings( | ||
model="text-embedding-3-small", | ||
) | ||
vector_store = ChromaVectorStore( | ||
client=EphemeralClient(), | ||
index_name="jokes", | ||
) | ||
document_search = DocumentSearch( | ||
embedder=embedder, | ||
vector_store=vector_store, | ||
) | ||
|
||
await document_search.ingest(documents) | ||
|
||
all_documents = await vector_store.list() | ||
|
||
print() | ||
print("All documents:") | ||
print([doc.metadata["content"] for doc in all_documents]) | ||
|
||
query = "I'm boiling my water and I need a joke" | ||
vector_store_kwargs = { | ||
"k": 2, | ||
"max_distance": None, | ||
} | ||
results = await document_search.search( | ||
query, | ||
config=SearchConfig(vector_store_kwargs=vector_store_kwargs), | ||
) | ||
|
||
print() | ||
print(f"Documents similar to: {query}") | ||
print([element.text_representation for element in results]) | ||
|
||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import time | ||
from typing import Optional | ||
|
||
from rich import print as rich_print | ||
from rich.tree import Tree | ||
|
||
from ragbits.core.audit import TraceHandler | ||
|
||
|
||
class CLISpan: | ||
""" | ||
CLI Span represents a single operation within a trace. | ||
""" | ||
|
||
def __init__(self, name: str, inputs: dict, parent: Optional["CLISpan"] = None): | ||
""" | ||
Constructs a new CLI Span. | ||
Sets the start time of the span - the wall time at which the operation started. | ||
Sets the span status to 'started'. | ||
|
||
Args: | ||
name: The name of the span. | ||
inputs: The inputs of the span. | ||
parent: the parent of initiated span. | ||
""" | ||
self.name = name | ||
self.parent = parent | ||
self.start_time: float = time.perf_counter() | ||
self.end_time: float | None = None | ||
self.children: list[CLISpan] = [] | ||
self.status: str = "started" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lets make it enum, e.g. |
||
self.inputs: dict = inputs or {} | ||
self.outputs: dict = {} | ||
|
||
def end(self) -> None: | ||
"""Sets the current time as the span's end time. | ||
The span's end time is the wall time at which the operation finished. | ||
Only the first call to `end` should modify the span, | ||
further calls are ignored. | ||
""" | ||
if self.end_time is None: | ||
self.end_time = time.perf_counter() | ||
|
||
def to_tree(self, tree: Tree | None = None, color: str = "bold blue") -> Tree | None: | ||
""" | ||
Convert theCLISpan object and its children into a Rich Tree structure for console rendering. | ||
|
||
Args: | ||
tree (Tree, optional): An existing Rich Tree object to which the span will be added. | ||
If None, a new tree is created for the root span. | ||
color (str, optional): The color of the text rendered to console. | ||
|
||
Returns: | ||
Tree: A Rich Tree object representing the span hierarchy, including its events and children. | ||
""" | ||
secondary_color = "grey50" | ||
error_color = "bold red" | ||
child_color = "bold green" | ||
duration = self.end_time - self.start_time if self.end_time else 0.0 | ||
|
||
if tree is None: | ||
tree = Tree( | ||
f"[{color}]{self.name}[/{color}] Duration: {duration:.3f}s\n" | ||
f"[{secondary_color}]Inputs: {self.inputs}\nOutputs: {self.outputs})[/{secondary_color}]" | ||
) | ||
|
||
else: | ||
child_tree = tree.add( | ||
f"[{color}]{self.name}[/{color}] Duration: {duration:.3f}s\n" | ||
f"[{secondary_color}]Inputs: {self.inputs}\nOutputs: {self.outputs})[/{secondary_color}]" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe we can make this more readable, lets try to print each input/output key separately and use different colors for keys and values There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can also add some parameters to the |
||
) | ||
tree = child_tree | ||
|
||
for child in self.children: | ||
if child.status == "error": | ||
child.to_tree(tree, error_color) | ||
else: | ||
child.to_tree(tree, child_color) | ||
return tree | ||
|
||
|
||
class CLITraceHandler(TraceHandler[CLISpan]): | ||
""" | ||
CLITraceHandler class for all trace handlers. | ||
""" | ||
|
||
def start(self, name: str, inputs: dict, current_span: CLISpan | None = None) -> CLISpan: # noqa: PLR6301 | ||
""" | ||
Log input data at the beginning of the trace. | ||
|
||
Args: | ||
name: The name of the trace. | ||
inputs: The input data. | ||
current_span: The current trace span. | ||
|
||
Returns: | ||
The updated current trace span. | ||
""" | ||
span = CLISpan(name, inputs, current_span) | ||
|
||
if current_span: | ||
current_span.children.append(span) | ||
|
||
return span | ||
|
||
def stop(self, outputs: dict, current_span: CLISpan) -> None: # noqa: PLR6301 | ||
""" | ||
Log output data at the end of the trace. | ||
|
||
Args: | ||
outputs: The output data. | ||
current_span: The current trace span. | ||
""" | ||
current_span.end() | ||
current_span.status = "done" | ||
current_span.outputs = outputs | ||
|
||
if current_span.parent is None: | ||
rich_print(current_span.to_tree()) | ||
|
||
def error(self, error: Exception, current_span: CLISpan) -> None: # noqa: PLR6301 | ||
""" | ||
Log error during the trace. | ||
|
||
Args: | ||
error: The error that occurred. | ||
current_span: The current trace span. | ||
""" | ||
current_span.end() | ||
current_span.status = "error" | ||
if current_span.parent is None: | ||
rich_print(current_span.to_tree()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are we sure that we need script example of cli-related feature ? even if - perhaps setting trace handler is just one line - perhaps we shall eventually add setting trace handlers to already existing scripts
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably at the final version we don;t need. I just wanted to see how CLI event handler is working. I can remove it before merge.