From 7f82f9535c99bb53456f7cd87899eca9756e6c23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Hordy=C5=84ski?= <26008518+mhordynski@users.noreply.github.com> Date: Fri, 29 Nov 2024 14:54:17 +0100 Subject: [PATCH] fix: chroma examples error due to None parsing in flattens (#215) --- examples/document-search/chroma_otel.py | 4 ++-- .../src/ragbits/core/utils/dict_transformations.py | 12 ++++++------ .../src/ragbits/core/vector_stores/chroma.py | 7 ++++++- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/examples/document-search/chroma_otel.py b/examples/document-search/chroma_otel.py index 7efd5195..e5c28cb5 100644 --- a/examples/document-search/chroma_otel.py +++ b/examples/document-search/chroma_otel.py @@ -51,7 +51,7 @@ import asyncio -from chromadb import PersistentClient +from chromadb import EphemeralClient from opentelemetry import trace from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.resources import SERVICE_NAME, Resource @@ -102,7 +102,7 @@ async def main() -> None: model="text-embedding-3-small", ) vector_store = ChromaVectorStore( - client=PersistentClient("./chroma"), + client=EphemeralClient(), index_name="jokes", ) document_search = DocumentSearch( diff --git a/packages/ragbits-core/src/ragbits/core/utils/dict_transformations.py b/packages/ragbits-core/src/ragbits/core/utils/dict_transformations.py index c05c84a4..ae58fc62 100644 --- a/packages/ragbits-core/src/ragbits/core/utils/dict_transformations.py +++ b/packages/ragbits-core/src/ragbits/core/utils/dict_transformations.py @@ -1,9 +1,9 @@ from typing import Any +SimpleTypes = str | int | float | bool | None -def flatten_dict( - input_dict: dict[str, Any], parent_key: str = "", sep: str = "." -) -> dict[str, str | int | float | bool]: + +def flatten_dict(input_dict: dict[str, Any], parent_key: str = "", sep: str = ".") -> dict[str, SimpleTypes]: """ Recursively flatten a nested dictionary and lists, converting non-primitive types to strings. @@ -18,7 +18,7 @@ def flatten_dict( Raises: ValueError: If the dictionary cannot be safely flattened due to the presence of the separator in the dict key. """ - items: dict[str, str | int | float | bool] = {} + items: dict[str, SimpleTypes] = {} for k, v in input_dict.items(): if sep in k: raise ValueError(f"Separator '{sep}' found in key '{parent_key}' Cannot flatten dictionary safely.") @@ -36,9 +36,9 @@ def flatten_dict( if isinstance(item, dict): items = {**items, **flatten_dict(item, list_key, sep=sep)} else: - items[list_key] = item if isinstance(item, str | int | float | bool) else str(item) + items[list_key] = item if isinstance(item, SimpleTypes) else str(item) else: - items[new_key] = v if isinstance(v, str | int | float | bool) else str(v) + items[new_key] = v if isinstance(v, SimpleTypes) else str(v) return items diff --git a/packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py b/packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py index 3f650d60..d769cb30 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py +++ b/packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py @@ -80,7 +80,7 @@ async def store(self, entries: list[VectorStoreEntry]) -> None: metadatas = [entry.metadata for entry in entries] # Flatten metadata - flattened_metadatas = [flatten_dict(metadata) for metadata in metadatas] + flattened_metadatas = [self._flatten_metadata(metadata) for metadata in metadatas] metadatas = ( flattened_metadatas @@ -180,3 +180,8 @@ async def list( ) for id, metadata, embedding, document in zip(ids, metadatas, embeddings, documents, strict=True) ] + + @staticmethod + def _flatten_metadata(metadata: dict) -> dict: + """Flattens the metadata dictionary. Removes any None values as they are not supported by ChromaDB.""" + return {k: v for k, v in flatten_dict(metadata).items() if v is not None}