From c17ddda274d5b6ac318a78af3fe0af63b1ecef15 Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Wed, 20 Dec 2023 12:28:54 +0100 Subject: [PATCH] update to the latest Document format (#127) --- integrations/chroma/example/example.py | 8 ++++---- integrations/chroma/src/chroma_haystack/document_store.py | 7 +------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/integrations/chroma/example/example.py b/integrations/chroma/example/example.py index a6053db1c..a1155b216 100644 --- a/integrations/chroma/example/example.py +++ b/integrations/chroma/example/example.py @@ -3,7 +3,7 @@ from pathlib import Path from haystack import Pipeline -from haystack.components.file_converters import TextFileToDocument +from haystack.components.converters import TextFileToDocument from haystack.components.writers import DocumentWriter from chroma_haystack import ChromaDocumentStore @@ -19,11 +19,11 @@ indexing.add_component("converter", TextFileToDocument()) indexing.add_component("writer", DocumentWriter(document_store)) indexing.connect("converter", "writer") -indexing.run({"converter": {"paths": file_paths}}) +indexing.run({"converter": {"sources": file_paths}}) querying = Pipeline() querying.add_component("retriever", ChromaQueryRetriever(document_store)) results = querying.run({"retriever": {"queries": ["Variable declarations"], "top_k": 3}}) -for d in results["retriever"][0]: - print(d.metadata, d.score) +for d in results["retriever"]["documents"][0]: + print(d.meta, d.score) diff --git a/integrations/chroma/src/chroma_haystack/document_store.py b/integrations/chroma/src/chroma_haystack/document_store.py index 8d6a8437e..8af78f05f 100644 --- a/integrations/chroma/src/chroma_haystack/document_store.py +++ b/integrations/chroma/src/chroma_haystack/document_store.py @@ -247,10 +247,6 @@ def _normalize_filters(self, filters: Dict[str, Any]) -> Tuple[List[str], Dict[s # if the list contains multiple items, we need an $or chain for v in value: where["$or"].append({field: v}) - elif field == "mime_type": - # Schedule for removal the original key, we're going to change it - keys_to_remove.append(field) - where["_mime_type"] = value for k in keys_to_remove: del filters[k] @@ -310,8 +306,7 @@ def _query_result_to_documents(self, result: QueryResult) -> List[List[Document] # prepare metadata if metadatas := result.get("metadatas"): - document_dict["metadata"] = dict(metadatas[i][j]) - document_dict["mime_type"] = document_dict["metadata"].pop("_mime_type") + document_dict["meta"] = dict(metadatas[i][j]) if embeddings := result.get("embeddings"): document_dict["embedding"] = np.array(embeddings[i][j])