From 51629a3d2d376c7e4832b29fe9beccdc543543fc Mon Sep 17 00:00:00 2001 From: Alan Konarski Date: Thu, 19 Sep 2024 14:17:12 +0200 Subject: [PATCH] Fix tests --- .libraries-whitelist.txt | 3 ++- .../src/ragbits/document_search/_main.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.libraries-whitelist.txt b/.libraries-whitelist.txt index 5bd0d5a48..06516cf0c 100644 --- a/.libraries-whitelist.txt +++ b/.libraries-whitelist.txt @@ -1,2 +1,3 @@ pkg_resources -tiktoken \ No newline at end of file +tiktoken +chardet diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 812b599ea..9034fb3ab 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -1,7 +1,8 @@ from ragbits.core.embeddings.base import Embeddings -from ragbits.document_search.documents.document import DocumentMeta +from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion.document_processor import DocumentProcessor +from ragbits.document_search.ingestion.providers.dummy import DummyProvider from ragbits.document_search.retrieval.rephrasers.base import QueryRephraser from ragbits.document_search.retrieval.rephrasers.noop import NoopQueryRephraser from ragbits.document_search.retrieval.rerankers.base import Reranker @@ -70,7 +71,7 @@ async def ingest_document(self, document: DocumentMeta) -> None: """ # TODO: This is a placeholder implementation. It should be replaced with a real implementation. - document_processor = DocumentProcessor.from_config() + document_processor = DocumentProcessor.from_config({DocumentType.TXT: DummyProvider()}) elements = await document_processor.process(document) vectors = await self.embedder.embed_text([element.get_key() for element in elements]) entries = [element.to_vector_db_entry(vector) for element, vector in zip(elements, vectors)]