From a1867514b4975aebc8b8c1f3e4cf55eb0c643792 Mon Sep 17 00:00:00 2001 From: PatrykWyzgowski <pwyzgow.github@gmail.com> Date: Mon, 7 Oct 2024 15:51:02 +0200 Subject: [PATCH] refactor: move vector search capabilities to core package (#39) --- .../examples/chromadb_example.py | 2 +- packages/ragbits-core/pyproject.toml | 3 +++ .../src/ragbits/core}/vector_store/__init__.py | 0 .../src/ragbits/core}/vector_store/base.py | 0 .../ragbits/core}/vector_store/chromadb_store.py | 4 ++-- .../src/ragbits/core}/vector_store/in_memory.py | 2 +- .../unit/vector_stores}/test_chromadb_store.py | 14 ++++++++++++-- .../vector_stores}/test_simple_vector_store.py | 2 +- .../examples/simple_text.py | 2 +- packages/ragbits-document-search/pyproject.toml | 3 --- .../src/ragbits/document_search/_main.py | 2 +- .../ragbits/document_search/documents/element.py | 2 +- .../tests/unit/test_document_search.py | 2 +- .../tests/unit/test_elements.py | 2 +- pyproject.toml | 4 ++-- uv.lock | 16 ++++++++-------- 16 files changed, 35 insertions(+), 25 deletions(-) rename packages/{ragbits-document-search => ragbits-core}/examples/chromadb_example.py (94%) rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/__init__.py (100%) rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/base.py (100%) rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/chromadb_store.py (97%) rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/in_memory.py (94%) rename packages/{ragbits-document-search/tests/unit => ragbits-core/tests/unit/vector_stores}/test_chromadb_store.py (96%) rename packages/{ragbits-document-search/tests/unit => ragbits-core/tests/unit/vector_stores}/test_simple_vector_store.py (92%) diff --git a/packages/ragbits-document-search/examples/chromadb_example.py b/packages/ragbits-core/examples/chromadb_example.py similarity index 94% rename from packages/ragbits-document-search/examples/chromadb_example.py rename to packages/ragbits-core/examples/chromadb_example.py index d2a78097..0a2a8015 100644 --- a/packages/ragbits-document-search/examples/chromadb_example.py +++ b/packages/ragbits-core/examples/chromadb_example.py @@ -10,9 +10,9 @@ import chromadb from ragbits.core.embeddings.litellm import LiteLLMEmbeddings +from ragbits.core.vector_store.chromadb_store import ChromaDBStore from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta -from ragbits.document_search.vector_store.chromadb_store import ChromaDBStore documents = [ DocumentMeta.create_text_document_from_literal("RIP boiled water. You will be mist."), diff --git a/packages/ragbits-core/pyproject.toml b/packages/ragbits-core/pyproject.toml index f2d2986b..8272a684 100644 --- a/packages/ragbits-core/pyproject.toml +++ b/packages/ragbits-core/pyproject.toml @@ -37,6 +37,9 @@ dependencies = [ ] [project.optional-dependencies] +chromadb = [ + "chromadb~=0.4.24", +] litellm = [ "litellm~=1.46.0", ] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/__init__.py b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/__init__.py rename to packages/ragbits-core/src/ragbits/core/vector_store/__init__.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/base.py b/packages/ragbits-core/src/ragbits/core/vector_store/base.py similarity index 100% rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/base.py rename to packages/ragbits-core/src/ragbits/core/vector_store/base.py diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py similarity index 97% rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/chromadb_store.py rename to packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py index 6d4d4bc4..259fdc7f 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/chromadb_store.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py @@ -10,8 +10,8 @@ HAS_CHROMADB = False from ragbits.core.embeddings.base import Embeddings -from ragbits.document_search.vector_store.base import VectorStore -from ragbits.document_search.vector_store.in_memory import VectorDBEntry +from ragbits.core.vector_store.base import VectorStore +from ragbits.core.vector_store.in_memory import VectorDBEntry class ChromaDBStore(VectorStore): diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/in_memory.py b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py similarity index 94% rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/in_memory.py rename to packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py index 4d9e6fd0..ce0576fa 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/in_memory.py +++ b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py @@ -1,6 +1,6 @@ import numpy as np -from ragbits.document_search.vector_store.base import VectorDBEntry, VectorStore +from ragbits.core.vector_store.base import VectorDBEntry, VectorStore class InMemoryVectorStore(VectorStore): diff --git a/packages/ragbits-document-search/tests/unit/test_chromadb_store.py b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py similarity index 96% rename from packages/ragbits-document-search/tests/unit/test_chromadb_store.py rename to packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py index 9d45bdc1..1d08f90e 100644 --- a/packages/ragbits-document-search/tests/unit/test_chromadb_store.py +++ b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py @@ -4,7 +4,7 @@ import pytest from ragbits.core.embeddings.base import Embeddings -from ragbits.document_search.vector_store.chromadb_store import ChromaDBStore, VectorDBEntry +from ragbits.core.vector_store.chromadb_store import ChromaDBStore, VectorDBEntry @pytest.fixture @@ -61,13 +61,14 @@ def mock_vector_db_entry(): def test_chromadbstore_init_import_error(): - with patch("ragbits.document_search.vector_store.chromadb_store.HAS_CHROMADB", False): + with patch("ragbits.core.vector_store.chromadb_store.HAS_CHROMADB", False): with pytest.raises(ImportError): ChromaDBStore(index_name="test_index", chroma_client=MagicMock(), embedding_function=MagicMock()) def test_get_chroma_collection(mock_chromadb_store): _ = mock_chromadb_store._get_chroma_collection() + assert mock_chromadb_store._chroma_client.get_or_create_collection.called @@ -82,7 +83,9 @@ async def test_stores_entries_correctly(mock_chromadb_store): }, ) ] + await mock_chromadb_store.store(data) + mock_chromadb_store._chroma_client.get_or_create_collection().add.assert_called_once() @@ -100,6 +103,7 @@ def test_process_db_entry(mock_chromadb_store, mock_vector_db_entry): async def test_store(mock_chromadb_store, mock_vector_db_entry): await mock_chromadb_store.store([mock_vector_db_entry]) + assert mock_chromadb_store._chroma_client.get_or_create_collection().add.called @@ -117,7 +121,9 @@ async def test_retrieves_entries_correctly(mock_chromadb_store): ] ], } + entries = await mock_chromadb_store.retrieve(vector) + assert len(entries) == 1 assert entries[0].metadata["content"] == "test content" assert entries[0].metadata["document"]["title"] == "test title" @@ -127,7 +133,9 @@ async def test_handles_empty_retrieve(mock_chromadb_store): vector = [0.1, 0.2, 0.3] mock_collection = mock_chromadb_store._get_chroma_collection() mock_collection.query.return_value = {"documents": [], "metadatas": []} + entries = await mock_chromadb_store.retrieve(vector) + assert len(entries) == 0 @@ -145,5 +153,7 @@ def test_repr(mock_chromadb_store): ) def test_return_best_match(mock_chromadb_store, retrieved, max_distance, expected): mock_chromadb_store._max_distance = max_distance + result = mock_chromadb_store._return_best_match(retrieved) + assert result == expected diff --git a/packages/ragbits-document-search/tests/unit/test_simple_vector_store.py b/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py similarity index 92% rename from packages/ragbits-document-search/tests/unit/test_simple_vector_store.py rename to packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py index 4c47bc96..8461d93b 100644 --- a/packages/ragbits-document-search/tests/unit/test_simple_vector_store.py +++ b/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py @@ -1,9 +1,9 @@ from pathlib import Path +from ragbits.core.vector_store.in_memory import InMemoryVectorStore from ragbits.document_search.documents.document import DocumentMeta, DocumentType from ragbits.document_search.documents.element import TextElement from ragbits.document_search.documents.sources import LocalFileSource -from ragbits.document_search.vector_store.in_memory import InMemoryVectorStore async def test_simple_vector_store(): diff --git a/packages/ragbits-document-search/examples/simple_text.py b/packages/ragbits-document-search/examples/simple_text.py index 186e06bf..c0a3fa44 100644 --- a/packages/ragbits-document-search/examples/simple_text.py +++ b/packages/ragbits-document-search/examples/simple_text.py @@ -8,9 +8,9 @@ import asyncio from ragbits.core.embeddings.litellm import LiteLLMEmbeddings +from ragbits.core.vector_store.in_memory import InMemoryVectorStore from ragbits.document_search import DocumentSearch from ragbits.document_search.documents.document import DocumentMeta -from ragbits.document_search.vector_store.in_memory import InMemoryVectorStore documents = [ DocumentMeta.create_text_document_from_literal("RIP boiled water. You will be mist."), diff --git a/packages/ragbits-document-search/pyproject.toml b/packages/ragbits-document-search/pyproject.toml index 199f958b..6820f0bf 100644 --- a/packages/ragbits-document-search/pyproject.toml +++ b/packages/ragbits-document-search/pyproject.toml @@ -38,9 +38,6 @@ dependencies = [ ] [project.optional-dependencies] -chromadb = [ - "chromadb~=0.4.24", -] gcs = [ "gcloud-aio-storage~=9.3.0" ] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index ae593a1e..04289872 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -3,6 +3,7 @@ from pydantic import BaseModel, Field from ragbits.core.embeddings.base import Embeddings +from ragbits.core.vector_store.base import VectorStore from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import Element from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter @@ -11,7 +12,6 @@ from ragbits.document_search.retrieval.rephrasers.noop import NoopQueryRephraser from ragbits.document_search.retrieval.rerankers.base import Reranker from ragbits.document_search.retrieval.rerankers.noop import NoopReranker -from ragbits.document_search.vector_store.base import VectorStore class SearchConfig(BaseModel): diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index d521b7f7..744aed72 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -3,8 +3,8 @@ from pydantic import BaseModel +from ragbits.core.vector_store.base import VectorDBEntry from ragbits.document_search.documents.document import DocumentMeta -from ragbits.document_search.vector_store.base import VectorDBEntry class Element(BaseModel, ABC): diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py index 2e34ba72..8f6ee9e1 100644 --- a/packages/ragbits-document-search/tests/unit/test_document_search.py +++ b/packages/ragbits-document-search/tests/unit/test_document_search.py @@ -4,12 +4,12 @@ import pytest +from ragbits.core.vector_store.in_memory import InMemoryVectorStore from ragbits.document_search import DocumentSearch from ragbits.document_search._main import SearchConfig from ragbits.document_search.documents.document import Document, DocumentMeta from ragbits.document_search.documents.element import TextElement from ragbits.document_search.ingestion.providers.dummy import DummyProvider -from ragbits.document_search.vector_store.in_memory import InMemoryVectorStore @pytest.mark.parametrize( diff --git a/packages/ragbits-document-search/tests/unit/test_elements.py b/packages/ragbits-document-search/tests/unit/test_elements.py index bb213ca7..38eb456a 100644 --- a/packages/ragbits-document-search/tests/unit/test_elements.py +++ b/packages/ragbits-document-search/tests/unit/test_elements.py @@ -1,6 +1,6 @@ +from ragbits.core.vector_store.base import VectorDBEntry from ragbits.document_search.documents.document import DocumentType from ragbits.document_search.documents.element import Element -from ragbits.document_search.vector_store.base import VectorDBEntry def test_resolving_element_type(): diff --git a/pyproject.toml b/pyproject.toml index e2680984..ab39635e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,9 +5,9 @@ description = "Ragbits development workspace" readme = "README.md" requires-python = ">=3.10" dependencies = [ - "ragbits[litellm,local]", + "ragbits[litellm,local,chromadb]", "ragbits-dev-kit", - "ragbits-document-search[chromadb,gcs]", + "ragbits-document-search[gcs]", "ragbits-cli" ] diff --git a/uv.lock b/uv.lock index 3a335bc4..a000bb8b 100644 --- a/uv.lock +++ b/uv.lock @@ -2874,6 +2874,9 @@ dependencies = [ ] [package.optional-dependencies] +chromadb = [ + { name = "chromadb" }, +] litellm = [ { name = "litellm" }, ] @@ -2894,6 +2897,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "chromadb", marker = "extra == 'chromadb'", specifier = "~=0.4.24" }, { name = "jinja2", specifier = ">=3.1.4" }, { name = "litellm", marker = "extra == 'litellm'", specifier = "~=1.46.0" }, { name = "numpy", marker = "extra == 'local'", specifier = "~=1.24.0" }, @@ -2968,9 +2972,6 @@ dependencies = [ ] [package.optional-dependencies] -chromadb = [ - { name = "chromadb" }, -] gcs = [ { name = "gcloud-aio-storage" }, ] @@ -2987,7 +2988,6 @@ dev = [ [package.metadata] requires-dist = [ - { name = "chromadb", marker = "extra == 'chromadb'", specifier = "~=0.4.24" }, { name = "gcloud-aio-storage", marker = "extra == 'gcs'", specifier = "~=9.3.0" }, { name = "numpy", specifier = "~=1.24.0" }, { name = "ragbits", editable = "packages/ragbits-core" }, @@ -3009,10 +3009,10 @@ name = "ragbits-workspace" version = "0.1.0" source = { editable = "." } dependencies = [ - { name = "ragbits", extra = ["litellm", "local"] }, + { name = "ragbits", extra = ["chromadb", "litellm", "local"] }, { name = "ragbits-cli" }, { name = "ragbits-dev-kit" }, - { name = "ragbits-document-search", extra = ["chromadb", "gcs"] }, + { name = "ragbits-document-search", extra = ["gcs"] }, ] [package.dev-dependencies] @@ -3026,10 +3026,10 @@ dev = [ [package.metadata] requires-dist = [ - { name = "ragbits", extras = ["litellm", "local"], editable = "packages/ragbits-core" }, + { name = "ragbits", extras = ["litellm", "local", "chromadb"], editable = "packages/ragbits-core" }, { name = "ragbits-cli", editable = "packages/ragbits-cli" }, { name = "ragbits-dev-kit", editable = "packages/ragbits-dev-kit" }, - { name = "ragbits-document-search", extras = ["chromadb", "gcs"], editable = "packages/ragbits-document-search" }, + { name = "ragbits-document-search", extras = ["gcs"], editable = "packages/ragbits-document-search" }, ] [package.metadata.requires-dev]