From a1867514b4975aebc8b8c1f3e4cf55eb0c643792 Mon Sep 17 00:00:00 2001
From: PatrykWyzgowski <pwyzgow.github@gmail.com>
Date: Mon, 7 Oct 2024 15:51:02 +0200
Subject: [PATCH] refactor: move vector search capabilities to core package
 (#39)

---
 .../examples/chromadb_example.py                 |  2 +-
 packages/ragbits-core/pyproject.toml             |  3 +++
 .../src/ragbits/core}/vector_store/__init__.py   |  0
 .../src/ragbits/core}/vector_store/base.py       |  0
 .../ragbits/core}/vector_store/chromadb_store.py |  4 ++--
 .../src/ragbits/core}/vector_store/in_memory.py  |  2 +-
 .../unit/vector_stores}/test_chromadb_store.py   | 14 ++++++++++++--
 .../vector_stores}/test_simple_vector_store.py   |  2 +-
 .../examples/simple_text.py                      |  2 +-
 packages/ragbits-document-search/pyproject.toml  |  3 ---
 .../src/ragbits/document_search/_main.py         |  2 +-
 .../ragbits/document_search/documents/element.py |  2 +-
 .../tests/unit/test_document_search.py           |  2 +-
 .../tests/unit/test_elements.py                  |  2 +-
 pyproject.toml                                   |  4 ++--
 uv.lock                                          | 16 ++++++++--------
 16 files changed, 35 insertions(+), 25 deletions(-)
 rename packages/{ragbits-document-search => ragbits-core}/examples/chromadb_example.py (94%)
 rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/__init__.py (100%)
 rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/base.py (100%)
 rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/chromadb_store.py (97%)
 rename packages/{ragbits-document-search/src/ragbits/document_search => ragbits-core/src/ragbits/core}/vector_store/in_memory.py (94%)
 rename packages/{ragbits-document-search/tests/unit => ragbits-core/tests/unit/vector_stores}/test_chromadb_store.py (96%)
 rename packages/{ragbits-document-search/tests/unit => ragbits-core/tests/unit/vector_stores}/test_simple_vector_store.py (92%)

diff --git a/packages/ragbits-document-search/examples/chromadb_example.py b/packages/ragbits-core/examples/chromadb_example.py
similarity index 94%
rename from packages/ragbits-document-search/examples/chromadb_example.py
rename to packages/ragbits-core/examples/chromadb_example.py
index d2a78097..0a2a8015 100644
--- a/packages/ragbits-document-search/examples/chromadb_example.py
+++ b/packages/ragbits-core/examples/chromadb_example.py
@@ -10,9 +10,9 @@
 import chromadb
 
 from ragbits.core.embeddings.litellm import LiteLLMEmbeddings
+from ragbits.core.vector_store.chromadb_store import ChromaDBStore
 from ragbits.document_search import DocumentSearch
 from ragbits.document_search.documents.document import DocumentMeta
-from ragbits.document_search.vector_store.chromadb_store import ChromaDBStore
 
 documents = [
     DocumentMeta.create_text_document_from_literal("RIP boiled water. You will be mist."),
diff --git a/packages/ragbits-core/pyproject.toml b/packages/ragbits-core/pyproject.toml
index f2d2986b..8272a684 100644
--- a/packages/ragbits-core/pyproject.toml
+++ b/packages/ragbits-core/pyproject.toml
@@ -37,6 +37,9 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+chromadb = [
+    "chromadb~=0.4.24",
+]
 litellm = [
     "litellm~=1.46.0",
 ]
diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/__init__.py b/packages/ragbits-core/src/ragbits/core/vector_store/__init__.py
similarity index 100%
rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/__init__.py
rename to packages/ragbits-core/src/ragbits/core/vector_store/__init__.py
diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/base.py b/packages/ragbits-core/src/ragbits/core/vector_store/base.py
similarity index 100%
rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/base.py
rename to packages/ragbits-core/src/ragbits/core/vector_store/base.py
diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/chromadb_store.py b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py
similarity index 97%
rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/chromadb_store.py
rename to packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py
index 6d4d4bc4..259fdc7f 100644
--- a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/chromadb_store.py
+++ b/packages/ragbits-core/src/ragbits/core/vector_store/chromadb_store.py
@@ -10,8 +10,8 @@
     HAS_CHROMADB = False
 
 from ragbits.core.embeddings.base import Embeddings
-from ragbits.document_search.vector_store.base import VectorStore
-from ragbits.document_search.vector_store.in_memory import VectorDBEntry
+from ragbits.core.vector_store.base import VectorStore
+from ragbits.core.vector_store.in_memory import VectorDBEntry
 
 
 class ChromaDBStore(VectorStore):
diff --git a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/in_memory.py b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py
similarity index 94%
rename from packages/ragbits-document-search/src/ragbits/document_search/vector_store/in_memory.py
rename to packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py
index 4d9e6fd0..ce0576fa 100644
--- a/packages/ragbits-document-search/src/ragbits/document_search/vector_store/in_memory.py
+++ b/packages/ragbits-core/src/ragbits/core/vector_store/in_memory.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from ragbits.document_search.vector_store.base import VectorDBEntry, VectorStore
+from ragbits.core.vector_store.base import VectorDBEntry, VectorStore
 
 
 class InMemoryVectorStore(VectorStore):
diff --git a/packages/ragbits-document-search/tests/unit/test_chromadb_store.py b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py
similarity index 96%
rename from packages/ragbits-document-search/tests/unit/test_chromadb_store.py
rename to packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py
index 9d45bdc1..1d08f90e 100644
--- a/packages/ragbits-document-search/tests/unit/test_chromadb_store.py
+++ b/packages/ragbits-core/tests/unit/vector_stores/test_chromadb_store.py
@@ -4,7 +4,7 @@
 import pytest
 
 from ragbits.core.embeddings.base import Embeddings
-from ragbits.document_search.vector_store.chromadb_store import ChromaDBStore, VectorDBEntry
+from ragbits.core.vector_store.chromadb_store import ChromaDBStore, VectorDBEntry
 
 
 @pytest.fixture
@@ -61,13 +61,14 @@ def mock_vector_db_entry():
 
 
 def test_chromadbstore_init_import_error():
-    with patch("ragbits.document_search.vector_store.chromadb_store.HAS_CHROMADB", False):
+    with patch("ragbits.core.vector_store.chromadb_store.HAS_CHROMADB", False):
         with pytest.raises(ImportError):
             ChromaDBStore(index_name="test_index", chroma_client=MagicMock(), embedding_function=MagicMock())
 
 
 def test_get_chroma_collection(mock_chromadb_store):
     _ = mock_chromadb_store._get_chroma_collection()
+
     assert mock_chromadb_store._chroma_client.get_or_create_collection.called
 
 
@@ -82,7 +83,9 @@ async def test_stores_entries_correctly(mock_chromadb_store):
             },
         )
     ]
+
     await mock_chromadb_store.store(data)
+
     mock_chromadb_store._chroma_client.get_or_create_collection().add.assert_called_once()
 
 
@@ -100,6 +103,7 @@ def test_process_db_entry(mock_chromadb_store, mock_vector_db_entry):
 
 async def test_store(mock_chromadb_store, mock_vector_db_entry):
     await mock_chromadb_store.store([mock_vector_db_entry])
+
     assert mock_chromadb_store._chroma_client.get_or_create_collection().add.called
 
 
@@ -117,7 +121,9 @@ async def test_retrieves_entries_correctly(mock_chromadb_store):
             ]
         ],
     }
+
     entries = await mock_chromadb_store.retrieve(vector)
+
     assert len(entries) == 1
     assert entries[0].metadata["content"] == "test content"
     assert entries[0].metadata["document"]["title"] == "test title"
@@ -127,7 +133,9 @@ async def test_handles_empty_retrieve(mock_chromadb_store):
     vector = [0.1, 0.2, 0.3]
     mock_collection = mock_chromadb_store._get_chroma_collection()
     mock_collection.query.return_value = {"documents": [], "metadatas": []}
+
     entries = await mock_chromadb_store.retrieve(vector)
+
     assert len(entries) == 0
 
 
@@ -145,5 +153,7 @@ def test_repr(mock_chromadb_store):
 )
 def test_return_best_match(mock_chromadb_store, retrieved, max_distance, expected):
     mock_chromadb_store._max_distance = max_distance
+
     result = mock_chromadb_store._return_best_match(retrieved)
+
     assert result == expected
diff --git a/packages/ragbits-document-search/tests/unit/test_simple_vector_store.py b/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py
similarity index 92%
rename from packages/ragbits-document-search/tests/unit/test_simple_vector_store.py
rename to packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py
index 4c47bc96..8461d93b 100644
--- a/packages/ragbits-document-search/tests/unit/test_simple_vector_store.py
+++ b/packages/ragbits-core/tests/unit/vector_stores/test_simple_vector_store.py
@@ -1,9 +1,9 @@
 from pathlib import Path
 
+from ragbits.core.vector_store.in_memory import InMemoryVectorStore
 from ragbits.document_search.documents.document import DocumentMeta, DocumentType
 from ragbits.document_search.documents.element import TextElement
 from ragbits.document_search.documents.sources import LocalFileSource
-from ragbits.document_search.vector_store.in_memory import InMemoryVectorStore
 
 
 async def test_simple_vector_store():
diff --git a/packages/ragbits-document-search/examples/simple_text.py b/packages/ragbits-document-search/examples/simple_text.py
index 186e06bf..c0a3fa44 100644
--- a/packages/ragbits-document-search/examples/simple_text.py
+++ b/packages/ragbits-document-search/examples/simple_text.py
@@ -8,9 +8,9 @@
 import asyncio
 
 from ragbits.core.embeddings.litellm import LiteLLMEmbeddings
+from ragbits.core.vector_store.in_memory import InMemoryVectorStore
 from ragbits.document_search import DocumentSearch
 from ragbits.document_search.documents.document import DocumentMeta
-from ragbits.document_search.vector_store.in_memory import InMemoryVectorStore
 
 documents = [
     DocumentMeta.create_text_document_from_literal("RIP boiled water. You will be mist."),
diff --git a/packages/ragbits-document-search/pyproject.toml b/packages/ragbits-document-search/pyproject.toml
index 199f958b..6820f0bf 100644
--- a/packages/ragbits-document-search/pyproject.toml
+++ b/packages/ragbits-document-search/pyproject.toml
@@ -38,9 +38,6 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-chromadb = [
-    "chromadb~=0.4.24",
-]
 gcs = [
     "gcloud-aio-storage~=9.3.0"
 ]
diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py
index ae593a1e..04289872 100644
--- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py
+++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py
@@ -3,6 +3,7 @@
 from pydantic import BaseModel, Field
 
 from ragbits.core.embeddings.base import Embeddings
+from ragbits.core.vector_store.base import VectorStore
 from ragbits.document_search.documents.document import Document, DocumentMeta
 from ragbits.document_search.documents.element import Element
 from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
@@ -11,7 +12,6 @@
 from ragbits.document_search.retrieval.rephrasers.noop import NoopQueryRephraser
 from ragbits.document_search.retrieval.rerankers.base import Reranker
 from ragbits.document_search.retrieval.rerankers.noop import NoopReranker
-from ragbits.document_search.vector_store.base import VectorStore
 
 
 class SearchConfig(BaseModel):
diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
index d521b7f7..744aed72 100644
--- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
+++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
@@ -3,8 +3,8 @@
 
 from pydantic import BaseModel
 
+from ragbits.core.vector_store.base import VectorDBEntry
 from ragbits.document_search.documents.document import DocumentMeta
-from ragbits.document_search.vector_store.base import VectorDBEntry
 
 
 class Element(BaseModel, ABC):
diff --git a/packages/ragbits-document-search/tests/unit/test_document_search.py b/packages/ragbits-document-search/tests/unit/test_document_search.py
index 2e34ba72..8f6ee9e1 100644
--- a/packages/ragbits-document-search/tests/unit/test_document_search.py
+++ b/packages/ragbits-document-search/tests/unit/test_document_search.py
@@ -4,12 +4,12 @@
 
 import pytest
 
+from ragbits.core.vector_store.in_memory import InMemoryVectorStore
 from ragbits.document_search import DocumentSearch
 from ragbits.document_search._main import SearchConfig
 from ragbits.document_search.documents.document import Document, DocumentMeta
 from ragbits.document_search.documents.element import TextElement
 from ragbits.document_search.ingestion.providers.dummy import DummyProvider
-from ragbits.document_search.vector_store.in_memory import InMemoryVectorStore
 
 
 @pytest.mark.parametrize(
diff --git a/packages/ragbits-document-search/tests/unit/test_elements.py b/packages/ragbits-document-search/tests/unit/test_elements.py
index bb213ca7..38eb456a 100644
--- a/packages/ragbits-document-search/tests/unit/test_elements.py
+++ b/packages/ragbits-document-search/tests/unit/test_elements.py
@@ -1,6 +1,6 @@
+from ragbits.core.vector_store.base import VectorDBEntry
 from ragbits.document_search.documents.document import DocumentType
 from ragbits.document_search.documents.element import Element
-from ragbits.document_search.vector_store.base import VectorDBEntry
 
 
 def test_resolving_element_type():
diff --git a/pyproject.toml b/pyproject.toml
index e2680984..ab39635e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,9 +5,9 @@ description = "Ragbits development workspace"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
-    "ragbits[litellm,local]",
+    "ragbits[litellm,local,chromadb]",
     "ragbits-dev-kit",
-    "ragbits-document-search[chromadb,gcs]",
+    "ragbits-document-search[gcs]",
     "ragbits-cli"
 ]
 
diff --git a/uv.lock b/uv.lock
index 3a335bc4..a000bb8b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2874,6 +2874,9 @@ dependencies = [
 ]
 
 [package.optional-dependencies]
+chromadb = [
+    { name = "chromadb" },
+]
 litellm = [
     { name = "litellm" },
 ]
@@ -2894,6 +2897,7 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "chromadb", marker = "extra == 'chromadb'", specifier = "~=0.4.24" },
     { name = "jinja2", specifier = ">=3.1.4" },
     { name = "litellm", marker = "extra == 'litellm'", specifier = "~=1.46.0" },
     { name = "numpy", marker = "extra == 'local'", specifier = "~=1.24.0" },
@@ -2968,9 +2972,6 @@ dependencies = [
 ]
 
 [package.optional-dependencies]
-chromadb = [
-    { name = "chromadb" },
-]
 gcs = [
     { name = "gcloud-aio-storage" },
 ]
@@ -2987,7 +2988,6 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "chromadb", marker = "extra == 'chromadb'", specifier = "~=0.4.24" },
     { name = "gcloud-aio-storage", marker = "extra == 'gcs'", specifier = "~=9.3.0" },
     { name = "numpy", specifier = "~=1.24.0" },
     { name = "ragbits", editable = "packages/ragbits-core" },
@@ -3009,10 +3009,10 @@ name = "ragbits-workspace"
 version = "0.1.0"
 source = { editable = "." }
 dependencies = [
-    { name = "ragbits", extra = ["litellm", "local"] },
+    { name = "ragbits", extra = ["chromadb", "litellm", "local"] },
     { name = "ragbits-cli" },
     { name = "ragbits-dev-kit" },
-    { name = "ragbits-document-search", extra = ["chromadb", "gcs"] },
+    { name = "ragbits-document-search", extra = ["gcs"] },
 ]
 
 [package.dev-dependencies]
@@ -3026,10 +3026,10 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "ragbits", extras = ["litellm", "local"], editable = "packages/ragbits-core" },
+    { name = "ragbits", extras = ["litellm", "local", "chromadb"], editable = "packages/ragbits-core" },
     { name = "ragbits-cli", editable = "packages/ragbits-cli" },
     { name = "ragbits-dev-kit", editable = "packages/ragbits-dev-kit" },
-    { name = "ragbits-document-search", extras = ["chromadb", "gcs"], editable = "packages/ragbits-document-search" },
+    { name = "ragbits-document-search", extras = ["gcs"], editable = "packages/ragbits-document-search" },
 ]
 
 [package.metadata.requires-dev]