From 41433e65394e4189b80a238bdede02d1c14d9421 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Mon, 24 Apr 2023 13:40:22 +0200
Subject: [PATCH 1/6] feat: add in-memory and hnswlib vectorstore

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 langchain/vectorstores/hnsw_lib.py            | 235 +++++++++++++++++
 langchain/vectorstores/in_memory.py           | 210 +++++++++++++++
 poetry.lock                                   | 245 ++++++++++++------
 pyproject.toml                                |   7 +-
 .../vectorstores/test_hnsw_lib.py             |  54 ++++
 .../vectorstores/test_in_memory.py            |  48 ++++
 6 files changed, 712 insertions(+), 87 deletions(-)
 create mode 100644 langchain/vectorstores/hnsw_lib.py
 create mode 100644 langchain/vectorstores/in_memory.py
 create mode 100644 tests/integration_tests/vectorstores/test_hnsw_lib.py
 create mode 100644 tests/integration_tests/vectorstores/test_in_memory.py

diff --git a/langchain/vectorstores/hnsw_lib.py b/langchain/vectorstores/hnsw_lib.py
new file mode 100644
index 0000000000000..6974133c9891b
--- /dev/null
+++ b/langchain/vectorstores/hnsw_lib.py
@@ -0,0 +1,235 @@
+"""Wrapper around in-memory DocArray store."""
+from __future__ import annotations
+
+from operator import itemgetter
+from typing import List, Optional, Any, Tuple, Iterable, Type, Callable, Sequence, TYPE_CHECKING
+
+from langchain.embeddings.base import Embeddings
+from langchain.schema import Document
+from langchain.vectorstores import VectorStore
+from langchain.vectorstores.base import VST
+from langchain.vectorstores.utils import maximal_marginal_relevance
+
+from docarray import BaseDoc
+from docarray.typing import NdArray
+
+
+class HnswLib(VectorStore):
+    """Wrapper around HnswLib storage.
+
+    To use it, you should have the ``docarray`` package with version >=0.30.0 installed.
+    """
+    def __init__(
+        self,
+        work_dir: str,
+        n_dim: int,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]],
+        sim_metric: str = 'cosine',
+        kwargs: dict = None
+    ) -> None:
+        """Initialize HnswLib store."""
+        try:
+            import docarray
+            da_version = docarray.__version__.split('.')
+            if int(da_version[0]) == 0 and int(da_version[1]) <= 21:
+                raise ValueError(
+                    f'To use the HnswLib VectorStore the docarray version >=0.30.0 is expected, '
+                    f'received: {docarray.__version__}.'
+                    f'To upgrade, please run: `pip install -U docarray`.'
+                )
+            else:
+                from docarray import DocList
+                from docarray.index import HnswDocumentIndex
+        except ImportError:
+            raise ImportError(
+                "Could not import docarray python package. "
+                "Please install it with `pip install -U docarray`."
+            )
+        try:
+            import google.protobuf
+        except ImportError:
+            raise ImportError(
+                "Could not import protobuf python package. "
+                "Please install it with `pip install -U protobuf`."
+            )
+
+        if metadatas is None:
+            metadatas = [{} for _ in range(len(texts))]
+
+        self.embedding = embedding
+
+        self.doc_cls = self._get_doc_cls(n_dim, sim_metric)
+        self.doc_index = HnswDocumentIndex[self.doc_cls](work_dir=work_dir)
+        embeddings = self.embedding.embed_documents(texts)
+        docs = DocList[self.doc_cls](
+            [
+                self.doc_cls(
+                    text=t,
+                    embedding=e,
+                    metadata=m,
+                ) for t, m, e in zip(texts, metadatas, embeddings)
+            ]
+        )
+        self.doc_index.index(docs)
+
+    @staticmethod
+    def _get_doc_cls(n_dim: int, sim_metric: str):
+        from pydantic import Field
+
+        class DocArrayDoc(BaseDoc):
+            text: Optional[str]
+            embedding: Optional[NdArray] = Field(dim=n_dim, space=sim_metric)
+            metadata: Optional[dict]
+
+        return DocArrayDoc
+
+    @classmethod
+    def from_texts(
+        cls: Type[VST],
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        work_dir: str = None,
+        n_dim: int = None,
+        **kwargs: Any
+    ) -> HnswLib:
+
+        if work_dir is None:
+            raise ValueError('`work_dir` parameter hs not been set.')
+        if n_dim is None:
+            raise ValueError('`n_dim` parameter has not been set.')
+
+        return cls(
+            work_dir=work_dir,
+            n_dim=n_dim,
+            texts=texts,
+            embedding=embedding,
+            metadatas=metadatas,
+            kwargs=kwargs
+        )
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        if metadatas is None:
+            metadatas = [{} for _ in range(len(list(texts)))]
+
+        ids = []
+        embeddings = self.embedding.embed_documents(texts)
+        for t, m, e in zip(texts, metadatas, embeddings):
+            doc = self.doc_cls(
+                text=t,
+                embedding=e,
+                metadata=m
+            )
+            self.doc_index.index(doc)
+            ids.append(doc.id)  # TODO return index of self.docs ?
+
+        return ids
+
+    def similarity_search_with_score(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query and score for each.
+        """
+        query_embedding = self.embedding.embed_query(query)
+        query_embedding = [1., 1., 1., 1., 1., 1., 1., 1., 1., 0.]
+        print(f"query_embedding = {query_embedding}")
+        query_doc = self.doc_cls(embedding=query_embedding)
+        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=k)
+
+        result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
+        return result
+
+    def similarity_search(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
+        results = self.similarity_search_with_score(query, k)
+        return list(map(itemgetter(0), results))
+
+    def _similarity_search_with_relevance_scores(
+        self,
+        query: str,
+        k: int = 4,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs and relevance scores, normalized on a scale from 0 to 1.
+
+        0 is dissimilar, 1 is most similar.
+        """
+        raise NotImplementedError
+
+    def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query vector.
+        """
+
+        query_doc = self.doc_cls(embedding=embedding)
+        docs = self.doc_index.find(query_doc, search_field='embedding', limit=k).documents
+
+        result = [Document(page_content=doc.text) for doc in docs]
+        return result
+
+    def max_marginal_relevance_search(
+        self, query: str, k: int = 4, fetch_k: int = 20, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        query_embedding = self.embedding.embed_query(query)
+        query_doc = self.doc_cls(embedding=query_embedding)
+
+        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=fetch_k)
+
+        embeddings = [emb for emb in docs.emb]
+
+        mmr_selected = maximal_marginal_relevance(query_embedding, embeddings, k=k)
+        results = [Document(page_content=self.doc_index[idx].text) for idx in mmr_selected]
+        return results
+
diff --git a/langchain/vectorstores/in_memory.py b/langchain/vectorstores/in_memory.py
new file mode 100644
index 0000000000000..a079b10da7887
--- /dev/null
+++ b/langchain/vectorstores/in_memory.py
@@ -0,0 +1,210 @@
+"""Wrapper around in-memory DocArray store."""
+from __future__ import annotations
+
+from operator import itemgetter
+from typing import List, Optional, Any, Tuple, Iterable, Type
+
+from langchain.embeddings.base import Embeddings
+from langchain.schema import Document
+from langchain.vectorstores import VectorStore
+from langchain.vectorstores.base import VST
+from langchain.vectorstores.utils import maximal_marginal_relevance
+
+from docarray import BaseDoc
+from docarray.typing import NdArray
+
+
+class InMemory(VectorStore):
+    """Wrapper around in-memory storage.
+
+    To use it, you should have the ``docarray`` package with version >=0.30.0 installed.
+    """
+    def __init__(
+        self,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]]
+    ) -> None:
+        """Initialize in-memory store."""
+        try:
+            import docarray
+            da_version = docarray.__version__.split('.')
+            if int(da_version[0]) == 0 and int(da_version[1]) <= 21:
+                raise ValueError(
+                    f'To use the InMemory VectorStore the docarray version >=0.30.0 is expected, '
+                    f'received: {docarray.__version__}.'
+                    f'To upgrade, please run: `pip install -U docarray`.'
+                )
+            else:
+                from docarray import DocList
+
+        except ImportError:
+            raise ImportError(
+                "Could not import docarray python package. "
+                "Please install it with `pip install -U docarray`."
+            )
+        if metadatas is None:
+            metadatas = [{} for _ in range(len(texts))]
+
+        self.embedding = embedding
+        self.doc_cls = self._get_doc_cls()
+        self.docs = DocList[self.doc_cls](
+            [
+                self.doc_cls(
+                    text=t,
+                    embedding=e,
+                    metadata=m,
+                ) for t, m, e in zip(texts, metadatas, self.embedding.embed_documents(texts))
+            ]
+        )
+
+    @staticmethod
+    def _get_doc_cls():
+        class DocArrayDoc(BaseDoc):
+            text: Optional[str]
+            embedding: Optional[NdArray]
+            metadata: Optional[dict]
+
+        # DocArrayDoc.update_forward_refs()
+        return DocArrayDoc
+
+    @classmethod
+    def from_texts(
+        cls: Type[VST],
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any
+    ) -> InMemory:
+        return cls(
+            texts=texts,
+            embedding=embedding,
+            metadatas=metadatas
+        )
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        if metadatas is None:
+            metadatas = [{} for _ in range(len(list(texts)))]
+
+        ids = []
+        embeddings = self.embedding.embed_documents(texts)
+        for t, m, e in zip(texts, metadatas, embeddings):
+            doc = self.doc_cls(
+                text=t,
+                embedding=e,
+                metadata=m
+            )
+            self.docs.append(doc)
+            ids.append(doc.id)  # TODO return index of self.docs ?
+
+        return ids
+
+    def similarity_search_with_score(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query and score for each.
+        """
+        from docarray.utils.find import find  # TODO move import
+
+        query_embedding = self.embedding.embed_query(query)
+        query_doc = self.doc_cls(embedding=query_embedding)
+        docs, scores = find(index=self.docs, query=query_doc, limit=k, search_field='embedding')
+
+        result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
+        return result
+
+    def similarity_search(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
+        results = self.similarity_search_with_score(query, k)
+        return list(map(itemgetter(0), results))
+
+    def _similarity_search_with_relevance_scores(
+        self,
+        query: str,
+        k: int = 4,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs and relevance scores, normalized on a scale from 0 to 1.
+
+        0 is dissimilar, 1 is most similar.
+        """
+        raise NotImplementedError
+
+    def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query vector.
+        """
+        from docarray.utils.find import find
+
+        query_doc = self.doc_cls(embedding=embedding)
+        result_docs = find(index=self.docs, query=query_doc, limit=k, search_field='embedding').documents
+
+        result = [Document(page_content=doc.text) for doc in result_docs]
+        return result
+
+    def max_marginal_relevance_search(
+        self, query: str, k: int = 4, fetch_k: int = 20, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+                """
+        from docarray.utils.find import find
+
+        query_embedding = self.embedding.embed_query(query)
+        query_doc = self.doc_cls(embedding=query_embedding)
+        find_res = find(self.docs, query_doc, limit=k)
+
+        embeddings = [emb for emb in find_res.documents.emb]
+        mmr_selected = maximal_marginal_relevance(query_embedding, embeddings, k=k)
+        results = []
+        for idx in mmr_selected:
+            results.append(Document(page_content=self.docs[idx].text))
+        return results
+
diff --git a/poetry.lock b/poetry.lock
index 1138b9196a649..fc785b03aebfb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -1515,32 +1515,40 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"]
 
 [[package]]
 name = "docarray"
-version = "0.21.0"
-description = "The data structure for unstructured data"
+version = "0.30.0"
+description = "The data structure for multimodal data"
 category = "main"
 optional = true
-python-versions = "*"
+python-versions = ">=3.7,<4.0"
 files = [
-    {file = "docarray-0.21.0.tar.gz", hash = "sha256:3c9f605123800c1b0cdf8c458be3fb19c05e9a81f723e51200ef531b02e689ee"},
+    {file = "docarray-0.30.0-py3-none-any.whl", hash = "sha256:739dbe06bfee6f1cbc030156036764ca1c75832dcc01a07c724640c6d464651b"},
+    {file = "docarray-0.30.0.tar.gz", hash = "sha256:dd73e9ff20485a1d819ac906a59ee0cbc4382e78a5061286e77eb7d7f8b28a8e"},
 ]
 
 [package.dependencies]
-jina-hubble-sdk = ">=0.24.0"
-numpy = "*"
-rich = ">=12.0.0"
-
-[package.extras]
-annlite = ["annlite"]
-benchmark = ["h5py", "matplotlib", "pandas", "seaborn"]
-common = ["Pillow", "fastapi", "lz4", "matplotlib", "protobuf (>=3.13.0)", "pydantic (>=1.9.0)", "requests", "uvicorn"]
-elasticsearch = ["elasticsearch (>=8.2.0)"]
-full = ["Pillow", "av", "fastapi", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "ipython", "lz4", "matplotlib", "protobuf (>=3.13.0)", "pydantic (>=1.9.0)", "requests", "scipy", "strawberry-graphql", "trimesh[easy]", "uvicorn"]
-milvus = ["pymilvus (>=2.1.0,<2.2.0)"]
-opensearch = ["opensearch-py (==2.0.1)"]
-qdrant = ["qdrant-client (>=0.10.3,<0.11.0)"]
-redis = ["redis (>=4.3.0)"]
-test = ["annlite", "black (==22.3.0)", "datasets", "elasticsearch (>=8.2.0)", "jina", "jupyterlab", "mock", "onnx", "onnxruntime", "opensearch-py (==2.0.1)", "paddlepaddle", "protobuf (>=3.13.0,<=3.20.0)", "pymilvus (==2.1.3)", "pytest", "pytest-cov (==3.0.0)", "pytest-custom_exit_code", "pytest-mock", "pytest-mock", "pytest-repeat", "pytest-reraise", "pytest-timeout", "redis (>=4.3.0)", "tensorflow (==2.7.0)", "torch (==1.9.0)", "torchvision (==0.10.0)", "transformers (>=4.16.2)", "weaviate-client (>=3.9.0,<3.10.0)"]
-weaviate = ["weaviate-client (>=3.9.0,<3.10.0)"]
+numpy = ">=1.17.3"
+orjson = ">=3.8.2"
+pydantic = ">=1.10.2"
+rich = ">=13.1.0"
+types-requests = ">=2.28.11.6"
+typing-inspect = ">=0.8.0"
+
+[package.extras]
+audio = ["pydub (>=0.25.1,<0.26.0)"]
+aws = ["smart-open[s3] (>=6.3.0)"]
+elasticsearch = ["elastic-transport (>=8.4.0,<9.0.0)", "elasticsearch (>=7.10.1)"]
+full = ["av (>=10.0.0)", "lz4 (>=1.0.0)", "pandas (>=1.1.0)", "pillow (>=9.3.0)", "protobuf (>=3.19.0)", "pydub (>=0.25.1,<0.26.0)", "trimesh[easy] (>=3.17.1)", "types-pillow (>=9.3.0.1)"]
+hnswlib = ["hnswlib (>=0.6.2)"]
+image = ["pillow (>=9.3.0)", "types-pillow (>=9.3.0.1)"]
+jac = ["jina-hubble-sdk (>=0.34.0)"]
+mesh = ["trimesh[easy] (>=3.17.1)"]
+pandas = ["pandas (>=1.1.0)"]
+proto = ["lz4 (>=1.0.0)", "protobuf (>=3.19.0)"]
+qdrant = ["qdrant-client (>=1.1.4)"]
+torch = ["torch (>=1.0.0)"]
+video = ["av (>=10.0.0)"]
+weaviate = ["weaviate-client (>=3.15)"]
+web = ["fastapi (>=0.87.0)"]
 
 [[package]]
 name = "docker"
@@ -1740,7 +1748,7 @@ files = [
 name = "exceptiongroup"
 version = "1.1.1"
 description = "Backport of PEP 654 (exception groups)"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2018,26 +2026,24 @@ files = [
 
 [[package]]
 name = "google-api-core"
-version = "2.11.0"
+version = "2.8.2"
 description = "Google API client core library"
 category = "main"
 optional = true
-python-versions = ">=3.7"
+python-versions = ">=3.6"
 files = [
-    {file = "google-api-core-2.11.0.tar.gz", hash = "sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22"},
-    {file = "google_api_core-2.11.0-py3-none-any.whl", hash = "sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e"},
+    {file = "google-api-core-2.8.2.tar.gz", hash = "sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc"},
+    {file = "google_api_core-2.8.2-py3-none-any.whl", hash = "sha256:93c6a91ccac79079ac6bbf8b74ee75db970cc899278b97d53bc012f35908cf50"},
 ]
 
 [package.dependencies]
-google-auth = ">=2.14.1,<3.0dev"
+google-auth = ">=1.25.0,<3.0dev"
 googleapis-common-protos = ">=1.56.2,<2.0dev"
-protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev"
+protobuf = ">=3.15.0,<5.0.0dev"
 requests = ">=2.18.0,<3.0.0dev"
 
 [package.extras]
-grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)", "grpcio-status (>=1.49.1,<2.0dev)"]
-grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"]
-grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"]
+grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)"]
 
 [[package]]
 name = "google-api-python-client"
@@ -2151,21 +2157,21 @@ requests = "*"
 
 [[package]]
 name = "googleapis-common-protos"
-version = "1.59.0"
+version = "1.56.4"
 description = "Common protobufs used in Google APIs"
 category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "googleapis-common-protos-1.59.0.tar.gz", hash = "sha256:4168fcb568a826a52f23510412da405abd93f4d23ba544bb68d943b14ba3cb44"},
-    {file = "googleapis_common_protos-1.59.0-py2.py3-none-any.whl", hash = "sha256:b287dc48449d1d41af0c69f4ea26242b5ae4c3d7249a38b0984c86a4caffff1f"},
+    {file = "googleapis-common-protos-1.56.4.tar.gz", hash = "sha256:c25873c47279387cfdcbdafa36149887901d36202cb645a0e4f29686bf6e4417"},
+    {file = "googleapis_common_protos-1.56.4-py2.py3-none-any.whl", hash = "sha256:8eb2cbc91b69feaf23e32452a7ae60e791e09967d81d4fcc7fc388182d1bd394"},
 ]
 
 [package.dependencies]
-protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev"
+protobuf = ">=3.15.0,<5.0.0dev"
 
 [package.extras]
-grpc = ["grpcio (>=1.44.0,<2.0.0dev)"]
+grpc = ["grpcio (>=1.0.0,<2.0.0dev)"]
 
 [[package]]
 name = "gptcache"
@@ -2483,7 +2489,7 @@ numpy = ">=1.14.5"
 name = "hnswlib"
 version = "0.7.0"
 description = "hnswlib"
-category = "dev"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2763,7 +2769,7 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
 name = "iniconfig"
 version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2955,20 +2961,20 @@ testing = ["Django (<3.1)", "attrs", "colorama", "docopt", "pytest (<7.0.0)"]
 
 [[package]]
 name = "jina"
-version = "3.15.0"
+version = "3.14.1"
 description = "Build multimodal AI services via cloud native technologies · Neural Search · Generative AI · MLOps"
 category = "main"
 optional = true
 python-versions = "*"
 files = [
-    {file = "jina-3.15.0.tar.gz", hash = "sha256:18a3be8ddca14ed66a554d8480a277bcb7620ebc6ae11352a9835c91865f9d1e"},
+    {file = "jina-3.14.1.tar.gz", hash = "sha256:00b1f5995b13c9a49a2287bd534bd32eb8c05706064752035d569e616a15b411"},
 ]
 
 [package.dependencies]
 aiofiles = "*"
 aiohttp = "*"
 aiostream = "*"
-docarray = ">=0.16.4,<0.30.0"
+docarray = ">=0.16.4"
 docker = "*"
 fastapi = ">=0.76.0"
 filelock = "*"
@@ -3002,14 +3008,14 @@ websockets = "*"
 aiofiles = ["aiofiles"]
 aiohttp = ["aiohttp"]
 aiostream = ["aiostream"]
-all = ["Pillow", "aiofiles", "aiohttp", "aiostream", "black (==22.3.0)", "bs4", "coverage (==6.2)", "docarray (>=0.16.4,<0.30.0)", "docker", "fastapi (>=0.76.0)", "filelock", "flaky", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "jsonschema", "kubernetes (>=18.20.0)", "mock", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "opentelemetry-sdk (>=1.14.0)", "opentelemetry-test-utils (>=0.33b0)", "packaging (>=20.0)", "pathspec", "portforward (>=0.2.4,<0.4.3)", "prometheus-api-client (>=0.5.1)", "prometheus_client (>=0.12.0)", "protobuf (>=3.19.0)", "psutil", "pydantic", "pytest", "pytest-asyncio", "pytest-cov (==3.0.0)", "pytest-custom_exit_code", "pytest-kind (==22.11.1)", "pytest-lazy-fixture", "pytest-mock", "pytest-repeat", "pytest-reraise", "pytest-timeout", "python-multipart", "pyyaml (>=5.3.1)", "requests", "requests-mock", "scipy (>=1.6.1)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"]
+all = ["Pillow", "aiofiles", "aiohttp", "aiostream", "black (==22.3.0)", "bs4", "coverage (==6.2)", "docarray (>=0.16.4)", "docker", "fastapi (>=0.76.0)", "filelock", "flaky", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "jsonschema", "kubernetes (>=18.20.0)", "mock", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "opentelemetry-sdk (>=1.14.0)", "opentelemetry-test-utils (>=0.33b0)", "packaging (>=20.0)", "pathspec", "portforward (>=0.2.4)", "prometheus-api-client (>=0.5.1)", "prometheus_client (>=0.12.0)", "protobuf (>=3.19.0)", "psutil", "pydantic", "pytest", "pytest-asyncio", "pytest-cov (==3.0.0)", "pytest-custom_exit_code", "pytest-kind (==22.11.1)", "pytest-lazy-fixture", "pytest-mock", "pytest-repeat", "pytest-reraise", "pytest-timeout", "python-multipart", "pyyaml (>=5.3.1)", "requests", "requests-mock", "scipy (>=1.6.1)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"]
 black = ["black (==22.3.0)"]
 bs4 = ["bs4"]
-cicd = ["bs4", "jsonschema", "portforward (>=0.2.4,<0.4.3)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch"]
-core = ["aiostream", "docarray (>=0.16.4,<0.30.0)", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "packaging (>=20.0)", "protobuf (>=3.19.0)", "pyyaml (>=5.3.1)"]
+cicd = ["bs4", "jsonschema", "portforward (>=0.2.4)", "sgqlc", "strawberry-graphql (>=0.96.0)", "tensorflow (>=2.0)", "torch"]
+core = ["docarray (>=0.16.4)", "grpcio (>=1.46.0,<1.48.1)", "grpcio-health-checking (>=1.46.0,<1.48.1)", "grpcio-reflection (>=1.46.0,<1.48.1)", "jcloud (>=0.0.35)", "jina-hubble-sdk (>=0.30.4)", "numpy", "opentelemetry-api (>=1.12.0)", "opentelemetry-instrumentation-grpc (>=0.35b0)", "packaging (>=20.0)", "protobuf (>=3.19.0)", "pyyaml (>=5.3.1)"]
 coverage = ["coverage (==6.2)"]
-devel = ["aiofiles", "aiohttp", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "sgqlc", "strawberry-graphql (>=0.96.0)", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"]
-docarray = ["docarray (>=0.16.4,<0.30.0)"]
+devel = ["aiofiles", "aiohttp", "aiostream", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "sgqlc", "strawberry-graphql (>=0.96.0)", "uvicorn[standard]", "uvloop", "watchfiles (>=0.18.0)", "websockets"]
+docarray = ["docarray (>=0.16.4)"]
 docker = ["docker"]
 fastapi = ["fastapi (>=0.76.0)"]
 filelock = ["filelock"]
@@ -3036,7 +3042,7 @@ packaging = ["packaging (>=20.0)"]
 pathspec = ["pathspec"]
 perf = ["opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "prometheus_client (>=0.12.0)", "uvloop"]
 pillow = ["Pillow"]
-portforward = ["portforward (>=0.2.4,<0.4.3)"]
+portforward = ["portforward (>=0.2.4)"]
 prometheus-api-client = ["prometheus-api-client (>=0.5.1)"]
 prometheus-client = ["prometheus_client (>=0.12.0)"]
 protobuf = ["protobuf (>=3.19.0)"]
@@ -3058,7 +3064,7 @@ requests = ["requests"]
 requests-mock = ["requests-mock"]
 scipy = ["scipy (>=1.6.1)"]
 sgqlc = ["sgqlc"]
-standard = ["aiofiles", "aiohttp", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "uvicorn[standard]", "uvloop", "websockets"]
+standard = ["aiofiles", "aiohttp", "aiostream", "docker", "fastapi (>=0.76.0)", "filelock", "opentelemetry-exporter-otlp (>=1.12.0)", "opentelemetry-exporter-prometheus (>=1.12.0rc1)", "opentelemetry-instrumentation-aiohttp-client (>=0.33b0)", "opentelemetry-instrumentation-fastapi (>=0.33b0)", "opentelemetry-sdk (>=1.14.0)", "pathspec", "prometheus_client (>=0.12.0)", "pydantic", "python-multipart", "requests", "uvicorn[standard]", "uvloop", "websockets"]
 standrad = ["opentelemetry-exporter-otlp-proto-grpc (>=1.13.0)"]
 strawberry-graphql = ["strawberry-graphql (>=0.96.0)"]
 tensorflow = ["tensorflow (>=2.0)"]
@@ -4991,6 +4997,72 @@ numpy = ">=1.7"
 docs = ["numpydoc", "sphinx (==1.2.3)", "sphinx-rtd-theme", "sphinxcontrib-napoleon"]
 tests = ["pytest", "pytest-cov", "pytest-pep8"]
 
+[[package]]
+name = "orjson"
+version = "3.8.10"
+description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
+category = "main"
+optional = true
+python-versions = ">= 3.7"
+files = [
+    {file = "orjson-3.8.10-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:4dfe0651e26492d5d929bbf4322de9afbd1c51ac2e3947a7f78492b20359711d"},
+    {file = "orjson-3.8.10-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:bc30de5c7b3a402eb59cc0656b8ee53ca36322fc52ab67739c92635174f88336"},
+    {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c08b426fae7b9577b528f99af0f7e0ff3ce46858dd9a7d1bf86d30f18df89a4c"},
+    {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bce970f293825e008dbf739268dfa41dfe583aa2a1b5ef4efe53a0e92e9671ea"},
+    {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9b23fb0264bbdd7218aa685cb6fc71f0dcecf34182f0a8596a3a0dff010c06f9"},
+    {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0826ad2dc1cea1547edff14ce580374f0061d853cbac088c71162dbfe2e52205"},
+    {file = "orjson-3.8.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7bce6e61cea6426309259b04c6ee2295b3f823ea51a033749459fe2dd0423b2"},
+    {file = "orjson-3.8.10-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0b470d31244a6f647e5402aac7d2abaf7bb4f52379acf67722a09d35a45c9417"},
+    {file = "orjson-3.8.10-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:48824649019a25d3e52f6454435cf19fe1eb3d05ee697e65d257f58ae3aa94d9"},
+    {file = "orjson-3.8.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:faee89e885796a9cc493c930013fa5cfcec9bfaee431ddf00f0fbfb57166a8b3"},
+    {file = "orjson-3.8.10-cp310-none-win_amd64.whl", hash = "sha256:3cfe32b1227fe029a5ad989fbec0b453a34e5e6d9a977723f7c3046d062d3537"},
+    {file = "orjson-3.8.10-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:2073b62822738d6740bd2492f6035af5c2fd34aa198322b803dc0e70559a17b7"},
+    {file = "orjson-3.8.10-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:b2c4faf20b6bb5a2d7ac0c16f58eb1a3800abcef188c011296d1dc2bb2224d48"},
+    {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c1825997232a324911d11c75d91e1e0338c7b723c149cf53a5fc24496c048a4"},
+    {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f7e85d4682f3ed7321d36846cad0503e944ea9579ef435d4c162e1b73ead8ac9"},
+    {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8cdaacecb92997916603ab232bb096d0fa9e56b418ca956b9754187d65ca06"},
+    {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ddabc5e44702d13137949adee3c60b7091e73a664f6e07c7b428eebb2dea7bbf"},
+    {file = "orjson-3.8.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27bb26e171e9cfdbec39c7ca4739b6bef8bd06c293d56d92d5e3a3fc017df17d"},
+    {file = "orjson-3.8.10-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1810e5446fe68d61732e9743592da0ec807e63972eef076d09e02878c2f5958e"},
+    {file = "orjson-3.8.10-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:61e2e51cefe7ef90c4fbbc9fd38ecc091575a3ea7751d56fad95cbebeae2a054"},
+    {file = "orjson-3.8.10-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f3e9ac9483c2b4cd794e760316966b7bd1e6afb52b0218f068a4e80c9b2db4f6"},
+    {file = "orjson-3.8.10-cp311-none-win_amd64.whl", hash = "sha256:26aee557cf8c93b2a971b5a4a8e3cca19780573531493ce6573aa1002f5c4378"},
+    {file = "orjson-3.8.10-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:11ae68f995a50724032af297c92f20bcde31005e0bf3653b12bff9356394615b"},
+    {file = "orjson-3.8.10-cp37-cp37m-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:35d879b46b8029e1e01e9f6067928b470a4efa1ca749b6d053232b873c2dcf66"},
+    {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:345e41abd1d9e3ecfb554e1e75ff818cf42e268bd06ad25a96c34e00f73a327e"},
+    {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:45a5afc9cda6b8aac066dd50d8194432fbc33e71f7164f95402999b725232d78"},
+    {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad632dc330a7b39da42530c8d146f76f727d476c01b719dc6743c2b5701aaf6b"},
+    {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bf2556ba99292c4dc550560384dd22e88b5cdbe6d98fb4e202e902b5775cf9f"},
+    {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b88afd662190f19c3bb5036a903589f88b1d2c2608fbb97281ce000db6b08897"},
+    {file = "orjson-3.8.10-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:abce8d319aae800fd2d774db1106f926dee0e8a5ca85998fd76391fcb58ef94f"},
+    {file = "orjson-3.8.10-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e999abca892accada083f7079612307d94dd14cc105a699588a324f843216509"},
+    {file = "orjson-3.8.10-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a3fdee68c4bb3c5d6f89ed4560f1384b5d6260e48fbf868bae1a245a3c693d4d"},
+    {file = "orjson-3.8.10-cp37-none-win_amd64.whl", hash = "sha256:e5d7f82506212e047b184c06e4bcd48c1483e101969013623cebcf51cf12cad9"},
+    {file = "orjson-3.8.10-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:d953e6c2087dcd990e794f8405011369ee11cf13e9aaae3172ee762ee63947f2"},
+    {file = "orjson-3.8.10-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:81aa3f321d201bff0bd0f4014ea44e51d58a9a02d8f2b0eeab2cee22611be8e1"},
+    {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d27b6182f75896dd8c10ea0f78b9265a3454be72d00632b97f84d7031900dd4"},
+    {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1486600bc1dd1db26c588dd482689edba3d72d301accbe4301db4b2b28bd7aa4"},
+    {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344ea91c556a2ce6423dc13401b83ab0392aa697a97fa4142c2c63a6fd0bbfef"},
+    {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:979f231e3bad1c835627eef1a30db12a8af58bfb475a6758868ea7e81897211f"},
+    {file = "orjson-3.8.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa3a26dcf0f5f2912a8ce8e87273e68b2a9526854d19fd09ea671b154418e88"},
+    {file = "orjson-3.8.10-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:b6e79d8864794635974b18821b49a7f27859d17b93413d4603efadf2e92da7a5"},
+    {file = "orjson-3.8.10-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ce49999bcbbc14791c61844bc8a69af44f5205d219be540e074660038adae6bf"},
+    {file = "orjson-3.8.10-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c2ef690335b24f9272dbf6639353c1ffc3f196623a92b851063e28e9515cf7dd"},
+    {file = "orjson-3.8.10-cp38-none-win_amd64.whl", hash = "sha256:5a0b1f4e4fa75e26f814161196e365fc0e1a16e3c07428154505b680a17df02f"},
+    {file = "orjson-3.8.10-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:af7601a78b99f0515af2f8ab12c955c0072ffcc1e437fb2556f4465783a4d813"},
+    {file = "orjson-3.8.10-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:6bbd7b3a3e2030b03c68c4d4b19a2ef5b89081cbb43c05fe2010767ef5e408db"},
+    {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4355c9aedfefe60904e8bd7901315ebbc8bb828f665e4c9bc94b1432e67cb6f7"},
+    {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b0ba074375e25c1594e770e2215941e2017c3cd121889150737fa1123e8bfe"},
+    {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34b6901c110c06ab9e8d7d0496db4bc9a0c162ca8d77f67539d22cb39e0a1ef4"},
+    {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb62ec16a1c26ad9487727b529103cb6a94a1d4969d5b32dd0eab5c3f4f5a6f2"},
+    {file = "orjson-3.8.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:595e1e7d04aaaa3d41113e4eb9f765ab642173c4001182684ae9ddc621bb11c8"},
+    {file = "orjson-3.8.10-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:64ffd92328473a2f9af059410bd10c703206a4bbc7b70abb1bedcd8761e39eb8"},
+    {file = "orjson-3.8.10-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b1f648ec89c6a426098868460c0ef8c86b457ce1378d7569ff4acb6c0c454048"},
+    {file = "orjson-3.8.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6a286ad379972e4f46579e772f0477e6b505f1823aabcd64ef097dbb4549e1a4"},
+    {file = "orjson-3.8.10-cp39-none-win_amd64.whl", hash = "sha256:d2874cee6856d7c386b596e50bc517d1973d73dc40b2bd6abec057b5e7c76b2f"},
+    {file = "orjson-3.8.10.tar.gz", hash = "sha256:dcf6adb4471b69875034afab51a14b64f1026bc968175a2bb02c5f6b358bd413"},
+]
+
 [[package]]
 name = "packaging"
 version = "23.1"
@@ -5373,7 +5445,7 @@ typing-extensions = {version = "*", markers = "python_version <= \"3.8\""}
 name = "pluggy"
 version = "1.0.0"
 description = "plugin and hook calling mechanisms for python"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -5546,37 +5618,36 @@ requests = "*"
 
 [[package]]
 name = "protobuf"
-version = "3.19.6"
+version = "3.19.0"
 description = "Protocol Buffers"
 category = "main"
 optional = true
 python-versions = ">=3.5"
 files = [
-    {file = "protobuf-3.19.6-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:010be24d5a44be7b0613750ab40bc8b8cedc796db468eae6c779b395f50d1fa1"},
-    {file = "protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11478547958c2dfea921920617eb457bc26867b0d1aa065ab05f35080c5d9eb6"},
-    {file = "protobuf-3.19.6-cp310-cp310-win32.whl", hash = "sha256:559670e006e3173308c9254d63facb2c03865818f22204037ab76f7a0ff70b5f"},
-    {file = "protobuf-3.19.6-cp310-cp310-win_amd64.whl", hash = "sha256:347b393d4dd06fb93a77620781e11c058b3b0a5289262f094379ada2920a3730"},
-    {file = "protobuf-3.19.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a8ce5ae0de28b51dff886fb922012dad885e66176663950cb2344c0439ecb473"},
-    {file = "protobuf-3.19.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90b0d02163c4e67279ddb6dc25e063db0130fc299aefabb5d481053509fae5c8"},
-    {file = "protobuf-3.19.6-cp36-cp36m-win32.whl", hash = "sha256:30f5370d50295b246eaa0296533403961f7e64b03ea12265d6dfce3a391d8992"},
-    {file = "protobuf-3.19.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0c0714b025ec057b5a7600cb66ce7c693815f897cfda6d6efb58201c472e3437"},
-    {file = "protobuf-3.19.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5057c64052a1f1dd7d4450e9aac25af6bf36cfbfb3a1cd89d16393a036c49157"},
-    {file = "protobuf-3.19.6-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:bb6776bd18f01ffe9920e78e03a8676530a5d6c5911934c6a1ac6eb78973ecb6"},
-    {file = "protobuf-3.19.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84a04134866861b11556a82dd91ea6daf1f4925746b992f277b84013a7cc1229"},
-    {file = "protobuf-3.19.6-cp37-cp37m-win32.whl", hash = "sha256:4bc98de3cdccfb5cd769620d5785b92c662b6bfad03a202b83799b6ed3fa1fa7"},
-    {file = "protobuf-3.19.6-cp37-cp37m-win_amd64.whl", hash = "sha256:aa3b82ca1f24ab5326dcf4ea00fcbda703e986b22f3d27541654f749564d778b"},
-    {file = "protobuf-3.19.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2b2d2913bcda0e0ec9a784d194bc490f5dc3d9d71d322d070b11a0ade32ff6ba"},
-    {file = "protobuf-3.19.6-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d0b635cefebd7a8a0f92020562dead912f81f401af7e71f16bf9506ff3bdbb38"},
-    {file = "protobuf-3.19.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a552af4dc34793803f4e735aabe97ffc45962dfd3a237bdde242bff5a3de684"},
-    {file = "protobuf-3.19.6-cp38-cp38-win32.whl", hash = "sha256:0469bc66160180165e4e29de7f445e57a34ab68f49357392c5b2f54c656ab25e"},
-    {file = "protobuf-3.19.6-cp38-cp38-win_amd64.whl", hash = "sha256:91d5f1e139ff92c37e0ff07f391101df77e55ebb97f46bbc1535298d72019462"},
-    {file = "protobuf-3.19.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c0ccd3f940fe7f3b35a261b1dd1b4fc850c8fde9f74207015431f174be5976b3"},
-    {file = "protobuf-3.19.6-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:30a15015d86b9c3b8d6bf78d5b8c7749f2512c29f168ca259c9d7727604d0e39"},
-    {file = "protobuf-3.19.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:878b4cd080a21ddda6ac6d1e163403ec6eea2e206cf225982ae04567d39be7b0"},
-    {file = "protobuf-3.19.6-cp39-cp39-win32.whl", hash = "sha256:5a0d7539a1b1fb7e76bf5faa0b44b30f812758e989e59c40f77a7dab320e79b9"},
-    {file = "protobuf-3.19.6-cp39-cp39-win_amd64.whl", hash = "sha256:bbf5cea5048272e1c60d235c7bd12ce1b14b8a16e76917f371c718bd3005f045"},
-    {file = "protobuf-3.19.6-py2.py3-none-any.whl", hash = "sha256:14082457dc02be946f60b15aad35e9f5c69e738f80ebbc0900a19bc83734a5a4"},
-    {file = "protobuf-3.19.6.tar.gz", hash = "sha256:5f5540d57a43042389e87661c6eaa50f47c19c6176e8cf1c4f287aeefeccb5c4"},
+    {file = "protobuf-3.19.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:01a0645ef3acddfbc90237e1cdfae1086130fc7cb480b5874656193afd657083"},
+    {file = "protobuf-3.19.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d3861c9721a90ba83ee0936a9cfcc4fa1c4b4144ac9658fb6f6343b38558e9b4"},
+    {file = "protobuf-3.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b64be5d7270cf5e76375bac049846e8a9543a2d4368b69afe78ab725380a7487"},
+    {file = "protobuf-3.19.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:2f6046b9e2feee0dce994493186e8715b4392ed5f50f356280ad9c2f9f93080a"},
+    {file = "protobuf-3.19.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac2f8ec942d414609aba0331952ae12bb823e8f424bbb6b8c422f1cef32dc842"},
+    {file = "protobuf-3.19.0-cp36-cp36m-win32.whl", hash = "sha256:3fea09aa04ef2f8b01fcc9bb87f19509934f8a35d177c865b8f9ee5c32b60c1b"},
+    {file = "protobuf-3.19.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d1f4277d321f60456845ca9b882c4845736f1f5c1c69eb778eba22a97977d8af"},
+    {file = "protobuf-3.19.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8488c2276f14f294e890cc1260ab342a13e90cd20dcc03319d2eea258f1fd321"},
+    {file = "protobuf-3.19.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:36bf292f44966c67080e535321501717f4f1eba30faef8f2cd4b0c745a027211"},
+    {file = "protobuf-3.19.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99af73ae34c93e0e2ace57ea2e70243f34fc015c8c23fd39ee93652e726f7e7"},
+    {file = "protobuf-3.19.0-cp37-cp37m-win32.whl", hash = "sha256:f7a031cf8e2fc14acc0ba694f6dff0a01e06b70d817eba6edc72ee6cc20517ac"},
+    {file = "protobuf-3.19.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d4ca5f0c7bc8d2e6966ca3bbd85e9ebe7191b6e21f067896d4af6b28ecff29fe"},
+    {file = "protobuf-3.19.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9a8a880593015ef2c83f7af797fa4fbf583b2c98b4bd94e46c5b61fee319d84b"},
+    {file = "protobuf-3.19.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:6f16925f5c977dd7787973a50c242e60c22b1d1182aba6bec7bd02862579c10f"},
+    {file = "protobuf-3.19.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9097327d277b0aa4a3224e61cd6850aef3269172397715299bcffc9f90293c9"},
+    {file = "protobuf-3.19.0-cp38-cp38-win32.whl", hash = "sha256:708d04394a63ee9bdc797938b6e15ed5bf24a1cb37743eb3886fd74a5a67a234"},
+    {file = "protobuf-3.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:ee4d07d596357f51316b6ecf1cc1927660e9d5e418385bb1c51fd2496cd9bee7"},
+    {file = "protobuf-3.19.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34a77b8fafdeb8f89fee2b7108ae60d8958d72e33478680cc1e05517892ecc46"},
+    {file = "protobuf-3.19.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:4f93e0f6af796ddd1502225ff8ea25340ced186ca05b601c44d5c88b45ba80a0"},
+    {file = "protobuf-3.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:942dd6bc8bd2a3c6a156d8ab0f80bd45313f22b78e1176283270054dcc8ca4c2"},
+    {file = "protobuf-3.19.0-cp39-cp39-win32.whl", hash = "sha256:7b3867795708ac88fde8d6f34f0d9a50af56087e41f624bdb2e9ff808ea5dda7"},
+    {file = "protobuf-3.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:a74432e9d28a6072a2359a0f49f81eb14dd718e7dbbfb6c0789b456c49e1f130"},
+    {file = "protobuf-3.19.0-py2.py3-none-any.whl", hash = "sha256:c96e94d3e523a82caa3e5f74b35dd1c4884199358d01c950d95c341255ff48bc"},
+    {file = "protobuf-3.19.0.tar.gz", hash = "sha256:6a1dc6584d24ef86f5b104bcad64fa0fe06ed36e5687f426e0445d363a041d18"},
 ]
 
 [[package]]
@@ -6105,7 +6176,7 @@ Pillow = ">=8.0.0"
 name = "pytest"
 version = "7.3.1"
 description = "pytest: simple powerful testing with Python"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -7507,7 +7578,7 @@ files = [
 ]
 
 [package.dependencies]
-greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and platform_machine == \"aarch64\" or python_version >= \"3\" and platform_machine == \"ppc64le\" or python_version >= \"3\" and platform_machine == \"x86_64\" or python_version >= \"3\" and platform_machine == \"amd64\" or python_version >= \"3\" and platform_machine == \"AMD64\" or python_version >= \"3\" and platform_machine == \"win32\" or python_version >= \"3\" and platform_machine == \"WIN32\""}
+greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
 
 [package.extras]
 aiomysql = ["aiomysql", "greenlet (!=0.4.17)"]
@@ -7759,18 +7830,18 @@ files = [
 
 [[package]]
 name = "tensorflow-hub"
-version = "0.13.0"
+version = "0.12.0"
 description = "TensorFlow Hub is a library to foster the publication, discovery, and consumption of reusable parts of machine learning models."
 category = "main"
 optional = true
 python-versions = "*"
 files = [
-    {file = "tensorflow_hub-0.13.0-py2.py3-none-any.whl", hash = "sha256:3544f4fd9fd99e4eeb6da1b5b5320e4a2dbdef7f9bb778f66f76d6790f32dd65"},
+    {file = "tensorflow_hub-0.12.0-py2.py3-none-any.whl", hash = "sha256:822fe5f7338c95efcc3a534011c6689e4309ba2459def87194179c4de8a6e1fc"},
 ]
 
 [package.dependencies]
 numpy = ">=1.12.0"
-protobuf = ">=3.19.6"
+protobuf = ">=3.8.0"
 
 [package.extras]
 make-image-classifier = ["keras-preprocessing[image]"]
@@ -8132,7 +8203,7 @@ files = [
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -8414,7 +8485,7 @@ types-pyOpenSSL = "*"
 name = "types-requests"
 version = "2.28.11.17"
 description = "Typing stubs for requests"
-category = "dev"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -8441,7 +8512,7 @@ files = [
 name = "types-urllib3"
 version = "1.26.25.10"
 description = "Typing stubs for urllib3"
-category = "dev"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -9267,13 +9338,15 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
 cffi = ["cffi (>=1.11)"]
 
 [extras]
-all = ["aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "qdrant-client", "redis", "sentence-transformers", "spacy", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
+all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf"]
 cohere = ["cohere"]
-llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"]
+docarray = ["docarray", "protobuf"]
+embeddings = ["sentence-transformers"]
+llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
 openai = ["openai"]
 qdrant = ["qdrant-client"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "ab6ea1c53c7a6e792d5bdcf8865b87e5dcfe4c89080c18b356dc4ed8a17cc3a3"
+content-hash = "81e7b09595d12739f056c5f5d34021ad7e3f855a8da711d3ccc23aab72cfbd83"
diff --git a/pyproject.toml b/pyproject.toml
index 0eec46451897e..61406f1db2e0c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,6 +69,10 @@ pytesseract = {version = "^0.3.10", optional=true}
 html2text = {version="^2020.1.16", optional=true}
 numexpr = "^2.8.4"
 duckduckgo-search = {version="^2.8.6", optional=true}
+docarray = {version="^0.30.0", optional=true}
+protobuf = {version="3.19", optional=true}
+hnswlib = {version="^0.7.0", optional=true}
+pytest = "^7.3.1"
 
 
 [tool.poetry.group.docs.dependencies]
@@ -145,8 +149,9 @@ llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifes
 qdrant = ["qdrant-client"]
 openai = ["openai"]
 cohere = ["cohere"]
+docarray = ["docarray", "protobuf"]
 embeddings = ["sentence-transformers"]
-all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect"]
+all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf"]
 
 [tool.ruff]
 select = [
diff --git a/tests/integration_tests/vectorstores/test_hnsw_lib.py b/tests/integration_tests/vectorstores/test_hnsw_lib.py
new file mode 100644
index 0000000000000..7aa3481cf19e7
--- /dev/null
+++ b/tests/integration_tests/vectorstores/test_hnsw_lib.py
@@ -0,0 +1,54 @@
+import pytest
+
+from langchain.schema import Document
+from langchain.vectorstores.hnsw_lib import HnswLib
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+
+
+def test_docarray_hnswlib_vec_store_init(tmp_path) -> None:
+    """Test end to end construction and simple similarity search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        work_dir=str(tmp_path),
+        n_dim=10,
+        sim_metric='cosine',
+    )
+    assert isinstance(docsearch, HnswLib)
+
+
+@pytest.fixture
+def docarray_vec_store(tmp_path):
+    texts = ["foo", "bar", "baz"]
+    docsearch = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        work_dir=str(tmp_path),
+        n_dim=10,
+    )
+    return docsearch
+
+
+def test_sim_search(docarray_vec_store) -> None:
+    """Test end to end construction and simple similarity search."""
+
+    output = docarray_vec_store.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+
+def test_sim_search_with_score(docarray_vec_store) -> None:
+    """Test end to end construction and similarity search with score."""
+
+    output = docarray_vec_store.similarity_search_with_score("foo", k=1)
+    assert output == [(Document(page_content="foo"), 1.0)]
+
+
+def test_sim_search_by_vector(docarray_vec_store):
+    """Test end to end construction and similarity search by vector."""
+    embedding = [1.0] * 10
+    output = docarray_vec_store.similarity_search_by_vector(embedding, k=1)
+
+    assert output == [Document(page_content="bar")]
+
+
diff --git a/tests/integration_tests/vectorstores/test_in_memory.py b/tests/integration_tests/vectorstores/test_in_memory.py
new file mode 100644
index 0000000000000..79458727310a8
--- /dev/null
+++ b/tests/integration_tests/vectorstores/test_in_memory.py
@@ -0,0 +1,48 @@
+import pytest
+
+from langchain.schema import Document
+from langchain.vectorstores.in_memory import InMemory
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+
+
+def test_docarray_vec_store_init() -> None:
+    """Test end to end construction and simple similarity search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = InMemory.from_texts(
+        texts,
+        FakeEmbeddings(),
+    )
+    assert isinstance(docsearch, InMemory)
+
+
+@pytest.fixture
+def docarray_vec_store():
+    texts = ["foo", "bar", "baz"]
+    docsearch = InMemory.from_texts(
+        texts,
+        FakeEmbeddings(),
+    )
+    return docsearch
+
+
+def test_sim_search(docarray_vec_store) -> None:
+    """Test end to end construction and simple similarity search."""
+
+    output = docarray_vec_store.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+
+def test_sim_search_with_score(docarray_vec_store) -> None:
+    """Test end to end construction and similarity search with score."""
+
+    output = docarray_vec_store.similarity_search_with_score("foo", k=1)
+    assert output == [(Document(page_content="foo"), 1.0)]
+
+
+def test_sim_search_by_vector(docarray_vec_store):
+    """Test end to end construction and similarity search by vector."""
+    embedding = [1.0] * 10
+    output = docarray_vec_store.similarity_search_by_vector(embedding, k=1)
+
+    assert output == [Document(page_content="bar")]
+

From b687fd487f596818f0fe9e7230712a0ca0da7ad5 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 27 Apr 2023 12:22:02 +0200
Subject: [PATCH 2/6] refactor: use abtract VecStoreFromDocIndex for in memory
 and hnswlib implementation

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 langchain/vectorstores/hnsw_lib.py            | 204 ++-----------
 langchain/vectorstores/in_memory.py           | 273 +++++++++---------
 .../vector_store_from_doc_index.py            | 186 ++++++++++++
 .../vectorstores/test_hnsw_lib.py             |  73 +++--
 .../vectorstores/test_in_memory.py            |  63 +++-
 5 files changed, 448 insertions(+), 351 deletions(-)
 create mode 100644 langchain/vectorstores/vector_store_from_doc_index.py

diff --git a/langchain/vectorstores/hnsw_lib.py b/langchain/vectorstores/hnsw_lib.py
index 6974133c9891b..51c85423ac2b7 100644
--- a/langchain/vectorstores/hnsw_lib.py
+++ b/langchain/vectorstores/hnsw_lib.py
@@ -1,52 +1,44 @@
 """Wrapper around in-memory DocArray store."""
 from __future__ import annotations
 
-from operator import itemgetter
 from typing import List, Optional, Any, Tuple, Iterable, Type, Callable, Sequence, TYPE_CHECKING
+from docarray.typing import NdArray
 
 from langchain.embeddings.base import Embeddings
-from langchain.schema import Document
-from langchain.vectorstores import VectorStore
 from langchain.vectorstores.base import VST
-from langchain.vectorstores.utils import maximal_marginal_relevance
-
-from docarray import BaseDoc
-from docarray.typing import NdArray
+from langchain.vectorstores.vector_store_from_doc_index import VecStoreFromDocIndex, _check_docarray_import
 
 
-class HnswLib(VectorStore):
+class HnswLib(VecStoreFromDocIndex):
     """Wrapper around HnswLib storage.
 
-    To use it, you should have the ``docarray`` package with version >=0.30.0 installed.
+    To use it, you should have the ``docarray`` package with version >=0.31.0 installed.
     """
     def __init__(
         self,
-        work_dir: str,
-        n_dim: int,
         texts: List[str],
         embedding: Embeddings,
+        work_dir: str,
+        n_dim: int,
         metadatas: Optional[List[dict]],
-        sim_metric: str = 'cosine',
-        kwargs: dict = None
+        dist_metric: str = 'cosine',
+        **kwargs,
     ) -> None:
-        """Initialize HnswLib store."""
-        try:
-            import docarray
-            da_version = docarray.__version__.split('.')
-            if int(da_version[0]) == 0 and int(da_version[1]) <= 21:
-                raise ValueError(
-                    f'To use the HnswLib VectorStore the docarray version >=0.30.0 is expected, '
-                    f'received: {docarray.__version__}.'
-                    f'To upgrade, please run: `pip install -U docarray`.'
-                )
-            else:
-                from docarray import DocList
-                from docarray.index import HnswDocumentIndex
-        except ImportError:
-            raise ImportError(
-                "Could not import docarray python package. "
-                "Please install it with `pip install -U docarray`."
-            )
+        """Initialize HnswLib store.
+
+        Args:
+            texts (List[str]): Text data.
+            embedding (Embeddings): Embedding function.
+            metadatas (Optional[List[dict]]): Metadata for each text if it exists.
+                Defaults to None.
+            work_dir (str): path to the location where all the data will be stored.
+            n_dim (int): dimension of an embedding.
+            dist_metric (str): Distance metric for HnswLib can be one of: 'cosine',
+                'ip', and 'l2'. Defaults to 'cosine'.
+        """
+        _check_docarray_import()
+        from docarray.index import HnswDocumentIndex
+
         try:
             import google.protobuf
         except ImportError:
@@ -55,27 +47,13 @@ def __init__(
                 "Please install it with `pip install -U protobuf`."
             )
 
-        if metadatas is None:
-            metadatas = [{} for _ in range(len(texts))]
-
-        self.embedding = embedding
-
-        self.doc_cls = self._get_doc_cls(n_dim, sim_metric)
-        self.doc_index = HnswDocumentIndex[self.doc_cls](work_dir=work_dir)
-        embeddings = self.embedding.embed_documents(texts)
-        docs = DocList[self.doc_cls](
-            [
-                self.doc_cls(
-                    text=t,
-                    embedding=e,
-                    metadata=m,
-                ) for t, m, e in zip(texts, metadatas, embeddings)
-            ]
-        )
-        self.doc_index.index(docs)
+        doc_cls = self._get_doc_cls(n_dim, dist_metric)
+        doc_index = HnswDocumentIndex[doc_cls](work_dir=work_dir)
+        super().__init__(doc_index, texts, embedding, metadatas)
 
     @staticmethod
     def _get_doc_cls(n_dim: int, sim_metric: str):
+        from docarray import BaseDoc
         from pydantic import Field
 
         class DocArrayDoc(BaseDoc):
@@ -93,6 +71,7 @@ def from_texts(
         metadatas: Optional[List[dict]] = None,
         work_dir: str = None,
         n_dim: int = None,
+        dist_metric: str = 'cosine',
         **kwargs: Any
     ) -> HnswLib:
 
@@ -107,129 +86,6 @@ def from_texts(
             texts=texts,
             embedding=embedding,
             metadatas=metadatas,
-            kwargs=kwargs
+            dist_metric=dist_metric,
+            kwargs=kwargs,
         )
-
-    def add_texts(
-        self,
-        texts: Iterable[str],
-        metadatas: Optional[List[dict]] = None,
-        **kwargs: Any
-    ) -> List[str]:
-        """Run more texts through the embeddings and add to the vectorstore.
-
-        Args:
-            texts: Iterable of strings to add to the vectorstore.
-            metadatas: Optional list of metadatas associated with the texts.
-
-        Returns:
-            List of ids from adding the texts into the vectorstore.
-        """
-        if metadatas is None:
-            metadatas = [{} for _ in range(len(list(texts)))]
-
-        ids = []
-        embeddings = self.embedding.embed_documents(texts)
-        for t, m, e in zip(texts, metadatas, embeddings):
-            doc = self.doc_cls(
-                text=t,
-                embedding=e,
-                metadata=m
-            )
-            self.doc_index.index(doc)
-            ids.append(doc.id)  # TODO return index of self.docs ?
-
-        return ids
-
-    def similarity_search_with_score(
-        self, query: str, k: int = 4, **kwargs: Any
-    ) -> List[Tuple[Document, float]]:
-        """Return docs most similar to query.
-
-        Args:
-            query: Text to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-
-        Returns:
-            List of Documents most similar to the query and score for each.
-        """
-        query_embedding = self.embedding.embed_query(query)
-        query_embedding = [1., 1., 1., 1., 1., 1., 1., 1., 1., 0.]
-        print(f"query_embedding = {query_embedding}")
-        query_doc = self.doc_cls(embedding=query_embedding)
-        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=k)
-
-        result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
-        return result
-
-    def similarity_search(
-        self, query: str, k: int = 4, **kwargs: Any
-    ) -> List[Document]:
-        """Return docs most similar to query.
-
-        Args:
-            query: Text to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-
-        Returns:
-            List of Documents most similar to the query.
-        """
-        results = self.similarity_search_with_score(query, k)
-        return list(map(itemgetter(0), results))
-
-    def _similarity_search_with_relevance_scores(
-        self,
-        query: str,
-        k: int = 4,
-        **kwargs: Any,
-    ) -> List[Tuple[Document, float]]:
-        """Return docs and relevance scores, normalized on a scale from 0 to 1.
-
-        0 is dissimilar, 1 is most similar.
-        """
-        raise NotImplementedError
-
-    def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
-        """Return docs most similar to embedding vector.
-
-        Args:
-            embedding: Embedding to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-
-        Returns:
-            List of Documents most similar to the query vector.
-        """
-
-        query_doc = self.doc_cls(embedding=embedding)
-        docs = self.doc_index.find(query_doc, search_field='embedding', limit=k).documents
-
-        result = [Document(page_content=doc.text) for doc in docs]
-        return result
-
-    def max_marginal_relevance_search(
-        self, query: str, k: int = 4, fetch_k: int = 20, **kwargs: Any
-    ) -> List[Document]:
-        """Return docs selected using the maximal marginal relevance.
-
-        Maximal marginal relevance optimizes for similarity to query AND diversity
-        among selected documents.
-
-        Args:
-            query: Text to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
-
-        Returns:
-            List of Documents selected by maximal marginal relevance.
-        """
-        query_embedding = self.embedding.embed_query(query)
-        query_doc = self.doc_cls(embedding=query_embedding)
-
-        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=fetch_k)
-
-        embeddings = [emb for emb in docs.emb]
-
-        mmr_selected = maximal_marginal_relevance(query_embedding, embeddings, k=k)
-        results = [Document(page_content=self.doc_index[idx].text) for idx in mmr_selected]
-        return results
-
diff --git a/langchain/vectorstores/in_memory.py b/langchain/vectorstores/in_memory.py
index a079b10da7887..7a5139d898401 100644
--- a/langchain/vectorstores/in_memory.py
+++ b/langchain/vectorstores/in_memory.py
@@ -1,71 +1,58 @@
 """Wrapper around in-memory DocArray store."""
 from __future__ import annotations
 
-from operator import itemgetter
-from typing import List, Optional, Any, Tuple, Iterable, Type
+from typing import List, Optional, Any, Type
+
+from docarray.typing import NdArray
 
 from langchain.embeddings.base import Embeddings
 from langchain.schema import Document
-from langchain.vectorstores import VectorStore
 from langchain.vectorstores.base import VST
 from langchain.vectorstores.utils import maximal_marginal_relevance
-
-from docarray import BaseDoc
-from docarray.typing import NdArray
+from langchain.vectorstores.vector_store_from_doc_index import _check_docarray_import, VecStoreFromDocIndex
 
 
-class InMemory(VectorStore):
+class InMemory(VecStoreFromDocIndex):
     """Wrapper around in-memory storage.
 
-    To use it, you should have the ``docarray`` package with version >=0.30.0 installed.
+    To use it, you should have the ``docarray`` package with version >=0.31.0 installed.
     """
     def __init__(
         self,
         texts: List[str],
         embedding: Embeddings,
-        metadatas: Optional[List[dict]]
+        metadatas: Optional[List[dict]] = None,
+        metric: str = 'cosine_sim',
     ) -> None:
-        """Initialize in-memory store."""
-        try:
-            import docarray
-            da_version = docarray.__version__.split('.')
-            if int(da_version[0]) == 0 and int(da_version[1]) <= 21:
-                raise ValueError(
-                    f'To use the InMemory VectorStore the docarray version >=0.30.0 is expected, '
-                    f'received: {docarray.__version__}.'
-                    f'To upgrade, please run: `pip install -U docarray`.'
-                )
-            else:
-                from docarray import DocList
-
-        except ImportError:
-            raise ImportError(
-                "Could not import docarray python package. "
-                "Please install it with `pip install -U docarray`."
-            )
-        if metadatas is None:
-            metadatas = [{} for _ in range(len(texts))]
-
-        self.embedding = embedding
-        self.doc_cls = self._get_doc_cls()
-        self.docs = DocList[self.doc_cls](
-            [
-                self.doc_cls(
-                    text=t,
-                    embedding=e,
-                    metadata=m,
-                ) for t, m, e in zip(texts, metadatas, self.embedding.embed_documents(texts))
-            ]
-        )
+        """Initialize in-memory store.
+
+        Args:
+            texts (List[str]): Text data.
+            embedding (Embeddings): Embedding function.
+            metadatas (Optional[List[dict]]): Metadata for each text if it exists.
+                Defaults to None.
+            metric (str): metric for exact nearest-neighbor search.
+                Can be one of: 'cosine_sim', 'euclidean_dist' and 'sqeuclidean_dist'.
+                Defaults to 'cosine_sim'.
+
+        """
+        _check_docarray_import()
+        from docarray.index import InMemoryDocIndex
+
+        doc_cls = self._get_doc_cls(metric)
+        doc_index = InMemoryDocIndex[doc_cls]()
+        super().__init__(doc_index, texts, embedding, metadatas)
 
     @staticmethod
-    def _get_doc_cls():
+    def _get_doc_cls(sim_metric: str):
+        from docarray import BaseDoc
+        from pydantic import Field
+
         class DocArrayDoc(BaseDoc):
             text: Optional[str]
-            embedding: Optional[NdArray]
+            embedding: Optional[NdArray] = Field(space=sim_metric)
             metadata: Optional[dict]
 
-        # DocArrayDoc.update_forward_refs()
         return DocArrayDoc
 
     @classmethod
@@ -74,110 +61,112 @@ def from_texts(
         texts: List[str],
         embedding: Embeddings,
         metadatas: Optional[List[dict]] = None,
+        metric: str = 'cosine_sim',
         **kwargs: Any
     ) -> InMemory:
         return cls(
             texts=texts,
             embedding=embedding,
-            metadatas=metadatas
+            metadatas=metadatas,
+            metric=metric,
         )
-
-    def add_texts(
-        self,
-        texts: Iterable[str],
-        metadatas: Optional[List[dict]] = None,
-        **kwargs: Any
-    ) -> List[str]:
-        """Run more texts through the embeddings and add to the vectorstore.
-
-        Args:
-            texts: Iterable of strings to add to the vectorstore.
-            metadatas: Optional list of metadatas associated with the texts.
-
-        Returns:
-            List of ids from adding the texts into the vectorstore.
-        """
-        if metadatas is None:
-            metadatas = [{} for _ in range(len(list(texts)))]
-
-        ids = []
-        embeddings = self.embedding.embed_documents(texts)
-        for t, m, e in zip(texts, metadatas, embeddings):
-            doc = self.doc_cls(
-                text=t,
-                embedding=e,
-                metadata=m
-            )
-            self.docs.append(doc)
-            ids.append(doc.id)  # TODO return index of self.docs ?
-
-        return ids
-
-    def similarity_search_with_score(
-        self, query: str, k: int = 4, **kwargs: Any
-    ) -> List[Tuple[Document, float]]:
-        """Return docs most similar to query.
-
-        Args:
-            query: Text to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-
-        Returns:
-            List of Documents most similar to the query and score for each.
-        """
-        from docarray.utils.find import find  # TODO move import
-
-        query_embedding = self.embedding.embed_query(query)
-        query_doc = self.doc_cls(embedding=query_embedding)
-        docs, scores = find(index=self.docs, query=query_doc, limit=k, search_field='embedding')
-
-        result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
-        return result
-
-    def similarity_search(
-        self, query: str, k: int = 4, **kwargs: Any
-    ) -> List[Document]:
-        """Return docs most similar to query.
-
-        Args:
-            query: Text to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-
-        Returns:
-            List of Documents most similar to the query.
-        """
-        results = self.similarity_search_with_score(query, k)
-        return list(map(itemgetter(0), results))
-
-    def _similarity_search_with_relevance_scores(
-        self,
-        query: str,
-        k: int = 4,
-        **kwargs: Any,
-    ) -> List[Tuple[Document, float]]:
-        """Return docs and relevance scores, normalized on a scale from 0 to 1.
-
-        0 is dissimilar, 1 is most similar.
-        """
-        raise NotImplementedError
-
-    def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
-        """Return docs most similar to embedding vector.
-
-        Args:
-            embedding: Embedding to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-
-        Returns:
-            List of Documents most similar to the query vector.
-        """
-        from docarray.utils.find import find
-
-        query_doc = self.doc_cls(embedding=embedding)
-        result_docs = find(index=self.docs, query=query_doc, limit=k, search_field='embedding').documents
-
-        result = [Document(page_content=doc.text) for doc in result_docs]
-        return result
+    #
+    # def add_texts(
+    #     self,
+    #     texts: Iterable[str],
+    #     metadatas: Optional[List[dict]] = None,
+    #     **kwargs: Any
+    # ) -> List[str]:
+    #     """Run more texts through the embeddings and add to the vectorstore.
+    #
+    #     Args:
+    #         texts: Iterable of strings to add to the vectorstore.
+    #         metadatas: Optional list of metadatas associated with the texts.
+    #
+    #     Returns:
+    #         List of ids from adding the texts into the vectorstore.
+    #     """
+    #     if metadatas is None:
+    #         metadatas = [{} for _ in range(len(list(texts)))]
+    #
+    #     ids = []
+    #     embeddings = self.embedding.embed_documents(texts)
+    #     for t, m, e in zip(texts, metadatas, embeddings):
+    #         doc = self.doc_cls(
+    #             text=t,
+    #             embedding=e,
+    #             metadata=m
+    #         )
+    #         self.docs.append(doc)
+    #         ids.append(doc.id)  # TODO return index of self.docs ?
+    #
+    #     return ids
+    #
+    # def similarity_search_with_score(
+    #     self, query: str, k: int = 4, **kwargs: Any
+    # ) -> List[Tuple[Document, float]]:
+    #     """Return docs most similar to query.
+    #
+    #     Args:
+    #         query: Text to look up documents similar to.
+    #         k: Number of Documents to return. Defaults to 4.
+    #
+    #     Returns:
+    #         List of Documents most similar to the query and score for each.
+    #     """
+    #     from docarray.utils.find import find  # TODO move import
+    #
+    #     query_embedding = self.embedding.embed_query(query)
+    #     query_doc = self.doc_cls(embedding=query_embedding)
+    #     docs, scores = find(index=self.docs, query=query_doc, limit=k, search_field='embedding')
+    #
+    #     result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
+    #     return result
+    #
+    # def similarity_search(
+    #     self, query: str, k: int = 4, **kwargs: Any
+    # ) -> List[Document]:
+    #     """Return docs most similar to query.
+    #
+    #     Args:
+    #         query: Text to look up documents similar to.
+    #         k: Number of Documents to return. Defaults to 4.
+    #
+    #     Returns:
+    #         List of Documents most similar to the query.
+    #     """
+    #     results = self.similarity_search_with_score(query, k)
+    #     return list(map(itemgetter(0), results))
+    #
+    # def _similarity_search_with_relevance_scores(
+    #     self,
+    #     query: str,
+    #     k: int = 4,
+    #     **kwargs: Any,
+    # ) -> List[Tuple[Document, float]]:
+    #     """Return docs and relevance scores, normalized on a scale from 0 to 1.
+    #
+    #     0 is dissimilar, 1 is most similar.
+    #     """
+    #     raise NotImplementedError
+    #
+    # def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
+    #     """Return docs most similar to embedding vector.
+    #
+    #     Args:
+    #         embedding: Embedding to look up documents similar to.
+    #         k: Number of Documents to return. Defaults to 4.
+    #
+    #     Returns:
+    #         List of Documents most similar to the query vector.
+    #     """
+    #     from docarray.utils.find import find
+    #
+    #     query_doc = self.doc_cls(embedding=embedding)
+    #     result_docs = find(index=self.docs, query=query_doc, limit=k, search_field='embedding').documents
+    #
+    #     result = [Document(page_content=doc.text) for doc in result_docs]
+    #     return result
 
     def max_marginal_relevance_search(
         self, query: str, k: int = 4, fetch_k: int = 20, **kwargs: Any
diff --git a/langchain/vectorstores/vector_store_from_doc_index.py b/langchain/vectorstores/vector_store_from_doc_index.py
new file mode 100644
index 0000000000000..a72c883b2e201
--- /dev/null
+++ b/langchain/vectorstores/vector_store_from_doc_index.py
@@ -0,0 +1,186 @@
+from typing import TYPE_CHECKING, TypeVar, List, Optional, Type, Iterable, Any, Tuple
+
+from docarray import DocList, BaseDoc
+from operator import itemgetter
+
+from langchain.embeddings.base import Embeddings
+from langchain.schema import Document
+from langchain.vectorstores import VectorStore
+
+from docarray.index.abstract import BaseDocIndex
+
+
+T_Doc = TypeVar('T_Doc', bound=BaseDocIndex)
+
+
+def _check_docarray_import():
+    try:
+        import docarray
+        da_version = docarray.__version__.split('.')
+        if int(da_version[0]) == 0 and int(da_version[1]) <= 21:
+            raise ValueError(
+                f'To use the HnswLib VectorStore the docarray version >=0.31.0 is expected, '
+                f'received: {docarray.__version__}.'
+                f'To upgrade, please run: `pip install -U docarray`.'
+            )
+    except ImportError:
+        raise ImportError(
+            "Could not import docarray python package. "
+            "Please install it with `pip install -U docarray`."
+        )
+
+
+class VecStoreFromDocIndex(VectorStore):
+    doc_index: BaseDocIndex = None
+    doc_cls: Type[BaseDoc] = None
+    embedding: Embeddings = None
+
+    def __init__(
+        self,
+        doc_index: T_Doc,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]],
+    ):
+        self.doc_index = doc_index
+        self.doc_cls = doc_index._schema
+        self.embedding = embedding
+
+        embeddings = self.embedding.embed_documents(texts)
+        if metadatas is None:
+            metadatas = [{} for _ in range(len(texts))]
+
+        docs = DocList[self.doc_cls](
+            [
+                self.doc_cls(
+                    text=t,
+                    embedding=e,
+                    metadata=m,
+                ) for t, m, e in zip(texts, metadatas, embeddings)
+            ]
+        )
+        if len(docs) > 0:
+            self.doc_index.index(docs)
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        if metadatas is None:
+            metadatas = [{} for _ in range(len(list(texts)))]
+
+        ids = []
+        embeddings = self.embedding.embed_documents(texts)
+        for t, m, e in zip(texts, metadatas, embeddings):
+            doc = self.doc_cls(
+                text=t,
+                embedding=e,
+                metadata=m
+            )
+            self.doc_index.index([doc])
+            ids.append(doc.id)  # TODO return index of self.docs ?
+
+        return ids
+
+    def similarity_search_with_score(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query and score for each.
+        """
+        query_embedding = self.embedding.embed_query(query)
+        query_doc = self.doc_cls(embedding=query_embedding)
+        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=k)
+
+        result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
+        return result
+
+    def similarity_search(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
+        results = self.similarity_search_with_score(query, k)
+        return list(map(itemgetter(0), results))
+
+
+    def _similarity_search_with_relevance_scores(
+        self,
+        query: str,
+        k: int = 4,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs and relevance scores, normalized on a scale from 0 to 1.
+
+        0 is dissimilar, 1 is most similar.
+        """
+        raise NotImplementedError
+
+    def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query vector.
+        """
+
+        query_doc = self.doc_cls(embedding=embedding)
+        docs = self.doc_index.find(query_doc, search_field='embedding', limit=k).documents
+
+        result = [Document(page_content=doc.text) for doc in docs]
+        return result
+
+    def max_marginal_relevance_search(
+        self, query: str, k: int = 4, fetch_k: int = 20, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
+        query_embedding = self.embedding.embed_query(query)
+        query_doc = self.doc_cls(embedding=query_embedding)
+
+        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=fetch_k)
+
+        embeddings = [emb for emb in docs.emb]
+
+        mmr_selected = maximal_marginal_relevance(query_embedding, embeddings, k=k)
+        results = [Document(page_content=self.doc_index[idx].text) for idx in mmr_selected]
+        return results
+
diff --git a/tests/integration_tests/vectorstores/test_hnsw_lib.py b/tests/integration_tests/vectorstores/test_hnsw_lib.py
index 7aa3481cf19e7..58919d37e7094 100644
--- a/tests/integration_tests/vectorstores/test_hnsw_lib.py
+++ b/tests/integration_tests/vectorstores/test_hnsw_lib.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pytest
 
 from langchain.schema import Document
@@ -5,7 +6,7 @@
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
 
-def test_docarray_hnswlib_vec_store_init(tmp_path) -> None:
+def test_hnswlib_vec_store_from_texts(tmp_path) -> None:
     """Test end to end construction and simple similarity search."""
     texts = ["foo", "bar", "baz"]
     docsearch = HnswLib.from_texts(
@@ -16,39 +17,71 @@ def test_docarray_hnswlib_vec_store_init(tmp_path) -> None:
         sim_metric='cosine',
     )
     assert isinstance(docsearch, HnswLib)
+    assert docsearch.doc_index.num_docs() == 3
 
 
-@pytest.fixture
-def docarray_vec_store(tmp_path):
-    texts = ["foo", "bar", "baz"]
-    docsearch = HnswLib.from_texts(
-        texts,
-        FakeEmbeddings(),
+def test_hnswlib_vec_store_add_texts(tmp_path) -> None:
+    """Test end to end construction and simple similarity search."""
+    docsearch = HnswLib(
         work_dir=str(tmp_path),
         n_dim=10,
+        texts=[],
+        embedding=FakeEmbeddings(),
+        metadatas=[{}],
+        sim_metric='cosine',
     )
-    return docsearch
+    assert isinstance(docsearch, HnswLib)
+    assert docsearch.doc_index.num_docs() == 0
 
+    texts = ["foo", "bar", "baz"]
+    docsearch.add_texts(texts=texts)
+    assert docsearch.doc_index.num_docs() == 3
 
-def test_sim_search(docarray_vec_store) -> None:
-    """Test end to end construction and simple similarity search."""
 
-    output = docarray_vec_store.similarity_search("foo", k=1)
+@pytest.mark.parametrize('metric', ['cosine', 'ip', 'l2'])
+def test_sim_search(metric, tmp_path) -> None:
+    """Test end to end construction and simple similarity search."""
+    texts = ["foo", "bar", "baz"]
+    hnswlib_vec_store = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        work_dir=str(tmp_path),
+        n_dim=10,
+    )
+    output = hnswlib_vec_store.similarity_search("foo", k=1)
     assert output == [Document(page_content="foo")]
 
 
-def test_sim_search_with_score(docarray_vec_store) -> None:
-    """Test end to end construction and similarity search with score."""
-
-    output = docarray_vec_store.similarity_search_with_score("foo", k=1)
-    assert output == [(Document(page_content="foo"), 1.0)]
-
-
-def test_sim_search_by_vector(docarray_vec_store):
+@pytest.mark.parametrize('metric', ['cosine', 'ip', 'l2'])
+def test_sim_search_by_vector(metric, tmp_path):
     """Test end to end construction and similarity search by vector."""
+    texts = ["foo", "bar", "baz"]
+    hnswlib_vec_store = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        work_dir=str(tmp_path),
+        n_dim=10,
+    )
     embedding = [1.0] * 10
-    output = docarray_vec_store.similarity_search_by_vector(embedding, k=1)
+    output = hnswlib_vec_store.similarity_search_by_vector(embedding, k=1)
 
     assert output == [Document(page_content="bar")]
 
 
+@pytest.mark.parametrize('metric', ['cosine', 'ip', 'l2'])
+def test_sim_search_with_score(metric, tmp_path) -> None:
+    """Test end to end construction and similarity search with score."""
+    texts = ["foo", "bar", "baz"]
+    hnswlib_vec_store = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        work_dir=str(tmp_path),
+        n_dim=10,
+    )
+    output = hnswlib_vec_store.similarity_search_with_score("foo", k=1)
+    assert len(output) == 1
+
+    out_doc, out_score = output[0]
+    assert out_doc == Document(page_content="foo")
+    assert np.isclose(out_score, 0.0, atol=1.e-6)
+
diff --git a/tests/integration_tests/vectorstores/test_in_memory.py b/tests/integration_tests/vectorstores/test_in_memory.py
index 79458727310a8..62834336c7c24 100644
--- a/tests/integration_tests/vectorstores/test_in_memory.py
+++ b/tests/integration_tests/vectorstores/test_in_memory.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pytest
 
 from langchain.schema import Document
@@ -5,7 +6,7 @@
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
 
-def test_docarray_vec_store_init() -> None:
+def test_in_memory_vec_store_from_texts() -> None:
     """Test end to end construction and simple similarity search."""
     texts = ["foo", "bar", "baz"]
     docsearch = InMemory.from_texts(
@@ -13,36 +14,68 @@ def test_docarray_vec_store_init() -> None:
         FakeEmbeddings(),
     )
     assert isinstance(docsearch, InMemory)
+    assert docsearch.doc_index.num_docs() == 3
 
 
-@pytest.fixture
-def docarray_vec_store():
-    texts = ["foo", "bar", "baz"]
-    docsearch = InMemory.from_texts(
-        texts,
-        FakeEmbeddings(),
+def test_in_memory_vec_store_add_texts(tmp_path) -> None:
+    """Test end to end construction and simple similarity search."""
+    docsearch = InMemory(
+        texts=[],
+        embedding=FakeEmbeddings(),
     )
-    return docsearch
+    assert isinstance(docsearch, InMemory)
+    assert docsearch.doc_index.num_docs() == 0
+
+    texts = ["foo", "bar", "baz"]
+    docsearch.add_texts(texts=texts)
+    assert docsearch.doc_index.num_docs() == 3
 
 
-def test_sim_search(docarray_vec_store) -> None:
+@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist'])
+def test_sim_search(metric) -> None:
     """Test end to end construction and simple similarity search."""
+    texts = ["foo", "bar", "baz"]
+    in_memory_vec_store = InMemory.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metric=metric,
+    )
 
-    output = docarray_vec_store.similarity_search("foo", k=1)
+    output = in_memory_vec_store.similarity_search("foo", k=1)
     assert output == [Document(page_content="foo")]
 
 
-def test_sim_search_with_score(docarray_vec_store) -> None:
+@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist'])
+def test_sim_search_with_score(metric) -> None:
     """Test end to end construction and similarity search with score."""
+    texts = ["foo", "bar", "baz"]
+    in_memory_vec_store = InMemory.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metric=metric,
+    )
+
+    output = in_memory_vec_store.similarity_search_with_score("foo", k=1)
 
-    output = docarray_vec_store.similarity_search_with_score("foo", k=1)
-    assert output == [(Document(page_content="foo"), 1.0)]
+    out_doc, out_score = output[0]
+    assert out_doc == Document(page_content="foo")
 
+    expected_score = 0.0 if 'dist' in metric else 1.0
+    assert np.isclose(out_score, expected_score, atol=1.e-6)
 
-def test_sim_search_by_vector(docarray_vec_store):
+
+@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist'])
+def test_sim_search_by_vector(metric):
     """Test end to end construction and similarity search by vector."""
+    texts = ["foo", "bar", "baz"]
+    in_memory_vec_store = InMemory.from_texts(
+        texts=texts,
+        embedding=FakeEmbeddings(),
+        metric=metric,
+    )
+
     embedding = [1.0] * 10
-    output = docarray_vec_store.similarity_search_by_vector(embedding, k=1)
+    output = in_memory_vec_store.similarity_search_by_vector(embedding, k=1)
 
     assert output == [Document(page_content="bar")]
 

From de262f9ae52affcccc3653128dee3d7cded177dc Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 27 Apr 2023 15:17:26 +0200
Subject: [PATCH 3/6] fix: clean up and add dependencies

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 langchain/vectorstores/__init__.py            |   4 +
 langchain/vectorstores/hnsw_lib.py            |  74 ++++---
 langchain/vectorstores/in_memory.py           | 195 +++---------------
 .../vector_store_from_doc_index.py            | 113 +++++-----
 poetry.lock                                   |  23 ++-
 pyproject.toml                                |   9 +-
 .../vectorstores/test_hnsw_lib.py             |  27 ++-
 .../vectorstores/test_in_memory.py            |  20 +-
 8 files changed, 186 insertions(+), 279 deletions(-)

diff --git a/langchain/vectorstores/__init__.py b/langchain/vectorstores/__init__.py
index 30d1ca7ecdc3e..5360f4b8f25c2 100644
--- a/langchain/vectorstores/__init__.py
+++ b/langchain/vectorstores/__init__.py
@@ -7,6 +7,8 @@
 from langchain.vectorstores.deeplake import DeepLake
 from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
 from langchain.vectorstores.faiss import FAISS
+from langchain.vectorstores.hnsw_lib import HnswLib
+from langchain.vectorstores.in_memory import InMemory
 from langchain.vectorstores.milvus import Milvus
 from langchain.vectorstores.myscale import MyScale, MyScaleSettings
 from langchain.vectorstores.opensearch_vector_search import OpenSearchVectorSearch
@@ -34,4 +36,6 @@
     "MyScaleSettings",
     "SupabaseVectorStore",
     "AnalyticDB",
+    "HnswLib",
+    "InMemory",
 ]
diff --git a/langchain/vectorstores/hnsw_lib.py b/langchain/vectorstores/hnsw_lib.py
index 51c85423ac2b7..ddc3ec7b6102c 100644
--- a/langchain/vectorstores/hnsw_lib.py
+++ b/langchain/vectorstores/hnsw_lib.py
@@ -1,40 +1,38 @@
-"""Wrapper around in-memory DocArray store."""
+"""Wrapper around HnswLib store."""
 from __future__ import annotations
 
-from typing import List, Optional, Any, Tuple, Iterable, Type, Callable, Sequence, TYPE_CHECKING
-from docarray.typing import NdArray
+from typing import List, Optional, Type
 
 from langchain.embeddings.base import Embeddings
 from langchain.vectorstores.base import VST
-from langchain.vectorstores.vector_store_from_doc_index import VecStoreFromDocIndex, _check_docarray_import
+from langchain.vectorstores.vector_store_from_doc_index import (
+    VecStoreFromDocIndex,
+    _check_docarray_import,
+)
 
 
 class HnswLib(VecStoreFromDocIndex):
     """Wrapper around HnswLib storage.
 
-    To use it, you should have the ``docarray`` package with version >=0.31.0 installed.
+    To use it, you should have the ``docarray[hnswlib]`` package with version >=0.31.0 installed.
+    You can install it with `pip install "langchain[hnswlib]"`.
     """
+
     def __init__(
         self,
-        texts: List[str],
         embedding: Embeddings,
         work_dir: str,
         n_dim: int,
-        metadatas: Optional[List[dict]],
-        dist_metric: str = 'cosine',
-        **kwargs,
+        dist_metric: str = "cosine",
     ) -> None:
         """Initialize HnswLib store.
 
         Args:
-            texts (List[str]): Text data.
             embedding (Embeddings): Embedding function.
-            metadatas (Optional[List[dict]]): Metadata for each text if it exists.
-                Defaults to None.
             work_dir (str): path to the location where all the data will be stored.
             n_dim (int): dimension of an embedding.
-            dist_metric (str): Distance metric for HnswLib can be one of: 'cosine',
-                'ip', and 'l2'. Defaults to 'cosine'.
+            dist_metric (str): Distance metric for HnswLib can be one of: "cosine",
+                "ip", and "l2". Defaults to "cosine".
         """
         _check_docarray_import()
         from docarray.index import HnswDocumentIndex
@@ -43,25 +41,13 @@ def __init__(
             import google.protobuf
         except ImportError:
             raise ImportError(
-                "Could not import protobuf python package. "
-                "Please install it with `pip install -U protobuf`."
+                "Could not import all required packages. "
+                "Please install it with `pip install \"langchain[hnswlib]\"`."
             )
 
-        doc_cls = self._get_doc_cls(n_dim, dist_metric)
+        doc_cls = self._get_doc_cls({"dim": n_dim, "space": dist_metric})
         doc_index = HnswDocumentIndex[doc_cls](work_dir=work_dir)
-        super().__init__(doc_index, texts, embedding, metadatas)
-
-    @staticmethod
-    def _get_doc_cls(n_dim: int, sim_metric: str):
-        from docarray import BaseDoc
-        from pydantic import Field
-
-        class DocArrayDoc(BaseDoc):
-            text: Optional[str]
-            embedding: Optional[NdArray] = Field(dim=n_dim, space=sim_metric)
-            metadata: Optional[dict]
-
-        return DocArrayDoc
+        super().__init__(doc_index, embedding)
 
     @classmethod
     def from_texts(
@@ -71,21 +57,33 @@ def from_texts(
         metadatas: Optional[List[dict]] = None,
         work_dir: str = None,
         n_dim: int = None,
-        dist_metric: str = 'cosine',
-        **kwargs: Any
+        dist_metric: str = "cosine",
     ) -> HnswLib:
+        """Create an HnswLib store and insert data.
 
+        Args:
+            texts (List[str]): Text data.
+            embedding (Embeddings): Embedding function.
+            metadatas (Optional[List[dict]]): Metadata for each text if it exists.
+                Defaults to None.
+            work_dir (str): path to the location where all the data will be stored.
+            n_dim (int): dimension of an embedding.
+            dist_metric (str): Distance metric for HnswLib can be one of: "cosine",
+                "ip", and "l2". Defaults to "cosine".
+
+        Returns:
+            HnswLib Vector Store
+        """
         if work_dir is None:
-            raise ValueError('`work_dir` parameter hs not been set.')
+            raise ValueError("`work_dir` parameter hs not been set.")
         if n_dim is None:
-            raise ValueError('`n_dim` parameter has not been set.')
+            raise ValueError("`n_dim` parameter has not been set.")
 
-        return cls(
+        store = cls(
             work_dir=work_dir,
             n_dim=n_dim,
-            texts=texts,
             embedding=embedding,
-            metadatas=metadatas,
             dist_metric=dist_metric,
-            kwargs=kwargs,
         )
+        store.add_texts(texts=texts, metadatas=metadatas)
+        return store
diff --git a/langchain/vectorstores/in_memory.py b/langchain/vectorstores/in_memory.py
index 7a5139d898401..07e1f49d82c17 100644
--- a/langchain/vectorstores/in_memory.py
+++ b/langchain/vectorstores/in_memory.py
@@ -1,59 +1,42 @@
-"""Wrapper around in-memory DocArray store."""
+"""Wrapper around in-memory storage."""
 from __future__ import annotations
 
-from typing import List, Optional, Any, Type
-
-from docarray.typing import NdArray
+from typing import List, Optional, Type
 
 from langchain.embeddings.base import Embeddings
-from langchain.schema import Document
 from langchain.vectorstores.base import VST
-from langchain.vectorstores.utils import maximal_marginal_relevance
-from langchain.vectorstores.vector_store_from_doc_index import _check_docarray_import, VecStoreFromDocIndex
+from langchain.vectorstores.vector_store_from_doc_index import (
+    VecStoreFromDocIndex,
+    _check_docarray_import,
+)
 
 
 class InMemory(VecStoreFromDocIndex):
     """Wrapper around in-memory storage.
 
     To use it, you should have the ``docarray`` package with version >=0.31.0 installed.
+    You can install it with `pip install "langchain[in_memory_store]"`.
     """
+
     def __init__(
         self,
-        texts: List[str],
         embedding: Embeddings,
-        metadatas: Optional[List[dict]] = None,
-        metric: str = 'cosine_sim',
+        metric: str = "cosine_sim",
     ) -> None:
         """Initialize in-memory store.
 
         Args:
-            texts (List[str]): Text data.
             embedding (Embeddings): Embedding function.
-            metadatas (Optional[List[dict]]): Metadata for each text if it exists.
-                Defaults to None.
             metric (str): metric for exact nearest-neighbor search.
-                Can be one of: 'cosine_sim', 'euclidean_dist' and 'sqeuclidean_dist'.
-                Defaults to 'cosine_sim'.
-
+                Can be one of: "cosine_sim", "euclidean_dist" and "sqeuclidean_dist".
+                Defaults to "cosine_sim".
         """
         _check_docarray_import()
-        from docarray.index import InMemoryDocIndex
-
-        doc_cls = self._get_doc_cls(metric)
-        doc_index = InMemoryDocIndex[doc_cls]()
-        super().__init__(doc_index, texts, embedding, metadatas)
-
-    @staticmethod
-    def _get_doc_cls(sim_metric: str):
-        from docarray import BaseDoc
-        from pydantic import Field
-
-        class DocArrayDoc(BaseDoc):
-            text: Optional[str]
-            embedding: Optional[NdArray] = Field(space=sim_metric)
-            metadata: Optional[dict]
+        from docarray.index import InMemoryExactNNIndex
 
-        return DocArrayDoc
+        doc_cls = self._get_doc_cls({"space": metric})
+        doc_index = InMemoryExactNNIndex[doc_cls]()
+        super().__init__(doc_index, embedding)
 
     @classmethod
     def from_texts(
@@ -61,139 +44,25 @@ def from_texts(
         texts: List[str],
         embedding: Embeddings,
         metadatas: Optional[List[dict]] = None,
-        metric: str = 'cosine_sim',
-        **kwargs: Any
+        metric: str = "cosine_sim",
     ) -> InMemory:
-        return cls(
-            texts=texts,
-            embedding=embedding,
-            metadatas=metadatas,
-            metric=metric,
-        )
-    #
-    # def add_texts(
-    #     self,
-    #     texts: Iterable[str],
-    #     metadatas: Optional[List[dict]] = None,
-    #     **kwargs: Any
-    # ) -> List[str]:
-    #     """Run more texts through the embeddings and add to the vectorstore.
-    #
-    #     Args:
-    #         texts: Iterable of strings to add to the vectorstore.
-    #         metadatas: Optional list of metadatas associated with the texts.
-    #
-    #     Returns:
-    #         List of ids from adding the texts into the vectorstore.
-    #     """
-    #     if metadatas is None:
-    #         metadatas = [{} for _ in range(len(list(texts)))]
-    #
-    #     ids = []
-    #     embeddings = self.embedding.embed_documents(texts)
-    #     for t, m, e in zip(texts, metadatas, embeddings):
-    #         doc = self.doc_cls(
-    #             text=t,
-    #             embedding=e,
-    #             metadata=m
-    #         )
-    #         self.docs.append(doc)
-    #         ids.append(doc.id)  # TODO return index of self.docs ?
-    #
-    #     return ids
-    #
-    # def similarity_search_with_score(
-    #     self, query: str, k: int = 4, **kwargs: Any
-    # ) -> List[Tuple[Document, float]]:
-    #     """Return docs most similar to query.
-    #
-    #     Args:
-    #         query: Text to look up documents similar to.
-    #         k: Number of Documents to return. Defaults to 4.
-    #
-    #     Returns:
-    #         List of Documents most similar to the query and score for each.
-    #     """
-    #     from docarray.utils.find import find  # TODO move import
-    #
-    #     query_embedding = self.embedding.embed_query(query)
-    #     query_doc = self.doc_cls(embedding=query_embedding)
-    #     docs, scores = find(index=self.docs, query=query_doc, limit=k, search_field='embedding')
-    #
-    #     result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
-    #     return result
-    #
-    # def similarity_search(
-    #     self, query: str, k: int = 4, **kwargs: Any
-    # ) -> List[Document]:
-    #     """Return docs most similar to query.
-    #
-    #     Args:
-    #         query: Text to look up documents similar to.
-    #         k: Number of Documents to return. Defaults to 4.
-    #
-    #     Returns:
-    #         List of Documents most similar to the query.
-    #     """
-    #     results = self.similarity_search_with_score(query, k)
-    #     return list(map(itemgetter(0), results))
-    #
-    # def _similarity_search_with_relevance_scores(
-    #     self,
-    #     query: str,
-    #     k: int = 4,
-    #     **kwargs: Any,
-    # ) -> List[Tuple[Document, float]]:
-    #     """Return docs and relevance scores, normalized on a scale from 0 to 1.
-    #
-    #     0 is dissimilar, 1 is most similar.
-    #     """
-    #     raise NotImplementedError
-    #
-    # def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
-    #     """Return docs most similar to embedding vector.
-    #
-    #     Args:
-    #         embedding: Embedding to look up documents similar to.
-    #         k: Number of Documents to return. Defaults to 4.
-    #
-    #     Returns:
-    #         List of Documents most similar to the query vector.
-    #     """
-    #     from docarray.utils.find import find
-    #
-    #     query_doc = self.doc_cls(embedding=embedding)
-    #     result_docs = find(index=self.docs, query=query_doc, limit=k, search_field='embedding').documents
-    #
-    #     result = [Document(page_content=doc.text) for doc in result_docs]
-    #     return result
-
-    def max_marginal_relevance_search(
-        self, query: str, k: int = 4, fetch_k: int = 20, **kwargs: Any
-    ) -> List[Document]:
-        """Return docs selected using the maximal marginal relevance.
-
-        Maximal marginal relevance optimizes for similarity to query AND diversity
-        among selected documents.
+        """Create an in-memory store and insert data.
 
         Args:
-            query: Text to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            texts (List[str]): Text data.
+            embedding (Embeddings): Embedding function.
+            metadatas (Optional[List[dict]]): Metadata for each text if it exists.
+                Defaults to None.
+            metric (str): metric for exact nearest-neighbor search.
+                Can be one of: "cosine_sim", "euclidean_dist" and "sqeuclidean_dist".
+                Defaults to "cosine_sim".
 
         Returns:
-            List of Documents selected by maximal marginal relevance.
-                """
-        from docarray.utils.find import find
-
-        query_embedding = self.embedding.embed_query(query)
-        query_doc = self.doc_cls(embedding=query_embedding)
-        find_res = find(self.docs, query_doc, limit=k)
-
-        embeddings = [emb for emb in find_res.documents.emb]
-        mmr_selected = maximal_marginal_relevance(query_embedding, embeddings, k=k)
-        results = []
-        for idx in mmr_selected:
-            results.append(Document(page_content=self.docs[idx].text))
-        return results
-
+            InMemory Vector Store
+        """
+        store = cls(
+            embedding=embedding,
+            metric=metric,
+        )
+        store.add_texts(texts=texts, metadatas=metadatas)
+        return store
diff --git a/langchain/vectorstores/vector_store_from_doc_index.py b/langchain/vectorstores/vector_store_from_doc_index.py
index a72c883b2e201..a471bfe1cd703 100644
--- a/langchain/vectorstores/vector_store_from_doc_index.py
+++ b/langchain/vectorstores/vector_store_from_doc_index.py
@@ -1,72 +1,72 @@
-from typing import TYPE_CHECKING, TypeVar, List, Optional, Type, Iterable, Any, Tuple
-
-from docarray import DocList, BaseDoc
 from operator import itemgetter
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
+
+try:
+    from docarray import BaseDoc
+    from docarray.index.abstract import BaseDocIndex
+    from docarray.typing import NdArray
+except ImportError:
+    BaseDoc = None
+    BaseDocIndex = None
+    NdArray = None
 
 from langchain.embeddings.base import Embeddings
 from langchain.schema import Document
 from langchain.vectorstores import VectorStore
-
-from docarray.index.abstract import BaseDocIndex
-
-
-T_Doc = TypeVar('T_Doc', bound=BaseDocIndex)
+from langchain.vectorstores.utils import maximal_marginal_relevance
 
 
-def _check_docarray_import():
+def _check_docarray_import() -> None:
     try:
         import docarray
-        da_version = docarray.__version__.split('.')
-        if int(da_version[0]) == 0 and int(da_version[1]) <= 21:
+
+        da_version = docarray.__version__.split(".")
+        if int(da_version[0]) == 0 and int(da_version[1]) <= 30:
             raise ValueError(
-                f'To use the HnswLib VectorStore the docarray version >=0.31.0 is expected, '
-                f'received: {docarray.__version__}.'
-                f'To upgrade, please run: `pip install -U docarray`.'
+                f"To use the HnswLib VectorStore the docarray version >=0.31.0 is expected, "
+                f"received: {docarray.__version__}."
+                f"To upgrade, please run: `pip install -U docarray`."
             )
     except ImportError:
         raise ImportError(
             "Could not import docarray python package. "
-            "Please install it with `pip install -U docarray`."
+            "Please install it with `pip install \"langchain[docarray]\"`."
         )
 
 
 class VecStoreFromDocIndex(VectorStore):
-    doc_index: BaseDocIndex = None
-    doc_cls: Type[BaseDoc] = None
-    embedding: Embeddings = None
+    doc_index: BaseDocIndex
+    doc_cls: Type[BaseDoc]
+    embedding: Embeddings
 
     def __init__(
         self,
-        doc_index: T_Doc,
-        texts: List[str],
+        doc_index: BaseDocIndex,
         embedding: Embeddings,
-        metadatas: Optional[List[dict]],
     ):
+        """Initialize a vector store from DocArray's DocIndex."""
         self.doc_index = doc_index
         self.doc_cls = doc_index._schema
         self.embedding = embedding
 
-        embeddings = self.embedding.embed_documents(texts)
-        if metadatas is None:
-            metadatas = [{} for _ in range(len(texts))]
-
-        docs = DocList[self.doc_cls](
-            [
-                self.doc_cls(
-                    text=t,
-                    embedding=e,
-                    metadata=m,
-                ) for t, m, e in zip(texts, metadatas, embeddings)
-            ]
-        )
-        if len(docs) > 0:
-            self.doc_index.index(docs)
+    @staticmethod
+    def _get_doc_cls(embeddings_params: Dict[str, Any]) -> Type[BaseDoc]:
+        """Get docarray Document class describing the schema of DocIndex."""
+        from docarray import BaseDoc
+        from pydantic import Field
+
+        class DocArrayDoc(BaseDoc):
+            text: Optional[str]
+            embedding: Optional[NdArray] = Field(**embeddings_params)
+            metadata: Optional[dict]
+
+        return DocArrayDoc
 
     def add_texts(
         self,
         texts: Iterable[str],
         metadatas: Optional[List[dict]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> List[str]:
         """Run more texts through the embeddings and add to the vectorstore.
 
@@ -80,16 +80,12 @@ def add_texts(
         if metadatas is None:
             metadatas = [{} for _ in range(len(list(texts)))]
 
-        ids = []
+        ids: List[str] = []
         embeddings = self.embedding.embed_documents(texts)
         for t, m, e in zip(texts, metadatas, embeddings):
-            doc = self.doc_cls(
-                text=t,
-                embedding=e,
-                metadata=m
-            )
+            doc = self.doc_cls(text=t, embedding=e, metadata=m)
             self.doc_index.index([doc])
-            ids.append(doc.id)  # TODO return index of self.docs ?
+            ids.append(str(doc.id))
 
         return ids
 
@@ -107,9 +103,11 @@ def similarity_search_with_score(
         """
         query_embedding = self.embedding.embed_query(query)
         query_doc = self.doc_cls(embedding=query_embedding)
-        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=k)
+        docs, scores = self.doc_index.find(query_doc, search_field="embedding", limit=k)
 
-        result = [(Document(page_content=doc.text), score) for doc, score in zip(docs, scores)]
+        result = [
+            (Document(page_content=doc.text), score) for doc, score in zip(docs, scores)
+        ]
         return result
 
     def similarity_search(
@@ -127,7 +125,6 @@ def similarity_search(
         results = self.similarity_search_with_score(query, k)
         return list(map(itemgetter(0), results))
 
-
     def _similarity_search_with_relevance_scores(
         self,
         query: str,
@@ -140,7 +137,9 @@ def _similarity_search_with_relevance_scores(
         """
         raise NotImplementedError
 
-    def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwargs: Any) -> List[Document]:
+    def similarity_search_by_vector(
+        self, embedding: List[float], k: int = 4, **kwargs: Any
+    ) -> List[Document]:
         """Return docs most similar to embedding vector.
 
         Args:
@@ -152,7 +151,9 @@ def similarity_search_by_vector(self, embedding: List[float], k: int = 4, **kwar
         """
 
         query_doc = self.doc_cls(embedding=embedding)
-        docs = self.doc_index.find(query_doc, search_field='embedding', limit=k).documents
+        docs = self.doc_index.find(
+            query_doc, search_field="embedding", limit=k
+        ).documents
 
         result = [Document(page_content=doc.text) for doc in docs]
         return result
@@ -176,11 +177,13 @@ def max_marginal_relevance_search(
         query_embedding = self.embedding.embed_query(query)
         query_doc = self.doc_cls(embedding=query_embedding)
 
-        docs, scores = self.doc_index.find(query_doc, search_field='embedding', limit=fetch_k)
+        docs = self.doc_index.find(
+            query_doc, search_field="embedding", limit=fetch_k
+        ).documents
 
-        embeddings = [emb for emb in docs.emb]
-
-        mmr_selected = maximal_marginal_relevance(query_embedding, embeddings, k=k)
-        results = [Document(page_content=self.doc_index[idx].text) for idx in mmr_selected]
+        mmr_selected = maximal_marginal_relevance(query_embedding, docs.embedding, k=k)
+        results = [
+            Document(page_content=docs[idx].text, metadata=docs[idx].metadata)
+            for idx in mmr_selected
+        ]
         return results
-
diff --git a/poetry.lock b/poetry.lock
index fc785b03aebfb..4109d28490ed9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1515,14 +1515,14 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"]
 
 [[package]]
 name = "docarray"
-version = "0.30.0"
+version = "0.31.0.dev35"
 description = "The data structure for multimodal data"
 category = "main"
 optional = true
 python-versions = ">=3.7,<4.0"
 files = [
-    {file = "docarray-0.30.0-py3-none-any.whl", hash = "sha256:739dbe06bfee6f1cbc030156036764ca1c75832dcc01a07c724640c6d464651b"},
-    {file = "docarray-0.30.0.tar.gz", hash = "sha256:dd73e9ff20485a1d819ac906a59ee0cbc4382e78a5061286e77eb7d7f8b28a8e"},
+    {file = "docarray-0.31.0.dev35-py3-none-any.whl", hash = "sha256:a5c578cbf69853dddd17e845cc3fb2250cb1a0800ef48082d2a40a38bc9a7165"},
+    {file = "docarray-0.31.0.dev35.tar.gz", hash = "sha256:f918cc5c35ed2df9b9ad7ef0abcc0bf5f3fe38a8f9e33526a33293d26a956f2e"},
 ]
 
 [package.dependencies]
@@ -1748,7 +1748,7 @@ files = [
 name = "exceptiongroup"
 version = "1.1.1"
 description = "Backport of PEP 654 (exception groups)"
-category = "main"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2769,7 +2769,7 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
 name = "iniconfig"
 version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
-category = "main"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -5445,7 +5445,7 @@ typing-extensions = {version = "*", markers = "python_version <= \"3.8\""}
 name = "pluggy"
 version = "1.0.0"
 description = "plugin and hook calling mechanisms for python"
-category = "main"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -6176,7 +6176,7 @@ Pillow = ">=8.0.0"
 name = "pytest"
 version = "7.3.1"
 description = "pytest: simple powerful testing with Python"
-category = "main"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -8203,7 +8203,7 @@ files = [
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
-category = "main"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -9338,10 +9338,11 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
 cffi = ["cffi (>=1.11)"]
 
 [extras]
-all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf"]
+all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf", "hnswlib"]
 cohere = ["cohere"]
-docarray = ["docarray", "protobuf"]
 embeddings = ["sentence-transformers"]
+hnswlib = ["docarray", "protobuf", "hnswlib"]
+in-memory-store = ["docarray"]
 llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
 openai = ["openai"]
 qdrant = ["qdrant-client"]
@@ -9349,4 +9350,4 @@ qdrant = ["qdrant-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "81e7b09595d12739f056c5f5d34021ad7e3f855a8da711d3ccc23aab72cfbd83"
+content-hash = "5223e3c6bdf37a28e1ee1cfb26e7f8d84fd6bc94893c96ecaca428fb9e8278eb"
diff --git a/pyproject.toml b/pyproject.toml
index 61406f1db2e0c..869d5f8d0d453 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,11 +69,9 @@ pytesseract = {version = "^0.3.10", optional=true}
 html2text = {version="^2020.1.16", optional=true}
 numexpr = "^2.8.4"
 duckduckgo-search = {version="^2.8.6", optional=true}
-docarray = {version="^0.30.0", optional=true}
+docarray = {version="^0.31.0.dev35", optional=true}
 protobuf = {version="3.19", optional=true}
 hnswlib = {version="^0.7.0", optional=true}
-pytest = "^7.3.1"
-
 
 [tool.poetry.group.docs.dependencies]
 autodoc_pydantic = "^1.8.0"
@@ -149,9 +147,10 @@ llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifes
 qdrant = ["qdrant-client"]
 openai = ["openai"]
 cohere = ["cohere"]
-docarray = ["docarray", "protobuf"]
+in_memory_store = ["docarray"]
+hnswlib = ["docarray", "protobuf", "hnswlib"]
 embeddings = ["sentence-transformers"]
-all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf"]
+all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "docarray", "protobuf", "hnswlib"]
 
 [tool.ruff]
 select = [
diff --git a/tests/integration_tests/vectorstores/test_hnsw_lib.py b/tests/integration_tests/vectorstores/test_hnsw_lib.py
index 58919d37e7094..fc86321c20dd5 100644
--- a/tests/integration_tests/vectorstores/test_hnsw_lib.py
+++ b/tests/integration_tests/vectorstores/test_hnsw_lib.py
@@ -14,7 +14,7 @@ def test_hnswlib_vec_store_from_texts(tmp_path) -> None:
         FakeEmbeddings(),
         work_dir=str(tmp_path),
         n_dim=10,
-        sim_metric='cosine',
+        dist_metric='cosine',
     )
     assert isinstance(docsearch, HnswLib)
     assert docsearch.doc_index.num_docs() == 3
@@ -25,10 +25,8 @@ def test_hnswlib_vec_store_add_texts(tmp_path) -> None:
     docsearch = HnswLib(
         work_dir=str(tmp_path),
         n_dim=10,
-        texts=[],
         embedding=FakeEmbeddings(),
-        metadatas=[{}],
-        sim_metric='cosine',
+        dist_metric='cosine',
     )
     assert isinstance(docsearch, HnswLib)
     assert docsearch.doc_index.num_docs() == 0
@@ -53,7 +51,7 @@ def test_sim_search(metric, tmp_path) -> None:
 
 
 @pytest.mark.parametrize('metric', ['cosine', 'ip', 'l2'])
-def test_sim_search_by_vector(metric, tmp_path):
+def test_sim_search_by_vector(metric, tmp_path) -> None:
     """Test end to end construction and similarity search by vector."""
     texts = ["foo", "bar", "baz"]
     hnswlib_vec_store = HnswLib.from_texts(
@@ -85,3 +83,22 @@ def test_sim_search_with_score(metric, tmp_path) -> None:
     assert out_doc == Document(page_content="foo")
     assert np.isclose(out_score, 0.0, atol=1.e-6)
 
+
+@pytest.mark.parametrize('metric', ['cosine', 'l2'])
+def test_max_marginal_relevance_search(metric, tmp_path) -> None:
+    """Test MRR search."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    docsearch = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        metadatas=metadatas,
+        dist_metric=metric,
+        work_dir=str(tmp_path),
+        n_dim=10,
+    )
+    output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)
+    assert output == [
+        Document(page_content="foo", metadata={"page": 0}),
+        Document(page_content="bar", metadata={"page": 1}),
+    ]
diff --git a/tests/integration_tests/vectorstores/test_in_memory.py b/tests/integration_tests/vectorstores/test_in_memory.py
index 62834336c7c24..e90c4ed312d21 100644
--- a/tests/integration_tests/vectorstores/test_in_memory.py
+++ b/tests/integration_tests/vectorstores/test_in_memory.py
@@ -20,7 +20,6 @@ def test_in_memory_vec_store_from_texts() -> None:
 def test_in_memory_vec_store_add_texts(tmp_path) -> None:
     """Test end to end construction and simple similarity search."""
     docsearch = InMemory(
-        texts=[],
         embedding=FakeEmbeddings(),
     )
     assert isinstance(docsearch, InMemory)
@@ -65,7 +64,7 @@ def test_sim_search_with_score(metric) -> None:
 
 
 @pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist'])
-def test_sim_search_by_vector(metric):
+def test_sim_search_by_vector(metric) -> None:
     """Test end to end construction and similarity search by vector."""
     texts = ["foo", "bar", "baz"]
     in_memory_vec_store = InMemory.from_texts(
@@ -79,3 +78,20 @@ def test_sim_search_by_vector(metric):
 
     assert output == [Document(page_content="bar")]
 
+
+@pytest.mark.parametrize('metric', ['cosine_sim', 'euclidean_dist', 'sqeuclidean_dist'])
+def test_max_marginal_relevance_search(metric) -> None:
+    """Test MRR search."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    docsearch = InMemory.from_texts(
+        texts,
+        FakeEmbeddings(),
+        metadatas=metadatas,
+        metric=metric
+    )
+    output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3)
+    assert output == [
+        Document(page_content="foo", metadata={"page": 0}),
+        Document(page_content="bar", metadata={"page": 1}),
+    ]

From 30456bc3c30fca7a09cd115e07205a1db997159a Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 27 Apr 2023 15:39:30 +0200
Subject: [PATCH 4/6] Add more configurations for hnswlib

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 langchain/vectorstores/hnsw_lib.py            | 54 ++++++++++++++++++-
 .../vectorstores/test_hnsw_lib.py             | 51 ++++++++++++++++--
 2 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/langchain/vectorstores/hnsw_lib.py b/langchain/vectorstores/hnsw_lib.py
index ddc3ec7b6102c..42f5c902cb5af 100644
--- a/langchain/vectorstores/hnsw_lib.py
+++ b/langchain/vectorstores/hnsw_lib.py
@@ -24,6 +24,13 @@ def __init__(
         work_dir: str,
         n_dim: int,
         dist_metric: str = "cosine",
+        max_elements: int = 1024,
+        index: bool = True,
+        ef_construction: int = 200,
+        ef: int = 10,
+        M: int = 16,
+        allow_replace_deleted: bool = True,
+        num_threads: int = 1,
     ) -> None:
         """Initialize HnswLib store.
 
@@ -33,6 +40,19 @@ def __init__(
             n_dim (int): dimension of an embedding.
             dist_metric (str): Distance metric for HnswLib can be one of: "cosine",
                 "ip", and "l2". Defaults to "cosine".
+            max_elements (int): Maximum number of vectors that can be stored.
+                Defaults to 1024.
+            index (bool): Whether an index should be built for this field.
+                Defaults to True.
+            ef_construction (int): defines a construction time/accuracy trade-off.
+                Defaults to 200.
+            ef (int): parameter controlling query time/accuracy trade-off.
+                Defaults to 10.
+            M (int): parameter that defines the maximum number of outgoing
+                connections in the graph. Defaults to 16.
+            allow_replace_deleted (bool): Enables replacing of deleted elements
+                with new added ones. Defaults to True.
+            num_threads (int): Sets the number of cpu threads to use. Defaults to 1.
         """
         _check_docarray_import()
         from docarray.index import HnswDocumentIndex
@@ -45,7 +65,19 @@ def __init__(
                 "Please install it with `pip install \"langchain[hnswlib]\"`."
             )
 
-        doc_cls = self._get_doc_cls({"dim": n_dim, "space": dist_metric})
+        doc_cls = self._get_doc_cls(
+            {
+                "dim": n_dim,
+                "space": dist_metric,
+                "max_elements": max_elements,
+                "index": index,
+                "ef_construction": ef_construction,
+                "ef": ef,
+                "M": M,
+                "allow_replace_deleted": allow_replace_deleted,
+                "num_threads": num_threads,
+            }
+        )
         doc_index = HnswDocumentIndex[doc_cls](work_dir=work_dir)
         super().__init__(doc_index, embedding)
 
@@ -58,6 +90,13 @@ def from_texts(
         work_dir: str = None,
         n_dim: int = None,
         dist_metric: str = "cosine",
+        max_elements: int = 1024,
+        index: bool = True,
+        ef_construction: int = 200,
+        ef: int = 10,
+        M: int = 16,
+        allow_replace_deleted: bool = True,
+        num_threads: int = 1,
     ) -> HnswLib:
         """Create an HnswLib store and insert data.
 
@@ -70,6 +109,19 @@ def from_texts(
             n_dim (int): dimension of an embedding.
             dist_metric (str): Distance metric for HnswLib can be one of: "cosine",
                 "ip", and "l2". Defaults to "cosine".
+            max_elements (int): Maximum number of vectors that can be stored.
+                Defaults to 1024.
+            index (bool): Whether an index should be built for this field.
+                Defaults to True.
+            ef_construction (int): defines a construction time/accuracy trade-off.
+                Defaults to 200.
+            ef (int): parameter controlling query time/accuracy trade-off.
+                Defaults to 10.
+            M (int): parameter that defines the maximum number of outgoing
+                connections in the graph. Defaults to 16.
+            allow_replace_deleted (bool): Enables replacing of deleted elements
+                with new added ones. Defaults to True.
+            num_threads (int): Sets the number of cpu threads to use. Defaults to 1.
 
         Returns:
             HnswLib Vector Store
diff --git a/tests/integration_tests/vectorstores/test_hnsw_lib.py b/tests/integration_tests/vectorstores/test_hnsw_lib.py
index fc86321c20dd5..a4a6441eec779 100644
--- a/tests/integration_tests/vectorstores/test_hnsw_lib.py
+++ b/tests/integration_tests/vectorstores/test_hnsw_lib.py
@@ -36,7 +36,7 @@ def test_hnswlib_vec_store_add_texts(tmp_path) -> None:
     assert docsearch.doc_index.num_docs() == 3
 
 
-@pytest.mark.parametrize('metric', ['cosine', 'ip', 'l2'])
+@pytest.mark.parametrize('metric', ['cosine', 'l2'])
 def test_sim_search(metric, tmp_path) -> None:
     """Test end to end construction and simple similarity search."""
     texts = ["foo", "bar", "baz"]
@@ -45,12 +45,35 @@ def test_sim_search(metric, tmp_path) -> None:
         FakeEmbeddings(),
         work_dir=str(tmp_path),
         n_dim=10,
+        dist_metric=metric,
+    )
+    output = hnswlib_vec_store.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+
+@pytest.mark.parametrize('metric', ['cosine', 'l2'])
+def test_sim_search_all_configurations(metric, tmp_path) -> None:
+    """Test end to end construction and simple similarity search."""
+    texts = ["foo", "bar", "baz"]
+    hnswlib_vec_store = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        work_dir=str(tmp_path),
+        dist_metric=metric,
+        n_dim=10,
+        max_elements=8,
+        index=False,
+        ef_construction=300,
+        ef=20,
+        M=8,
+        allow_replace_deleted=False,
+        num_threads=2,
     )
     output = hnswlib_vec_store.similarity_search("foo", k=1)
     assert output == [Document(page_content="foo")]
 
 
-@pytest.mark.parametrize('metric', ['cosine', 'ip', 'l2'])
+@pytest.mark.parametrize('metric', ['cosine', 'l2'])
 def test_sim_search_by_vector(metric, tmp_path) -> None:
     """Test end to end construction and similarity search by vector."""
     texts = ["foo", "bar", "baz"]
@@ -59,6 +82,7 @@ def test_sim_search_by_vector(metric, tmp_path) -> None:
         FakeEmbeddings(),
         work_dir=str(tmp_path),
         n_dim=10,
+        dist_metric=metric,
     )
     embedding = [1.0] * 10
     output = hnswlib_vec_store.similarity_search_by_vector(embedding, k=1)
@@ -66,7 +90,7 @@ def test_sim_search_by_vector(metric, tmp_path) -> None:
     assert output == [Document(page_content="bar")]
 
 
-@pytest.mark.parametrize('metric', ['cosine', 'ip', 'l2'])
+@pytest.mark.parametrize('metric', ['cosine', 'l2'])
 def test_sim_search_with_score(metric, tmp_path) -> None:
     """Test end to end construction and similarity search with score."""
     texts = ["foo", "bar", "baz"]
@@ -75,6 +99,7 @@ def test_sim_search_with_score(metric, tmp_path) -> None:
         FakeEmbeddings(),
         work_dir=str(tmp_path),
         n_dim=10,
+        dist_metric=metric,
     )
     output = hnswlib_vec_store.similarity_search_with_score("foo", k=1)
     assert len(output) == 1
@@ -84,6 +109,26 @@ def test_sim_search_with_score(metric, tmp_path) -> None:
     assert np.isclose(out_score, 0.0, atol=1.e-6)
 
 
+def test_sim_search_with_score_for_ip_metric(tmp_path) -> None:
+    """
+    Test end to end construction and similarity search with score for ip
+    (inner-product) metric.
+    """
+    texts = ["foo", "bar", "baz"]
+    hnswlib_vec_store = HnswLib.from_texts(
+        texts,
+        FakeEmbeddings(),
+        work_dir=str(tmp_path),
+        n_dim=10,
+        dist_metric='ip',
+    )
+    output = hnswlib_vec_store.similarity_search_with_score("foo", k=3)
+    assert len(output) == 3
+
+    for result in output:
+        assert result[1] == -8.0
+
+
 @pytest.mark.parametrize('metric', ['cosine', 'l2'])
 def test_max_marginal_relevance_search(metric, tmp_path) -> None:
     """Test MRR search."""

From 5d2324a65fb4c8dc78dc7ac5c886d0325ebb3b57 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 27 Apr 2023 16:08:05 +0200
Subject: [PATCH 5/6] refactor: rename InMemory to InMemoryExactSearch

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 langchain/vectorstores/__init__.py             |  4 ++--
 ...{in_memory.py => in_memory_exact_search.py} | 12 ++++++------
 ...emory.py => test_in_memory_exact_search.py} | 18 +++++++++---------
 3 files changed, 17 insertions(+), 17 deletions(-)
 rename langchain/vectorstores/{in_memory.py => in_memory_exact_search.py} (86%)
 rename tests/integration_tests/vectorstores/{test_in_memory.py => test_in_memory_exact_search.py} (85%)

diff --git a/langchain/vectorstores/__init__.py b/langchain/vectorstores/__init__.py
index 5360f4b8f25c2..ed3982ad7e1ab 100644
--- a/langchain/vectorstores/__init__.py
+++ b/langchain/vectorstores/__init__.py
@@ -8,7 +8,7 @@
 from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
 from langchain.vectorstores.faiss import FAISS
 from langchain.vectorstores.hnsw_lib import HnswLib
-from langchain.vectorstores.in_memory import InMemory
+from langchain.vectorstores.in_memory_exact_search import InMemoryExactSearch
 from langchain.vectorstores.milvus import Milvus
 from langchain.vectorstores.myscale import MyScale, MyScaleSettings
 from langchain.vectorstores.opensearch_vector_search import OpenSearchVectorSearch
@@ -37,5 +37,5 @@
     "SupabaseVectorStore",
     "AnalyticDB",
     "HnswLib",
-    "InMemory",
+    "InMemoryExactSearch",
 ]
diff --git a/langchain/vectorstores/in_memory.py b/langchain/vectorstores/in_memory_exact_search.py
similarity index 86%
rename from langchain/vectorstores/in_memory.py
rename to langchain/vectorstores/in_memory_exact_search.py
index 07e1f49d82c17..bbaabe7e11c6b 100644
--- a/langchain/vectorstores/in_memory.py
+++ b/langchain/vectorstores/in_memory_exact_search.py
@@ -11,8 +11,8 @@
 )
 
 
-class InMemory(VecStoreFromDocIndex):
-    """Wrapper around in-memory storage.
+class InMemoryExactSearch(VecStoreFromDocIndex):
+    """Wrapper around in-memory storage for exact search.
 
     To use it, you should have the ``docarray`` package with version >=0.31.0 installed.
     You can install it with `pip install "langchain[in_memory_store]"`.
@@ -23,7 +23,7 @@ def __init__(
         embedding: Embeddings,
         metric: str = "cosine_sim",
     ) -> None:
-        """Initialize in-memory store.
+        """Initialize InMemoryExactSearch store.
 
         Args:
             embedding (Embeddings): Embedding function.
@@ -45,8 +45,8 @@ def from_texts(
         embedding: Embeddings,
         metadatas: Optional[List[dict]] = None,
         metric: str = "cosine_sim",
-    ) -> InMemory:
-        """Create an in-memory store and insert data.
+    ) -> InMemoryExactSearch:
+        """Create an InMemoryExactSearch store and insert data.
 
         Args:
             texts (List[str]): Text data.
@@ -58,7 +58,7 @@ def from_texts(
                 Defaults to "cosine_sim".
 
         Returns:
-            InMemory Vector Store
+            InMemoryExactSearch Vector Store
         """
         store = cls(
             embedding=embedding,
diff --git a/tests/integration_tests/vectorstores/test_in_memory.py b/tests/integration_tests/vectorstores/test_in_memory_exact_search.py
similarity index 85%
rename from tests/integration_tests/vectorstores/test_in_memory.py
rename to tests/integration_tests/vectorstores/test_in_memory_exact_search.py
index e90c4ed312d21..7e0142ec8212f 100644
--- a/tests/integration_tests/vectorstores/test_in_memory.py
+++ b/tests/integration_tests/vectorstores/test_in_memory_exact_search.py
@@ -2,27 +2,27 @@
 import pytest
 
 from langchain.schema import Document
-from langchain.vectorstores.in_memory import InMemory
+from langchain.vectorstores.in_memory_exact_search import InMemoryExactSearch
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
 
 def test_in_memory_vec_store_from_texts() -> None:
     """Test end to end construction and simple similarity search."""
     texts = ["foo", "bar", "baz"]
-    docsearch = InMemory.from_texts(
+    docsearch = InMemoryExactSearch.from_texts(
         texts,
         FakeEmbeddings(),
     )
-    assert isinstance(docsearch, InMemory)
+    assert isinstance(docsearch, InMemoryExactSearch)
     assert docsearch.doc_index.num_docs() == 3
 
 
 def test_in_memory_vec_store_add_texts(tmp_path) -> None:
     """Test end to end construction and simple similarity search."""
-    docsearch = InMemory(
+    docsearch = InMemoryExactSearch(
         embedding=FakeEmbeddings(),
     )
-    assert isinstance(docsearch, InMemory)
+    assert isinstance(docsearch, InMemoryExactSearch)
     assert docsearch.doc_index.num_docs() == 0
 
     texts = ["foo", "bar", "baz"]
@@ -34,7 +34,7 @@ def test_in_memory_vec_store_add_texts(tmp_path) -> None:
 def test_sim_search(metric) -> None:
     """Test end to end construction and simple similarity search."""
     texts = ["foo", "bar", "baz"]
-    in_memory_vec_store = InMemory.from_texts(
+    in_memory_vec_store = InMemoryExactSearch.from_texts(
         texts=texts,
         embedding=FakeEmbeddings(),
         metric=metric,
@@ -48,7 +48,7 @@ def test_sim_search(metric) -> None:
 def test_sim_search_with_score(metric) -> None:
     """Test end to end construction and similarity search with score."""
     texts = ["foo", "bar", "baz"]
-    in_memory_vec_store = InMemory.from_texts(
+    in_memory_vec_store = InMemoryExactSearch.from_texts(
         texts=texts,
         embedding=FakeEmbeddings(),
         metric=metric,
@@ -67,7 +67,7 @@ def test_sim_search_with_score(metric) -> None:
 def test_sim_search_by_vector(metric) -> None:
     """Test end to end construction and similarity search by vector."""
     texts = ["foo", "bar", "baz"]
-    in_memory_vec_store = InMemory.from_texts(
+    in_memory_vec_store = InMemoryExactSearch.from_texts(
         texts=texts,
         embedding=FakeEmbeddings(),
         metric=metric,
@@ -84,7 +84,7 @@ def test_max_marginal_relevance_search(metric) -> None:
     """Test MRR search."""
     texts = ["foo", "bar", "baz"]
     metadatas = [{"page": i} for i in range(len(texts))]
-    docsearch = InMemory.from_texts(
+    docsearch = InMemoryExactSearch.from_texts(
         texts,
         FakeEmbeddings(),
         metadatas=metadatas,

From ecc73b4bb948a9852237957944c5653f94d2b08f Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Fri, 28 Apr 2023 10:38:25 +0200
Subject: [PATCH 6/6] fix: change space default for hnswlib to l2

---
 langchain/vectorstores/hnsw_lib.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/langchain/vectorstores/hnsw_lib.py b/langchain/vectorstores/hnsw_lib.py
index 42f5c902cb5af..2857248f0f5aa 100644
--- a/langchain/vectorstores/hnsw_lib.py
+++ b/langchain/vectorstores/hnsw_lib.py
@@ -89,7 +89,7 @@ def from_texts(
         metadatas: Optional[List[dict]] = None,
         work_dir: str = None,
         n_dim: int = None,
-        dist_metric: str = "cosine",
+        dist_metric: str = "l2",
         max_elements: int = 1024,
         index: bool = True,
         ef_construction: int = 200,
@@ -108,7 +108,7 @@ def from_texts(
             work_dir (str): path to the location where all the data will be stored.
             n_dim (int): dimension of an embedding.
             dist_metric (str): Distance metric for HnswLib can be one of: "cosine",
-                "ip", and "l2". Defaults to "cosine".
+                "ip", and "l2". Defaults to "l2".
             max_elements (int): Maximum number of vectors that can be stored.
                 Defaults to 1024.
             index (bool): Whether an index should be built for this field.