From b1066d616fd132b80a146c8cb56be36fa5825f51 Mon Sep 17 00:00:00 2001
From: rigazilla <vrigamon@redhat.com>
Date: Tue, 20 Feb 2024 11:06:30 +0100
Subject: [PATCH] Adding support for Infinispan as VectorStore

---
 .../integrations/providers/infinispanvs.mdx   |  17 +
 .../vectorstores/infinispanvs.ipynb           | 408 ++++++++++++++
 .../vectorstores/__init__.py                  |   9 +
 .../vectorstores/infinispanvs.py              | 506 ++++++++++++++++++
 .../vectorstores/test_infinispanvs.py         | 135 +++++
 .../vectorstores/test_public_api.py           |   1 +
 6 files changed, 1076 insertions(+)
 create mode 100644 docs/docs/integrations/providers/infinispanvs.mdx
 create mode 100644 docs/docs/integrations/vectorstores/infinispanvs.ipynb
 create mode 100644 libs/community/langchain_community/vectorstores/infinispanvs.py
 create mode 100644 libs/community/tests/integration_tests/vectorstores/test_infinispanvs.py

diff --git a/docs/docs/integrations/providers/infinispanvs.mdx b/docs/docs/integrations/providers/infinispanvs.mdx
new file mode 100644
index 0000000000000..b42e7504231bf
--- /dev/null
+++ b/docs/docs/integrations/providers/infinispanvs.mdx
@@ -0,0 +1,17 @@
+# Infinispan VS
+
+> [Infinispan](https://infinispan.org) Infinispan is an open-source in-memory data grid that provides
+> a key/value data store able to hold all types of data, from Java objects to plain text.
+> Since version 15 Infinispan supports vector search over caches.
+
+## Installation and Setup
+See [Get Started](https://infinispan.org/get-started/) to run an Infinispan server, you may want to disable authentication
+(not supported atm)
+
+## Vector Store
+
+See a [usage example](/docs/integrations/vectorstores/infinispanvs).
+
+```python
+from langchain_community.vectorstores import InfinispanVS
+```
diff --git a/docs/docs/integrations/vectorstores/infinispanvs.ipynb b/docs/docs/integrations/vectorstores/infinispanvs.ipynb
new file mode 100644
index 0000000000000..f0dff76c49dda
--- /dev/null
+++ b/docs/docs/integrations/vectorstores/infinispanvs.ipynb
@@ -0,0 +1,408 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cffb482c-bbd8-4829-b185-0d930a5fe0bc",
+   "metadata": {},
+   "source": [
+    "# Infinispan\n",
+    "\n",
+    "Infinispan is an open-source key-value data grid, it can work as single node as well as distributed.\n",
+    "\n",
+    "Vector search is supported since release 15.x\n",
+    "For more: [Infinispan Home](https://infinispan.org)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03ec8f9a-7641-47ea-9fa0-f43ee9fc79a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Ensure that all we need is installed\n",
+    "# You may want to skip this\n",
+    "%pip install sentence-transformers\n",
+    "%pip install langchain\n",
+    "%pip install langchain_core\n",
+    "%pip install langchain_community"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "180d172e-cca1-481c-87d5-c4f14684604d",
+   "metadata": {},
+   "source": [
+    "# Setup\n",
+    "\n",
+    "To run this demo we need a running Infinispan instance without authentication and a data file.\n",
+    "In the next three cells we're going to:\n",
+    "- create the configuration\n",
+    "- run Infinispan in docker\n",
+    "- download the data file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b251e66e-f056-4e81-a6b4-5f4d95b6537d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "#create infinispan configuration file\n",
+    "echo 'infinispan:\n",
+    "  cache-container: \n",
+    "    name: default\n",
+    "    transport: \n",
+    "      cluster: cluster \n",
+    "      stack: tcp \n",
+    "  server:\n",
+    "    interfaces:\n",
+    "      interface:\n",
+    "        name: public\n",
+    "        inet-address:\n",
+    "          value: 0.0.0.0 \n",
+    "    socket-bindings:\n",
+    "      default-interface: public\n",
+    "      port-offset: 0        \n",
+    "      socket-binding:\n",
+    "        name: default\n",
+    "        port: 11222\n",
+    "    endpoints:\n",
+    "      endpoint:\n",
+    "        socket-binding: default\n",
+    "        rest-connector:\n",
+    "' > infinispan-noauth.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9678d5ce-894c-4e28-bf68-20d45507122f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "#get an archive of news\n",
+    "wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "009da6d1-9d1a-4392-90f1-5c654dd12654",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!docker run -d --name infinispanvs-demo -v $(pwd):/user-config  -p 11222:11222 infinispan/server:15.0.0.Dev09 -c /user-config/infinispan-noauth.yaml "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b575cde9-4c62-47b3-af89-109ed39f56b6",
+   "metadata": {},
+   "source": [
+    "# The Code\n",
+    "\n",
+    "## Pick up an embedding model\n",
+    "\n",
+    "In this demo we're using\n",
+    "a HuggingFace embedding mode."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d2c9f46f-3c78-4865-810b-52408dff5fb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings import HuggingFaceEmbeddings\n",
+    "from langchain_core.embeddings import Embeddings\n",
+    "\n",
+    "model_name = \"sentence-transformers/all-MiniLM-L12-v2\"\n",
+    "hf = HuggingFaceEmbeddings(model_name=model_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "61ce7e1f-51ee-4d3d-ad3c-97088b1120f6",
+   "metadata": {},
+   "source": [
+    "## Setup Infinispan cache\n",
+    "\n",
+    "Infinispan is a very flexible key-value store, it can store raw bits as well as complex data type.\n",
+    "We need to configure it to store data containing embedded vectors.\n",
+    "\n",
+    "In the next cells we're going to:\n",
+    "- create an empty Infinispan VectoreStore\n",
+    "- deploy a protobuf definition of our data\n",
+    "- create a cache"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49668bf1-778b-466d-86fb-41747ed52b74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Creating a langchain_core.VectorStore\n",
+    "from langchain_community.vectorstores import InfinispanVS\n",
+    "\n",
+    "ispnvs = InfinispanVS.from_texts(\n",
+    "    texts={}, embedding=hf, cache_name=\"demo_cache\", entity_name=\"demo_entity\"\n",
+    ")\n",
+    "ispn = ispnvs.ispn"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0cedf066-aaab-4185-b049-93eea9b48329",
+   "metadata": {},
+   "source": [
+    "### Protobuf definition\n",
+    "\n",
+    "Below there's the protobuf definition of our data type that contains:\n",
+    "- embedded vector (field 1)\n",
+    "- text of the news (2)\n",
+    "- title of the news (3)\n",
+    "\n",
+    "As you can see, there are additional annotations in the comments that tell Infinispan that:\n",
+    "- data type must be indexed (`@Indexed`)\n",
+    "- field 1 is an embeddeded vector (`@Vector`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1fa0add0-8317-4667-9b8c-5d91c47f752a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "# Infinispan supports protobuf schemas\n",
+    "schema_vector = \"\"\"\n",
+    "/**\n",
+    " * @Indexed\n",
+    " */\n",
+    "message demo_entity {\n",
+    "/**\n",
+    " * @Vector(dimension=384)\n",
+    " */\n",
+    "repeated float vector = 1;\n",
+    "optional string text = 2;\n",
+    "optional string title = 3;\n",
+    "}\n",
+    "\"\"\"\n",
+    "# Cleanup before deploy a new schema\n",
+    "ispnvs.schema_delete()\n",
+    "output = ispnvs.schema_create(schema_vector)\n",
+    "assert output.status_code == 200\n",
+    "assert json.loads(output.text)[\"error\"] is None\n",
+    "# Create the cache\n",
+    "ispnvs.cache_create()\n",
+    "# Cleanup old data and index\n",
+    "ispnvs.cache_clear()\n",
+    "ispnvs.cache_index_reindex()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "456da9e7-baf4-472a-a9ee-8473aed8cabd",
+   "metadata": {},
+   "source": [
+    "## Prepare the data\n",
+    "\n",
+    "In this demo we choose to store text,vector and metadata in the same cache, but other options\n",
+    "are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f6a42d3-c5ec-44ec-9b57-ebe5ca8c301a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import csv\n",
+    "import gzip\n",
+    "import time\n",
+    "\n",
+    "# Open the news file and process it as a csv\n",
+    "with gzip.open(\"bbc_news.csv.gz\", \"rt\", newline=\"\") as csvfile:\n",
+    "    spamreader = csv.reader(csvfile, delimiter=\",\", quotechar='\"')\n",
+    "    i = 0\n",
+    "    texts = []\n",
+    "    metas = []\n",
+    "    embeds = []\n",
+    "    for row in spamreader:\n",
+    "        # first and fifth value are joined to form the content\n",
+    "        # to be processed\n",
+    "        text = row[0] + \".\" + row[4]\n",
+    "        texts.append(text)\n",
+    "        # Storing meta\n",
+    "        # Store text and title as metadata\n",
+    "        meta = {}\n",
+    "        meta[\"text\"] = row[4]\n",
+    "        meta[\"title\"] = row[0]\n",
+    "        metas.append(meta)\n",
+    "        i = i + 1\n",
+    "        # Change this to change the number of news you want to load\n",
+    "        if i >= 5000:\n",
+    "            break"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a6b00299-94db-43ca-9da3-45d12cdf2db1",
+   "metadata": {},
+   "source": [
+    "# Populate the vector store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75e135a6-1b38-48eb-96ca-379b6f4a653f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add texts and fill vector db\n",
+    "keys = ispnvs.add_texts(texts, metas)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bb6f053-208d-407e-b8b7-c6c6443522d8",
+   "metadata": {},
+   "source": [
+    "# An helper func that prints the result documents\n",
+    "\n",
+    "By default InfinispanVS returns the protobuf `ŧext` field in the `Document.page_content`\n",
+    "and all the remaining protobuf fields (except the vector) in the `metadata`. This behaviour is\n",
+    "configurable via lambda functions at setup."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "594fad38-37f0-4dd4-9785-a99a2f009ae5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def print_docs(docs):\n",
+    "    for res, i in zip(docs, range(len(docs))):\n",
+    "        print(\"----\" + str(i + 1) + \"----\")\n",
+    "        print(\"TITLE: \" + res.metadata[\"title\"])\n",
+    "        print(res.page_content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cfa517c7-e741-4f64-9736-6db7a6bd259a",
+   "metadata": {},
+   "source": [
+    "# Try it!!!\n",
+    "\n",
+    "Below some sample queries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86e782b3-5a74-4ca1-a5d1-c0ee935a659e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "docs = ispnvs.similarity_search(\"European nations\", 5)\n",
+    "print_docs(docs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b60847f9-ef34-4c79-b276-ac62170e2d6a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_docs(ispnvs.similarity_search(\"Milan fashion week begins\", 2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cbb5607-da55-4879-92cf-79ac690cc0c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_docs(ispnvs.similarity_search(\"Stock market is rising today\", 4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bb94ca1-7b1e-41ed-9d8f-b845775d11c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_docs(ispnvs.similarity_search(\"Why cats are so viral?\", 2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4fca208-b580-483d-9be0-786b6b63a31d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_docs(ispnvs.similarity_search(\"How to stay young\", 5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "862e4af2-9f8a-4985-90cb-997477901b1e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Clean up\n",
+    "ispnvs.schema_delete()\n",
+    "ispnvs.cache_delete()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4a460b8-f0c8-4ae9-a7ff-cf550c3195f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!docker rm --force infinispanvs-demo"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/community/langchain_community/vectorstores/__init__.py b/libs/community/langchain_community/vectorstores/__init__.py
index 806942bb3fde7..80f7420382366 100644
--- a/libs/community/langchain_community/vectorstores/__init__.py
+++ b/libs/community/langchain_community/vectorstores/__init__.py
@@ -240,6 +240,12 @@ def _import_hologres() -> Any:
     return Hologres
 
 
+def _import_infinispanvs() -> Any:
+    from langchain_community.vectorstores.infinispanvs import InfinispanVS
+
+    return InfinispanVS
+
+
 def _import_kdbai() -> Any:
     from langchain_community.vectorstores.kdbai import KDBAI
 
@@ -569,6 +575,8 @@ def __getattr__(name: str) -> Any:
         return _import_hanavector()
     elif name == "Hologres":
         return _import_hologres()
+    elif name == "InfinispanVS":
+        return _import_infinispanvs()
     elif name == "KDBAI":
         return _import_kdbai()
     elif name == "DistanceStrategy":
@@ -696,6 +704,7 @@ def __getattr__(name: str) -> Any:
     "FAISS",
     "HanaDB",
     "Hologres",
+    "InfinispanVS",
     "KDBAI",
     "DistanceStrategy",
     "Kinetica",
diff --git a/libs/community/langchain_community/vectorstores/infinispanvs.py b/libs/community/langchain_community/vectorstores/infinispanvs.py
new file mode 100644
index 0000000000000..9ad59ebc1395b
--- /dev/null
+++ b/libs/community/langchain_community/vectorstores/infinispanvs.py
@@ -0,0 +1,506 @@
+"""Module providing Infinispan as a VectorStore"""
+
+from __future__ import annotations
+
+import json
+import logging
+import uuid
+from typing import (
+    Any,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Type,
+)
+
+import requests
+from langchain_core.documents import Document
+from langchain_core.embeddings import Embeddings
+from langchain_core.vectorstores import VectorStore
+
+logger = logging.getLogger(__name__)
+
+
+class InfinispanVS(VectorStore):
+    """`Infinispan` VectorStore interface.
+
+    This class exposes the method to present Infinispan as a
+    VectorStore. It relies on the Infinispan class (below) which takes care
+    of the REST interface with the server.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.vectorstores import InfinispanVS
+            from mymodels import RGBEmbeddings
+
+            vectorDb = InfinispanVS.from_documents(docs,
+                            embedding=RGBEmbeddings(),
+                            output_fields=["texture", "color"],
+                            lambda_key=lambda text,meta: str(meta["_key"]),
+                            lambda_content=lambda item: item["color"])
+
+    """
+
+    def __init__(
+        self,
+        embedding: Optional[Embeddings] = None,
+        ids: Optional[List[str]] = None,
+        clear_old: Optional[bool] = True,
+        **kwargs: Any,
+    ):
+        self.ispn = Infinispan(**kwargs)
+        self._configuration = kwargs
+        self._cache_name = str(self._configuration.get("cache_name", "vector"))
+        self._entity_name = str(self._configuration.get("entity_name", "vector"))
+        self._embedding = embedding
+        self._textfield = self._configuration.get("textfield", "text")
+        self._vectorfield = self._configuration.get("vectorfield", "vector")
+        self._to_content = self._configuration.get(
+            "lambda_content", lambda item: self._default_content(item)
+        )
+        self._to_metadata = self._configuration.get(
+            "lambda_metadata", lambda item: self._default_metadata(item)
+        )
+        self._output_fields = self._configuration.get("output_fields")
+        self._ids = ids
+        if clear_old:
+            self.ispn.cache_clear(self._cache_name)
+
+    def _default_metadata(self, item: dict) -> dict:
+        meta = dict(item)
+        meta.pop(self._vectorfield, None)
+        meta.pop(self._textfield, None)
+        meta.pop("_type", None)
+        return meta
+
+    def _default_content(self, item: dict[str, Any]) -> Any:
+        return item.get(self._textfield)
+
+    def schema_create(self, proto: str) -> requests.Response:
+        """Deploy the schema for the vector db
+        Args:
+            proto(str): protobuf schema
+        Returns:
+            An http Response containing the result of the operation
+        """
+        return self.ispn.schema_post(self._entity_name + ".proto", proto)
+
+    def schema_delete(self) -> requests.Response:
+        """Delete the schema for the vector db
+        Returns:
+            An http Response containing the result of the operation
+        """
+        return self.ispn.schema_delete(self._entity_name + ".proto")
+
+    def cache_create(self, config: str = "") -> requests.Response:
+        """Create the cache for the vector db
+        Args:
+            config(str): configuration of the cache.
+        Returns:
+            An http Response containing the result of the operation
+        """
+        if config == "":
+            config = (
+                '''
+            {
+  "distributed-cache": {
+    "owners": "2",
+    "mode": "SYNC",
+    "statistics": true,
+    "encoding": {
+      "media-type": "application/x-protostream"
+    },
+    "indexing": {
+      "enabled": true,
+      "storage": "filesystem",
+      "startup-mode": "AUTO",
+      "indexing-mode": "AUTO",
+      "indexed-entities": [
+        "'''
+                + self._entity_name
+                + """"
+      ]
+    }
+  }
+}
+"""
+            )
+        return self.ispn.cache_post(self._cache_name, config)
+
+    def cache_delete(self) -> requests.Response:
+        """Delete the cache for the vector db
+        Returns:
+            An http Response containing the result of the operation
+        """
+        return self.ispn.cache_delete(self._cache_name)
+
+    def cache_clear(self) -> requests.Response:
+        """Clear the cache for the vector db
+        Returns:
+            An http Response containing the result of the operation
+        """
+        return self.ispn.cache_clear(self._cache_name)
+
+    def cache_index_clear(self) -> requests.Response:
+        """Clear the index for the vector db
+        Returns:
+            An http Response containing the result of the operation
+        """
+        return self.ispn.index_clear(self._cache_name)
+
+    def cache_index_reindex(self) -> requests.Response:
+        """Rebuild the for the vector db
+        Returns:
+            An http Response containing the result of the operation
+        """
+        return self.ispn.index_reindex(self._cache_name)
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        result = []
+        embeds = self._embedding.embed_documents(list(texts))  # type: ignore
+        if not metadatas:
+            metadatas = [{} for _ in texts]
+        ids = self._ids or [str(uuid.uuid4()) for _ in texts]
+        data_input = list(zip(metadatas, embeds, ids))
+        for metadata, embed, key in data_input:
+            data = {"_type": self._entity_name, self._vectorfield: embed}
+            data.update(metadata)
+            data_str = json.dumps(data)
+            self.ispn.put(key, data_str, self._cache_name)
+            result.append(key)
+        return result
+
+    def similarity_search(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Document]:
+        """Return docs most similar to query."""
+        documents = self.similarity_search_with_score(query=query, k=k)
+        return [doc for doc, _ in documents]
+
+    def similarity_search_with_score(
+        self, query: str, k: int = 4, **kwargs: Any
+    ) -> List[Tuple[Document, float]]:
+        """Perform a search on a query string and return results with score.
+
+        Args:
+            query (str): The text being searched.
+            k (int, optional): The amount of results to return. Defaults to 4.
+
+        Returns:
+            List[Tuple[Document, float]]
+        """
+        embed = self._embedding.embed_query(query)  # type: ignore
+        documents = self.similarity_search_with_score_by_vector(embedding=embed, k=k)
+        return documents
+
+    def similarity_search_by_vector(
+        self, embedding: List[float], k: int = 4, **kwargs: Any
+    ) -> List[Document]:
+        res = self.similarity_search_with_score_by_vector(embedding, k)
+        return [doc for doc, _ in res]
+
+    def similarity_search_with_score_by_vector(
+        self, embedding: List[float], k: int = 4
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of pair (Documents, score) most similar to the query vector.
+        """
+        if self._output_fields is None:
+            query_str = (
+                "select v, score(v) from "
+                + self._entity_name
+                + " v where v."
+                + self._vectorfield
+                + " <-> "
+                + json.dumps(embedding)
+                + "~"
+                + str(k)
+            )
+        else:
+            query_proj = "select "
+            for field in self._output_fields[:-1]:
+                query_proj = query_proj + "v." + field + ","
+            query_proj = query_proj + "v." + self._output_fields[-1]
+            query_str = (
+                query_proj
+                + ", score(v) from "
+                + self._entity_name
+                + " v where v."
+                + self._vectorfield
+                + " <-> "
+                + json.dumps(embedding)
+                + "~"
+                + str(k)
+            )
+        query_res = self.ispn.req_query(query_str, self._cache_name)
+        result = json.loads(query_res.text)
+        return self._query_result_to_docs(result)
+
+    def _query_result_to_docs(
+        self, result: dict[str, Any]
+    ) -> List[Tuple[Document, float]]:
+        documents = []
+        for row in result["hits"]:
+            hit = row["hit"] or {}
+            if self._output_fields is None:
+                entity = hit["*"]
+            else:
+                entity = {key: hit.get(key) for key in self._output_fields}
+            doc = Document(
+                page_content=self._to_content(entity),
+                metadata=self._to_metadata(entity),
+            )
+            documents.append((doc, hit["score()"]))
+        return documents
+
+    @classmethod
+    def from_texts(
+        cls: Type[InfinispanVS],
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        clear_old: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> InfinispanVS:
+        """Return VectorStore initialized from texts and embeddings."""
+        infinispanvs = cls(embedding=embedding, ids=ids, clear_old=clear_old, **kwargs)
+        if texts:
+            infinispanvs.add_texts(texts, metadatas)
+        return infinispanvs
+
+
+REST_TIMEOUT = 10
+
+
+class Infinispan:
+    """Helper class for `Infinispan` REST interface.
+
+    This class exposes the Infinispan operations needed to
+    create and set up a vector db.
+
+    You need a running Infinispan (15+) server without authentication.
+    You can easily start one, see: https://github.com/rigazilla/infinispan-vector#run-infinispan
+    """
+
+    def __init__(self, **kwargs: Any):
+        self._configuration = kwargs
+        self._schema = str(self._configuration.get("schema", "http"))
+        self._host = str(self._configuration.get("hosts", ["127.0.0.1:11222"])[0])
+        self._default_node = self._schema + "://" + self._host
+        self._cache_url = str(self._configuration.get("cache_url", "/rest/v2/caches"))
+        self._schema_url = str(self._configuration.get("cache_url", "/rest/v2/schemas"))
+        self._use_post_for_query = str(
+            self._configuration.get("use_post_for_query", True)
+        )
+
+    def req_query(
+        self, query: str, cache_name: str, local: bool = False
+    ) -> requests.Response:
+        """Request a query
+        Args:
+            query(str): query requested
+            cache_name(str): name of the target cache
+            local(boolean): whether the query is local to clustered
+        Returns:
+            An http Response containing the result set or errors
+        """
+        if self._use_post_for_query:
+            return self._query_post(query, cache_name, local)
+        return self._query_get(query, cache_name, local)
+
+    def _query_post(
+        self, query_str: str, cache_name: str, local: bool = False
+    ) -> requests.Response:
+        api_url = (
+            self._default_node
+            + self._cache_url
+            + "/"
+            + cache_name
+            + "?action=search&local="
+            + str(local)
+        )
+        data = {"query": query_str}
+        data_json = json.dumps(data)
+        response = requests.post(
+            api_url,
+            data_json,
+            headers={"Content-Type": "application/json"},
+            timeout=REST_TIMEOUT,
+        )
+        return response
+
+    def _query_get(
+        self, query_str: str, cache_name: str, local: bool = False
+    ) -> requests.Response:
+        api_url = (
+            self._default_node
+            + self._cache_url
+            + "/"
+            + cache_name
+            + "?action=search&query="
+            + query_str
+            + "&local="
+            + str(local)
+        )
+        response = requests.get(api_url, timeout=REST_TIMEOUT)
+        return response
+
+    def post(self, key: str, data: str, cache_name: str) -> requests.Response:
+        """Post an entry
+        Args:
+            key(str): key of the entry
+            data(str): content of the entry in json format
+            cache_name(str): target cache
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
+        response = requests.post(
+            api_url,
+            data,
+            headers={"Content-Type": "application/json"},
+            timeout=REST_TIMEOUT,
+        )
+        return response
+
+    def put(self, key: str, data: str, cache_name: str) -> requests.Response:
+        """Put an entry
+        Args:
+            key(str): key of the entry
+            data(str): content of the entry in json format
+            cache_name(str): target cache
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
+        response = requests.put(
+            api_url,
+            data,
+            headers={"Content-Type": "application/json"},
+            timeout=REST_TIMEOUT,
+        )
+        return response
+
+    def get(self, key: str, cache_name: str) -> requests.Response:
+        """Get an entry
+        Args:
+            key(str): key of the entry
+            cache_name(str): target cache
+        Returns:
+            An http Response containing the entry or errors
+        """
+        api_url = self._default_node + self._cache_url + "/" + cache_name + "/" + key
+        response = requests.get(
+            api_url, headers={"Content-Type": "application/json"}, timeout=REST_TIMEOUT
+        )
+        return response
+
+    def schema_post(self, name: str, proto: str) -> requests.Response:
+        """Deploy a schema
+        Args:
+            name(str): name of the schema. Will be used as a key
+            proto(str): protobuf schema
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = self._default_node + self._schema_url + "/" + name
+        response = requests.post(api_url, proto, timeout=REST_TIMEOUT)
+        return response
+
+    def cache_post(self, name: str, config: str) -> requests.Response:
+        """Create a cache
+        Args:
+            name(str): name of the cache.
+            config(str): configuration of the cache.
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = self._default_node + self._cache_url + "/" + name
+        response = requests.post(
+            api_url,
+            config,
+            headers={"Content-Type": "application/json"},
+            timeout=REST_TIMEOUT,
+        )
+        return response
+
+    def schema_delete(self, name: str) -> requests.Response:
+        """Delete a schema
+        Args:
+            name(str): name of the schema.
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = self._default_node + self._schema_url + "/" + name
+        response = requests.delete(api_url, timeout=REST_TIMEOUT)
+        return response
+
+    def cache_delete(self, name: str) -> requests.Response:
+        """Delete a cache
+        Args:
+            name(str): name of the cache.
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = self._default_node + self._cache_url + "/" + name
+        response = requests.delete(api_url, timeout=REST_TIMEOUT)
+        return response
+
+    def cache_clear(self, cache_name: str) -> requests.Response:
+        """Clear a cache
+        Args:
+            cache_name(str): name of the cache.
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = (
+            self._default_node + self._cache_url + "/" + cache_name + "?action=clear"
+        )
+        response = requests.post(api_url, timeout=REST_TIMEOUT)
+        return response
+
+    def index_clear(self, cache_name: str) -> requests.Response:
+        """Clear an index on a cache
+        Args:
+            cache_name(str): name of the cache.
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = (
+            self._default_node
+            + self._cache_url
+            + "/"
+            + cache_name
+            + "/search/indexes?action=clear"
+        )
+        return requests.post(api_url, timeout=REST_TIMEOUT)
+
+    def index_reindex(self, cache_name: str) -> requests.Response:
+        """Rebuild index on a cache
+        Args:
+            cache_name(str): name of the cache.
+        Returns:
+            An http Response containing the result of the operation
+        """
+        api_url = (
+            self._default_node
+            + self._cache_url
+            + "/"
+            + cache_name
+            + "/search/indexes?action=reindex"
+        )
+        return requests.post(api_url, timeout=REST_TIMEOUT)
diff --git a/libs/community/tests/integration_tests/vectorstores/test_infinispanvs.py b/libs/community/tests/integration_tests/vectorstores/test_infinispanvs.py
new file mode 100644
index 0000000000000..a5464d75151f2
--- /dev/null
+++ b/libs/community/tests/integration_tests/vectorstores/test_infinispanvs.py
@@ -0,0 +1,135 @@
+"""Test Infinispan functionality."""
+from typing import Any, List, Optional
+
+from langchain_core.documents import Document
+
+from langchain_community.vectorstores import InfinispanVS
+from tests.integration_tests.vectorstores.fake_embeddings import (
+    FakeEmbeddings,
+    fake_texts,
+)
+
+
+def _infinispan_setup() -> None:
+    ispnvs = InfinispanVS()
+    ispnvs.cache_delete()
+    ispnvs.schema_delete()
+    proto = """
+    /**
+     * @Indexed
+     */
+    message vector {
+    /**
+     * @Vector(dimension=10)
+     */
+    repeated float vector = 1;
+    optional string text = 2;
+    optional string label = 3;
+    optional int32 page = 4;
+    }
+    """
+    ispnvs.schema_create(proto)
+    ispnvs.cache_create()
+    ispnvs.cache_index_clear()
+
+
+def _infinispanvs_from_texts(
+    metadatas: Optional[List[dict]] = None,
+    ids: Optional[List[str]] = None,
+    clear_old: Optional[bool] = True,
+    **kwargs: Any,
+) -> InfinispanVS:
+    texts = [{"text": t} for t in fake_texts]
+    if metadatas is None:
+        metadatas = texts
+    else:
+        [m.update(t) for (m, t) in zip(metadatas, texts)]
+    return InfinispanVS.from_texts(
+        fake_texts,
+        FakeEmbeddings(),
+        metadatas=metadatas,
+        ids=ids,
+        clear_old=clear_old,
+        **kwargs,
+    )
+
+
+def test_infinispan() -> None:
+    """Test end to end construction and search."""
+    _infinispan_setup()
+    docsearch = _infinispanvs_from_texts()
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+
+def test_infinispan_with_metadata() -> None:
+    """Test with metadata"""
+    _infinispan_setup()
+    meta = []
+    for _ in range(len(fake_texts)):
+        meta.append({"label": "test"})
+    docsearch = _infinispanvs_from_texts(metadatas=meta)
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo", metadata={"label": "test"})]
+
+
+def test_infinispan_with_metadata_with_output_fields() -> None:
+    """Test with metadata"""
+    _infinispan_setup()
+    metadatas = [{"page": i, "label": "label" + str(i)} for i in range(len(fake_texts))]
+    c = {"output_fields": ["label", "page", "text"]}
+    docsearch = _infinispanvs_from_texts(metadatas=metadatas, configuration=c)
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [
+        Document(page_content="foo", metadata={"label": "label0", "page": 0})
+    ]
+
+
+def test_infinispanvs_with_id() -> None:
+    """Test with ids"""
+    ids = ["id_" + str(i) for i in range(len(fake_texts))]
+    docsearch = _infinispanvs_from_texts(ids=ids)
+    output = docsearch.similarity_search("foo", k=1)
+    assert output == [Document(page_content="foo")]
+
+
+def test_infinispan_with_score() -> None:
+    """Test end to end construction and search with scores and IDs."""
+    _infinispan_setup()
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    docsearch = _infinispanvs_from_texts(metadatas=metadatas)
+    output = docsearch.similarity_search_with_score("foo", k=3)
+    docs = [o[0] for o in output]
+    scores = [o[1] for o in output]
+    assert docs == [
+        Document(page_content="foo", metadata={"page": 0}),
+        Document(page_content="bar", metadata={"page": 1}),
+        Document(page_content="baz", metadata={"page": 2}),
+    ]
+    assert scores[0] >= scores[1] >= scores[2]
+
+
+def test_infinispan_add_texts() -> None:
+    """Test end to end construction and MRR search."""
+    _infinispan_setup()
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    docsearch = _infinispanvs_from_texts(metadatas=metadatas)
+
+    docsearch.add_texts(texts, metadatas)
+
+    output = docsearch.similarity_search("foo", k=10)
+    assert len(output) == 6
+
+
+def test_infinispan_no_clear_old() -> None:
+    """Test end to end construction and MRR search."""
+    _infinispan_setup()
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    docsearch = _infinispanvs_from_texts(metadatas=metadatas)
+    del docsearch
+    docsearch = _infinispanvs_from_texts(metadatas=metadatas, clear_old=False)
+    output = docsearch.similarity_search("foo", k=10)
+    assert len(output) == 6
diff --git a/libs/community/tests/unit_tests/vectorstores/test_public_api.py b/libs/community/tests/unit_tests/vectorstores/test_public_api.py
index 808da73b55978..1e963c3368445 100644
--- a/libs/community/tests/unit_tests/vectorstores/test_public_api.py
+++ b/libs/community/tests/unit_tests/vectorstores/test_public_api.py
@@ -31,6 +31,7 @@
     "FAISS",
     "HanaDB",
     "Hologres",
+    "InfinispanVS",
     "KDBAI",
     "Kinetica",
     "KineticaSettings",