diff --git a/docs/docs/integrations/vectorstores/falkordbvector.ipynb b/docs/docs/integrations/vectorstores/falkordbvector.ipynb
new file mode 100644
index 0000000000000..ecc0aa68bf008
--- /dev/null
+++ b/docs/docs/integrations/vectorstores/falkordbvector.ipynb
@@ -0,0 +1,437 @@
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# FalkorDBVectorStore\n",
+ "FalkorDB is an open-source graph database with integrated support for vector similarity search\n",
+ "\n",
+ "it supports:\n",
+ "- approximate nearest neighbor search\n",
+ "- Euclidean similarity & Cosine Similarity\n",
+ "- Hybrid search combining vector and keyword searches\n",
+ "\n",
+ "This notebook shows how to use the FalkorDB vector index (`FalkorDB`)\n",
+ "\n",
+ "See the installation instruction\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Setup"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: falkordb in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (1.0.10)Note: you may need to restart the kernel to use updated packages.\n",
+ "\n",
+ "Requirement already satisfied: redis<6.0.0,>=5.0.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from falkordb) (5.2.0)\n",
+ "Requirement already satisfied: async-timeout>=4.0.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from redis<6.0.0,>=5.0.1->falkordb) (4.0.3)\n",
+ "Requirement already satisfied: tiktoken in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.8.0)\n",
+ "Requirement already satisfied: regex>=2022.1.18 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tiktoken) (2024.11.6)\n",
+ "Requirement already satisfied: requests>=2.26.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tiktoken) (2.32.3)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (1.26.20)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (2024.8.30)\n",
+ "Note: you may need to restart the kernel to use updated packages.\n",
+ "Requirement already satisfied: langchain in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.3.9)Note: you may need to restart the kernel to use updated packages.\n",
+ "\n",
+ "Requirement already satisfied: langchain_huggingface in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.1.2)\n",
+ "Requirement already satisfied: PyYAML>=5.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (6.0.2)\n",
+ "Requirement already satisfied: SQLAlchemy<3,>=1.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.0.36)\n",
+ "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (3.11.8)\n",
+ "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (4.0.3)\n",
+ "Requirement already satisfied: langchain-core<0.4.0,>=0.3.21 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.3.21)\n",
+ "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.3.2)\n",
+ "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.1.147)\n",
+ "Requirement already satisfied: numpy<2,>=1.22.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (1.26.4)\n",
+ "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.9.2)\n",
+ "Requirement already satisfied: requests<3,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.32.3)\n",
+ "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (9.0.0)\n",
+ "Requirement already satisfied: huggingface-hub>=0.23.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (0.26.3)\n",
+ "Requirement already satisfied: sentence-transformers>=2.6.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (3.3.1)\n",
+ "Requirement already satisfied: tokenizers>=0.19.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (0.20.3)\n",
+ "Requirement already satisfied: transformers>=4.39.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (4.46.3)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.0)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.0)\n",
+ "Requirement already satisfied: filelock in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (3.16.1)\n",
+ "Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (2024.10.0)\n",
+ "Requirement already satisfied: packaging>=20.9 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (24.2)\n",
+ "Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.67.1)\n",
+ "Requirement already satisfied: typing-extensions>= in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.12.2)\n",
+ "Requirement already satisfied: jsonpatch<2.0,>=1.33 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain-core<0.4.0,>=0.3.21->langchain) (1.33)\n",
+ "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (0.27.2)\n",
+ "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.12)\n",
+ "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (1.0.0)\n",
+ "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
+ "Requirement already satisfied: pydantic-core==2.23.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (1.26.20)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (2024.8.30)\n",
+ "Requirement already satisfied: torch>=1.11.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (2.5.1)\n",
+ "Requirement already satisfied: scikit-learn in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.5.2)\n",
+ "Requirement already satisfied: scipy in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n",
+ "Requirement already satisfied: Pillow in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (11.0.0)\n",
+ "Requirement already satisfied: greenlet!=0.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from transformers>=4.39.0->langchain_huggingface) (2024.11.6)\n",
+ "Requirement already satisfied: safetensors>=0.4.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from transformers>=4.39.0->langchain_huggingface) (0.4.5)\n",
+ "Requirement already satisfied: anyio in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (4.6.2.post1)\n",
+ "Requirement already satisfied: httpcore==1.* in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.0.7)\n",
+ "Requirement already satisfied: sniffio in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.3.1)\n",
+ "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (0.14.0)\n",
+ "Requirement already satisfied: jsonpointer>=1.9 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.21->langchain) (3.0.0)\n",
+ "Requirement already satisfied: networkx in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.2.1)\n",
+ "Requirement already satisfied: jinja2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.1.4)\n",
+ "Requirement already satisfied: sympy==1.13.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n",
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.3.0)\n",
+ "Requirement already satisfied: colorama in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tqdm>=4.42.1->huggingface-hub>=0.23.0->langchain_huggingface) (0.4.6)\n",
+ "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (1.4.2)\n",
+ "Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (3.5.0)\n",
+ "Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.2.2)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from jinja2->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.0.2)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Pip install necessary package\n",
+ "%pip install --upgrade falkordb\n",
+ "%pip install --upgrade tiktoken\n",
+ "%pip install --upgrade langchain langchain_huggingface"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Credentials\n",
+ "We want to use `HuggingFace` so we have to get the HuggingFace API Key"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import getpass\n",
+ "import os\n",
+ "\n",
+ "if \"HUGGINGFACE_API_KEY\" not in os.environ:\n",
+ " os.environ[\"HUGGINGFACE_API_KEY\"] = getpass.getpass(\"HuggingFace API Key:\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you want to get automated tracing of your model calls you can also set your LangSmith API key by uncommenting below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
+ "# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Initialization"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_community.vectorstores.falkordb_vector import FalkorDBVector\n",
+ "from langchain_core.documents import Document\n",
+ "from langchain_huggingface import HuggingFaceEmbeddings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can use FalkorDBVector locally with docker. See installation instruction"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "host = \"localhost\"\n",
+ "port = 6379"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Or you can use FalkorDBVector with FalkorDB Cloud"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# E.g\n",
+ "# host = \"r-6jissuruar.instance-zwb082gpf.hc-v8noonp0c.europe-west1.gcp.f2e0a955bb84.cloud\"\n",
+ "# port = 62471\n",
+ "# username = \"falkordb\" # SET ON FALKORDB CLOUD\n",
+ "# password = \"password\" # SET ON FALKORDB CLOUD"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vector_store = FalkorDBVector(host=host, port=port, embedding=HuggingFaceEmbeddings())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Manage vector store"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Add items to vector store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['1', '2', '3']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from langchain_core.documents import Document\n",
+ "\n",
+ "document_1 = Document(page_content=\"foo\", metadata={\"source\": \"https://example.com\"})\n",
+ "\n",
+ "document_2 = Document(page_content=\"bar\", metadata={\"source\": \"https://example.com\"})\n",
+ "\n",
+ "document_3 = Document(page_content=\"baz\", metadata={\"source\": \"https://example.com\"})\n",
+ "\n",
+ "documents = [document_1, document_2, document_3]\n",
+ "\n",
+ "vector_store.add_documents(documents=documents, ids=[\"1\", \"2\", \"3\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Update items in vector store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "updated_document = Document(\n",
+ " page_content=\"qux\", metadata={\"source\": \"https://another-example.com\"}\n",
+ ")\n",
+ "\n",
+ "vector_store.update_documents(document_id=\"1\", document=updated_document)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Delete items from vector store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vector_store.delete(ids=[\"3\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Query vector store\n",
+ "\n",
+ "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Query directly\n",
+ "\n",
+ "Performing a simple similarity search can be done as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* qux [{'text': 'qux', 'id': '1', 'source': 'https://another-example.com'}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "results = vector_store.similarity_search(\n",
+ " query=\"thud\", k=1, filter={\"source\": \"https://another-example.com\"}\n",
+ ")\n",
+ "for doc in results:\n",
+ " print(f\"* {doc.page_content} [{doc.metadata}]\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you want to execute a similarity search and receive the corresponding scores you can run:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "* [SIM=0.000001] bar [{'text': 'bar', 'id': '2', 'source': 'https://example.com'}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "results = vector_store.similarity_search_with_score(query=\"bar\")\n",
+ "for doc, score in results:\n",
+ " print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Query by turning into retriever\n",
+ "You can also transform the vector store into a retriever for easier usage in your chains."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Document(metadata={'text': 'qux', 'id': '1', 'source': 'https://another-example.com'}, page_content='qux')]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
+ "retriever.invoke(\"thud\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Usage for retrieval-augmented generation\n",
+ "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
+ "- Tutorials: working with external knowledge\n",
+ "- How-to: Question and answer with RAG\n",
+ "- Retrieval conceptual docs\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## API reference\n",
+ "For detailed documentation of all `FalkorDBVector` features and configurations head to the API reference: https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.falkordb_vector.FalkorDBVector.html"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
diff --git a/libs/community/langchain_community/vectorstores/falkordb_vector.py b/libs/community/langchain_community/vectorstores/falkordb_vector.py
new file mode 100644
index 0000000000000..d3a74177f93f4
--- /dev/null
+++ b/libs/community/langchain_community/vectorstores/falkordb_vector.py
@@ -0,0 +1,1858 @@
+from __future__ import annotations
+import enum
+import os
+import random
+import string
+from hashlib import md5
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type
+import numpy as np
+from langchain_core.documents import Document
+from langchain_core.embeddings import Embeddings
+from langchain_core.vectorstores import VectorStore
+from langchain_community.graphs import FalkorDBGraph
+from langchain_community.vectorstores.utils import (
+ DistanceStrategy,
+ maximal_marginal_relevance,
+def generate_random_string(length: int) -> str:
+ # Define the characters to use: uppercase, lowercase, digits, and
+ # punctuation
+ characters = string.ascii_letters
+ # Randomly choose 'length' characters from the pool of possible characters
+ random_string = "".join(random.choice(characters) for _ in range(length))
+ return random_string
+ DistanceStrategy.EUCLIDEAN_DISTANCE: "euclidean",
+ DistanceStrategy.COSINE: "cosine",
+class SearchType(str, enum.Enum):
+ """
+ Enumerator for different search strategies in FalkorDB VectorStore.
+ - `SearchType.VECTOR`: This option searches using only
+ the vector indexes in the vectorstore, relying on the
+ similarity between vector embeddings to return
+ relevant results.
+ - `SearchType.HYBRID`: This option performs a combined search,
+ querying both the full-text indexes and the vector indexes.
+ It integrates traditional text search with vector-based
+ search for more comprehensive results.
+ """
+ VECTOR = "vector"
+ HYBRID = "hybrid"
+class IndexType(str, enum.Enum):
+ """Enumerator of the index types."""
+def dict_to_yaml_str(input_dict: Dict, indent: int = 0) -> str:
+ """
+ Convert a dictionary to a YAML-like string without using external libraries.
+ Parameters:
+ - input_dict (dict): The dictionary to convert.
+ - indent (int): The current indentation level.
+ Returns:
+ - str: The YAML-like string representation of the input dictionary.
+ """
+ yaml_str = ""
+ for key, value in input_dict.items():
+ padding = " " * indent
+ if isinstance(value, dict):
+ yaml_str += f"{padding}{key}:\n{dict_to_yaml_str(value, indent + 1)}"
+ elif isinstance(value, list):
+ yaml_str += f"{padding}{key}:\n"
+ for item in value:
+ yaml_str += f"{padding}- {item}\n"
+ else:
+ yaml_str += f"{padding}{key}: {value}\n"
+ return yaml_str
+def construct_metadata_filter(
+ filter: Optional[Dict[str, Any]] = None,
+) -> Tuple[str, Dict[str, Any]]:
+ """
+ Construct a metadata filter by directly injecting
+ the filter values into the query.
+ Args:
+ filter (Optional[Dict[str, Any]]): Dictionary
+ representing the filter condition.
+ Returns:
+ Tuple[str, Dict[str, Any]]: Filter snippet
+ and an empty dictionary (since
+ we don't need parameters).
+ """
+ if not filter:
+ return "", {}
+ filter_snippet = ""
+ for i, (key, value) in enumerate(filter.items(), start=1):
+ if filter_snippet:
+ filter_snippet += " AND "
+ # If the value is a string, wrap it in quotes. Otherwise, directly
+ # inject the value.
+ if isinstance(value, str):
+ filter_snippet += f"n.{key} = '{value}'"
+ else:
+ filter_snippet += f"n.{key} = {value}"
+ return filter_snippet, {}
+def _get_search_index_query(
+ search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
+) -> str:
+ if index_type == IndexType.NODE:
+ if search_type == SearchType.VECTOR:
+ return (
+ "CALL db.idx.vector.queryNodes($entity_label, "
+ "$entity_property, $k, vecf32($embedding)) "
+ "YIELD node, score "
+ )
+ elif search_type == SearchType.HYBRID:
+ return (
+ "CALL { "
+ "CALL db.idx.vector.queryNodes($entity_label, "
+ "$entity_property, $k, vecf32($embedding)) "
+ "YIELD node, score "
+ "WITH collect({node: node, score: score})"
+ " AS nodes, max(score) AS max_score "
+ "UNWIND nodes AS n "
+ "RETURN n.node AS node, (n.score / max_score) AS score "
+ "UNION "
+ "CALL db.idx.fulltext.queryNodes($entity_label, $query) "
+ "YIELD node, score "
+ "WITH collect({node: node, score: score})"
+ " AS nodes, max(score) AS max_score "
+ "UNWIND nodes AS n "
+ "RETURN n.node AS node, (n.score / max_score) AS score "
+ "} "
+ "WITH node, max(score) AS score "
+ "ORDER BY score DESC LIMIT $k "
+ )
+ elif index_type == IndexType.RELATIONSHIP:
+ return (
+ "CALL db.idx.vector.queryRelationships"
+ "($entity_label, $entity_property, $k, vecf32($embedding)) "
+ "YIELD relationship, score "
+ )
+def process_index_data(data: List[List[Any]]) -> List[Dict[str, Any]]:
+ """
+ Processes a nested list of entity data
+ to extract information about labels,
+ entity types, properties, index types,
+ and index details (if applicable).
+ Args:
+ data (List[List[Any]]): A nested list containing
+ details about entitys, their properties, index
+ types, and configuration information.
+ Returns:
+ List[Dict[str, Any]]: A list of dictionaries where each dictionary
+ contains:
+ - entity_label (str): The label or name of the
+ entity or relationship (e.g., 'Person', 'Song').
+ - entity_property (str): The property of the entity
+ or relationship on which an index
+ was created (e.g., 'first_name').
+ - index_type (str or List[str]): The type(s)
+ of index applied to the property (e.g.,
+ - index_status (str): The status of the index
+ - index_dimension (Optional[int]): The dimension
+ of the vector index, if applicable.
+ - index_similarityFunction (Optional[str]): The
+ similarity function used by the vector
+ index, if applicable.
+ - entity_type (str): The type of entity. That is
+ either entity or relationship
+ Notes:
+ - The entity label is extracted from the first
+ element of each entity list.
+ - The entity property and associated index types
+ are extracted from the second element.
+ - If the index type includes 'VECTOR', additional
+ details such as dimension and similarity function
+ are extracted from the entity configuration.
+ - The function handles cases where entitys have
+ multiple index types (e.g., both 'FULLTEXT' and 'VECTOR').
+ """
+ result = []
+ for entity in data:
+ # Extract basic information
+ entity_label = entity[0]
+ index_type_dict = entity[2]
+ index_status = entity[7]
+ entity_type = entity[6]
+ # Process each property and its index type(s)
+ for prop, index_types in index_type_dict.items():
+ entity_info = {
+ "entity_label": entity_label,
+ "entity_property": prop,
+ "entity_type": entity_type,
+ "index_type": index_types[0],
+ "index_status": index_status,
+ "index_dimension": None,
+ "index_similarityFunction": None,
+ }
+ # Check for VECTOR type and extract additional details
+ if "VECTOR" in index_types:
+ if isinstance(entity[3], str):
+ entity_info["index_dimension"] = None
+ entity_info["index_similarityFunction"] = None
+ else:
+ vector_info = entity[3].get(prop, {})
+ entity_info["index_dimension"] = vector_info.get("dimension")
+ entity_info["index_similarityFunction"] = vector_info.get(
+ "similarityFunction"
+ )
+ result.append(entity_info)
+ return result
+class FalkorDBVector(VectorStore):
+ """`FalkorDB` vector index.
+ To use, you should have the ``falkordb`` python package installed
+ Args:
+ host: FalkorDB host
+ port: FalkorDB port
+ username: Optionally provide your username
+ details if you are connecting to a
+ FalkorDB Cloud database instance
+ password: Optionally provide your password
+ details if you are connecting to a
+ FalkorDB Cloud database instance
+ embedding: Any embedding function implementing
+ `langchain.embeddings.base.Embeddings` interface.
+ distance_strategy The distance strategy to use.
+ (default: "EUCLIDEAN")
+ pre_delete_collection: If True, will delete
+ existing data if it exists.(default:
+ False). Useful for testing.
+ search_type: Similiarity search type to use.
+ Could be either SearchType.VECTOR or
+ SearchType.HYBRID (default:
+ SearchType.VECTOR)
+ database: Optionally provide the name of the
+ database to use else FalkorDBVector will
+ generate a random database for you.
+ node_label: Provide the label of the node you
+ want the embeddings of your data to be
+ stored in. (default: "Chunk")
+ relation_type: Provide the relationship type
+ of the relationship you want the
+ embeddings of your data to be stored in.
+ (default: "")
+ embedding_node_property: Provide the name of
+ the property in which you want your
+ embeddings to be stored. (default: "embedding")
+ text_node_property: Provide the name of
+ the property in which you want your texts
+ to be stored. (default: "text")
+ embedding_dimension: Provide the dimension
+ of your embeddings or it will be
+ calculated for you.
+ retrieval_query: Optionally a provide a
+ retrieval_query else the default
+ retrieval query will be used.
+ index_type: Provide the index type for the
+ VectorStore else the default index
+ type will be used.
+ graph: Optionally provide the graph you
+ would like to use
+ relevance_score_fn: Optionally provide a
+ function that computes a relevance score
+ based on the similarity score returned by
+ the search.
+ ssl: Specify whether the connection to the
+ database should be secured using SSL/TLS
+ encryption (default: False)
+ Example:
+ .. code-block:: python
+ from langchain_community.vectorstores.falkordb_vector import FalkorDBVector
+ from langchain_community.embeddings.openai import OpenAIEmbeddings
+ from langchain_text_splitters import CharacterTextSplitter
+ host="localhost"
+ port=6379
+ raw_documents = TextLoader('../../../state_of_the_union.txt').load()
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+ documents = text_splitter.split_documents(raw_documents)
+ embeddings=OpenAIEmbeddings()
+ vectorstore = FalkorDBVector.from_documents(
+ embedding=embeddings,
+ documents=documents,
+ host=host,
+ port=port,
+ )
+ """
+ def __init__(
+ self,
+ embedding: Embeddings,
+ *,
+ search_type: SearchType = SearchType.VECTOR,
+ username: Optional[str] = None,
+ password: Optional[str] = None,
+ host: str = "localhost",
+ port: int = 6379,
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+ database: Optional[str] = generate_random_string(4),
+ node_label: str = "Chunk",
+ relation_type: str = "",
+ embedding_node_property: str = "embedding",
+ text_node_property: str = "text",
+ embedding_dimension: Optional[int] = None,
+ retrieval_query: Optional[str] = "",
+ index_type: IndexType = DEFAULT_INDEX_TYPE,
+ graph: Optional[FalkorDBGraph] = None,
+ relevance_score_fn: Optional[Callable[[float], float]] = None,
+ ssl: bool = False,
+ pre_delete_collection: bool = False,
+ metadata: List[Any] = [],
+ ) -> None:
+ try:
+ import falkordb
+ except ImportError:
+ raise ImportError(
+ "Could not import falkordb python package."
+ "Please install it with `pip install falkordb`"
+ )
+ try:
+ import redis.exceptions
+ except ImportError:
+ raise ImportError(
+ "Could not import redis.exceptions."
+ "Please install it with `pip install redis`"
+ )
+ # Allow only cosine and euclidean distance strategies
+ if distance_strategy not in [
+ DistanceStrategy.EUCLIDEAN_DISTANCE,
+ DistanceStrategy.COSINE,
+ ]:
+ raise ValueError(
+ "`distance_strategy` must be either 'EULIDEAN_DISTANCE` or `COSINE`"
+ )
+ # Graph object takes precedent over env or input params
+ if graph:
+ self._database = graph._graph
+ self._driver = graph._driver
+ else:
+ # Handle credentials via environment variables or input params
+ self._host = host
+ self._port = port
+ self._username = username or os.environ.get("FALKORDB_USERNAME")
+ self._password = password or os.environ.get("FALKORDB_PASSWORD")
+ self._ssl = ssl
+ # Initialize the FalkorDB connection
+ try:
+ self._driver = falkordb.FalkorDB(
+ host=self._host,
+ port=self._port,
+ username=self._username,
+ password=self._password,
+ ssl=self._ssl,
+ )
+ except redis.exceptions.ConnectionError:
+ raise ValueError(
+ "Could not connect to FalkorDB database."
+ "Please ensure that the host and port is correct"
+ )
+ except redis.exceptions.AuthenticationError:
+ raise ValueError(
+ "Could not connect to FalkorDB database. "
+ "Please ensure that the username and password are correct"
+ )
+ # Verify that required values are not null
+ if not embedding_node_property:
+ raise ValueError(
+ "The `embedding_node_property` must not be None or empty string"
+ )
+ if not node_label:
+ raise ValueError("The `node_label` must not be None or empty string")
+ self._database = self._driver.select_graph(database)
+ self.database_name = database
+ self.embedding = embedding
+ self.node_label = node_label
+ self.relation_type = relation_type
+ self.embedding_node_property = embedding_node_property
+ self.text_node_property = text_node_property
+ self._distance_strategy = distance_strategy
+ self.override_relevance_score_fn = relevance_score_fn
+ self.pre_delete_collection = pre_delete_collection
+ self.retrieval_query = retrieval_query
+ self.search_type = search_type
+ self._index_type = index_type
+ self.metadata = metadata
+ # Calculate embedding_dimensions if not given
+ if not embedding_dimension:
+ self.embedding_dimension = len(self.embedding.embed_query("foo"))
+ # Delete existing data if flagged
+ if pre_delete_collection:
+ self._database.query(f"""MATCH (n:`{self.node_label}`) DELETE n""")
+ @property
+ def embeddings(self) -> Embeddings:
+ """Returns the `Embeddings` model being used by the Vectorstore"""
+ return self.embedding
+ def _query(
+ self,
+ query: str,
+ *,
+ params: Optional[dict] = None,
+ retry_on_timeout: bool = True,
+ ) -> List[List]:
+ """
+ This method sends a Cypher query to the connected FalkorDB database
+ and returns the results as a list of lists.
+ Args:
+ query (str): The Cypher query to execute.
+ params (dict, optional): Dictionary of query parameters. Defaults to {}.
+ Returns:
+ List[List]: List of Lists containing the query results
+ """
+ params = params or {}
+ try:
+ data = self._database.query(query, params)
+ return data.result_set
+ except Exception as e:
+ if "Invalid input" in str(e):
+ raise ValueError(f"Cypher Statement is not valid\n{e}")
+ if retry_on_timeout:
+ return self._query(query, params=params, retry_on_timeout=False)
+ else:
+ raise e
+ def retrieve_existing_node_index(
+ self, node_label: Optional[str] = ""
+ ) -> Tuple[Optional[int], Optional[str], Optional[str], Optional[str]]:
+ """
+ Check if the vector index exists in the FalkorDB database
+ and returns its embedding dimension, entity_type,
+ entity_label, entity_property
+ This method;
+ 1. queries the FalkorDB database for existing indexes
+ 2. attempts to retrieve the dimension of
+ the vector index with the specified node label
+ & index type
+ 3. If the index exists, its dimension is returned.
+ 4. Else if the index doesn't exist, `None` is returned.
+ Returns:
+ int or None: The embedding dimension of the
+ existing index if found,
+ str or None: The entity type found.
+ str or None: The label of the entity that the
+ vector index was created with
+ str or None: The property of the entity for
+ which the vector index was created on
+ """
+ if node_label:
+ pass
+ elif self.node_label:
+ node_label = self.node_label
+ else:
+ raise ValueError("`node_label` property must be set to use this function")
+ embedding_dimension = None
+ entity_type = None
+ entity_label = None
+ entity_property = None
+ index_information = self._database.query("CALL db.indexes()")
+ if index_information:
+ processed_index_information = process_index_data(
+ index_information.result_set
+ )
+ for dict in processed_index_information:
+ if (
+ dict.get("entity_label", False) == node_label
+ and dict.get("entity_type", False) == "NODE"
+ ):
+ if dict["index_type"] == "VECTOR":
+ embedding_dimension = int(dict["index_dimension"])
+ entity_type = str(dict["entity_type"])
+ entity_label = str(dict["entity_label"])
+ entity_property = str(dict["entity_property"])
+ break
+ if embedding_dimension and entity_type and entity_label and entity_property:
+ self._index_type = IndexType(entity_type)
+ return embedding_dimension, entity_type, entity_label, entity_property
+ else:
+ return None, None, None, None
+ else:
+ return None, None, None, None
+ def retrieve_existing_relationship_index(
+ self, relation_type: Optional[str] = ""
+ ) -> Tuple[Optional[int], Optional[str], Optional[str], Optional[str]]:
+ """
+ Check if the vector index exists in the FalkorDB database
+ and returns its embedding dimension, entity_type, entity_label, entity_property
+ This method;
+ 1. queries the FalkorDB database for existing indexes
+ 2. attempts to retrieve the dimension of the vector
+ index with the specified label & index type
+ 3. If the index exists, its dimension is returned.
+ 4. Else if the index doesn't exist, `None` is returned.
+ Returns:
+ int or None: The embedding dimension of the existing index if found,
+ str or None: The entity type found.
+ str or None: The label of the entity that
+ the vector index was created with
+ str or None: The property of the entity for
+ which the vector index was created on
+ """
+ if relation_type:
+ pass
+ elif self.relation_type:
+ relation_type = self.relation_type
+ else:
+ raise ValueError(
+ "Couldn't find any specified `relation_type`."
+ " Check if you spelled it correctly"
+ )
+ embedding_dimension = None
+ entity_type = None
+ entity_label = None
+ entity_property = None
+ index_information = self._database.query("CALL db.indexes()")
+ if index_information:
+ processed_index_information = process_index_data(
+ index_information.result_set
+ )
+ for dict in processed_index_information:
+ if (
+ dict.get("entity_label", False) == relation_type
+ and dict.get("entity_type", False) == "RELATIONSHIP"
+ ):
+ if dict["index_type"] == "VECTOR":
+ embedding_dimension = int(dict["index_dimension"])
+ entity_type = str(dict["entity_type"])
+ entity_label = str(dict["entity_label"])
+ entity_property = str(dict["entity_property"])
+ break
+ if embedding_dimension and entity_type and entity_label and entity_property:
+ self._index_type = IndexType(entity_type)
+ return embedding_dimension, entity_type, entity_label, entity_property
+ else:
+ return None, None, None, None
+ else:
+ return None, None, None, None
+ def retrieve_existing_fts_index(self) -> Optional[str]:
+ """
+ Check if the fulltext index exists in the FalkorDB database
+ This method queries the FalkorDB database for existing fts indexes
+ with the specified name.
+ Returns:
+ str: fulltext index entity label
+ """
+ entity_label = None
+ index_information = self._database.query("CALL db.indexes()")
+ if index_information:
+ processed_index_information = process_index_data(
+ index_information.result_set
+ )
+ for dict in processed_index_information:
+ if dict.get("entity_label", False) == self.node_label:
+ if dict["index_type"] == "FULLTEXT":
+ entity_label = str(dict["entity_label"])
+ break
+ if entity_label:
+ return entity_label
+ else:
+ return None
+ else:
+ return None
+ def create_new_node_index(
+ self,
+ node_label: Optional[str] = "",
+ embedding_node_property: Optional[str] = "",
+ embedding_dimension: Optional[int] = None,
+ ) -> None:
+ """
+ This method creates a new vector index
+ on a node in FalkorDB.
+ """
+ if node_label:
+ pass
+ elif self.node_label:
+ node_label = self.node_label
+ else:
+ raise ValueError("`node_label` property must be set to use this function")
+ if embedding_node_property:
+ pass
+ elif self.embedding_node_property:
+ embedding_node_property = self.embedding_node_property
+ else:
+ raise ValueError(
+ "`embedding_node_property` property must be set to use this function"
+ )
+ if embedding_dimension:
+ pass
+ elif self.embedding_dimension:
+ embedding_dimension = self.embedding_dimension
+ else:
+ raise ValueError(
+ "`embedding_dimension` property must be set to use this function"
+ )
+ try:
+ self._database.create_node_vector_index(
+ node_label,
+ embedding_node_property,
+ dim=embedding_dimension,
+ similarity_function=DISTANCE_MAPPING[self._distance_strategy],
+ )
+ except Exception as e:
+ if "already indexed" in str(e):
+ raise ValueError(
+ f"A vector index on (:{node_label}"
+ "{"
+ f"{embedding_node_property}"
+ "}) has already been created"
+ )
+ else:
+ raise ValueError(f"Error occured: {e}")
+ def create_new_index_on_relationship(
+ self,
+ relation_type: str = "",
+ embedding_node_property: str = "",
+ embedding_dimension: int = 0,
+ ) -> None:
+ """
+ This method creates an new vector index
+ on a relationship/edge in FalkorDB.
+ """
+ if relation_type:
+ pass
+ elif self.relation_type:
+ relation_type = self.relation_type
+ else:
+ raise ValueError("`relation_type` must be set to use this function")
+ if embedding_node_property:
+ pass
+ elif self.embedding_node_property:
+ embedding_node_property = self.embedding_node_property
+ else:
+ raise ValueError(
+ "`embedding_node_property` must be set to use this function"
+ )
+ if embedding_dimension and embedding_dimension != 0:
+ pass
+ elif self.embedding_dimension:
+ embedding_dimension = self.embedding_dimension
+ else:
+ raise ValueError("`embedding_dimension` must be set to use this function")
+ try:
+ self._database.create_edge_vector_index(
+ relation_type,
+ embedding_node_property,
+ dim=embedding_dimension,
+ )
+ except Exception as e:
+ if "already indexed" in str(e):
+ raise ValueError(
+ f"A vector index on [:{relation_type}"
+ "{"
+ f"{embedding_node_property}"
+ "}] has already been created"
+ )
+ else:
+ raise ValueError(f"Error occured: {e}")
+ def create_new_keyword_index(self, text_node_properties: List[str] = []) -> None:
+ """
+ This method constructs a Cypher query and executes it
+ to create a new full text index in FalkorDB
+ Args:
+ text_node_properties (List[str]): List of node properties
+ to be indexed.If not provided, defaults to
+ self.text_node_property.
+ """
+ # Use the provided properties or default to self.text_node_property
+ node_props = text_node_properties or [self.text_node_property]
+ # Dynamically pass node label and properties to create the full-text
+ # index
+ self._database.create_node_fulltext_index(self.node_label, *node_props)
+ def add_embeddings(
+ self,
+ texts: Iterable[str],
+ embeddings: List[List[float]],
+ metadatas: Optional[List[dict]] = None,
+ ids: Optional[List[str]] = None,
+ **kwargs: Any,
+ ) -> List[str]:
+ """Add embeddings to the vectorstore.
+ Args:
+ texts: Iterable of strings to add to the vectorstore.
+ embeddings: List of list of embedding vectors.
+ metadatas: List of metadatas associated with the texts.
+ kwargs: vectorstore specific parameters
+ """
+ if ids is None:
+ ids = [md5(text.encode("utf-8")).hexdigest() for text in texts]
+ if not metadatas:
+ metadatas = [{} for _ in texts]
+ self.metadata = []
+ # Check if all dictionaries are empty
+ if all(not metadata for metadata in metadatas):
+ pass
+ else:
+ # Initialize a set to keep track of unique non-empty keys
+ unique_non_empty_keys: set[str] = set()
+ # Iterate over each metadata dictionary
+ for metadata in metadatas:
+ # Add keys with non-empty values to the set
+ unique_non_empty_keys.update(
+ key for key, value in metadata.items() if value
+ )
+ # Print unique non-empty keys
+ if unique_non_empty_keys:
+ self.metadata = list(unique_non_empty_keys)
+ parameters = {
+ "data": [
+ {"text": text, "metadata": metadata, "embedding": embedding, "id": id}
+ for text, metadata, embedding, id in zip(
+ texts, metadatas, embeddings, ids
+ )
+ ]
+ }
+ self._database.query(
+ "UNWIND $data AS row "
+ f"MERGE (c:`{self.node_label}` {{id: row.id}}) "
+ f"SET c.`{self.embedding_node_property}`"
+ f" = vecf32(row.embedding), c.`{self.text_node_property}`"
+ " = row.text, c += row.metadata",
+ params=parameters,
+ )
+ return ids
+ def add_texts(
+ self,
+ texts: Iterable[str],
+ metadatas: Optional[List[dict]] = None,
+ ids: Optional[List[str]] = None,
+ **kwargs: Any,
+ ) -> List[str]:
+ """Run more texts through the embeddings and add to the vectorstore.
+ Args:
+ texts: Iterable of strings to add to the vectorstore.
+ metadatas: Optional list of metadatas associated with the texts.
+ kwargs: vectorstore specific parameters
+ Returns:
+ List of ids from adding the texts into the vectorstore.
+ """
+ embeddings = self.embedding.embed_documents(list(texts))
+ return self.add_embeddings(
+ texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+ )
+ def add_documents(
+ self,
+ documents: List[Document],
+ ids: Optional[List[str]] = None,
+ **kwargs: Any,
+ ) -> List[str]:
+ """
+ This function takes List[Document] element(s) and populates
+ the existing store with a default node or default node(s) that
+ represent the element(s) and returns the id(s) of the newly created node(s).
+ Args:
+ documents: the List[Document] element(s).
+ ids: Optional List of custom IDs to assign to the documents.
+ Returns:
+ A list containing the id(s) of the newly created node in the store.
+ """
+ # Ensure the length of the ids matches the length of the documents if
+ # provided
+ if ids and len(ids) != len(documents):
+ raise ValueError("The number of ids must match the number of documents.")
+ result_ids = []
+ # Add the documents to the store with custom or generated IDs
+ self.from_documents(
+ embedding=self.embedding,
+ documents=documents,
+ )
+ for i, doc in enumerate(documents):
+ page_content = doc.page_content
+ if ids:
+ # If custom IDs are provided, use them directly
+ assigned_id = ids[i]
+ self._query(
+ """
+ MATCH (n)
+ WHERE n.text = $page_content
+ SET n.id = $assigned_id
+ """,
+ params={"page_content": page_content, "assigned_id": assigned_id},
+ )
+ result_ids.append(assigned_id)
+ else:
+ # Use the existing logic to query the ID if no custom IDs were
+ # provided
+ result = self._query(
+ """
+ MATCH (n)
+ WHERE n.text = $page_content
+ RETURN n.id
+ """,
+ params={"page_content": page_content},
+ )
+ try:
+ result_ids.append(result[0][0])
+ except Exception:
+ raise ValueError(
+ "Your document wasn't added to the store"
+ " successfully. Check your spellings."
+ )
+ return result_ids
+ @classmethod
+ def from_texts(
+ cls: type[FalkorDBVector],
+ texts: List[str],
+ embedding: Embeddings,
+ metadatas: Optional[List[Dict]] = None, # Optional
+ distance_strategy: Optional[DistanceStrategy] = None, # Optional
+ ids: Optional[List[str]] = None,
+ **kwargs: Any,
+ ) -> FalkorDBVector:
+ """
+ Return FalkorDBVector initialized from texts and embeddings.
+ """
+ embeddings = embedding.embed_documents(list(texts))
+ # Set default values if None
+ if metadatas is None:
+ metadatas = [{} for _ in texts]
+ if distance_strategy is None:
+ distance_strategy = DEFAULT_DISTANCE_STRATEGY
+ return cls.__from(
+ texts,
+ embeddings,
+ embedding,
+ metadatas=metadatas,
+ ids=ids,
+ distance_strategy=distance_strategy,
+ **kwargs,
+ )
+ @classmethod
+ def __from(
+ cls,
+ texts: List[str],
+ embeddings: List[List[float]],
+ embedding: Embeddings,
+ metadatas: Optional[List[dict]] = None,
+ ids: Optional[List[str]] = None,
+ search_type: SearchType = SearchType.VECTOR,
+ **kwargs: Any,
+ ) -> FalkorDBVector:
+ if ids is None:
+ ids = [md5(text.encode("utf-8")).hexdigest() for text in texts]
+ if not metadatas:
+ metadatas = [{} for _ in texts]
+ store = cls(
+ embedding=embedding,
+ search_type=search_type,
+ **kwargs,
+ )
+ # Check if the vector index already exists
+ embedding_dimension, index_type, entity_label, entity_property = (
+ store.retrieve_existing_node_index()
+ )
+ # Raise error if relationship index type
+ if index_type == "RELATIONSHIP":
+ raise ValueError(
+ "Data ingestion is not supported with relationship vector index"
+ )
+ # If the vector index doesn't exist yet
+ if not index_type:
+ store.create_new_node_index()
+ embedding_dimension, index_type, entity_label, entity_property = (
+ store.retrieve_existing_node_index()
+ )
+ # If the index already exists, check if embedding dimensions match
+ elif (
+ embedding_dimension and not store.embedding_dimension == embedding_dimension
+ ):
+ raise ValueError(
+ f"A Vector index for {entity_label} on {entity_property} exists"
+ "The provided embedding function and vector index "
+ "dimensions do not match.\n"
+ f"Embedding function dimension: {store.embedding_dimension}\n"
+ f"Vector index dimension: {embedding_dimension}"
+ )
+ if search_type == SearchType.HYBRID:
+ fts_node_label = store.retrieve_existing_fts_index()
+ # If the FTS index doesn't exist yet
+ if not fts_node_label:
+ store.create_new_keyword_index()
+ else: # Validate that FTS and Vector Index use the same information
+ if not fts_node_label == store.node_label:
+ raise ValueError(
+ "Vector and keyword index don't index the same node label"
+ )
+ store.add_embeddings(
+ texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+ )
+ return store
+ @classmethod
+ def from_existing_index(
+ cls: Type[FalkorDBVector],
+ embedding: Embeddings,
+ node_label: str,
+ search_type: SearchType = DEFAULT_SEARCH_TYPE,
+ **kwargs: Any,
+ ) -> FalkorDBVector:
+ """
+ Get instance of an existing FalkorDB vector index. This method will
+ return the instance of the store without inserting any new
+ embeddings.
+ """
+ store = cls(
+ embedding=embedding,
+ node_label=node_label,
+ search_type=search_type,
+ **kwargs,
+ )
+ embedding_dimension, index_type, entity_label, entity_property = (
+ store.retrieve_existing_node_index()
+ )
+ # Raise error if relationship index type
+ if index_type == "RELATIONSHIP":
+ raise ValueError(
+ "Relationship vector index is not supported with "
+ "`from_existing_index` method. Please use the "
+ "`from_existing_relationship_index` method."
+ )
+ if not index_type:
+ raise ValueError(
+ f"The specified vector index node label `{node_label}` does not exist. "
+ "Make sure to check if you spelled the node label correctly"
+ )
+ # Check if embedding function and vector index dimensions match
+ if embedding_dimension and not store.embedding_dimension == embedding_dimension:
+ raise ValueError(
+ "The provided embedding function and vector index "
+ "dimensions do not match.\n"
+ f"Embedding function dimension: {store.embedding_dimension}\n"
+ f"Vector index dimension: {embedding_dimension}"
+ )
+ if search_type == SearchType.HYBRID:
+ fts_node_label = store.retrieve_existing_fts_index()
+ # If the FTS index doesn't exist yet
+ if not fts_node_label:
+ raise ValueError(
+ "The specified keyword index name does not exist. "
+ "Make sure to check if you spelled it correctly"
+ )
+ else: # Validate that FTS and Vector index use the same information
+ if not fts_node_label == store.node_label:
+ raise ValueError(
+ "Vector and keyword index don't index the same node label"
+ )
+ return store
+ @classmethod
+ def from_existing_relationship_index(
+ cls: Type[FalkorDBVector],
+ embedding: Embeddings,
+ relation_type: str,
+ search_type: SearchType = DEFAULT_SEARCH_TYPE,
+ **kwargs: Any,
+ ) -> FalkorDBVector:
+ """
+ Get instance of an existing FalkorDB relationship vector index.
+ This method will return the instance of the store without
+ inserting any new embeddings.
+ """
+ if search_type == SearchType.HYBRID:
+ raise ValueError(
+ "Hybrid search is not supported in combination "
+ "with relationship vector index"
+ )
+ store = cls(
+ embedding=embedding,
+ relation_type=relation_type,
+ **kwargs,
+ )
+ embedding_dimension, index_type, entity_label, entity_property = (
+ store.retrieve_existing_relationship_index()
+ )
+ if not index_type:
+ raise ValueError(
+ "The specified vector index on the relationship"
+ f" {relation_type} does not exist. "
+ "Make sure to check if you spelled it correctly"
+ )
+ # Raise error if not relationship index type
+ if index_type == "NODE":
+ raise ValueError(
+ "Node vector index is not supported with "
+ "`from_existing_relationship_index` method. Please use the "
+ "`from_existing_index` method."
+ )
+ # Check if embedding function and vector index dimensions match
+ if embedding_dimension and not store.embedding_dimension == embedding_dimension:
+ raise ValueError(
+ "The provided embedding function and vector index "
+ "dimensions do not match.\n"
+ f"Embedding function dimension: {store.embedding_dimension}\n"
+ f"Vector index dimension: {embedding_dimension}"
+ )
+ return store
+ @classmethod
+ def from_existing_graph(
+ cls: Type[FalkorDBVector],
+ embedding: Embeddings,
+ database: str,
+ node_label: str,
+ embedding_node_property: str,
+ text_node_properties: List[str],
+ *,
+ search_type: SearchType = DEFAULT_SEARCH_TYPE,
+ retrieval_query: str = "",
+ **kwargs: Any,
+ ) -> FalkorDBVector:
+ """
+ Initialize and return a FalkorDBVector instance
+ from an existing graph using the database name
+ This method initializes a FalkorDBVector instance
+ using the provided parameters and the existing graph.
+ It validates the existence of the indices and creates
+ new ones if they don't exist.
+ Args:
+ embedding: The `Embeddings` model you would like to use
+ database: The name of the existing graph/database you
+ would like to intialize
+ node_label: The label of the node you want to initialize.
+ embedding_node_property: The name of the property you
+ want your embeddings to be stored in.
+ Returns:
+ FalkorDBVector: An instance of FalkorDBVector initialized
+ with the provided parameters and existing graph.
+ Example:
+ >>> falkordb_vector = FalkorDBVector.from_existing_graph(
+ ... embedding=my_embedding,
+ ... node_label="Document",
+ ... embedding_node_property="embedding",
+ ... text_node_properties=["title", "content"]
+ ... )
+ """
+ # Validate that database and text_node_properties is not empty
+ if not database:
+ raise ValueError("Parameter `database` must be given")
+ if not text_node_properties:
+ raise ValueError(
+ "Parameter `text_node_properties` must not be an empty list"
+ )
+ # Prefer retrieval query from params, otherwise construct it
+ if not retrieval_query:
+ retrieval_query = (
+ f"RETURN reduce(str='', k IN {text_node_properties} |"
+ " str + '\\n' + k + ': ' + coalesce(node[k], '')) AS text, "
+ "node {.*, `"
+ + embedding_node_property
+ + "`: Null, id: Null, "
+ + ", ".join([f"`{prop}`: Null" for prop in text_node_properties])
+ + "} AS metadata, score"
+ )
+ store = cls(
+ database=database,
+ embedding=embedding,
+ search_type=search_type,
+ retrieval_query=retrieval_query,
+ node_label=node_label,
+ embedding_node_property=embedding_node_property,
+ **kwargs,
+ )
+ embedding_dimension, index_type, entity_label, entity_property = (
+ store.retrieve_existing_node_index()
+ )
+ # Raise error if relationship index type
+ if index_type == "RELATIONSHIP":
+ raise ValueError(
+ "`from_existing_graph` method does not support "
+ " existing relationship vector index. "
+ "Please use `from_existing_relationship_index` method"
+ )
+ # If the vector index doesn't exist yet
+ if not index_type:
+ store.create_new_node_index(node_label=node_label)
+ # If the index already exists, check if embedding dimensions match
+ elif (
+ embedding_dimension and not store.embedding_dimension == embedding_dimension
+ ):
+ raise ValueError(
+ f"Index on Node {store.node_label} already exists."
+ "The provided embedding function and vector index "
+ "dimensions do not match.\n"
+ f"Embedding function dimension: {store.embedding_dimension}\n"
+ f"Vector index dimension: {embedding_dimension}"
+ )
+ # FTS index for Hybrid search
+ if search_type == SearchType.HYBRID:
+ fts_node_label = store.retrieve_existing_fts_index()
+ # If the FTS index doesn't exist yet
+ if not fts_node_label:
+ store.create_new_keyword_index(text_node_properties)
+ else: # Validate that FTS and Vector index use the same information
+ if not fts_node_label == store.node_label:
+ raise ValueError(
+ "Vector and keyword index don't index the same node label"
+ )
+ # Populate embeddings
+ while True:
+ fetch_query = (
+ f"MATCH (n:`{node_label}`) "
+ f"WHERE n.`{embedding_node_property}` IS null "
+ "AND any(k IN $props WHERE n[k] IS NOT null) "
+ "RETURN id(n) AS id, "
+ "coalesce(n.text, '') AS text "
+ "LIMIT 1000"
+ )
+ data = store._query(fetch_query, params={"props": text_node_properties})
+ if not data:
+ break
+ text_embeddings = embedding.embed_documents([el[1] for el in data])
+ params = {
+ "data": [
+ {"id": el[0], "embedding": embedding}
+ for el, embedding in zip(data, text_embeddings)
+ ]
+ }
+ store._query(
+ "UNWIND $data AS row "
+ f"MATCH (n:`{node_label}`) "
+ "WHERE id(n) = row.id "
+ f"SET n.`{embedding_node_property}` = vecf32(row.embedding)"
+ "RETURN count(*)",
+ params=params,
+ )
+ # If embedding calculation should be stopped
+ if len(data) < 1000:
+ break
+ return store
+ @classmethod
+ def from_documents(
+ cls: Type[FalkorDBVector],
+ documents: List[Document],
+ embedding: Embeddings,
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+ ids: Optional[List[str]] = None,
+ **kwargs: Any,
+ ) -> FalkorDBVector:
+ """
+ Return FalkorDBVector initialized from documents and embeddings.
+ """
+ texts = [d.page_content for d in documents]
+ metadatas = [d.metadata for d in documents]
+ return cls.from_texts(
+ texts=texts,
+ embedding=embedding,
+ distance_strategy=distance_strategy,
+ metadatas=metadatas,
+ ids=ids,
+ **kwargs,
+ )
+ @classmethod
+ def from_embeddings(
+ cls,
+ text_embeddings: List[Tuple[str, List[float]]],
+ embedding: Embeddings,
+ metadatas: Optional[List[dict]] = None,
+ distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+ ids: Optional[List[str]] = None,
+ pre_delete_collection: bool = False,
+ **kwargs: Any,
+ ) -> FalkorDBVector:
+ """Construct FalkorDBVector wrapper from raw documents and pre-
+ generated embeddings.
+ Return FalkorDBVector initialized from documents and embeddings.
+ Example:
+ .. code-block:: python
+ from langchain_community.vectorstores.falkordb_vector import (
+ FalkorDBVector )
+ from langchain_community.embeddings import OpenAIEmbeddings
+ embeddings = OpenAIEmbeddings()
+ text_embeddings = embeddings.embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ vectorstore = FalkorDBVector.from_embeddings(
+ text_embedding_pairs, embeddings
+ )
+ """
+ texts = [t[0] for t in text_embeddings]
+ embeddings = [t[1] for t in text_embeddings]
+ return cls.__from(
+ texts,
+ embeddings,
+ embedding,
+ metadatas=metadatas,
+ ids=ids,
+ distance_strategy=distance_strategy,
+ pre_delete_collection=pre_delete_collection,
+ **kwargs,
+ )
+ def similarity_search(
+ self,
+ query: str,
+ k: int = 4,
+ params: Dict[str, Any] = {},
+ filter: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> List[Document]:
+ """Run similarity search with FalkorDBVector.
+ Args:
+ query (str): Query text to search for.
+ k (int): Number of results to return. Defaults to 4.
+ params (Dict[str, Any]): The search params for the index type.
+ Defaults to empty dict.
+ filter (Optional[Dict[str, Any]]): Dictionary of arguments(s) to
+ filter on metadata.
+ Defaults to None.
+ Returns:
+ List of Documents most similar to the query.
+ """
+ embedding = self.embedding.embed_query(text=query)
+ return self.similarity_search_by_vector(
+ embedding=embedding,
+ k=k,
+ query=query,
+ params=params,
+ filter=filter,
+ **kwargs,
+ )
+ def similarity_search_by_vector(
+ self,
+ embedding: List[float],
+ k: int = 4,
+ filter: Optional[Dict[str, Any]] = None,
+ params: Dict[str, Any] = {},
+ **kwargs: Any,
+ ) -> List[Document]:
+ """Return docs most similar to embedding vector.
+ Args:
+ embedding: Embedding to look up documents similar to.
+ k: Number of Documents to return. Defaults to 4.
+ filter (Optional[Dict[str, Any]]): Dictionary of argument(s) to
+ filter on metadata.
+ Defaults to None.
+ params (Dict[str, Any]): The search params for the index type.
+ Defaults to empty dict.
+ Returns:
+ List of Documents most similar to the query vector.
+ """
+ docs_and_scores = self.similarity_search_with_score_by_vector(
+ embedding=embedding, k=k, filter=filter, params=params, **kwargs
+ )
+ return [doc for doc, _ in docs_and_scores]
+ def similarity_search_with_score_by_vector(
+ self,
+ embedding: List[float],
+ k: int = 4,
+ params: Dict[str, Any] = {},
+ filter: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> List[Tuple[Document, float]]:
+ """
+ Perform a similarity search in the FalkorDB database using a
+ given vector and return the top k similar documents with their scores.
+ This method uses a Cypher query to find the top k documents that
+ are most similar to a given embedding. The similarity is measured
+ using a vector index in the FalkorDB database. The results are returned
+ as a list of tuples, each containing a Document object and its similarity
+ score.
+ Args:
+ embedding (List[float]): The embedding vector to compare against.
+ k (int, optional): The number of top similar documents to retrieve.
+ filter (Optional[Dict[str, Any]]): Dictionary of argument(s) to
+ filter on metadata.
+ Defaults to None.
+ params (Dict[str, Any]): The Search params for the index type.
+ Defaults to empty dict.
+ Returns:
+ List[Tuple[Document, float]]: A list of tuples, each containing
+ a Document object and its similarity score.
+ """
+ if filter:
+ if self.search_type == SearchType.HYBRID:
+ raise ValueError(
+ "Metadata filtering can't be use in combination with "
+ "a hybrid search approach"
+ )
+ base_index_query = (
+ f"MATCH (n:{self.node_label}) WHERE "
+ f"n.{self.embedding_node_property} IS NOT NULL AND "
+ )
+ base_cosine_query = (
+ " WITH n as node, "
+ f" vec.cosineDistance(n.{self.embedding_node_property}"
+ ", vecf32($embedding)) as score "
+ )
+ filter_snippets, filter_params = construct_metadata_filter(filter)
+ index_query = base_index_query + filter_snippets + base_cosine_query
+ else:
+ index_query = _get_search_index_query(self.search_type, self._index_type)
+ filter_params = {}
+ if self._index_type == IndexType.RELATIONSHIP:
+ if kwargs.get("return_embeddings"):
+ if self.metadata:
+ # Construct the metadata part based on self.metadata
+ metadata_fields = ", ".join(
+ f"`{key}`: relationship.{key}" for key in self.metadata
+ )
+ default_retrieval = (
+ f"RETURN relationship.{self.text_node_property} "
+ "AS text, score, "
+ f"{{text: relationship.{self.text_node_property}, "
+ f"embedding: relationship.{self.embedding_node_property}, "
+ f"id: relationship.id, source: relationship.source, "
+ f"{metadata_fields}}} AS metadata"
+ )
+ else:
+ default_retrieval = (
+ f"RETURN relationship.{self.text_node_property}"
+ " AS text, score, "
+ f"{{text: relationship.{self.text_node_property}, "
+ f"embedding: relationship.{self.embedding_node_property}, "
+ f"id: relationship.id, source: relationship.source}}"
+ " AS metadata"
+ )
+ else:
+ if self.metadata:
+ # Construct the metadata part based on self.metadata
+ metadata_fields = ", ".join(
+ f"`{key}`: relationship.{key}" for key in self.metadata
+ )
+ default_retrieval = (
+ f"RETURN relationship.{self.text_node_property} "
+ "AS text, score, "
+ f"{{text: relationship.{self.text_node_property}, "
+ f"id: relationship.id, source: relationship.source, "
+ f"{metadata_fields}}} AS metadata"
+ )
+ else:
+ default_retrieval = (
+ f"RETURN relationship.{self.text_node_property}"
+ " AS text, score, "
+ f"{{text: relationship.{self.text_node_property}, "
+ f"id: relationship.id, source: relationship.source}}"
+ " AS metadata"
+ )
+ else:
+ if kwargs.get("return_embeddings"):
+ if self.metadata:
+ # Construct the metadata part based on self.metadata
+ metadata_fields = ", ".join(
+ f"`{key}`: node.`{key}`" for key in self.metadata
+ )
+ default_retrieval = (
+ f"RETURN node.{self.text_node_property} AS text, score, "
+ f"{{text: node.{self.text_node_property}, "
+ f"embedding: node.{self.embedding_node_property}, "
+ f"id: node.id, source: node.source, "
+ f"{metadata_fields}}} AS metadata"
+ )
+ else:
+ default_retrieval = (
+ f"RETURN node.{self.text_node_property} AS text, score, "
+ f"{{text: node.{self.text_node_property}, "
+ f"embedding: node.{self.embedding_node_property}, "
+ f"id: node.id, source: node.source}} AS metadata"
+ )
+ else:
+ if self.metadata:
+ # Construct the metadata part based on self.metadata
+ metadata_fields = ", ".join(
+ f"`{key}`: node.`{key}`" for key in self.metadata
+ )
+ default_retrieval = (
+ f"RETURN node.{self.text_node_property} AS text, score, "
+ f"{{text: node.{self.text_node_property}, "
+ f"id: node.id, source: node.source, "
+ f"{metadata_fields}}} AS metadata"
+ )
+ else:
+ default_retrieval = (
+ f"RETURN node.{self.text_node_property} AS text, score, "
+ f"{{text: node.{self.text_node_property}, "
+ f"id: node.id, source: node.source}} AS metadata"
+ )
+ retrieval_query = (
+ self.retrieval_query if self.retrieval_query else default_retrieval
+ )
+ read_query = index_query + retrieval_query
+ parameters = {
+ "entity_property": self.embedding_node_property,
+ "k": k,
+ "embedding": embedding,
+ "query": kwargs["query"],
+ **params,
+ **filter_params,
+ }
+ if self._index_type == "NODE":
+ parameters["entity_label"] = self.node_label
+ elif self._index_type == "RELATIONSHIP":
+ parameters["entity_label"] = self.relation_type
+ results = self._query(read_query, params=parameters)
+ if not results:
+ if not self.retrieval_query:
+ raise ValueError(
+ f"Make sure that none of the `{self.text_node_property}` "
+ f"properties on nodes with label `{self.node_label}` "
+ "are missing or empty"
+ )
+ else:
+ raise ValueError(
+ "Inspect the `retrieval_query` and ensure it doesn't "
+ "return None for the `text` column"
+ )
+ elif any(result[0] is None for result in results):
+ if not self.retrieval_query:
+ raise ValueError(
+ f"Make sure that none of the `{self.text_node_property}` "
+ f"properties on nodes with label `{self.node_label}` "
+ "are missing or empty"
+ )
+ else:
+ raise ValueError(
+ "Inspect the `retrieval_query` and ensure it doesn't "
+ "return None for the `text` column"
+ )
+ # Check if embeddings are missing when they are expected
+ if kwargs.get("return_embeddings") and any(
+ result[2]["embedding"] is None for result in results
+ ):
+ if not self.retrieval_query:
+ raise ValueError(
+ f"Make sure that none of the `{self.embedding_node_property}` "
+ f"properties on nodes with label `{self.node_label}` "
+ "are missing or empty"
+ )
+ else:
+ raise ValueError(
+ "Inspect the `retrieval_query` and ensure it doesn't "
+ "return None for the `embedding` metadata column"
+ )
+ try:
+ docs = [
+ (
+ Document(
+ # Use the first element for text
+ page_content=result[0],
+ metadata={
+ k: v for k, v in result[2].items() if v is not None
+ }, # Use the third element for metadata
+ ),
+ result[1], # Use the second element for score
+ )
+ for result in results
+ ]
+ except AttributeError:
+ try:
+ sorted_results = sorted(results, key=lambda r: r[2], reverse=True)
+ docs = [
+ (
+ Document(
+ # Use the first element for text
+ page_content=result[0],
+ metadata={
+ k: v for k, v in result[1].items() if v is not None
+ }, # Use the second element as metadata
+ ),
+ result[2], # Use the second element for score
+ )
+ for result in sorted_results
+ ]
+ except Exception as e:
+ raise ValueError(f"An error occured: {e}")
+ return docs
+ def similarity_search_with_score(
+ self,
+ query: str,
+ k: int = 4,
+ params: Dict[str, Any] = {},
+ filter: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> List[Tuple[Document, float]]:
+ """Return docs most similar to query.
+ Args:
+ query: Text to look up documents similar to.
+ k: Number of Documents to return. Defaults to 4.
+ params (Dict[str, Any]): The search params
+ for the index type. Defaults to empty dict.
+ filter (Optional[Dict[str, Any]]): Dictionary of
+ argument(s) to filter on metadata. Defaults
+ to None.
+ Returns:
+ List of Documents most similar to the query and score for each
+ """
+ embedding = self.embedding.embed_query(query)
+ docs = self.similarity_search_with_score_by_vector(
+ embedding=embedding,
+ k=k,
+ query=query,
+ params=params,
+ filter=filter,
+ **kwargs,
+ )
+ return docs
+ def similarity_search_with_relevance_scores(
+ self,
+ query: str,
+ k: int = 4,
+ filter: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> List[Tuple[Document, float]]:
+ docs_with_scores = self.similarity_search_with_score(
+ query=query, k=k, filter=filter, **kwargs
+ )
+ return docs_with_scores
+ def max_marginal_relevance_search(
+ self,
+ query: str,
+ k: int = 4,
+ fetch_k: int = 20,
+ lambda_mult: float = 0.5,
+ filter: Optional[dict] = None,
+ **kwargs: Any,
+ ) -> List[Document]:
+ """Return docs selected using the maximal marginal relevance.
+ Maximal marginal relevance optimizes for similarity to query AND diversity
+ among selected documents.
+ Args:
+ query: search query text.
+ k: Number of Documents to return. Defaults to 4.
+ fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+ lambda_mult: Number between 0 and 1 that determines the degree
+ of diversity among the results with 0 corresponding
+ to maximum diversity and 1 to minimum diversity.
+ Defaults to 0.5.
+ filter: Filter on metadata properties, e.g.
+ {
+ "str_property": "foo",
+ "int_property": 123
+ }
+ Returns:
+ List of Documents selected by maximal marginal relevance.
+ """
+ # Embed the query
+ query_embedding = self.embedding.embed_query(query)
+ # Fetch the initial documents
+ got_docs = self.similarity_search_with_score_by_vector(
+ embedding=query_embedding,
+ query=query,
+ k=fetch_k,
+ return_embeddings=True,
+ filter=filter,
+ **kwargs,
+ )
+ got_embeddings = [doc.metadata["embedding"] for doc, _ in got_docs]
+ # Select documents using maximal marginal relevance
+ selected_indices = maximal_marginal_relevance(
+ np.array(query_embedding), got_embeddings, lambda_mult=lambda_mult, k=k
+ )
+ selected_docs = [got_docs[i][0] for i in selected_indices]
+ # Remove embedding values from metadata
+ for doc in selected_docs:
+ del doc.metadata["embedding"]
+ return selected_docs
+ def _select_relevance_score_fn(self) -> Callable[[float], float]:
+ """
+ The 'correct' relevance function
+ may differ depending on a few things, including:
+ - the distance / similarity metric used by the VectorStore
+ - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
+ - embedding dimensionality
+ - etc.
+ """
+ if self.override_relevance_score_fn is not None:
+ return self.override_relevance_score_fn
+ # Default strategy is to rely on distance strategy provided
+ # in vectorstore constructor
+ if self._distance_strategy == DistanceStrategy.COSINE:
+ return lambda x: x
+ elif self._distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE:
+ return lambda x: x
+ else:
+ raise ValueError(
+ "No supported normalization function"
+ f" for distance_strategy of {self._distance_strategy}."
+ "Consider providing relevance_score_fn to PGVector constructor."
+ )
+ def update_documents(
+ self,
+ document_id: str,
+ document: Document,
+ ) -> None:
+ """
+ This function updates an existing document in
+ the store based on the document_id.
+ Args:
+ document_id: The id of the document to be updated.
+ document: The new Document instance with the
+ updated content.
+ Returns:
+ None
+ """
+ # Ensure the document_id exists in the store
+ existing_document = self._query(
+ """
+ MATCH (n)
+ WHERE n.id = $document_id
+ """,
+ params={"document_id": document_id},
+ )
+ if not existing_document:
+ raise ValueError(f"Document with id {document_id} not found in the store.")
+ # Update the document's text content
+ self._query(
+ """
+ MATCH (n)
+ WHERE n.id = $document_id
+ SET n.text = $new_content
+ """,
+ params={"document_id": document_id, "new_content": document.page_content},
+ )
+ # Optionally, update any other properties like metadata
+ if document.metadata:
+ for key, value in document.metadata.items():
+ self._query(
+ f"""
+ MATCH (n)
+ WHERE n.id = $document_id
+ SET n.{key} = $value
+ """,
+ params={"document_id": document_id, "value": value},
+ )
+ def delete(
+ self,
+ ids: Optional[List[str]] = None, # Make `ids` optional
+ **kwargs: Any,
+ ) -> Optional[bool]: # Return type matches the superclass signature
+ """
+ This function deletes an item from the store based on the item_id.
+ Args:
+ ids: A list of IDs of the documents to be deleted.
+ If None, deletes all documents.
+ Returns:
+ Optional[bool]: True if documents were deleted, False otherwise.
+ """
+ if ids is None:
+ raise ValueError("You must provide at least one ID to delete.")
+ for id in ids:
+ item_id = id
+ # Ensure the document exists in the store
+ existing_document = self._query(
+ """
+ MATCH (n)
+ WHERE n.id = $item_id
+ """,
+ params={"item_id": item_id},
+ )
+ if not existing_document:
+ raise ValueError(f"Document with id {item_id} not found in the store.")
+ # Delete the document node from the store
+ self._query(
+ """
+ MATCH (n)
+ WHERE n.id = $item_id
+ """,
+ params={"item_id": item_id},
+ )
+ return True
diff --git a/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py b/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py
new file mode 100644
index 0000000000000..9220033f01b35
--- /dev/null
+++ b/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py
@@ -0,0 +1,671 @@
+Integration tests for FalkorDB vector store functionality.
+These tests validate the end-to-end process of constructing, indexing,
+and searching vector embeddings in a FalkorDB instance. They include:
+- Setting up the FalkorDB vector store with a local instance.
+- Indexing documents with fake embeddings.
+- Performing vector searches and validating results.
+These tests are conducted using a local FalkorDB instance but can also
+be run against a Cloud FalkorDB instance. Ensure that appropriate host
+and port configurations are set up before running the tests.
+import os
+from math import isclose
+from typing import Any, Dict, List
+from dotenv import load_dotenv
+from langchain_core.documents import Document
+from langchain_community.vectorstores.falkordb_vector import (
+ FalkorDBVector,
+ SearchType,
+ process_index_data,
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+# Load environment variables from .env file
+host = os.getenv("FALKORDB_HOST", "localhost")
+port = int(os.getenv("FALKORDB_PORT", 6379))
+texts = ["foo", "bar", "baz", "It is the end of the world. Take shelter!"]
+def drop_vector_indexes(store: FalkorDBVector) -> None:
+ """Cleanup all vector indexes"""
+ index_entity_labels: List[Any] = []
+ index_entity_properties: List[Any] = []
+ index_entity_types: List[Any] = []
+ # get all indexes
+ result = store._query(
+ """
+ CALL db.indexes()
+ """
+ )
+ processed_result: List[Dict[str, Any]] = process_index_data(result)
+ # get all vector indexs entity labels, entity properties, entity_types
+ if isinstance(processed_result, list):
+ for index in processed_result:
+ if isinstance(index, dict):
+ if index.get("index_type") == "VECTOR":
+ index_entity_labels.append(index["entity_label"])
+ index_entity_properties.append(index["entity_property"])
+ index_entity_types.append(index["entity_type"])
+ # drop vector indexs
+ for entity_label, entity_property, entity_type in zip(
+ index_entity_labels, index_entity_properties, index_entity_types
+ ):
+ if entity_type == "NODE":
+ store._database.drop_node_vector_index(
+ label=entity_label,
+ attribute=entity_property,
+ )
+ elif entity_type == "RELATIONSHIP":
+ store._database.drop_edge_vector_index(
+ label=entity_label,
+ attribute=entity_property,
+ )
+class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
+ """Fake embeddings functionality for testing."""
+ def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]:
+ """Return simple embeddings."""
+ return [
+ [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(i + 1)]
+ for i in range(len(embedding_texts))
+ ]
+ def embed_query(self, text: str) -> List[float]:
+ """Return simple embeddings."""
+ return [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(texts.index(text) + 1)]
+def test_falkordbvector() -> None:
+ """Test end to end construction and search."""
+ docsearch = FalkorDBVector.from_texts(
+ texts=texts,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ output = docsearch.similarity_search("foo", k=1)
+ assert type(output) is list
+ assert type(output[0]) is Document
+ assert output[0].page_content == "foo"
+ drop_vector_indexes(docsearch)
+def test_falkordbvector_embeddings() -> None:
+ """Test end to end construction with embeddings and search."""
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ output = docsearch.similarity_search("foo", k=1)
+ assert type(output) is list
+ assert type(output[0]) is Document
+ assert output[0].page_content == "foo"
+ drop_vector_indexes(docsearch)
+def test_falkordbvector_catch_wrong_node_label() -> None:
+ """Test if node label is misspelled, but index name is correct."""
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ try:
+ FalkorDBVector.from_existing_index(
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ node_label="test",
+ )
+ except Exception as e:
+ assert type(e) is ValueError
+ assert str(e) == (
+ "The specified vector index node label "
+ + "`test` does not exist. Make sure to"
+ + " check if you spelled the node label correctly"
+ )
+ drop_vector_indexes(docsearch)
+def test_falkordbvector_with_metadatas() -> None:
+ """Test end to end construction and search."""
+ metadatas = [{"page": str(i)} for i in range(len(texts))]
+ docsearch = FalkorDBVector.from_texts(
+ texts=texts,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ metadatas=metadatas,
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ output = docsearch.similarity_search("foo", k=1)
+ assert type(output) is list
+ assert type(output[0]) is Document
+ assert output[0].metadata.get("page") == "0"
+ drop_vector_indexes(docsearch)
+def test_falkordbvector_with_metadatas_with_scores() -> None:
+ """Test end to end construction and search."""
+ metadatas = [{"page": str(i)} for i in range(len(texts))]
+ docsearch = FalkorDBVector.from_texts(
+ texts=texts,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ metadatas=metadatas,
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ output = [
+ (doc, round(score, 1))
+ for doc, score in docsearch.similarity_search_with_score("foo", k=1)
+ ]
+ assert output == [
+ (
+ Document(
+ metadata={
+ "text": "foo",
+ "id": "acbd18db4cc2f85cedef654fccc4a4d8",
+ "page": "0",
+ },
+ page_content="foo",
+ ),
+ 0.0,
+ )
+ ]
+ drop_vector_indexes(docsearch)
+def test_falkordb_relevance_score() -> None:
+ """Test to make sure the relevance score is scaled to 0-2."""
+ metadatas = [{"page": str(i)} for i in range(len(texts))]
+ docsearch = FalkorDBVector.from_texts(
+ texts=texts,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ metadatas=metadatas,
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
+ expected_output = [
+ (
+ Document(
+ metadata={
+ "text": "foo",
+ "id": "acbd18db4cc2f85cedef654fccc4a4d8",
+ "page": "0",
+ },
+ page_content="foo",
+ ),
+ 0.0,
+ ),
+ (
+ Document(
+ metadata={
+ "text": "bar",
+ "id": "37b51d194a7513e45b56f6524f2d51f2",
+ "page": "1",
+ },
+ page_content="bar",
+ ),
+ 1.0,
+ ),
+ (
+ Document(
+ metadata={
+ "text": "baz",
+ "id": "73feffa4b7f6bb68e44cf984c85f6e88",
+ "page": "2",
+ },
+ page_content="baz",
+ ),
+ 2.0,
+ ),
+ ]
+ # Check if the length of the outputs matches
+ assert len(output) == len(expected_output)
+ # Check if each document and its relevance score is close to the expected value
+ for (doc, score), (expected_doc, expected_score) in zip(output, expected_output):
+ assert doc.page_content == expected_doc.page_content
+ assert doc.metadata == expected_doc.metadata
+ assert isclose(score, expected_score, rel_tol=1e-5)
+ drop_vector_indexes(docsearch)
+def test_falkordbvector_retriever_search_threshold() -> None:
+ """Test using retriever for searching with threshold."""
+ metadatas = [{"page": str(i)} for i in range(len(texts))]
+ docsearch = FalkorDBVector.from_texts(
+ texts=texts,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ metadatas=metadatas,
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ retriever = docsearch.as_retriever(
+ search_type="similarity_score_threshold",
+ search_kwargs={"k": 1, "score_threshold": 0.9999},
+ )
+ output = retriever.invoke("foo")
+ assert output == [
+ Document(
+ metadata={
+ "text": "foo",
+ "id": "acbd18db4cc2f85cedef654fccc4a4d8",
+ "page": "0",
+ },
+ page_content="foo",
+ )
+ ]
+ drop_vector_indexes(docsearch)
+def test_custom_return_falkordbvector() -> None:
+ """Test end to end construction and search."""
+ docsearch = FalkorDBVector.from_texts(
+ texts=["test"],
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ retrieval_query="RETURN 'foo' AS text, score, {test: 'test'} AS metadata",
+ )
+ output = docsearch.similarity_search("foo", k=1)
+ assert output == [Document(page_content="foo", metadata={"test": "test"})]
+ drop_vector_indexes(docsearch)
+def test_falkordb_hybrid() -> None:
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ search_type=SearchType.HYBRID,
+ )
+ output = docsearch.similarity_search("foo", k=1)
+ assert output == [
+ Document(
+ metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
+ page_content="foo",
+ )
+ ]
+ drop_vector_indexes(docsearch)
+def test_falkordb_hybrid_deduplicate() -> None:
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ search_type=SearchType.HYBRID,
+ )
+ output = docsearch.similarity_search("foo", k=3)
+ assert output == [
+ Document(
+ metadata={"text": "baz", "id": "73feffa4b7f6bb68e44cf984c85f6e88"},
+ page_content="baz",
+ ),
+ Document(
+ metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
+ page_content="foo",
+ ),
+ Document(
+ metadata={"text": "bar", "id": "37b51d194a7513e45b56f6524f2d51f2"},
+ page_content="bar",
+ ),
+ ]
+ drop_vector_indexes(docsearch)
+def test_falkordb_hybrid_retrieval_query() -> None:
+ """Test custom retrieval_query with hybrid search."""
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ search_type=SearchType.HYBRID,
+ retrieval_query="RETURN 'moo' AS text, score, {test: 'test'} AS metadata",
+ )
+ output = docsearch.similarity_search("foo", k=1)
+ assert output == [Document(page_content="moo", metadata={"test": "test"})]
+ drop_vector_indexes(docsearch)
+def test_falkordbvector_missing_keyword() -> None:
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ node_label = "vector"
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ try:
+ FalkorDBVector.from_existing_index(
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ node_label=node_label,
+ search_type=SearchType.HYBRID,
+ )
+ except Exception as e:
+ assert str(e) == (
+ "The specified vector index node label "
+ + f"`{node_label}` does not exist. Make sure"
+ + " to check if you spelled the node label correctly"
+ )
+ drop_vector_indexes(docsearch)
+def test_falkordb_hybrid_from_existing() -> None:
+ """Test hybrid search with missing keyword_index_search."""
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ search_type=SearchType.HYBRID,
+ )
+ existing = FalkorDBVector.from_existing_index(
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ node_label="Chunk", # default node label
+ search_type=SearchType.HYBRID,
+ )
+ output = existing.similarity_search("foo", k=1)
+ assert output == [
+ Document(
+ metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
+ page_content="foo",
+ )
+ ]
+ drop_vector_indexes(existing)
+ drop_vector_indexes(docsearch)
+def test_falkordbvector_from_existing_graph() -> None:
+ """Test from_existing_graph with a single property"""
+ graph = FalkorDBVector.from_texts(
+ texts=["test"],
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ node_label="Foo",
+ embedding_node_property="vector",
+ text_node_property="info",
+ pre_delete_collection=True,
+ )
+ graph._query("MATCH (n) DELETE n")
+ graph._query("CREATE (:Test {name:'Foo'}), (:Test {name:'Bar'})")
+ assert graph.database_name, "Database name cannot be empty or None"
+ existing = FalkorDBVector.from_existing_graph(
+ embedding=FakeEmbeddingsWithOsDimension(),
+ database=graph.database_name,
+ host=host,
+ port=port,
+ node_label="Test",
+ text_node_properties=["name"],
+ embedding_node_property="embedding",
+ )
+ output = existing.similarity_search("foo", k=2)
+ assert [output[0]] == [Document(page_content="\nname: Foo")]
+ drop_vector_indexes(existing)
+def test_falkordb_from_existing_graph_mulitiple_properties() -> None:
+ """Test from_existing_graph with two properties."""
+ graph = FalkorDBVector.from_texts(
+ texts=["test"],
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ node_label="Foo",
+ embedding_node_property="vector",
+ text_node_property="info",
+ pre_delete_collection=True,
+ )
+ graph._query("MATCH (n) DELETE n")
+ graph._query("CREATE (:Test {name:'Foo', name2: 'Fooz'}), (:Test {name:'Bar'})")
+ assert graph.database_name, "Database name cannot be empty or None"
+ existing = FalkorDBVector.from_existing_graph(
+ embedding=FakeEmbeddingsWithOsDimension(),
+ database=graph.database_name,
+ host=host,
+ port=port,
+ node_label="Test",
+ text_node_properties=["name", "name2"],
+ embedding_node_property="embedding",
+ )
+ output = existing.similarity_search("foo", k=2)
+ assert [output[0]] == [Document(page_content="\nname: Foo\nname2: Fooz")]
+ drop_vector_indexes(existing)
+ drop_vector_indexes(graph)
+def test_falkordbvector_special_character() -> None:
+ """Test removing lucene."""
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ docsearch = FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ search_type=SearchType.HYBRID,
+ )
+ output = docsearch.similarity_search(
+ "It is the end of the world. Take shelter!", k=1
+ )
+ assert output == [
+ Document(
+ metadata={
+ "text": "It is the end of the world. Take shelter!",
+ "id": "84768c9c477cbe05fbafbe7247990051",
+ },
+ page_content="It is the end of the world. Take shelter!",
+ )
+ ]
+ drop_vector_indexes(docsearch)
+def test_falkordb_from_existing_graph_mulitiple_properties_hybrid() -> None:
+ """Test from_existing_graph with a two property."""
+ graph = FalkorDBVector.from_texts(
+ texts=["test"],
+ embedding=FakeEmbeddingsWithOsDimension(),
+ host=host,
+ port=port,
+ node_label="Foo",
+ embedding_node_property="vector",
+ text_node_property="info",
+ pre_delete_collection=True,
+ )
+ graph._query("MATCH (n) DELETE n")
+ graph._query("CREATE (:Test {name:'Foo', name2: 'Fooz'}), (:Test {name:'Bar'})")
+ assert graph.database_name, "Database name cannot be empty or None"
+ existing = FalkorDBVector.from_existing_graph(
+ embedding=FakeEmbeddingsWithOsDimension(),
+ database=graph.database_name,
+ host=host,
+ port=port,
+ node_label="Test",
+ text_node_properties=["name", "name2"],
+ embedding_node_property="embedding",
+ search_type=SearchType.HYBRID,
+ )
+ output = existing.similarity_search("foo", k=2)
+ assert [output[0]] == [Document(page_content="\nname: Foo\nname2: Fooz")]
+ drop_vector_indexes(existing)
+def test_index_fetching() -> None:
+ """testing correct index creation and fetching"""
+ text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
+ text_embedding_pairs = list(zip(texts, text_embeddings))
+ embeddings = FakeEmbeddingsWithOsDimension()
+ def create_store(node_label: str, text_properties: List[str]) -> FalkorDBVector:
+ return FalkorDBVector.from_embeddings(
+ text_embeddings=text_embedding_pairs,
+ embedding=FakeEmbeddingsWithOsDimension(),
+ node_label=node_label,
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ def fetch_store(node_label: str) -> FalkorDBVector:
+ store = FalkorDBVector.from_existing_index(
+ embedding=embeddings,
+ host=host,
+ port=port,
+ node_label=node_label,
+ )
+ return store
+ index_0_str = "label0"
+ create_store(index_0_str, ["text"])
+ # create index 1
+ index_1_str = "label1"
+ create_store("label1", ["text"])
+ index_1_store = fetch_store(index_1_str)
+ assert index_1_store.node_label == index_1_str
+ index_0_store = fetch_store(index_0_str)
+ assert index_0_store.node_label == index_0_str
+ drop_vector_indexes(index_1_store)
+ drop_vector_indexes(index_0_store)
+def test_retrieval_params() -> None:
+ """Test if we use parameters in retrieval query"""
+ docsearch = FalkorDBVector.from_texts(
+ texts=texts,
+ embedding=FakeEmbeddings(),
+ pre_delete_collection=True,
+ retrieval_query="""
+ RETURN $test as text, score, {test: $test1} AS metadata
+ """,
+ )
+ output = docsearch.similarity_search(
+ "Foo", k=2, params={"test": "test", "test1": "test1"}
+ )
+ assert output == [
+ Document(page_content="test", metadata={"test": "test1"}),
+ Document(page_content="test", metadata={"test": "test1"}),
+ ]
+ drop_vector_indexes(docsearch)
+def test_falkordb_relationship_index() -> None:
+ """Test end to end construction and search."""
+ embeddings = FakeEmbeddingsWithOsDimension()
+ docsearch = FalkorDBVector.from_texts(
+ texts=texts,
+ embedding=embeddings,
+ host=host,
+ port=port,
+ pre_delete_collection=True,
+ )
+ # Ingest data
+ docsearch._query(
+ (
+ "MERGE (p1:Person)"
+ "MERGE (p2:Person)"
+ "MERGE (p3:Person)"
+ "MERGE (p4:Person)"
+ "MERGE (p1)-[:REL {text: 'foo', embedding: vecf32($e1)}]->(p2)"
+ "MERGE (p3)-[:REL {text: 'far', embedding: vecf32($e2)}]->(p4)"
+ ),
+ params={
+ "e1": embeddings.embed_query("foo"),
+ "e2": embeddings.embed_query("bar"),
+ },
+ )
+ # Create relationship index
+ docsearch.create_new_index_on_relationship(
+ relation_type="REL",
+ embedding_node_property="embedding",
+ embedding_dimension=OS_TOKEN_COUNT,
+ )
+ relationship_index = FalkorDBVector.from_existing_relationship_index(
+ embeddings, relation_type="REL"
+ )
+ output = relationship_index.similarity_search("foo", k=1)
+ assert output == [Document(metadata={"text": "foo"}, page_content="foo")]
+ drop_vector_indexes(docsearch)
+ drop_vector_indexes(relationship_index)
diff --git a/libs/community/tests/unit_tests/vectorstores/test_falkordb_vector_utils.py b/libs/community/tests/unit_tests/vectorstores/test_falkordb_vector_utils.py
new file mode 100644
index 0000000000000..bbdcb60f52edf
--- /dev/null
+++ b/libs/community/tests/unit_tests/vectorstores/test_falkordb_vector_utils.py
@@ -0,0 +1,24 @@
+"""Test utils function in falkordb_vector.py"""
+from langchain_community.vectorstores.falkordb_vector import (
+ dict_to_yaml_str,
+def test_converting_to_yaml() -> None:
+ example_dict = {
+ "name": "John Doe",
+ "age": 30,
+ "skills": ["Python", "Data Analysis", "Machine Learning"],
+ "location": {"city": "Ljubljana", "country": "Slovenia"},
+ }
+ yaml_str = dict_to_yaml_str(example_dict)
+ expected_output = (
+ "name: John Doe\nage: 30\nskills:\n- Python\n- "
+ "Data Analysis\n- Machine Learning\nlocation:\n city: Ljubljana\n"
+ " country: Slovenia\n"
+ )
+ assert yaml_str == expected_output