diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eb1b98b..1c0227b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## Next +### Added +- Introduced a fail_if_exist option to index creation functions to control behavior when an index already exists. + +### Changed +- Comprehensive rewrite of the README to improve clarity and provide detailed usage examples. + ## 1.0.0 ### Fixed diff --git a/README.md b/README.md index 95aa5cf0..8b43b3f7 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,55 @@ -# Neo4j GraphRAG package for Python +# Neo4j GraphRAG Package for Python -This repository contains the official Neo4j GraphRAG features for Python. +The official Neo4j GraphRAG package for Python enables developers to build [graph retrieval augmented generation (GraphRAG)](https://neo4j.com/blog/graphrag-manifesto/) applications using the power of Neo4j and Python. +As a first-party library, it offers a robust, feature-rich, and high-performance solution, with the added assurance of long-term support and maintenance directly from Neo4j. -The purpose of this package is to provide a first party package to developers, -where Neo4j can guarantee long term commitment and maintenance as well as being -fast to ship new features and high performing patterns and methods. +## ๐Ÿ“„ Documentation -Documentation: https://neo4j.com/docs/neo4j-graphrag-python/ +Documentation can be found [here](https://neo4j.com/docs/neo4j-graphrag-python/) -Python versions supported: +## ๐Ÿ Python Version Support -* Python 3.12 supported. -* Python 3.11 supported. -* Python 3.10 supported. -* Python 3.9 supported. +| Version | Supported? | +| ------- | ---------: | +| 3.12 | ✓ | +| 3.11 | ✓ | +| 3.10 | ✓ | +| 3.9 | ✓ | +| 3.8 | ✗ | -# Usage +## ๐Ÿ“ฆ Installation -## Installation - -This package requires Python (>=3.9). - -To install the latest stable version, use: +To install the latest stable version, run: ```shell pip install neo4j-graphrag ``` -### Optional dependencies +### Optional Dependencies #### pygraphviz `pygraphviz` is used for visualizing pipelines. -Follow installation instructions [here](https://pygraphviz.github.io/documentation/stable/install.html). +Installation instructions can be found [here](https://pygraphviz.github.io/documentation/stable/install.html). + +## ๐Ÿ’ป Example Usage -## Examples +The scripts below demonstrate how to get started with the package and make use of its key features. +To run these examples, ensure that you have a Neo4j instance up and running and update the `NEO4J_URI`, `NEO4J_USERNAME`, and `NEO4J_PASSWORD` variables in each script with the details of your Neo4j instance. +For the examples, make sure to export your OpenAI key as an environment variable named `OPENAI_API_KEY`. +Additional examples are available in the `examples` folder. -### Knowledge graph construction +### Knowledge Graph Construction **NOTE: The [APOC core library](https://neo4j.com/labs/apoc/) must be installed in your Neo4j instance in order to use this feature** -Assumption: Neo4j running +This package offers two methods for constructing a knowledge graph. + +The `Pipeline` class provides extensive customization options, making it ideal for advanced use cases. +See the `examples/pipeline` folder for examples of how to use this class. + +For a more streamlined approach, the `SimpleKGPipeline` class offers a simplified abstraction layer over the `Pipeline`, making it easier to build knowledge graphs. +Both classes support working directly with text and PDFs. ```python import asyncio @@ -50,12 +59,14 @@ from neo4j_graphrag.embeddings import OpenAIEmbeddings from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline from neo4j_graphrag.llm.openai_llm import OpenAILLM -# Connect to Neo4j database -URI = "neo4j://localhost:7687" -AUTH = ("neo4j", "password") -driver = GraphDatabase.driver(URI, auth=AUTH) +NEO4J_URI = "neo4j://localhost:7687" +NEO4J_USERNAME = "neo4j" +NEO4J_PASSWORD = "password" -# Instantiate Entity and Relation objects +# Connect to the Neo4j database +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) + +# List the entities and relations the LLM should look for in the text entities = ["Person", "House", "Planet"] relations = ["PARENT_OF", "HEIR_OF", "RULES"] potential_schema = [ @@ -64,7 +75,7 @@ potential_schema = [ ("House", "RULES", "Planet"), ] -# Instantiate an Embedder object +# Create an Embedder object embedder = OpenAIEmbeddings(model="text-embedding-3-large") # Instantiate the LLM @@ -88,137 +99,151 @@ kg_builder = SimpleKGPipeline( from_pdf=False, ) -asyncio.run( - kg_builder.run_async( - text=""""The son of Duke Leto Atreides and the Lady Jessica, Paul is the heir of - House Atreides, an aristocratic family that rules the planet Caladan.""" - ) +# Run the pipeline on a piece of text +text = ( + "The son of Duke Leto Atreides and the Lady Jessica, Paul is the heir of House" + "Atreides, an aristocratic family that rules the planet Caladan." ) +asyncio.run(kg_builder.run_async(text=text)) +driver.close() ``` -Example knowledge graph created using the above code: +Example knowledge graph created using the above script: ![Example knowledge graph](images/kg_construction.svg) +### Creating a Vector Index -### Creating a vector index - -When creating a vector index, make sure you match the number of dimensions in the index with the number of dimensions the embeddings have. - -Assumption: Neo4j running +When creating a vector index, make sure you match the number of dimensions in the index with the number of dimensions your embeddings have. ```python from neo4j import GraphDatabase from neo4j_graphrag.indexes import create_vector_index -URI = "neo4j://localhost:7687" -AUTH = ("neo4j", "password") - +NEO4J_URI = "neo4j://localhost:7687" +NEO4J_USERNAME = "neo4j" +NEO4J_PASSWORD = "password" INDEX_NAME = "vector-index-name" -# Connect to Neo4j database -driver = GraphDatabase.driver(URI, auth=AUTH) +# Connect to the Neo4j database +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) -# Creating the index +# Create the index create_vector_index( driver, INDEX_NAME, - label="Document", - embedding_property="vectorProperty", - dimensions=1536, + label="Chunk", + embedding_property="embedding", + dimensions=3072, similarity_fn="euclidean", ) - +driver.close() ``` -### Populating the Neo4j Vector Index +### Populating a Vector Index -Note that the below example is not the only way you can upsert data into your Neo4j database. For example, you could also leverage [the Neo4j Python driver](https://github.com/neo4j/neo4j-python-driver). +This example demonstrates one method for upserting data in your Neo4j database. +It's important to note that there are alternative approaches, such as using the [Neo4j Python driver](https://github.com/neo4j/neo4j-python-driver). -Assumption: Neo4j running with a defined vector index +Ensure that your vector index is created prior to executing this example. ```python from neo4j import GraphDatabase +from neo4j_graphrag.embeddings import OpenAIEmbeddings from neo4j_graphrag.indexes import upsert_vector -URI = "neo4j://localhost:7687" -AUTH = ("neo4j", "password") +NEO4J_URI = "neo4j://localhost:7687" +NEO4J_USERNAME = "neo4j" +NEO4J_PASSWORD = "password" -# Connect to Neo4j database -driver = GraphDatabase.driver(URI, auth=AUTH) +# Connect to the Neo4j database +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) + +# Create an Embedder object +embedder = OpenAIEmbeddings(model="text-embedding-3-large") + +# Generate an embedding for some text +text = ( + "The son of Duke Leto Atreides and the Lady Jessica, Paul is the heir of House" + "Atreides, an aristocratic family that rules the planet Caladan." +) +vector = embedder.embed_query(text) # Upsert the vector -vector = ... upsert_vector( driver, - node_id=1, - embedding_property="vectorProperty", + node_id=0, + embedding_property="embedding", vector=vector, ) +driver.close() ``` -### Performing a similarity search +### Performing a Similarity Search -Assumption: Neo4j running with populated vector index in place. +Please note that when querying a Neo4j vector index _approximate_ nearest neighbor search is used, which may not always deliver exact results. +For more information, refer to the Neo4j documentation on [limitations and issues of vector indexes](https://neo4j.com/docs/cypher-manual/current/indexes/semantic-indexes/vector-indexes/#limitations-and-issues). -Limitation: The query over the vector index is an _approximate_ nearest neighbor search and may not give exact results. [See this reference for more details](https://neo4j.com/docs/cypher-manual/current/indexes/semantic-indexes/vector-indexes/#limitations-and-issues). +In the example below, we perform a simple vector search using a retriever that conducts a similarity search over the `vector-index-name` vector index. -While the library has more retrievers than shown here, the following examples should be able to get you started. +This library provides more retrievers beyond just the `VectorRetriever`. +See the `examples` folder for examples of how to use these retrievers. -In the following example, we use a simple vector search as retriever, -that will perform a similarity search over the `index-name` vector index -in Neo4j. +Before running this example, make sure your vector index has been created and populated. ```python from neo4j import GraphDatabase -from neo4j_graphrag.retrievers import VectorRetriever -from neo4j_graphrag.llm import OpenAILLM -from neo4j_graphrag.generation import GraphRAG from neo4j_graphrag.embeddings import OpenAIEmbeddings +from neo4j_graphrag.generation import GraphRAG +from neo4j_graphrag.llm import OpenAILLM +from neo4j_graphrag.retrievers import VectorRetriever -URI = "neo4j://localhost:7687" -AUTH = ("neo4j", "password") - +NEO4J_URI = "neo4j://localhost:7687" +NEO4J_USERNAME = "neo4j" +NEO4J_PASSWORD = "password" INDEX_NAME = "vector-index-name" -# Connect to Neo4j database -driver = GraphDatabase.driver(URI, auth=AUTH) +# Connect to the Neo4j database +driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) -# Create Embedder object +# Create an Embedder object embedder = OpenAIEmbeddings(model="text-embedding-3-large") # Initialize the retriever retriever = VectorRetriever(driver, INDEX_NAME, embedder) -# Initialize the LLM -# Note: An OPENAI_API_KEY environment variable is required here +# Instantiate the LLM llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0}) -# Initialize the RAG pipeline +# Instantiate the RAG pipeline rag = GraphRAG(retriever=retriever, llm=llm) # Query the graph -query_text = "How do I do similarity search in Neo4j?" +query_text = "Who is Paul Atreides?" response = rag.search(query_text=query_text, retriever_config={"top_k": 5}) print(response.answer) +driver.close() ``` -# Development +## ๐Ÿค Contributing -## Install dependencies +You must sign the [contributors license agreement](https://neo4j.com/developer/contributing-code/#sign-cla) in order to make contributions to this project. + +### Install Dependencies + +Our Python dependencies are managed using Poetry. +If Poetry is not yet installed on your system, you can follow the instructions [here](https://python-poetry.org/) to set it up. +To begin development on this project, start by cloning the repository and then install all necessary dependencies, including the development dependencies, with the following command: ```bash -poetry install +poetry install --with dev ``` -## Getting started - -### Issues +### Reporting Issues If you have a bug to report or feature to request, first [search to see if an issue already exists](https://docs.github.com/en/github/searching-for-information-on-github/searching-on-github/searching-issues-and-pull-requests#search-by-the-title-body-or-comments). -If a related issue doesn't exist, please raise a new issue using the relevant -[issue form](https://github.com/neo4j/neo4j-graphrag-python/issues/new/choose). +If a related issue doesn't exist, please raise a new issue using the [issue form](https://github.com/neo4j/neo4j-graphrag-python/issues/new/choose). If you're a Neo4j Enterprise customer, you can also reach out to [Customer Support](http://support.neo4j.com/). @@ -226,54 +251,83 @@ If you don't have a bug to report or feature request, but you need a hand with the library; community support is available via [Neo4j Online Community](https://community.neo4j.com/) and/or [Discord](https://discord.gg/neo4j). -### Make changes +### Workflow for Contributions 1. Fork the repository. 2. Install Python and Poetry. 3. Create a working branch from `main` and start with your changes! -### Pull request +### Code Formatting and Linting + +Our codebase follows strict formatting and linting standards using [Ruff](https://docs.astral.sh/ruff/) for code quality checks and [Mypy](https://github.com/python/mypy) for type checking. +Before contributing, ensure that all code is properly formatted, free of linting issues, and includes accurate type annotations. + +- To install Ruff, follow the instructions [here](https://docs.astral.sh/ruff/installation/). +- To set up Mypy, follow the steps outlined [here](https://mypy.readthedocs.io/en/stable/getting_started.html#installing-and-running-mypy). + +Adherence to these standards is required for contributions to be accepted. + +#### Using Pre-commit + +We recommend setting up [pre-commit](https://pre-commit.com/) to automate code quality checks. +This ensures your changes meet our guidelines before committing. + +1. Install pre-commit by following the [installation guide](https://pre-commit.com/#install). +2. Set up the pre-commit hooks by running: + + ```bash + pre-commit install + ``` + +3. To manually check if a file meets the quality requirements, run: + + ```bash + pre-commit run --file path/to/file + ``` + +### Pull Requests -When you're finished with your changes, create a pull request, also known as a PR. +When you're finished with your changes, create a pull request (PR) using the following workflow. -- Ensure that you have [signed the CLA](https://neo4j.com/developer/contributing-code/#sign-cla). -- Ensure that the base of your PR is set to `main`. -- Don't forget to [link your PR to an issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) +- Ensure you have formatted and linted your code. +- Ensure that you have [signed the CLA](https://neo4j.com/developer/contributing-code/#sign-cla). +- Ensure that the base of your PR is set to `main`. +- Don't forget to [link your PR to an issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) if you are solving one. -- Enable the checkbox to [allow maintainer edits](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork) +- Check the checkbox to [allow maintainer edits](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork) so that maintainers can make any necessary tweaks and update your branch for merge. -- Reviewers may ask for changes to be made before a PR can be merged, either using +- Reviewers may ask for changes to be made before a PR can be merged, either using [suggested changes](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/incorporating-feedback-in-your-pull-request) or normal pull request comments. You can apply suggested changes directly through - the UI, and any other changes can be made in your fork and committed to the PR branch. -- As you update your PR and apply changes, mark each conversation as [resolved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/commenting-on-a-pull-request#resolving-conversations). -- Update the `CHANGELOG.md` if you have made significant changes to the project, these include: - - Major changes: - - New features - - Bug fixes with high impact - - Breaking changes - - Minor changes: - - Documentation improvements - - Code refactoring without functional impact - - Minor bug fixes -- Keep `CHANGELOG.md` changes brief and focus on the most important changes. + the UI. Any other changes can be made in your fork and committed to the PR branch. +- As you update your PR and apply changes, mark each conversation as [resolved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/commenting-on-a-pull-request#resolving-conversations). +- Update the `CHANGELOG.md` if you have made significant changes to the project, these include: + - Major changes: + - New features + - Bug fixes with high impact + - Breaking changes + - Minor changes: + - Documentation improvements + - Code refactoring without functional impact + - Minor bug fixes +- Keep `CHANGELOG.md` changes brief and focus on the most important changes. ### Updating the `CHANGELOG.md` -1. When opening a PR, you can generate an edit suggestion by commenting on the GitHub PR [using CodiumAI](https://github.com/CodiumAI-Agent): +1. You can automatically generate a changelog suggestion for your PR by commenting on it [using CodiumAI](https://github.com/CodiumAI-Agent): ``` @CodiumAI-Agent /update_changelog ``` -2. Use this as a suggestion and update the `CHANGELOG.md` content under 'Next'. +2. Edit the suggestion if necessary and update the appropriate subsection in the `CHANGELOG.md` file under 'Next'. 3. Commit the changes. -## Run tests +## ๐Ÿงช Tests -### Unit tests +### Unit Tests -This should run out of the box once the dependencies are installed. +Install the project dependencies then run the following command to run the unit tests locally: ```bash poetry run pytest tests/unit @@ -281,27 +335,27 @@ poetry run pytest tests/unit ### E2E tests -To run e2e tests you'd need to have some services running locally: +To execute end-to-end (e2e) tests, you need the following services to be running locally: -- neo4j -- weaviate -- weaviate-text2vec-transformers +- neo4j +- weaviate +- weaviate-text2vec-transformers -The easiest way to get it up and running is via Docker compose: +The simplest way to set these up is by using Docker Compose: ```bash docker compose -f tests/e2e/docker-compose.yml up ``` -_(pro tip: if you suspect something in the databases are cached, run `docker compose -f tests/e2e/docker-compose.yml down` to remove them completely)_ +_(tip: If you encounter any caching issues within the databases, you can completely remove them by running `docker compose -f tests/e2e/docker-compose.yml down`)_ -Once the services are running, execute the following command to run the e2e tests. +Once all the services are running, execute the following command to run the e2e tests: ```bash poetry run pytest tests/e2e ``` -## Further information +## โ„น๏ธ Additional Information -- [The official Neo4j Python driver](https://github.com/neo4j/neo4j-python-driver) -- [Neo4j GenAI integrations](https://neo4j.com/docs/cypher-manual/current/genai-integrations/) +- [The official Neo4j Python driver](https://github.com/neo4j/neo4j-python-driver) +- [Neo4j GenAI integrations](https://neo4j.com/docs/cypher-manual/current/genai-integrations/) diff --git a/docs/source/index.rst b/docs/source/index.rst index bbc31633..85a9a5e7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,8 +3,8 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Neo4j GraphRAG for Python -========================= +GraphRAG for Python +=================== This package contains the official Neo4j GraphRAG features for Python. diff --git a/docs/source/user_guide_rag.rst b/docs/source/user_guide_rag.rst index 930b0563..38f772e3 100644 --- a/docs/source/user_guide_rag.rst +++ b/docs/source/user_guide_rag.rst @@ -543,11 +543,11 @@ See also :ref:`vectorretriever`. Vector Cypher Retriever ======================= -The `VectorCypherRetriever` allows full utilization of Neo4j's graph nature by -enhancing context through graph traversal. +The `VectorCypherRetriever` fully leverages Neo4j's graph capabilities by combining vector-based similarity searches with graph traversal techniques. It processes a query embedding to perform a similarity search against a specified vector index, retrieves relevant node variables, and then executes a Cypher query to traverse the graph based on these nodes. This integration ensures that retrievals are both semantically meaningful and contextually enriched by the underlying graph structure. + Retrieval Query ------------------------------ +--------------- When crafting the retrieval query, it's important to note two available variables are in the query scope: @@ -560,26 +560,34 @@ certain movie properties, the retrieval query can be structured as follows: .. code:: python + retrieval_query = """ + MATCH + (actor:Actor)-[:ACTED_IN]->(node) + RETURN + node.title AS movie_title, + node.plot AS movie_plot, + collect(actor.name) AS actors; + """ retriever = VectorCypherRetriever( driver, index_name=INDEX_NAME, - retrieval_query="MATCH (node)<-[:ACTED_IN]-(p:Person) RETURN node.title as movieTitle, node.plot as movieDescription, collect(p.name) as actors, score", + retrieval_query=retrieval_query, ) +It is recommended that the retrieval query returns node properties, as opposed to nodes. + + Format the Results ------------------------------ +------------------ .. warning:: This API is in beta mode and will be subject to change in the future. -For improved readability and ease in prompt-engineering, formatting the result to suit -specific needs involves providing a `record_formatter` function to the Cypher retrievers. -This function processes the Neo4j record from the retrieval query, returning a -`RetrieverResultItem` with `content` (str) and `metadata` (dict) fields. The `content` -field is used for passing data to the LLM, while `metadata` can serve debugging purposes -and provide additional context. +The result_formatter function customizes the output of Cypher retrievers for improved prompt engineering and readability. It converts each Neo4j record into a RetrieverResultItem with two fields: `content` and `metadata`. + +The `content` field is a formatted string containing the key information intended for the language model, such as movie titles or descriptions. The `metadata` field holds additional details, useful for debugging or providing extra context, like scores or node properties. .. code:: python @@ -738,7 +746,7 @@ Also note that there is an helper function to create a full-text index (see `the .. _hybrid-cypher-retriever-user-guide: Hybrid Cypher Retrievers ------------------------------------- +------------------------ In an hybrid cypher retriever, results are searched for in both a vector and a full-text index. Once the similar nodes are identified, a retrieval query can traverse diff --git a/src/neo4j_graphrag/indexes.py b/src/neo4j_graphrag/indexes.py index cdbc38d7..cf8cdadc 100644 --- a/src/neo4j_graphrag/indexes.py +++ b/src/neo4j_graphrag/indexes.py @@ -38,6 +38,7 @@ def create_vector_index( embedding_property: str, dimensions: int, similarity_fn: Literal["euclidean", "cosine"], + fail_if_exists: bool = False, neo4j_database: Optional[str] = None, ) -> None: """ @@ -46,7 +47,6 @@ def create_vector_index( See Cypher manual on `creating vector indexes `_. - Important: This operation will fail if an index with the same name already exists. Ensure that the index name provided is unique within the database context. Example: @@ -72,6 +72,7 @@ def create_vector_index( embedding_property="vectorProperty", dimensions=1536, similarity_fn="euclidean", + fail_if_exists=False, ) @@ -83,6 +84,7 @@ def create_vector_index( dimensions (int): Vector embedding dimension similarity_fn (str): case-insensitive values for the vector similarity function: ``euclidean`` or ``cosine``. + fail_if_exists (bool): If True raise an error if the index already exists. Defaults to False. neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to "neo4j" in the database (`see reference to documentation `_). Raises: @@ -105,7 +107,7 @@ def create_vector_index( try: query = ( - f"CREATE VECTOR INDEX $name FOR (n:{label}) ON n.{embedding_property} OPTIONS " + f"CREATE VECTOR INDEX $name {'' if fail_if_exists else 'IF NOT EXISTS'} FOR (n:{label}) ON n.{embedding_property} OPTIONS " "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" ) logger.info(f"Creating vector index named '{name}'") @@ -123,6 +125,7 @@ def create_fulltext_index( name: str, label: str, node_properties: list[str], + fail_if_exists: bool = False, neo4j_database: Optional[str] = None, ) -> None: """ @@ -131,7 +134,6 @@ def create_fulltext_index( See Cypher manual on `creating fulltext indexes `_. - Important: This operation will fail if an index with the same name already exists. Ensure that the index name provided is unique within the database context. Example: @@ -155,6 +157,7 @@ def create_fulltext_index( INDEX_NAME, label="Document", node_properties=["vectorProperty"], + fail_if_exists=False, ) @@ -163,6 +166,7 @@ def create_fulltext_index( name (str): The unique name of the index. label (str): The node label to be indexed. node_properties (list[str]): The node properties to create the fulltext index on. + fail_if_exists (bool): If True raise an error if the index already exists. Defaults to False. neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to "neo4j" in the database (`see reference to documentation `_). Raises: @@ -180,7 +184,7 @@ def create_fulltext_index( try: query = ( - "CREATE FULLTEXT INDEX $name " + f"CREATE FULLTEXT INDEX $name {'' if fail_if_exists else 'IF NOT EXISTS'} " f"FOR (n:`{label}`) ON EACH " f"[{', '.join(['n.`' + prop + '`' for prop in node_properties])}]" ) diff --git a/src/neo4j_graphrag/retrievers/vector.py b/src/neo4j_graphrag/retrievers/vector.py index 13a7ef9a..63bcd640 100644 --- a/src/neo4j_graphrag/retrievers/vector.py +++ b/src/neo4j_graphrag/retrievers/vector.py @@ -220,6 +220,8 @@ class VectorCypherRetriever(Retriever): Note: `node` is a variable from the base query that can be used in `retrieval_query` as seen in the example below. + The retrieval_query is additional Cypher that can allow for graph traversal after retrieving `node`. + Example: .. code-block:: python @@ -243,6 +245,7 @@ class VectorCypherRetriever(Retriever): result_formatter (Optional[Callable[[neo4j.Record], RetrieverResultItem]]): Provided custom function to transform a neo4j.Record to a RetrieverResultItem. neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to "neo4j" in the database (`see reference to documentation `_). + Read more in the :ref:`User Guide `. """ def __init__( diff --git a/tests/unit/test_indexes.py b/tests/unit/test_indexes.py index a9f4cd1c..aa6d5c30 100644 --- a/tests/unit/test_indexes.py +++ b/tests/unit/test_indexes.py @@ -30,7 +30,7 @@ def test_create_vector_index_happy_path(driver: MagicMock) -> None: create_query = ( - "CREATE VECTOR INDEX $name FOR (n:People) ON n.name OPTIONS " + "CREATE VECTOR INDEX $name IF NOT EXISTS FOR (n:People) ON n.name OPTIONS " "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" ) @@ -43,9 +43,26 @@ def test_create_vector_index_happy_path(driver: MagicMock) -> None: ) +def test_create_vector_index_fail_if_exists(driver: MagicMock) -> None: + create_query = ( + "CREATE VECTOR INDEX $name FOR (n:People) ON n.name OPTIONS " + "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" + ) + + create_vector_index( + driver, "my-index", "People", "name", 2048, "cosine", fail_if_exists=True + ) + + driver.execute_query.assert_called_once_with( + create_query, + {"name": "my-index", "dimensions": 2048, "similarity_fn": "cosine"}, + database_=None, + ) + + def test_create_vector_index_ensure_escaping(driver: MagicMock) -> None: create_query = ( - "CREATE VECTOR INDEX $name FOR (n:People) ON n.name OPTIONS " + "CREATE VECTOR INDEX $name IF NOT EXISTS FOR (n:People) ON n.name OPTIONS " "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" ) @@ -120,7 +137,7 @@ def test_create_fulltext_index_happy_path(driver: MagicMock) -> None: label = "node-label" text_node_properties = ["property-1", "property-2"] create_query = ( - "CREATE FULLTEXT INDEX $name " + "CREATE FULLTEXT INDEX $name IF NOT EXISTS " f"FOR (n:`{label}`) ON EACH " f"[{', '.join(['n.`' + property + '`' for property in text_node_properties])}]" ) @@ -134,6 +151,26 @@ def test_create_fulltext_index_happy_path(driver: MagicMock) -> None: ) +def test_create_fulltext_index_fail_if_exists(driver: MagicMock) -> None: + label = "node-label" + text_node_properties = ["property-1", "property-2"] + create_query = ( + "CREATE FULLTEXT INDEX $name " + f"FOR (n:`{label}`) ON EACH " + f"[{', '.join(['n.`' + property + '`' for property in text_node_properties])}]" + ) + + create_fulltext_index( + driver, "my-index", label, text_node_properties, fail_if_exists=True + ) + + driver.execute_query.assert_called_once_with( + create_query, + {"name": "my-index"}, + database_=None, + ) + + def test_create_fulltext_index_raises_error_with_neo4j_client_error( driver: MagicMock, ) -> None: @@ -159,7 +196,7 @@ def test_create_fulltext_index_ensure_escaping(driver: MagicMock) -> None: label = "node-label" text_node_properties = ["property-1", "property-2"] create_query = ( - "CREATE FULLTEXT INDEX $name " + "CREATE FULLTEXT INDEX $name IF NOT EXISTS " f"FOR (n:`{label}`) ON EACH " f"[{', '.join(['n.`' + property + '`' for property in text_node_properties])}]" )