From 1ee7f764bc0637c753e67f265ab3f73fa4e99eb0 Mon Sep 17 00:00:00 2001 From: willtai Date: Tue, 15 Oct 2024 15:26:04 +0100 Subject: [PATCH] Update docs for VectorCypherRetriever (#178) * Update docs for VectorCypherRetriever * multiline cypher query * add first paragraph for VectorCypherRetriever --- docs/source/user_guide_rag.rst | 32 +++++++++++++++---------- src/neo4j_graphrag/retrievers/vector.py | 3 +++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/docs/source/user_guide_rag.rst b/docs/source/user_guide_rag.rst index 930b0563..38f772e3 100644 --- a/docs/source/user_guide_rag.rst +++ b/docs/source/user_guide_rag.rst @@ -543,11 +543,11 @@ See also :ref:`vectorretriever`. Vector Cypher Retriever ======================= -The `VectorCypherRetriever` allows full utilization of Neo4j's graph nature by -enhancing context through graph traversal. +The `VectorCypherRetriever` fully leverages Neo4j's graph capabilities by combining vector-based similarity searches with graph traversal techniques. It processes a query embedding to perform a similarity search against a specified vector index, retrieves relevant node variables, and then executes a Cypher query to traverse the graph based on these nodes. This integration ensures that retrievals are both semantically meaningful and contextually enriched by the underlying graph structure. + Retrieval Query ------------------------------ +--------------- When crafting the retrieval query, it's important to note two available variables are in the query scope: @@ -560,26 +560,34 @@ certain movie properties, the retrieval query can be structured as follows: .. code:: python + retrieval_query = """ + MATCH + (actor:Actor)-[:ACTED_IN]->(node) + RETURN + node.title AS movie_title, + node.plot AS movie_plot, + collect(actor.name) AS actors; + """ retriever = VectorCypherRetriever( driver, index_name=INDEX_NAME, - retrieval_query="MATCH (node)<-[:ACTED_IN]-(p:Person) RETURN node.title as movieTitle, node.plot as movieDescription, collect(p.name) as actors, score", + retrieval_query=retrieval_query, ) +It is recommended that the retrieval query returns node properties, as opposed to nodes. + + Format the Results ------------------------------ +------------------ .. warning:: This API is in beta mode and will be subject to change in the future. -For improved readability and ease in prompt-engineering, formatting the result to suit -specific needs involves providing a `record_formatter` function to the Cypher retrievers. -This function processes the Neo4j record from the retrieval query, returning a -`RetrieverResultItem` with `content` (str) and `metadata` (dict) fields. The `content` -field is used for passing data to the LLM, while `metadata` can serve debugging purposes -and provide additional context. +The result_formatter function customizes the output of Cypher retrievers for improved prompt engineering and readability. It converts each Neo4j record into a RetrieverResultItem with two fields: `content` and `metadata`. + +The `content` field is a formatted string containing the key information intended for the language model, such as movie titles or descriptions. The `metadata` field holds additional details, useful for debugging or providing extra context, like scores or node properties. .. code:: python @@ -738,7 +746,7 @@ Also note that there is an helper function to create a full-text index (see `the .. _hybrid-cypher-retriever-user-guide: Hybrid Cypher Retrievers ------------------------------------- +------------------------ In an hybrid cypher retriever, results are searched for in both a vector and a full-text index. Once the similar nodes are identified, a retrieval query can traverse diff --git a/src/neo4j_graphrag/retrievers/vector.py b/src/neo4j_graphrag/retrievers/vector.py index 13a7ef9a..63bcd640 100644 --- a/src/neo4j_graphrag/retrievers/vector.py +++ b/src/neo4j_graphrag/retrievers/vector.py @@ -220,6 +220,8 @@ class VectorCypherRetriever(Retriever): Note: `node` is a variable from the base query that can be used in `retrieval_query` as seen in the example below. + The retrieval_query is additional Cypher that can allow for graph traversal after retrieving `node`. + Example: .. code-block:: python @@ -243,6 +245,7 @@ class VectorCypherRetriever(Retriever): result_formatter (Optional[Callable[[neo4j.Record], RetrieverResultItem]]): Provided custom function to transform a neo4j.Record to a RetrieverResultItem. neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to "neo4j" in the database (`see reference to documentation `_). + Read more in the :ref:`User Guide `. """ def __init__(