neo4j · stellasia · Nov 29, 2024 · Nov 29, 2024 · Nov 29, 2024 · Nov 29, 2024
@@ -2,6 +2,12 @@
 
 ## Next
 
+### Fixed
+- IDs for the Document and Chunk nodes in the lexical graph are now randomly generated and unique across multiple runs, fixing issues in the lexical graph where relationships were created between chunks that were created by different pipeline runs.
+
+### Changed
+- The `id_prefix` parameter in the `LexicalGraphConfig` is deprecated.
+
 ## 1.3.0
 
 ### Added

@@ -33,6 +33,12 @@ RagResultModel
 
 .. autoclass:: neo4j_graphrag.generation.types.RagResultModel
 
+DocumentInfo
+============
+
+.. autoclass:: neo4j_graphrag.experimental.components.types.DocumentInfo
+
+
 TextChunk
 =========
 

@@ -998,7 +998,7 @@ without making assumptions about entity similarity. The Entity Resolver
 is responsible for refining the created knowledge graph by merging entity
 nodes that represent the same real-world object.
 
-In practice, this package implements a single resolver that merges nodes
+In practice, this package implements a simple resolver that merges nodes
 with the same label and identical "name" property.
 
 .. warning::
@@ -1018,15 +1018,30 @@ It can be used like this:
 
 .. warning::
 
-    By default, all nodes with the __Entity__ label will be resolved.
-    To exclude specific nodes, a filter_query can be added to the query.
-    For example, if a `:Resolved` label has been applied to already resolved entities
-    in the graph, these entities can be excluded with the following approach:
+    By default, all nodes with the `__Entity__` label will be resolved.
+    This behavior can be controled using the `filter_query` parameter described below.
 
-    .. code:: python
+Filter Query Parameter
+----------------------
 
-        from neo4j_graphrag.experimental.components.resolver import (
-            SinglePropertyExactMatchResolver,
-        )
-        resolver = SinglePropertyExactMatchResolver(driver, filter_query="WHERE not entity:Resolved")
-        res = await resolver.run()
+To exclude specific nodes from the resolution, a `filter_query` can be added to the query.
+For example, if a `:Resolved` label has been applied to already resolved entities
+in the graph, these entities can be excluded with the following approach:
+
+.. code:: python
+
+    from neo4j_graphrag.experimental.components.resolver import (
+        SinglePropertyExactMatchResolver,
+    )
+    filter_query = "WHERE NOT entity:Resolved"
+    resolver = SinglePropertyExactMatchResolver(driver, filter_query=filter_query)
+    res = await resolver.run()
+
+
+Similar approach can be used to exclude entities created from a previous pipeline
+run on the same document, assuming a label `OldDocument` has been assigned to the
+previously created document node:
+
+.. code:: python
+
+    filter_query = "WHERE NOT EXISTS((entity)-[:FROM_DOCUMENT]->(:OldDocument))"
@@ -23,7 +23,7 @@
 DATABASE = "neo4j"
 
 
-root_dir = Path(__file__).parents[4]
+root_dir = Path(__file__).parents[1]
 file_path = root_dir / "data" / "Harry Potter and the Chamber of Secrets Summary.pdf"
 
 

@@ -4,8 +4,8 @@
     EntityRelationExtractor,
     OnError,
 )
-from neo4j_graphrag.experimental.components.pdf_loader import DocumentInfo
 from neo4j_graphrag.experimental.components.types import (
+    DocumentInfo,
     LexicalGraphConfig,
     Neo4jGraph,
     TextChunks,

@@ -13,7 +13,6 @@ async def main() -> GraphResult:
     # optionally, define a LexicalGraphConfig object
     # shown below with default values
     config = LexicalGraphConfig(
-        id_prefix="",  # used to prefix the chunk and document IDs
         chunk_node_label="Chunk",
         document_node_label="Document",
         chunk_to_document_relationship_type="PART_OF_DOCUMENT",

@@ -3,11 +3,8 @@
 from pathlib import Path
 from typing import Dict, Optional
 
-from neo4j_graphrag.experimental.components.pdf_loader import (
-    DataLoader,
-    DocumentInfo,
-    PdfDocument,
-)
+from neo4j_graphrag.experimental.components.pdf_loader import DataLoader
+from neo4j_graphrag.experimental.components.types import DocumentInfo, PdfDocument
 
 
 class MyLoader(DataLoader):

@@ -33,7 +33,6 @@ async def main(neo4j_driver: neo4j.Driver) -> PipelineResult:
     pipe.add_component(TextChunkEmbedder(embedder=OpenAIEmbeddings()), "chunk_embedder")
     # optional: define some custom node labels for the lexical graph:
     lexical_graph_config = LexicalGraphConfig(
-        id_prefix="example",
         chunk_node_label="TextPart",
     )
     pipe.add_component(

@@ -164,7 +164,6 @@ async def define_and_run_pipeline(
 async def main(driver: neo4j.Driver) -> PipelineResult:
     # optional: define some custom node labels for the lexical graph:
     lexical_graph_config = LexicalGraphConfig(
-        id_prefix="example",
         chunk_node_label="TextPart",
         document_node_label="Text",
     )

@@ -184,7 +184,6 @@ async def read_chunk_and_perform_entity_extraction(
 async def main(driver: neo4j.Driver) -> PipelineResult:
     # optional: define some custom node labels for the lexical graph:
     lexical_graph_config = LexicalGraphConfig(
-        id_prefix="example",
         document_node_label="Book",  # default: "Document"
         chunk_node_label="Chapter",  # default "Chunk"
         chunk_text_property="content",  # default: "text"