From 993b0016bdf56de156dc1081656842753ae79dcd Mon Sep 17 00:00:00 2001 From: Tomaz Bratanic Date: Fri, 13 Dec 2024 20:06:57 +0100 Subject: [PATCH] Deduplicate nodes for gliner graph transformer (#26) --- .../graph_transformers/gliner.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/libs/experimental/langchain_experimental/graph_transformers/gliner.py b/libs/experimental/langchain_experimental/graph_transformers/gliner.py index 85566e2..a7745df 100644 --- a/libs/experimental/langchain_experimental/graph_transformers/gliner.py +++ b/libs/experimental/langchain_experimental/graph_transformers/gliner.py @@ -54,21 +54,21 @@ def __init__( import gliner_spacy # type: ignore # noqa: F401 except ImportError: raise ImportError( - "Could not import relik python package. " + "Could not import gliner-spacy python package. " "Please install it with `pip install gliner-spacy`." ) try: import spacy # type: ignore except ImportError: raise ImportError( - "Could not import relik python package. " + "Could not import spacy python package. " "Please install it with `pip install spacy`." ) try: import glirel # type: ignore # noqa: F401 except ImportError: raise ImportError( - "Could not import relik python package. " + "Could not import gliner python package. " "Please install it with `pip install gliner`." ) @@ -101,15 +101,17 @@ def process_document(self, document: Document) -> GraphDocument: [(document.page_content, self.allowed_relationships)], as_tuples=True ) ) - # Convert nodes - nodes = [] - for node in docs[0][0].ents: - nodes.append( - Node( - id=node.text, - type=node.label_, - ) + # Deduplicate nodes + deduplicated_nodes = {(node.text, node.label_) for node in docs[0][0].ents} + + # Step 2: Convert back to Node objects + nodes = [ + Node( + id=node_text, + type=node_label, ) + for node_text, node_label in deduplicated_nodes + ] # Convert relationships relationships = [] relations = docs[0][0]._.relations