Skip to content

Commit

Permalink
Deduplicate nodes for gliner graph transformer (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasonjo authored Dec 13, 2024
1 parent 7177f46 commit 993b001
Showing 1 changed file with 13 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,21 @@ def __init__(
import gliner_spacy # type: ignore # noqa: F401
except ImportError:
raise ImportError(
"Could not import relik python package. "
"Could not import gliner-spacy python package. "
"Please install it with `pip install gliner-spacy`."
)
try:
import spacy # type: ignore
except ImportError:
raise ImportError(
"Could not import relik python package. "
"Could not import spacy python package. "
"Please install it with `pip install spacy`."
)
try:
import glirel # type: ignore # noqa: F401
except ImportError:
raise ImportError(
"Could not import relik python package. "
"Could not import gliner python package. "
"Please install it with `pip install gliner`."
)

Expand Down Expand Up @@ -101,15 +101,17 @@ def process_document(self, document: Document) -> GraphDocument:
[(document.page_content, self.allowed_relationships)], as_tuples=True
)
)
# Convert nodes
nodes = []
for node in docs[0][0].ents:
nodes.append(
Node(
id=node.text,
type=node.label_,
)
# Deduplicate nodes
deduplicated_nodes = {(node.text, node.label_) for node in docs[0][0].ents}

# Step 2: Convert back to Node objects
nodes = [
Node(
id=node_text,
type=node_label,
)
for node_text, node_label in deduplicated_nodes
]
# Convert relationships
relationships = []
relations = docs[0][0]._.relations
Expand Down

0 comments on commit 993b001

Please sign in to comment.