Skip to content

Commit

Permalink
multidigraph_to_digraph transitive reduction option (#16)
Browse files Browse the repository at this point in the history
merges #16

Background information at
https://twitter.com/larsjuhljensen/status/1450188835032375300

Ontologies such as GO can be reduced when collapsing multiple relationship
types into a single relationship type DiGraph.

Improve multidigraph_to_digraph logging
  • Loading branch information
dhimmel authored Nov 23, 2021
1 parent b7679f2 commit fc2e92f
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ Counter({'is a': 71509,
'regulates': 3216,
'negatively regulates': 2768,
'positively regulates': 2756})
>>> go_digraph = multidigraph_to_digraph(go_multidigraph)
>>> go_digraph = multidigraph_to_digraph(go_multidigraph, reduce=True)
>>> go_nxo = NXOntology(go_digraph)
>>> # Notice the similarity increases due to the full set of edges
>>> round(go_nxo.similarity("GO:0042552", "GO:0022008").lin, 3)
Expand Down
30 changes: 28 additions & 2 deletions nxontology/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,10 @@ def pronto_to_multidigraph(


def multidigraph_to_digraph(
graph: nx.MultiDiGraph, rel_types: Optional[List[str]] = None, reverse: bool = True
graph: nx.MultiDiGraph,
rel_types: Optional[List[str]] = None,
reverse: bool = True,
reduce: bool = False,
) -> nx.DiGraph:
"""
Convert a networkx MultiDiGraph to a DiGraph by aggregating edges accross relationship types.
Expand All @@ -152,7 +155,13 @@ def multidigraph_to_digraph(
When rel_types is None, all relationship types are preserved. If rel_types is defined,
then the MultiDiGraph is first filtered for edges with that key (relationship type).
If reduce is True, perform a transitive reduction on the DiGraph
to produce a minimum equivalent graph that removes redundant relationships
— i.e. those that are already captured by a more specific ancestral path.
The default is reduce=False since the reduction can be a computationally expensive step.
"""
logging.info(f"Received MultiDiGraph with {graph.number_of_edges():,} edges.")
if rel_types is not None:
graph.remove_edges_from(
[
Expand All @@ -161,11 +170,23 @@ def multidigraph_to_digraph(
if key not in rel_types
]
)
logging.info(
f"Filtered MultiDiGraph to {graph.number_of_edges():,} edges of the following types: {rel_types}."
)
if reverse:
graph = graph.reverse(copy=True)
digraph = nx.DiGraph(graph)
if reduce:
n_edges_before = digraph.number_of_edges()
digraph = nx.transitive_reduction(digraph)
logging.info(
f"Reduced DiGraph by removing {n_edges_before - digraph.number_of_edges():,} redundant edges."
)
for source, target in digraph.edges(data=False):
digraph[source][target]["rel_types"] = sorted(graph[source][target])
logging.info(
f"Converted MultiDiGraph to DiGraph with {digraph.number_of_nodes():,} nodes and {digraph.number_of_edges():,} edges."
)
return digraph


Expand All @@ -179,6 +200,7 @@ def read_gene_ontology(
"negatively regulates",
"positively regulates",
],
reduce: bool = True,
) -> NXOntology[str]:
"""
Load the Gene Ontology into NXOntology,
Expand All @@ -198,10 +220,14 @@ def read_gene_ontology(
else:
date.fromisoformat(release) # check that release is a valid date
url = f"http://release.geneontology.org/{release}/ontology/{source_file}"
logging.info(f"Loading Gene Ontology into Pronto from <{url}>.")
go_pronto = Prontology(handle=url)
go_multidigraph = pronto_to_multidigraph(go_pronto, default_rel_type="is a")
go_digraph = multidigraph_to_digraph(
go_multidigraph, rel_types=rel_types, reverse=True
go_multidigraph,
rel_types=rel_types,
reverse=True,
reduce=reduce,
)
go_nxo: NXOntology[str] = NXOntology(go_digraph)
go_nxo.graph.graph["source_url"] = url
Expand Down
8 changes: 8 additions & 0 deletions nxontology/tests/imports_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,11 @@ def test_read_gene_ontology():
== "http://release.geneontology.org/2021-02-01/ontology/go-basic.json.gz"
)
assert "regulates" in nxo.graph["GO:0006310"]["GO:0000018"]["rel_types"]
# Transitive reduction should remove this edge
# from "defense response to insect" to "negative regulation of defense response to insect"
# since it is redundant with a more specific ancestral path.
# https://github.com/related-sciences/nxontology/pull/16
assert not nxo.graph.has_edge("GO:0002213", "GO:1900366")
# GO:0002213 --> GO:2000068 --> GO:1900366 is more specific
assert nxo.graph.has_edge("GO:0002213", "GO:2000068")
assert nxo.graph.has_edge("GO:2000068", "GO:1900366")

0 comments on commit fc2e92f

Please sign in to comment.