From 9e033d9e338e0a8e5c9d83bf0952f8f6d7fb2e0b Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 12 Jul 2024 16:00:26 +0200 Subject: [PATCH 1/5] add examples to docstring --- src/nplinker/scoring/link_graph.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/nplinker/scoring/link_graph.py b/src/nplinker/scoring/link_graph.py index 2d3715cc..78257037 100644 --- a/src/nplinker/scoring/link_graph.py +++ b/src/nplinker/scoring/link_graph.py @@ -79,7 +79,7 @@ def __init__(self) -> None: >>> lg[gcf] {spectrum: {"metcalf": Score("metcalf", 1.0, {"cutoff": 0.5})}} - Get all links: + Get all links in the LinkGraph: >>> lg.links [(gcf, spectrum, {"metcalf": Score("metcalf", 1.0, {"cutoff": 0.5})})] @@ -129,6 +129,10 @@ def links( Returns: A list of tuples containing the links between objects. + + Examples: + >>> lg.links + [(gcf, spectrum, {"metcalf": Score("metcalf", 1.0, {"cutoff": 0.5})})] """ return list(self._g.edges(data=True)) @@ -150,6 +154,9 @@ def add_link( data: keyword arguments. At least one scoring method and its data must be provided. The key must be the name of the scoring method defined in `ScoringMethod`, and the value is a `Score` object, e.g. `metcalf=Score("metcalf", 1.0, {"cutoff": 0.5})`. + + Examples: + >>> lg.add_link(gcf, spectrum, metcalf=Score("metcalf", 1.0, {"cutoff": 0.5})) """ # validate the data if not data: @@ -174,6 +181,10 @@ def has_link(self, u: Entity, v: Entity) -> bool: Returns: True if there is a link between the two objects, False otherwise + + Examples: + >>> lg.has_link(gcf, spectrum) + True """ return self._g.has_edge(u, v) @@ -192,5 +203,9 @@ def get_link_data( Returns: A dictionary of scoring methods and their data for the link between the two objects, or None if there is no link between the two objects. + + Examples: + >>> lg.get_link_data(gcf, spectrum) + {"metcalf": Score("metcalf", 1.0, {"cutoff": 0.5})} """ return self._g.get_edge_data(u, v) # type: ignore From 72d42b38cb91ed88a90aa610f257863094798606 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 12 Jul 2024 16:01:19 +0200 Subject: [PATCH 2/5] add filter method to LinkGraph --- src/nplinker/scoring/link_graph.py | 59 +++++++++++++++++++++++++++ tests/unit/scoring/test_link_graph.py | 29 +++++++++++++ 2 files changed, 88 insertions(+) diff --git a/src/nplinker/scoring/link_graph.py b/src/nplinker/scoring/link_graph.py index 78257037..919c30af 100644 --- a/src/nplinker/scoring/link_graph.py +++ b/src/nplinker/scoring/link_graph.py @@ -1,4 +1,5 @@ from __future__ import annotations +from collections.abc import Sequence from functools import wraps from typing import Union from networkx import Graph @@ -209,3 +210,61 @@ def get_link_data( {"metcalf": Score("metcalf", 1.0, {"cutoff": 0.5})} """ return self._g.get_edge_data(u, v) # type: ignore + + def filter(self, u_nodes: Sequence[Entity], v_nodes: Sequence[Entity] = [], /) -> LinkGraph: + """Return a new LinkGraph object with the filtered links between the given objects. + + The new LinkGraph object will only contain the links between `u_nodes` and `v_nodes`. + + If `u_nodes` or `v_nodes` is empty, the new LinkGraph object will contain the links for + the given objects in `v_nodes` or `u_nodes`, respectively. If both are empty, return an + empty LinkGraph object. + + Note that not all objects in `u_nodes` and `v_nodes` need to be present in the original + LinkGraph. + + Args: + u_nodes: a sequence of objects used as the first object in the links + v_nodes: a sequence of objects used as the second object in the links + + Returns: + A new LinkGraph object with the filtered links between the given objects. + + Examples: + Filter the links for `gcf1` and `gcf2`: + >>> new_lg = lg.filter([gcf1, gcf2]) + Filter the links for `spectrum1` and `spectrum2`: + >>> new_lg = lg.filter([spectrum1, spectrum2]) + Filter the links between two lists of objects: + >>> new_lg = lg.filter([gcf1, gcf2], [spectrum1, spectrum2]) + """ + lg = LinkGraph() + + # exchange u_nodes and v_nodes if u_nodes is empty but v_nodes not + if len(u_nodes) == 0 and len(v_nodes) != 0: + u_nodes = v_nodes + v_nodes = [] + + if len(v_nodes) == 0: + for u in u_nodes: + self._filter_one_node(u, lg) + + for u in u_nodes: + for v in v_nodes: + self._filter_two_nodes(u, v, lg) + + return lg + + @validate_u + def _filter_one_node(self, u: Entity, lg: LinkGraph) -> None: + """Filter the links for a given object and add them to the new LinkGraph object.""" + links = self[u] + for node2, value in links.items(): + lg.add_link(u, node2, **value) + + @validate_uv + def _filter_two_nodes(self, u: Entity, v: Entity, lg: LinkGraph) -> None: + """Filter the links between two objects and add them to the new LinkGraph object.""" + link_data = self.get_link_data(u, v) + if link_data is not None: + lg.add_link(u, v, **link_data) diff --git a/tests/unit/scoring/test_link_graph.py b/tests/unit/scoring/test_link_graph.py index 545fee2c..ce52d769 100644 --- a/tests/unit/scoring/test_link_graph.py +++ b/tests/unit/scoring/test_link_graph.py @@ -83,3 +83,32 @@ def test_has_link(lg, gcfs, spectra): def test_get_link_data(lg, gcfs, spectra, score): assert lg.get_link_data(gcfs[0], spectra[0]) == {"metcalf": score} assert lg.get_link_data(gcfs[0], spectra[1]) is None + + +def test_filter(gcfs, spectra, score): + lg = LinkGraph() + lg.add_link(gcfs[0], spectra[0], metcalf=score) + lg.add_link(gcfs[1], spectra[1], metcalf=score) + + u_nodes = [gcfs[0], gcfs[1]] + v_nodes = [spectra[0], spectra[1]] + + # test filtering with GCFs + lg_filtered = lg.filter(u_nodes) + assert len(lg_filtered) == 4 # number of nodes + + # test filtering with Spectra + lg_filtered = lg.filter(v_nodes) + assert len(lg_filtered) == 4 + + # test empty `u_nodes` argument + lg_filtered = lg.filter([], v_nodes) + assert len(lg_filtered) == 4 + + # test empty `u_nodes` and `v_nodes` arguments + lg_filtered = lg.filter([], []) + assert len(lg_filtered) == 0 + + # test filtering with GCFs and Spectra + lg_filtered = lg.filter(u_nodes, v_nodes) + assert len(lg_filtered) == 4 From 20ce90c59a804d77de9dbff361325a47e25a02ca Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 12 Jul 2024 16:40:31 +0200 Subject: [PATCH 3/5] use python3.10 for code format check --- .github/workflows/format-typing-check.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/format-typing-check.yml b/.github/workflows/format-typing-check.yml index 9acbba09..50e4dff1 100644 --- a/.github/workflows/format-typing-check.yml +++ b/.github/workflows/format-typing-check.yml @@ -30,6 +30,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: '3.10' - name: Install ruff and mypy run: | pip install ruff mypy typing_extensions types-Deprecated types-beautifulsoup4 types-jsonschema pandas-stubs From a1e9beb9fe8061a5d2f616d2c98cdd0c5f0add31 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 12 Jul 2024 16:55:51 +0200 Subject: [PATCH 4/5] add stub for networkx to github action --- .github/workflows/format-typing-check.yml | 3 ++- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/format-typing-check.yml b/.github/workflows/format-typing-check.yml index 50e4dff1..a5def2b9 100644 --- a/.github/workflows/format-typing-check.yml +++ b/.github/workflows/format-typing-check.yml @@ -36,7 +36,8 @@ jobs: python-version: '3.10' - name: Install ruff and mypy run: | - pip install ruff mypy typing_extensions types-Deprecated types-beautifulsoup4 types-jsonschema pandas-stubs + pip install ruff mypy typing_extensions \ + types-Deprecated types-beautifulsoup4 types-jsonschema types-networkx pandas-stubs - name: Get all changed python files id: changed-python-files uses: tj-actions/changed-files@v44 diff --git a/pyproject.toml b/pyproject.toml index 36caca3a..849a15a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ dev = [ # static typing "mypy", "typing_extensions", - # stub packages + # stub packages. Update the `format-typing-check.yml` too if you add more. "types-Deprecated", "types-beautifulsoup4", "types-jsonschema", From a18ed84d6fd507d9f0a89bbedf36c9902d0abad9 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 12 Jul 2024 17:08:48 +0200 Subject: [PATCH 5/5] fix bug of accessing non-existent objects in link graph --- src/nplinker/scoring/link_graph.py | 10 +++++++--- tests/unit/scoring/test_link_graph.py | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/nplinker/scoring/link_graph.py b/src/nplinker/scoring/link_graph.py index 919c30af..b707df17 100644 --- a/src/nplinker/scoring/link_graph.py +++ b/src/nplinker/scoring/link_graph.py @@ -258,9 +258,13 @@ def filter(self, u_nodes: Sequence[Entity], v_nodes: Sequence[Entity] = [], /) - @validate_u def _filter_one_node(self, u: Entity, lg: LinkGraph) -> None: """Filter the links for a given object and add them to the new LinkGraph object.""" - links = self[u] - for node2, value in links.items(): - lg.add_link(u, node2, **value) + try: + links = self[u] + except KeyError: + pass + else: + for node2, value in links.items(): + lg.add_link(u, node2, **value) @validate_uv def _filter_two_nodes(self, u: Entity, v: Entity, lg: LinkGraph) -> None: diff --git a/tests/unit/scoring/test_link_graph.py b/tests/unit/scoring/test_link_graph.py index ce52d769..9f7c9d7d 100644 --- a/tests/unit/scoring/test_link_graph.py +++ b/tests/unit/scoring/test_link_graph.py @@ -90,8 +90,8 @@ def test_filter(gcfs, spectra, score): lg.add_link(gcfs[0], spectra[0], metcalf=score) lg.add_link(gcfs[1], spectra[1], metcalf=score) - u_nodes = [gcfs[0], gcfs[1]] - v_nodes = [spectra[0], spectra[1]] + u_nodes = [gcfs[0], gcfs[1], gcfs[2]] + v_nodes = [spectra[0], spectra[1], spectra[2]] # test filtering with GCFs lg_filtered = lg.filter(u_nodes)