From d2552f6caca09a47882a9b883a69cc501c20d779 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Thu, 9 May 2024 22:35:49 -0400 Subject: [PATCH 1/3] address `None` value in metagraph --- python/phenolrs/numpy_loader.py | 19 ++++++++++ python/phenolrs/phenolrs.pyi | 4 +- python/phenolrs/pyg_loader.py | 54 +++++++++++++++------------ python/tests/test_all.py | 65 ++++++++++++++++++++++----------- 4 files changed, 95 insertions(+), 47 deletions(-) diff --git a/python/phenolrs/numpy_loader.py b/python/phenolrs/numpy_loader.py index 6d37097..633a355 100644 --- a/python/phenolrs/numpy_loader.py +++ b/python/phenolrs/numpy_loader.py @@ -46,6 +46,25 @@ def load_graph_to_numpy( if "vertexCollections" not in metagraph: raise PhenolError("vertexCollections not found in metagraph") + # Address the possibility of having something like this: + # "USER": {"x": {"features": None}} + # Should be converted to: + # "USER": {"x": "features"} + entries: dict + for v_col_name, entries in metagraph["vertexCollections"].items(): + for source_name, value in entries.items(): + if isinstance(value, dict): + if len(value) != 1: + m = "Only one feature field should be specified per attribute" + raise PhenolError(f"{m}. Found {value}") + + value_key = list(value.keys())[0] + if value[value_key] is not None: + m = f"Invalid value for feature {source_name}: {value_key}. Found {value[value_key]}" # noqa: E501 + raise PhenolError(m) + + metagraph["vertexCollections"][v_col_name][source_name] = value_key + vertex_collections = [ {"name": v_col_name, "fields": list(entries.values())} for v_col_name, entries in metagraph["vertexCollections"].items() diff --git a/python/phenolrs/phenolrs.pyi b/python/phenolrs/phenolrs.pyi index 3baf9fc..890dd88 100644 --- a/python/phenolrs/phenolrs.pyi +++ b/python/phenolrs/phenolrs.pyi @@ -3,7 +3,9 @@ import typing import numpy as np import numpy.typing as npt -def graph_to_pyg_format(request: dict[str, typing.Any]) -> typing.Tuple[ +def graph_to_pyg_format( + request: dict[str, typing.Any] +) -> typing.Tuple[ dict[str, dict[str, npt.NDArray[np.float64]]], dict[typing.Tuple[str, str, str], npt.NDArray[np.float64]], dict[str, dict[str, int]], diff --git a/python/phenolrs/pyg_loader.py b/python/phenolrs/pyg_loader.py index aa5ee8c..de03774 100644 --- a/python/phenolrs/pyg_loader.py +++ b/python/phenolrs/pyg_loader.py @@ -43,18 +43,21 @@ def load_into_pyg_data( v_col_spec_name = list(metagraph["vertexCollections"].keys())[0] v_col_spec = list(metagraph["vertexCollections"].values())[0] - features_by_col, coo_map, col_to_key_inds, vertex_cols_source_to_output = ( - NumpyLoader.load_graph_to_numpy( - database, - metagraph, - hosts, - user_jwt, - username, - password, - tls_cert, - parallelism, - batch_size, - ) + ( + features_by_col, + coo_map, + col_to_key_inds, + vertex_cols_source_to_output, + ) = NumpyLoader.load_graph_to_numpy( + database, + metagraph, + hosts, + user_jwt, + username, + password, + tls_cert, + parallelism, + batch_size, ) data = Data() @@ -106,18 +109,21 @@ def load_into_pyg_heterodata( if len(metagraph["edgeCollections"]) == 0: raise PhenolError("edgeCollections must map to non-empty dictionary") - features_by_col, coo_map, col_to_key_inds, vertex_cols_source_to_output = ( - NumpyLoader.load_graph_to_numpy( - database, - metagraph, - hosts, - user_jwt, - username, - password, - tls_cert, - parallelism, - batch_size, - ) + ( + features_by_col, + coo_map, + col_to_key_inds, + vertex_cols_source_to_output, + ) = NumpyLoader.load_graph_to_numpy( + database, + metagraph, + hosts, + user_jwt, + username, + password, + tls_cert, + parallelism, + batch_size, ) data = HeteroData() for col in features_by_col.keys(): diff --git a/python/tests/test_all.py b/python/tests/test_all.py index 18f7658..7852b60 100644 --- a/python/tests/test_all.py +++ b/python/tests/test_all.py @@ -22,21 +22,39 @@ def test_phenol_abide_hetero( assert isinstance(result, HeteroData) assert result["Subjects"]["x"].shape == (871, 2000) + result = PygLoader.load_into_pyg_heterodata( + connection_information["dbName"], + { + "vertexCollections": { + "Subjects": {"x": {"brain_fmri_features": None}, "y": "label"} + }, + "edgeCollections": {"medical_affinity_graph": {}}, + }, + [connection_information["url"]], + username=connection_information["username"], + password=connection_information["password"], + ) + assert isinstance(result, HeteroData) + assert result["Subjects"]["x"].shape == (871, 2000) + def test_phenol_abide_numpy( load_abide: None, connection_information: dict[str, str] ) -> None: - features_by_col, coo_map, col_to_key_inds, vertex_cols_source_to_output = ( - NumpyLoader.load_graph_to_numpy( - connection_information["dbName"], - { - "vertexCollections": {"Subjects": {"x": "brain_fmri_features"}}, - "edgeCollections": {"medical_affinity_graph": {}}, - }, - [connection_information["url"]], - username=connection_information["username"], - password=connection_information["password"], - ) + ( + features_by_col, + coo_map, + col_to_key_inds, + vertex_cols_source_to_output, + ) = NumpyLoader.load_graph_to_numpy( + connection_information["dbName"], + { + "vertexCollections": {"Subjects": {"x": "brain_fmri_features"}}, + "edgeCollections": {"medical_affinity_graph": {}}, + }, + [connection_information["url"]], + username=connection_information["username"], + password=connection_information["password"], ) assert features_by_col["Subjects"]["brain_fmri_features"].shape == (871, 2000) @@ -47,17 +65,20 @@ def test_phenol_abide_numpy( assert len(col_to_key_inds["Subjects"]) == 871 assert vertex_cols_source_to_output == {"Subjects": {"brain_fmri_features": "x"}} - features_by_col, coo_map, col_to_key_inds, vertex_cols_source_to_output = ( - NumpyLoader.load_graph_to_numpy( - connection_information["dbName"], - { - "vertexCollections": {"Subjects": {"x": "brain_fmri_features"}}, - # "edgeCollections": {"medical_affinity_graph": {}}, - }, - [connection_information["url"]], - username=connection_information["username"], - password=connection_information["password"], - ) + ( + features_by_col, + coo_map, + col_to_key_inds, + vertex_cols_source_to_output, + ) = NumpyLoader.load_graph_to_numpy( + connection_information["dbName"], + { + "vertexCollections": {"Subjects": {"x": "brain_fmri_features"}}, + # "edgeCollections": {"medical_affinity_graph": {}}, + }, + [connection_information["url"]], + username=connection_information["username"], + password=connection_information["password"], ) assert features_by_col["Subjects"]["brain_fmri_features"].shape == (871, 2000) From 7464cdab041a3ed86960580b3aabeb7bbfdc1059 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Thu, 9 May 2024 22:37:59 -0400 Subject: [PATCH 2/3] cleanup --- python/phenolrs/numpy_loader.py | 4 ++-- python/phenolrs/phenolrs.pyi | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/python/phenolrs/numpy_loader.py b/python/phenolrs/numpy_loader.py index 633a355..a51f841 100644 --- a/python/phenolrs/numpy_loader.py +++ b/python/phenolrs/numpy_loader.py @@ -55,8 +55,8 @@ def load_graph_to_numpy( for source_name, value in entries.items(): if isinstance(value, dict): if len(value) != 1: - m = "Only one feature field should be specified per attribute" - raise PhenolError(f"{m}. Found {value}") + m = f"Only one feature field should be specified per attribute. Found {value}" # noqa: E501 + raise PhenolError(m) value_key = list(value.keys())[0] if value[value_key] is not None: diff --git a/python/phenolrs/phenolrs.pyi b/python/phenolrs/phenolrs.pyi index 890dd88..3baf9fc 100644 --- a/python/phenolrs/phenolrs.pyi +++ b/python/phenolrs/phenolrs.pyi @@ -3,9 +3,7 @@ import typing import numpy as np import numpy.typing as npt -def graph_to_pyg_format( - request: dict[str, typing.Any] -) -> typing.Tuple[ +def graph_to_pyg_format(request: dict[str, typing.Any]) -> typing.Tuple[ dict[str, dict[str, npt.NDArray[np.float64]]], dict[typing.Tuple[str, str, str], npt.NDArray[np.float64]], dict[str, dict[str, int]], From f8d89205ab1a568e684e4feed464b35d4e7c5711 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Thu, 9 May 2024 22:39:26 -0400 Subject: [PATCH 3/3] fix lint --- python/phenolrs/numpy_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/phenolrs/numpy_loader.py b/python/phenolrs/numpy_loader.py index a51f841..0e089ff 100644 --- a/python/phenolrs/numpy_loader.py +++ b/python/phenolrs/numpy_loader.py @@ -50,7 +50,7 @@ def load_graph_to_numpy( # "USER": {"x": {"features": None}} # Should be converted to: # "USER": {"x": "features"} - entries: dict + entries: dict[str, typing.Any] for v_col_name, entries in metagraph["vertexCollections"].items(): for source_name, value in entries.items(): if isinstance(value, dict):