diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py index 304c360e..113e332a 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py @@ -150,10 +150,10 @@ def load_partitioned_data( ) # Load features - feature_store["node", "x"] = torch.load( + feature_store["node", "x", None] = torch.load( os.path.join(feature_path, f"rank={rank}_x.pt") ) - feature_store["node", "y"] = torch.load( + feature_store["node", "y", None] = torch.load( os.path.join(feature_path, f"rank={rank}_y.pt") ) diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py index 736dede1..0f42707e 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py @@ -128,8 +128,8 @@ def load_data( ] = data.edge_index feature_store = cugraph_pyg.data.TensorDictFeatureStore() - feature_store["node", "x"] = data.x - feature_store["node", "y"] = data.y + feature_store["node", "x", None] = data.x + feature_store["node", "y", None] = data.y return ( (feature_store, graph_store), diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py index db335dab..dc5ea587 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py @@ -108,8 +108,8 @@ def run_train( ] = ixr feature_store = TensorDictFeatureStore() - feature_store["node", "x"] = data.x - feature_store["node", "y"] = data.y + feature_store["node", "x", None] = data.x + feature_store["node", "y", None] = data.y dist.barrier() diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py index 8566ad71..24bb7d62 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -93,7 +93,11 @@ def train(epoch, model, optimizer, train_loader, edge_feature_store, num_steps=N optimizer.zero_grad() for i, batch in enumerate(train_loader): - r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda() + r = ( + edge_feature_store[("n", "e", "n"), "rel", None][batch.e_id] + .flatten() + .cuda() + ) z = model.encode(batch.edge_index, r) loss = model.recon_loss(z, batch.edge_index) @@ -301,13 +305,18 @@ def load_partitioned_data(rank, edge_path, rel_path, pos_path, neg_path, meta_pa feature_store = TensorDictFeatureStore() edge_feature_store = WholeFeatureStore() + with open(meta_path, "r") as f: + meta = json.load(f) + + print("num nodes:", meta["num_nodes"]) + # Load edge index - graph_store[("n", "e", "n"), "coo"] = torch.load( - os.path.join(edge_path, f"rank={rank}.pt") - ) + graph_store[ + ("n", "e", "n"), "coo", False, (meta["num_nodes"], meta["num_nodes"]) + ] = torch.load(os.path.join(edge_path, f"rank={rank}.pt")) # Load edge rel type - edge_feature_store[("n", "e", "n"), "rel"] = torch.load( + edge_feature_store[("n", "e", "n"), "rel", None] = torch.load( os.path.join(rel_path, f"rank={rank}.pt") ) @@ -333,9 +342,6 @@ def load_partitioned_data(rank, edge_path, rel_path, pos_path, neg_path, meta_pa splits[stage]["tail_neg"] = tail_neg splits[stage]["relation"] = relation - with open(meta_path, "r") as f: - meta = json.load(f) - return (feature_store, graph_store), edge_feature_store, splits, meta diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py index 6823fe72..6188ba5f 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -90,8 +90,10 @@ def load_data( edge_feature_store = TensorDictFeatureStore() meta = {} - graph_store[("n", "e", "n"), "coo"] = dataset.edge_index - edge_feature_store[("n", "e", "n"), "rel"] = dataset.edge_reltype.pin_memory() + graph_store[ + ("n", "e", "n"), "coo", False, (dataset.num_nodes, dataset.num_nodes) + ] = dataset.edge_index + edge_feature_store[("n", "e", "n"), "rel", None] = dataset.edge_reltype.pin_memory() meta["num_nodes"] = dataset.num_nodes meta["num_rels"] = dataset.edge_reltype.max() + 1 diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py index 131dfeab..5eb419db 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -111,11 +111,13 @@ def load_data( feature_store = TensorDictFeatureStore() # empty fs required by PyG edge_feature_store = WholeFeatureStore() - graph_store[("n", "e", "n"), "coo"] = torch.tensor_split( - data.edge_index.cuda(), world_size, dim=1 - )[rank] + print("num nodes:", data.num_nodes) + + graph_store[ + ("n", "e", "n"), "coo", False, (data.num_nodes, data.num_nodes) + ] = torch.tensor_split(data.edge_index.cuda(), world_size, dim=1)[rank] - edge_feature_store[("n", "e", "n"), "rel"] = torch.tensor_split( + edge_feature_store[("n", "e", "n"), "rel", None] = torch.tensor_split( data.edge_reltype.cuda(), world_size, )[rank] diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py index ab5f1e21..9cfb2466 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -30,9 +30,9 @@ def test_tensordict_feature_store_basic_api(): other_features = torch.randint(1024, (10, 5)) - feature_store["node", "feat0"] = node_features_0 - feature_store["node", "feat1"] = node_features_1 - feature_store["other", "feat"] = other_features + feature_store["node", "feat0", None] = node_features_0 + feature_store["node", "feat1", None] = node_features_1 + feature_store["other", "feat", None] = other_features assert (feature_store["node"]["feat0"][:] == node_features_0).all() assert (feature_store["node"]["feat1"][:] == node_features_1).all() @@ -40,5 +40,5 @@ def test_tensordict_feature_store_basic_api(): assert len(feature_store.get_all_tensor_attrs()) == 3 - del feature_store["node", "feat0"] + del feature_store["node", "feat0", None] assert len(feature_store.get_all_tensor_attrs()) == 2 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store_mg.py index f1f51456..f8b645f4 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store_mg.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -46,22 +46,24 @@ def run_test_wholegraph_feature_store_basic_api(rank, world_size, dtype): features = features.reshape((features.numel() // 100, 100)).to(torch_dtype) tensordict_store = TensorDictFeatureStore() - tensordict_store["node", "fea"] = features + tensordict_store["node", "fea", None] = features whole_store = WholeFeatureStore() - whole_store["node", "fea"] = torch.tensor_split(features, world_size)[rank] + whole_store["node", "fea", None] = torch.tensor_split(features, world_size)[rank] ix = torch.arange(features.shape[0]) assert ( - whole_store["node", "fea"][ix].cpu() == tensordict_store["node", "fea"][ix] + whole_store["node", "fea", None][ix].cpu() + == tensordict_store["node", "fea", None][ix] ).all() label = torch.arange(0, features.shape[0]).reshape((features.shape[0], 1)) - tensordict_store["node", "label"] = label - whole_store["node", "label"] = torch.tensor_split(label, world_size)[rank] + tensordict_store["node", "label", None] = label + whole_store["node", "label", None] = torch.tensor_split(label, world_size)[rank] assert ( - whole_store["node", "fea"][ix].cpu() == tensordict_store["node", "fea"][ix] + whole_store["node", "fea", None][ix].cpu() + == tensordict_store["node", "fea", None][ix] ).all() pylibwholegraph.torch.initialize.finalize() diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store.py index a8b93665..cb078081 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -30,8 +30,12 @@ def test_graph_store_basic_api(): ei = torch.stack([dst, src]) + num_nodes = karate.number_of_nodes() + graph_store = GraphStore() - graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo") + graph_store.put_edge_index( + ei, ("person", "knows", "person"), "coo", False, (num_nodes, num_nodes) + ) rei = graph_store.get_edge_index(("person", "knows", "person"), "coo") diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store_mg.py index 14540b7e..d13cb084 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store_mg.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -30,8 +30,12 @@ def test_graph_store_basic_api_mg(): ei = torch.stack([dst, src]) + num_nodes = karate.number_of_nodes() + graph_store = GraphStore(is_multi_gpu=True) - graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo") + graph_store.put_edge_index( + ei, ("person", "knows", "person"), "coo", False, (num_nodes, num_nodes) + ) rei = graph_store.get_edge_index(("person", "knows", "person"), "coo") diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py index 7938e6f1..fb11588e 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py @@ -37,11 +37,15 @@ def test_neighbor_loader(): ei = torch.stack([dst, src]) + num_nodes = karate.number_of_nodes() + graph_store = GraphStore() - graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo") + graph_store.put_edge_index( + ei, ("person", "knows", "person"), "coo", False, (num_nodes, num_nodes) + ) feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (34, 16)) + feature_store["person", "feat", None] = torch.randint(128, (34, 16)) loader = NeighborLoader( (feature_store, graph_store), @@ -51,7 +55,7 @@ def test_neighbor_loader(): for batch in loader: assert isinstance(batch, torch_geometric.data.Data) - assert (feature_store["person", "feat"][batch.n_id] == batch.feat).all() + assert (feature_store["person", "feat", None][batch.n_id] == batch.feat).all() @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @@ -64,12 +68,16 @@ def test_neighbor_loader_biased(): ] ) + num_nodes = 6 + graph_store = GraphStore() - graph_store.put_edge_index(eix, ("person", "knows", "person"), "coo") + graph_store.put_edge_index( + eix, ("person", "knows", "person"), "coo", False, (num_nodes, num_nodes) + ) feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (6, 12)) - feature_store[("person", "knows", "person"), "bias"] = torch.tensor( + feature_store["person", "feat", None] = torch.randint(128, (6, 12)) + feature_store[("person", "knows", "person"), "bias", None] = torch.tensor( [0, 12, 14], dtype=torch.float32 ) @@ -104,7 +112,7 @@ def test_link_neighbor_loader_basic( feature_store = TensorDictFeatureStore() eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( + graph_store[("n", "e", "n"), "coo", False, (num_nodes, num_nodes)] = torch.stack( [ torch.randint(0, num_nodes, (num_edges,)), torch.randint(0, num_nodes, (num_edges,)), @@ -140,7 +148,7 @@ def test_link_neighbor_loader_negative_sampling_basic(batch_size): feature_store = TensorDictFeatureStore() eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( + graph_store[("n", "e", "n"), "coo", False, (num_nodes, num_nodes)] = torch.stack( [ torch.randint(0, num_nodes, (num_edges,)), torch.randint(0, num_nodes, (num_edges,)), @@ -174,7 +182,7 @@ def test_link_neighbor_loader_negative_sampling_uneven(batch_size): feature_store = TensorDictFeatureStore() eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( + graph_store[("n", "e", "n"), "coo", False, (num_nodes, num_nodes)] = torch.stack( [ torch.randint(0, num_nodes, (num_edges,)), torch.randint(0, num_nodes, (num_edges,)), @@ -205,11 +213,19 @@ def test_neighbor_loader_hetero_basic(): asrc = torch.tensor([0, 1, 2, 3, 3, 0]) # author adst = torch.tensor([0, 1, 2, 3, 4, 5]) # paper + num_authors = 4 + num_papers = 6 + graph_store = GraphStore() feature_store = TensorDictFeatureStore() - graph_store[("paper", "cites", "paper"), "coo"] = [src, dst] - graph_store[("author", "writes", "paper"), "coo"] = [asrc, adst] + graph_store[("paper", "cites", "paper"), "coo", False, (num_papers, num_papers)] = [ + src, + dst, + ] + graph_store[ + ("author", "writes", "paper"), "coo", False, (num_authors, num_papers) + ] = [asrc, adst] from cugraph_pyg.loader import NeighborLoader @@ -235,11 +251,19 @@ def test_neighbor_loader_hetero_single_etype(): asrc = torch.tensor([0, 1, 2, 3, 3, 0]) # author adst = torch.tensor([0, 1, 2, 3, 4, 5]) # paper + num_authors = 4 + num_papers = 6 + graph_store = GraphStore() feature_store = TensorDictFeatureStore() - graph_store[("paper", "cites", "paper"), "coo"] = [src, dst] - graph_store[("author", "writes", "paper"), "coo"] = [asrc, adst] + graph_store[("paper", "cites", "paper"), "coo", False, (num_papers, num_papers)] = [ + src, + dst, + ] + graph_store[ + ("author", "writes", "paper"), "coo", False, (num_authors, num_papers) + ] = [asrc, adst] from cugraph_pyg.loader import NeighborLoader diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py index 8d13ec4f..831ee0d6 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -79,7 +79,7 @@ def run_test_neighbor_loader_mg(rank, uid, world_size, specify_size): graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo", False, sz) feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (34, 16)) + feature_store["person", "feat", None] = torch.randint(128, (34, 16)) ix_train = torch.tensor_split(torch.arange(34), world_size, axis=0)[rank] @@ -91,7 +91,7 @@ def run_test_neighbor_loader_mg(rank, uid, world_size, specify_size): for batch in loader: assert isinstance(batch, torch_geometric.data.Data) - assert (feature_store["person", "feat"][batch.n_id] == batch.feat).all() + assert (feature_store["person", "feat", None][batch.n_id] == batch.feat).all() cugraph_comms_shutdown() @@ -134,8 +134,8 @@ def run_test_neighbor_loader_biased_mg(rank, uid, world_size): graph_store.put_edge_index(eix, ("person", "knows", "person"), "coo") feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (6 * world_size, 12)) - feature_store[("person", "knows", "person"), "bias"] = torch.concat( + feature_store["person", "feat", None] = torch.randint(128, (6 * world_size, 12)) + feature_store[("person", "knows", "person"), "bias", None] = torch.concat( [torch.tensor([0, 1, 1], dtype=torch.float32) for _ in range(world_size)] ) @@ -199,7 +199,7 @@ def run_test_link_neighbor_loader_basic_mg( feature_store = TensorDictFeatureStore() eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( + graph_store[("n", "e", "n"), "coo", False, (num_nodes, num_nodes)] = torch.stack( [ torch.randint(0, num_nodes, (num_edges,)), torch.randint(0, num_nodes, (num_edges,)), @@ -225,7 +225,6 @@ def run_test_link_neighbor_loader_basic_mg( cugraph_comms_shutdown() -@pytest.mark.skip(reason="deleteme") @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @pytest.mark.mg @pytest.mark.parametrize("select_edges", [64, 128]) @@ -255,16 +254,18 @@ def test_link_neighbor_loader_basic_mg(select_edges, batch_size, depth): ) -def run_test_link_neighbor_loader_uneven_mg(rank, uid, world_size, edge_index): +def run_test_link_neighbor_loader_uneven_mg( + rank, uid, world_size, edge_index, num_nodes +): init_pytorch_worker(rank, world_size, uid) graph_store = GraphStore(is_multi_gpu=True) feature_store = TensorDictFeatureStore() batch_size = 1 - graph_store[("n", "e", "n"), "coo"] = torch.tensor_split( - edge_index, world_size, dim=-1 - )[rank] + graph_store[ + ("n", "e", "n"), "coo", False, (num_nodes, num_nodes) + ] = torch.tensor_split(edge_index, world_size, dim=-1)[rank] elx = graph_store[("n", "e", "n"), "coo"] # select all edges on each worker loader = LinkNeighborLoader( @@ -285,7 +286,7 @@ def run_test_link_neighbor_loader_uneven_mg(rank, uid, world_size, edge_index): cugraph_comms_shutdown() -@pytest.mark.skip(reason="deleteme") +@pytest.mark.skip(reason="broken") @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @pytest.mark.mg def test_link_neighbor_loader_uneven_mg(): @@ -305,6 +306,7 @@ def test_link_neighbor_loader_uneven_mg(): uid, world_size, edge_index, + 13, ), nprocs=world_size, )