Skip to content

Commit

Permalink
Merge pull request #32 from aimclub/link_predict
Browse files Browse the repository at this point in the history
Link predict
  • Loading branch information
ShikovEgor authored Jan 11, 2024
2 parents f355128 + ec9867f commit 7af183b
Show file tree
Hide file tree
Showing 22 changed files with 1,027,437 additions and 955 deletions.
37 changes: 30 additions & 7 deletions stable_gnn/embedding/embedding_factory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict
from typing import Any, Dict, List

import torch
from numpy.typing import NDArray
Expand All @@ -13,9 +13,13 @@ class EmbeddingFactory:
"""Producing unsupervised embeddings for a given dataset"""

@staticmethod
def _build_embeddings(loss: Dict[str, Any], data: Graph, conv: str, device: device) -> NDArray:
optuna_training = OptunaTrainEmbeddings(data=data, conv=conv, device=device, loss_function=loss)
best_values = optuna_training.run(number_of_trials=50)
def _build_embeddings(
loss: Dict[str, Any], data: Graph, conv: str, device: device, number_of_trials: int, tune_out: bool = False
) -> NDArray:
optuna_training = OptunaTrainEmbeddings(
data=data, conv=conv, device=device, loss_function=loss, tune_out=tune_out
)
best_values = optuna_training.run(number_of_trials=number_of_trials)

loss_trgt = dict()
for par in loss:
Expand All @@ -28,7 +32,9 @@ def _build_embeddings(loss: Dict[str, Any], data: Graph, conv: str, device: devi
if "lmbda" in loss_trgt:
loss_trgt["lmbda"] = best_values["lmbda"]

model_training = ModelTrainEmbeddings(data=data, conv=conv, device=device, loss_function=loss_trgt)
model_training = ModelTrainEmbeddings(
data=data, conv=conv, device=device, loss_function=loss_trgt, tune_out=tune_out
)
out = model_training.run(best_values)
torch.cuda.empty_cache()
return out.detach().cpu().numpy()
Expand Down Expand Up @@ -77,15 +83,32 @@ def _get_emb_settings(loss_name: str) -> Dict[str, Any]:
else:
raise NameError

def build_embeddings(self, loss_name: str, conv: str, data: Graph, device: device) -> NDArray:
def build_embeddings(
self,
loss_name: str,
conv: str,
data: List[Graph],
device: device,
number_of_trials: int,
tune_out: bool = False,
) -> NDArray:
"""Build embeddings based on passed dataset and settings
:param loss_name: (str): Name of loss function for embedding learning in GeomGCN layer
:param conv: (str) Name of convolution used in unsupervied embeddings
:param data: (Graph): Input Graph
:param device: (device): Device 'cuda' or 'cpu'
:param number_of_trials (int): Number of trials for optuna tuning embeddings
:param tune_out (bool): Flag if you want tune out layer of embeddings
:returns: (NDArray) embeddings NumPy array of (N_nodes) x (N_emb_dim)
"""
loss_params = self._get_emb_settings(loss_name)
emb = self._build_embeddings(loss=loss_params, data=data[0], conv=conv, device=device)
emb = self._build_embeddings(
loss=loss_params,
data=data[0],
conv=conv,
device=device,
number_of_trials=number_of_trials,
tune_out=tune_out,
)
return emb
20 changes: 16 additions & 4 deletions stable_gnn/embedding/model_train_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,25 @@ class ModelTrainEmbeddings:
:param loss_function: (dict): Dict of parameters of unsupervised loss function
:param conv: (str): Name of convolution (default:'GCN')
:param device: (device): Either 'cuda' or 'cpu' (default:'cuda')
:param tune_out: (bool): Flag if you want tuning out layer or if it 2 for GeomGCN
"""

def __init__(self, data: Graph, loss_function: Dict, device: device, conv: str = "GCN") -> None:
def __init__(
self, data: Graph, loss_function: Dict, device: device, conv: str = "GCN", tune_out: bool = False
) -> None:
self.conv = conv
self.device = device
self.x = data.x
self.y = data.y.squeeze()
self.data = data.to(device)
self.train_mask = torch.Tensor([True] * data.num_nodes)
self.loss = loss_function
self.tune_out = tune_out
super(ModelTrainEmbeddings, self).__init__()

def _sampling(self, sampler: BaseSampler, epoch: int, nodes: Tensor) -> None:
if epoch == 0:
self.samples = sampler.sample(nodes)
self.samples = sampler.sample(nodes.to(self.device))

def _train(
self,
Expand Down Expand Up @@ -76,6 +80,10 @@ def run(self, params: Dict) -> Tensor:
:return: (Tensor): The output embeddings
"""
hidden_layer = params["hidden_layer"]
if self.tune_out:
out_layer = params["out_layer"]
else:
out_layer = 2
dropout = params["dropout"]
size = params["size of network, number of convs"]
learning_rate = params["lr"]
Expand All @@ -90,7 +98,7 @@ def run(self, params: Dict) -> Tensor:
loss_function=self.loss,
device=self.device,
hidden_layer=hidden_layer,
out_layer=2,
out_layer=out_layer,
num_layers=size,
dropout=dropout,
)
Expand Down Expand Up @@ -120,6 +128,10 @@ def _objective(self, trial: Trial) -> Tensor:
dropout = trial.suggest_float("dropout", 0.0, 0.5, step=0.1)
size = trial.suggest_categorical("size of network, number of convs", [1, 2, 3])
learning_rate = trial.suggest_float("lr", 5e-3, 1e-2)
if self.tune_out:
out_layer = trial.suggest_categorical("out_layer", [32, 64, 128])
else:
out_layer = 2

loss_to_train = {}
for name in self.loss:
Expand Down Expand Up @@ -158,7 +170,7 @@ def _objective(self, trial: Trial) -> Tensor:
loss_function=loss_to_train,
device=self.device,
hidden_layer=hidden_layer,
out_layer=2,
out_layer=out_layer,
num_layers=size,
dropout=dropout,
)
Expand Down
2 changes: 1 addition & 1 deletion stable_gnn/embedding/sampling/abstract_samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def _sample_negative(self, batch: Tensor, num_negative_samples: int) -> Tensor:
:param num_negative_samples: (int): number of negative samples for each edge
:return: (Tensor): Negative samples
"""
a, _ = subgraph(batch, self.data.edge_index)
a, _ = subgraph(batch, self.data.edge_index.to(self.device))
adj = self._adj_list(a)
g = dict()
batch = batch.tolist()
Expand Down
36 changes: 34 additions & 2 deletions stable_gnn/embedding/sampling/samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,39 @@
from stable_gnn.embedding.sampling.abstract_samplers import BaseSampler, BaseSamplerWithNegative


class NegativeSampler(BaseSamplerWithNegative):
"""
Sampler for positive and negative edges using random walk based methods
:param data: (Graph): Input dataset
:param device: (device): Either 'cuda' or 'cpu'
:param loss_info: (dict): Dict of parameters of unsupervised loss function
"""

def __init__(self, data: Graph, device: device, loss_info: Dict) -> None:
super().__init__(data, device, loss_info)

def _neg_sample(self, batch: Tensor) -> Tensor:
a, _ = subgraph(batch.tolist(), self.data.edge_index)
batch = batch.repeat(self.num_negative_samples)
neg_batch = self._sample_negative(batch, num_negative_samples=self.num_negative_samples)
return neg_batch

def negative_sample(self, batch: Batch) -> Tuple[Tensor, Tensor]:
"""
Sample positive and negative edges for batch nodes
:param batch: (Batch): Nodes for positive and negative sampling from them
:return: (Tensor, Tensor): positive and negative samples
"""
if not isinstance(batch, torch.Tensor):
batch = torch.tensor(batch, dtype=torch.long).to(self.device)
return self._neg_sample(batch)

def _pos_sample(self, batch: Tensor) -> Tensor:
pass


class SamplerRandomWalk(BaseSamplerWithNegative):
"""
Sampler for positive and negative edges using random walk based methods
Expand Down Expand Up @@ -274,8 +307,7 @@ def _pos_sample(self, batch: Tensor) -> Tensor:
len_batch = len(batch)
mask = torch.tensor([False] * len(self.data.x))
mask[batch.tolist()] = True

a, _ = subgraph(batch, self.data.edge_index)
a, _ = subgraph(batch, self.data.edge_index.to(self.device))
row, col = a
row = row.to(self.device)
col = col.to(self.device)
Expand Down
Loading

0 comments on commit 7af183b

Please sign in to comment.