diff --git a/stable_gnn/model_link_predict.py b/stable_gnn/model_link_predict.py index f6e107a..3190039 100644 --- a/stable_gnn/model_link_predict.py +++ b/stable_gnn/model_link_predict.py @@ -26,7 +26,6 @@ class ModelLinkPrediction(): def __init__( self, - dataset: Graph, number_of_trials: int, device: torch.device = "cuda", loss_name: str = "APP", @@ -34,23 +33,25 @@ def __init__( ) -> None: super().__init__() - self.data = dataset[0] - self.data.edge_index = self.data.edge_index.type(torch.LongTensor) - # это для того чтоб тестовые негативные примеры не включали - train_edges, test_edges = self._train_test_edges(self.data) - self.data.edge_index = torch.LongTensor(train_edges).T - self.positive_edges = test_edges + self.number_of_trials=number_of_trials + self.loss_name=loss_name + self.emb_conv_name=emb_conv_name self.device = device - self.neg_samples_test = self._neg_samples(self.positive_edges, self.data) - self.neg_samples_train = self._neg_samples(train_edges, self.data) - self.embeddings = EmbeddingFactory().build_embeddings( - loss_name=loss_name, conv=emb_conv_name, data=dataset, device=device, number_of_trials=number_of_trials, tune_out=True - ) - def _train_test_edges(self, data: Graph) -> (List[int], List[int]): - all_edges = data.edge_index.T.tolist() + + def train_test_edges(self, dataset: Graph) -> (List[List[int]], List[List[int]],List[List[int]],List[List[int]]): + ''' + Split dataset to train and test and calculate negative samples + + :param dataset: (Graph): Data to split on train, test and negatives + :return: (Tuple): Tuple of four lists of train edges, negativу train samples, test and negative test samples edges + ''' + self.data = dataset[0] + self.data.edge_index = self.data.edge_index.type(torch.LongTensor) + + all_edges = self.data.edge_index.T.tolist() train_edges = [] test_edges = [] indices_train_edges = random.sample(range(len(all_edges)), int(len(all_edges) * 0.8)) @@ -59,7 +60,11 @@ def _train_test_edges(self, data: Graph) -> (List[int], List[int]): train_edges.append(edge) else: test_edges.append(edge) - return train_edges, test_edges + + neg_samples_train =self._neg_samples(train_edges, self.data) + neg_samples_test = self._neg_samples(test_edges, self.data) + self.data.edge_index = torch.LongTensor(train_edges).T + return train_edges, neg_samples_train, test_edges, neg_samples_test def _neg_samples(self, positive_edges: List[int], data: Graph) -> List[int]: num_neg_samples_test = int(len(positive_edges) / len(self.data.x)) @@ -70,44 +75,48 @@ def _neg_samples(self, positive_edges: List[int], data: Graph) -> List[int]: ) return neg_edges - def train_cl(self) -> BaseEstimator: + def train_cl(self, train_edges: List[List[int]], neg_samples_train: List[List[int]]) -> BaseEstimator: ''' Train classifier for link prediction + :param train_edges: (List): List of existing edges + :param neg_samples_train: (List): List of negative samples to train :return: (BaseEstimator): Classifier which support fit predict notation ''' + self.embeddings = EmbeddingFactory().build_embeddings( + loss_name=self.loss_name, conv=self.emb_conv_name, data=[self.data], device=self.device, number_of_trials=self.number_of_trials, + tune_out=True + ) + emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings)) self.clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.2, max_depth=5, random_state=0) x_pred = [] - for edge in self.data.edge_index.T: + for edge in train_edges: x_pred.append(torch.concat((emb_norm[edge[0]], emb_norm[edge[1]])).tolist()) - for edge in self.neg_samples_train: + for edge in neg_samples_train: x_pred.append(torch.concat((emb_norm[edge[0]], emb_norm[edge[1]])).tolist()) - true_train = [1] * len(self.data.edge_index.T) + [0] * len(self.neg_samples_train) + true_train = [1] * len(train_edges) + [0] * len(neg_samples_train) self.clf.fit(x_pred, true_train) return self.clf - def test(self) -> float: + def test(self, clf: BaseEstimator, test_edges: List[List[int]], neg_samples_test: List[List[int]] ) -> float: ''' Calculate f1 measure for test edges - + + :param: cl (BaseEstimator) + :param test_edges: (List): List of existing edges to test on + :param neg_samples_test: (List): List of negative samples to test on :return: (float): Value of f1 measure ''' emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings)) pred_test = [] - for edge in self.positive_edges: + for edge in test_edges: pred_test.append(torch.concat((emb_norm[edge[0]], emb_norm[edge[1]])).tolist()) - for edge in self.neg_samples_test: + for edge in neg_samples_test: pred_test.append(torch.concat((emb_norm[edge[0]], emb_norm[edge[1]])).tolist()) - y_pred = self.clf.predict(pred_test) - y_true = [1] * len(self.positive_edges) + [0] * len(self.neg_samples_test) + y_pred = clf.predict(pred_test) + y_true = [1] * len(test_edges) + [0] * len(neg_samples_test) return f1_score(y_true, y_pred) - -if __name__ == "__main__": - data = Planetoid(root="/tmp/" + str("name"), name="Citeseer", transform=T.NormalizeFeatures()) - model = ModelLinkPrediction(data) - clf = model.train_cl() - print("f1", (model.test())) diff --git a/tutorials/tutorial_link_prediction.ipynb b/tutorials/tutorial_link_prediction.ipynb index 9e60cbb..baa84e1 100644 --- a/tutorials/tutorial_link_prediction.ipynb +++ b/tutorials/tutorial_link_prediction.ipynb @@ -1,13 +1,393 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Оптимизация Сети Добычи\n", + "модель для предсказания оптимальных связей между нефтяными и газовыми скважинами и центрами сбора или переработки. Здесь узлы - это скважины, а рёбра - логистические или технологические связи. Каждому месторождению соответствует вектор атрибутов: название, страна, регион месторождения, тектонический режим, тип углеводорода, структурные свойства, литология, литологический период, проницаемость, долгота и широта, валовая прибыль." + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, + "outputs": [], + "source": [ + "import torch_geometric.transforms as T\n", + "from stable_gnn.model_link_predict import ModelLinkPrediction\n", + "from stable_gnn.graph import Graph" + ], "metadata": { - "collapsed": true - }, + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T13:25:06.483464700Z", + "start_time": "2023-12-13T13:25:06.466264600Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Загружаем данные исходного графа" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + } + ], + "source": [ + "root = '../data_validation/'\n", + "name = 'oil_fields'\n", + "adjust_flag = False\n", + "dataset = Graph(root=root + str(name), name=name, transform=T.NormalizeFeatures(),adjust_flag=adjust_flag)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T13:25:07.599269500Z", + "start_time": "2023-12-13T13:25:07.552208100Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Обучаем модель link prediction\n", + "### Разбиене данных на train test" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 46, "outputs": [], - "source": [] + "source": [ + "model = ModelLinkPrediction(number_of_trials=50)\n", + "train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T14:14:53.979008400Z", + "start_time": "2023-12-13T14:14:53.562055200Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Обучаем классификатор на тренировочных ребрах, который выдает 1 если ребро есть и 0 -- если нет" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 47, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001B[32m[I 2023-12-13 17:14:54,483]\u001B[0m A new study created in memory with name: no-name-cf34933a-e673-4735-97e9-5c4cd15df50b\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:00,561]\u001B[0m Trial 0 finished with value: 4.94843864440918 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.006455795076076083, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.8}. Best is trial 0 with value: 4.94843864440918.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:10,028]\u001B[0m Trial 1 finished with value: 4.445287704467773 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 3, 'lr': 0.009848055947467238, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.4}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:20,654]\u001B[0m Trial 2 finished with value: 4.561388969421387 and parameters: {'hidden_layer': 32, 'dropout': 0.0, 'size of network, number of convs': 3, 'lr': 0.009228921694007648, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.5}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:27,311]\u001B[0m Trial 3 finished with value: 5.290963172912598 and parameters: {'hidden_layer': 64, 'dropout': 0.5, 'size of network, number of convs': 3, 'lr': 0.0063481527786354575, 'out_layer': 32, 'num_negative_samples': 21, 'alpha': 0.9}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:37,156]\u001B[0m Trial 4 finished with value: 4.566917419433594 and parameters: {'hidden_layer': 32, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.00998369433122475, 'out_layer': 32, 'num_negative_samples': 6, 'alpha': 0.5}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:45,111]\u001B[0m Trial 5 finished with value: 4.792712211608887 and parameters: {'hidden_layer': 64, 'dropout': 0.2, 'size of network, number of convs': 3, 'lr': 0.00877028805841402, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.7}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:50,127]\u001B[0m Trial 6 finished with value: 5.364152908325195 and parameters: {'hidden_layer': 32, 'dropout': 0.5, 'size of network, number of convs': 1, 'lr': 0.0073156450272589485, 'out_layer': 32, 'num_negative_samples': 16, 'alpha': 0.9}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:15:56,291]\u001B[0m Trial 7 finished with value: 5.350238800048828 and parameters: {'hidden_layer': 32, 'dropout': 0.2, 'size of network, number of convs': 3, 'lr': 0.009417660292546476, 'out_layer': 128, 'num_negative_samples': 21, 'alpha': 0.9}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:16:04,765]\u001B[0m Trial 8 finished with value: 4.74601936340332 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 1, 'lr': 0.008665879957820886, 'out_layer': 64, 'num_negative_samples': 21, 'alpha': 0.6}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:16:09,999]\u001B[0m Trial 9 finished with value: 5.346479892730713 and parameters: {'hidden_layer': 128, 'dropout': 0.2, 'size of network, number of convs': 1, 'lr': 0.007708739388309707, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.9}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:16:21,794]\u001B[0m Trial 10 finished with value: 4.455364227294922 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.00546316784883674, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.4}. Best is trial 1 with value: 4.445287704467773.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:16:32,832]\u001B[0m Trial 11 finished with value: 4.428609848022461 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.005166785413998819, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.4}. Best is trial 11 with value: 4.428609848022461.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:16:43,843]\u001B[0m Trial 12 finished with value: 4.426614761352539 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.005069658723722782, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.4}. Best is trial 12 with value: 4.426614761352539.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:16:55,014]\u001B[0m Trial 13 finished with value: 4.439720630645752 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.005198033966918222, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.4}. Best is trial 12 with value: 4.426614761352539.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:17:06,948]\u001B[0m Trial 14 finished with value: 4.2630438804626465 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.00586960814312501, 'out_layer': 64, 'num_negative_samples': 1, 'alpha': 0.3}. Best is trial 14 with value: 4.2630438804626465.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:17:19,565]\u001B[0m Trial 15 finished with value: 4.25166130065918 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 2, 'lr': 0.005882501082471112, 'out_layer': 64, 'num_negative_samples': 1, 'alpha': 0.3}. Best is trial 15 with value: 4.25166130065918.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:17:31,959]\u001B[0m Trial 16 finished with value: 4.23830509185791 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 2, 'lr': 0.006125626756898033, 'out_layer': 64, 'num_negative_samples': 1, 'alpha': 0.3}. Best is trial 16 with value: 4.23830509185791.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:17:45,215]\u001B[0m Trial 17 finished with value: 4.236704349517822 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 2, 'lr': 0.006998223976782317, 'out_layer': 64, 'num_negative_samples': 1, 'alpha': 0.3}. Best is trial 17 with value: 4.236704349517822.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:17:59,943]\u001B[0m Trial 18 finished with value: 3.929490804672241 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 2, 'lr': 0.007208336626029247, 'out_layer': 64, 'num_negative_samples': 1, 'alpha': 0.2}. Best is trial 18 with value: 3.929490804672241.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:18:14,760]\u001B[0m Trial 19 finished with value: 3.9347195625305176 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007031797317306148, 'out_layer': 128, 'num_negative_samples': 1, 'alpha': 0.2}. Best is trial 18 with value: 3.929490804672241.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:18:30,760]\u001B[0m Trial 20 finished with value: 3.926802158355713 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007925269843306321, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.2}. Best is trial 20 with value: 3.926802158355713.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:18:44,109]\u001B[0m Trial 21 finished with value: 3.910865306854248 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007941411094600424, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.2}. Best is trial 21 with value: 3.910865306854248.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:18:57,547]\u001B[0m Trial 22 finished with value: 3.942229747772217 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.0079933011339157, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.2}. Best is trial 21 with value: 3.910865306854248.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:19:10,097]\u001B[0m Trial 23 finished with value: 3.9330413341522217 and parameters: {'hidden_layer': 128, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.008119987802593634, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.2}. Best is trial 21 with value: 3.910865306854248.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:19:25,947]\u001B[0m Trial 24 finished with value: 3.9403295516967773 and parameters: {'hidden_layer': 128, 'dropout': 0.2, 'size of network, number of convs': 2, 'lr': 0.008239223367160507, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.2}. Best is trial 21 with value: 3.910865306854248.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:19:42,314]\u001B[0m Trial 25 finished with value: 3.3673112392425537 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007545581606752518, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 25 with value: 3.3673112392425537.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:19:58,559]\u001B[0m Trial 26 finished with value: 3.353905439376831 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007641487813069402, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:20:13,141]\u001B[0m Trial 27 finished with value: 3.3766632080078125 and parameters: {'hidden_layer': 128, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.00677991316719361, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:20:27,313]\u001B[0m Trial 28 finished with value: 3.379089832305908 and parameters: {'hidden_layer': 32, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.006668822148772528, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:20:41,926]\u001B[0m Trial 29 finished with value: 3.3641624450683594 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.0065933182842379204, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:20:54,242]\u001B[0m Trial 30 finished with value: 3.4526753425598145 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 1, 'lr': 0.00755630852260146, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:21:06,912]\u001B[0m Trial 31 finished with value: 3.3651294708251953 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.006701706665296095, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:21:19,696]\u001B[0m Trial 32 finished with value: 3.372197151184082 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.006578439870054117, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:21:24,658]\u001B[0m Trial 33 finished with value: 4.945215702056885 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.00739336404144688, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.8}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:21:37,608]\u001B[0m Trial 34 finished with value: 3.3701438903808594 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.006914685197044972, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:21:49,925]\u001B[0m Trial 35 finished with value: 3.3725249767303467 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.006404478474776761, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:22:03,043]\u001B[0m Trial 36 finished with value: 3.3712146282196045 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.00843088479589685, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:22:08,525]\u001B[0m Trial 37 finished with value: 4.766565322875977 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.0061430255154214415, 'out_layer': 32, 'num_negative_samples': 6, 'alpha': 0.7}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:22:22,897]\u001B[0m Trial 38 finished with value: 3.3745875358581543 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007637698708851515, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:22:30,297]\u001B[0m Trial 39 finished with value: 4.711760520935059 and parameters: {'hidden_layer': 32, 'dropout': 0.2, 'size of network, number of convs': 1, 'lr': 0.00911537885268011, 'out_layer': 128, 'num_negative_samples': 21, 'alpha': 0.5}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:22:37,068]\u001B[0m Trial 40 finished with value: 4.660437107086182 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007206397063845573, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.6}. Best is trial 26 with value: 3.353905439376831.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:22:48,174]\u001B[0m Trial 41 finished with value: 3.3524560928344727 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.00681989820226611, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:23:00,992]\u001B[0m Trial 42 finished with value: 3.3824143409729004 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.006207013583699559, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:23:13,184]\u001B[0m Trial 43 finished with value: 3.374380588531494 and parameters: {'hidden_layer': 64, 'dropout': 0.2, 'size of network, number of convs': 2, 'lr': 0.006734797755369286, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:23:18,249]\u001B[0m Trial 44 finished with value: 4.9087653160095215 and parameters: {'hidden_layer': 64, 'dropout': 0.2, 'size of network, number of convs': 2, 'lr': 0.007713080813908965, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.8}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:23:32,674]\u001B[0m Trial 45 finished with value: 3.4580602645874023 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 1, 'lr': 0.006466608362728475, 'out_layer': 128, 'num_negative_samples': 21, 'alpha': 0.1}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:23:45,102]\u001B[0m Trial 46 finished with value: 3.37900972366333 and parameters: {'hidden_layer': 32, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.005788151481289066, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:23:51,305]\u001B[0m Trial 47 finished with value: 4.768192291259766 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 2, 'lr': 0.007141939231466729, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.7}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:24:00,062]\u001B[0m Trial 48 finished with value: 4.5736002922058105 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.006869209433631206, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.5}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:24:16,190]\u001B[0m Trial 49 finished with value: 3.445894718170166 and parameters: {'hidden_layer': 32, 'dropout': 0.2, 'size of network, number of convs': 1, 'lr': 0.007423384248427025, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 41 with value: 3.3524560928344727.\u001B[0m\n" + ] + } + ], + "source": [ + "cl = model.train_cl(train_edges,train_negative)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T14:25:40.707760800Z", + "start_time": "2023-12-13T14:14:54.483095600Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Проведем тестирование и узнаем точность модели" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 48, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "f1 measure 0.8305010893246187\n" + ] + } + ], + "source": [ + "print(\"f1 measure\", (model.test(cl,test_edges,test_negative)))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T14:25:40.824498500Z", + "start_time": "2023-12-13T14:25:40.708762100Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Загружаем данные и уточняем граф с помощью флага adjust_flag = True" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 49, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + } + ], + "source": [ + "root = '../data_validation/'\n", + "name='oil_fields'\n", + "adjust_flag = True\n", + "\n", + "dataset = Graph(root=root + str(name), name=name, transform=T.NormalizeFeatures(), adjust_flag=adjust_flag)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T14:26:17.173527400Z", + "start_time": "2023-12-13T14:26:12.828307600Z" + } + } + }, + { + "cell_type": "code", + "execution_count": 50, + "outputs": [ + { + "data": { + "text/plain": "396" + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dataset[0].x)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T14:26:17.206761500Z", + "start_time": "2023-12-13T14:26:17.173527400Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Обучаем классификатор на тренировочных ребрах" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 51, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001B[32m[I 2023-12-13 17:26:19,290]\u001B[0m A new study created in memory with name: no-name-524615ab-2416-46f0-88b0-85aa3f39024d\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:23,044]\u001B[0m Trial 0 finished with value: 3.8365793228149414 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 3, 'lr': 0.006066350278122684, 'out_layer': 128, 'num_negative_samples': 16, 'alpha': 0.5}. Best is trial 0 with value: 3.8365793228149414.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:28,573]\u001B[0m Trial 1 finished with value: 2.628662347793579 and parameters: {'hidden_layer': 64, 'dropout': 0.5, 'size of network, number of convs': 1, 'lr': 0.005182580427943447, 'out_layer': 64, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 1 with value: 2.628662347793579.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:36,475]\u001B[0m Trial 2 finished with value: 2.884071111679077 and parameters: {'hidden_layer': 128, 'dropout': 0.0, 'size of network, number of convs': 2, 'lr': 0.005040593204677637, 'out_layer': 32, 'num_negative_samples': 16, 'alpha': 0.2}. Best is trial 1 with value: 2.628662347793579.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:44,089]\u001B[0m Trial 3 finished with value: 2.594714403152466 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.005487124219919353, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:45,939]\u001B[0m Trial 4 finished with value: 4.846668720245361 and parameters: {'hidden_layer': 32, 'dropout': 0.0, 'size of network, number of convs': 3, 'lr': 0.005997275587839929, 'out_layer': 64, 'num_negative_samples': 1, 'alpha': 0.7}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:53,861]\u001B[0m Trial 5 finished with value: 2.640279531478882 and parameters: {'hidden_layer': 64, 'dropout': 0.5, 'size of network, number of convs': 1, 'lr': 0.0060640682760954995, 'out_layer': 64, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:56,627]\u001B[0m Trial 6 finished with value: 4.9044013023376465 and parameters: {'hidden_layer': 32, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.008867947129031797, 'out_layer': 128, 'num_negative_samples': 6, 'alpha': 0.7}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:26:57,761]\u001B[0m Trial 7 finished with value: 7.476437568664551 and parameters: {'hidden_layer': 64, 'dropout': 0.0, 'size of network, number of convs': 3, 'lr': 0.008145511322228705, 'out_layer': 64, 'num_negative_samples': 11, 'alpha': 0.9}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:00,527]\u001B[0m Trial 8 finished with value: 3.9020090103149414 and parameters: {'hidden_layer': 32, 'dropout': 0.4, 'size of network, number of convs': 1, 'lr': 0.006597745494807822, 'out_layer': 64, 'num_negative_samples': 6, 'alpha': 0.5}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:02,156]\u001B[0m Trial 9 finished with value: 5.761407375335693 and parameters: {'hidden_layer': 128, 'dropout': 0.5, 'size of network, number of convs': 1, 'lr': 0.005331959893582404, 'out_layer': 64, 'num_negative_samples': 11, 'alpha': 0.8}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:06,806]\u001B[0m Trial 10 finished with value: 3.498806953430176 and parameters: {'hidden_layer': 128, 'dropout': 0.2, 'size of network, number of convs': 2, 'lr': 0.009772885305423726, 'out_layer': 32, 'num_negative_samples': 21, 'alpha': 0.4}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:14,772]\u001B[0m Trial 11 finished with value: 2.6005759239196777 and parameters: {'hidden_layer': 64, 'dropout': 0.30000000000000004, 'size of network, number of convs': 1, 'lr': 0.007234108814106155, 'out_layer': 32, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:21,022]\u001B[0m Trial 12 finished with value: 3.176621437072754 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 3, 'lr': 0.007354758523398173, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.3}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:29,239]\u001B[0m Trial 13 finished with value: 2.6037607192993164 and parameters: {'hidden_layer': 128, 'dropout': 0.2, 'size of network, number of convs': 1, 'lr': 0.007357916287855205, 'out_layer': 32, 'num_negative_samples': 21, 'alpha': 0.1}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:31,760]\u001B[0m Trial 14 finished with value: 4.285075664520264 and parameters: {'hidden_layer': 64, 'dropout': 0.30000000000000004, 'size of network, number of convs': 2, 'lr': 0.008256424678016109, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.6}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:40,107]\u001B[0m Trial 15 finished with value: 2.612947463989258 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 1, 'lr': 0.006752925383049627, 'out_layer': 32, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 3 with value: 2.594714403152466.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:48,372]\u001B[0m Trial 16 finished with value: 2.5859835147857666 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.008056923617184216, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 16 with value: 2.5859835147857666.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:51,394]\u001B[0m Trial 17 finished with value: 4.295757293701172 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.0080762047647032, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.6}. Best is trial 16 with value: 2.5859835147857666.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:27:55,155]\u001B[0m Trial 18 finished with value: 3.500119924545288 and parameters: {'hidden_layer': 128, 'dropout': 0.2, 'size of network, number of convs': 3, 'lr': 0.008923461621817865, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.4}. Best is trial 16 with value: 2.5859835147857666.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:01,422]\u001B[0m Trial 19 finished with value: 2.8932600021362305 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.009642107770557937, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.2}. Best is trial 16 with value: 2.5859835147857666.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:02,719]\u001B[0m Trial 20 finished with value: 7.552063941955566 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.00871044725674313, 'out_layer': 32, 'num_negative_samples': 11, 'alpha': 0.9}. Best is trial 16 with value: 2.5859835147857666.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:11,732]\u001B[0m Trial 21 finished with value: 2.5899999141693115 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 3, 'lr': 0.006862495826264894, 'out_layer': 32, 'num_negative_samples': 6, 'alpha': 0.1}. Best is trial 16 with value: 2.5859835147857666.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:21,272]\u001B[0m Trial 22 finished with value: 2.5821590423583984 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.006930482248672371, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:29,272]\u001B[0m Trial 23 finished with value: 2.5841684341430664 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.0068301549923037224, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:30,743]\u001B[0m Trial 24 finished with value: 5.676145076751709 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.00779586882504882, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.8}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:35,843]\u001B[0m Trial 25 finished with value: 3.174121379852295 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.007703333192573318, 'out_layer': 128, 'num_negative_samples': 1, 'alpha': 0.3}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:44,343]\u001B[0m Trial 26 finished with value: 2.5938143730163574 and parameters: {'hidden_layer': 32, 'dropout': 0.4, 'size of network, number of convs': 2, 'lr': 0.006472995927606249, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:28:53,472]\u001B[0m Trial 27 finished with value: 2.584397315979004 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.007052397602615945, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:02,043]\u001B[0m Trial 28 finished with value: 2.5900774002075195 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.007036024356707842, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:04,560]\u001B[0m Trial 29 finished with value: 3.8442800045013428 and parameters: {'hidden_layer': 128, 'dropout': 0.5, 'size of network, number of convs': 3, 'lr': 0.0063345936356496765, 'out_layer': 128, 'num_negative_samples': 1, 'alpha': 0.5}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:11,705]\u001B[0m Trial 30 finished with value: 2.590701103210449 and parameters: {'hidden_layer': 32, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.005844868064214588, 'out_layer': 32, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:17,976]\u001B[0m Trial 31 finished with value: 2.592123508453369 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 3, 'lr': 0.006958718030021918, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:24,788]\u001B[0m Trial 32 finished with value: 2.5886518955230713 and parameters: {'hidden_layer': 128, 'dropout': 0.5, 'size of network, number of convs': 3, 'lr': 0.00761336433350577, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:32,060]\u001B[0m Trial 33 finished with value: 2.584606885910034 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.006304781606218184, 'out_layer': 32, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:37,188]\u001B[0m Trial 34 finished with value: 2.8760554790496826 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.0063597091926625195, 'out_layer': 32, 'num_negative_samples': 16, 'alpha': 0.2}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:43,705]\u001B[0m Trial 35 finished with value: 2.595762252807617 and parameters: {'hidden_layer': 128, 'dropout': 0.5, 'size of network, number of convs': 2, 'lr': 0.00563304939132375, 'out_layer': 32, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 22 with value: 2.5821590423583984.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:50,388]\u001B[0m Trial 36 finished with value: 2.568971872329712 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.006236595557379001, 'out_layer': 128, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:51,738]\u001B[0m Trial 37 finished with value: 4.8685431480407715 and parameters: {'hidden_layer': 128, 'dropout': 0.5, 'size of network, number of convs': 3, 'lr': 0.007123608715763118, 'out_layer': 128, 'num_negative_samples': 1, 'alpha': 0.7}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:55,326]\u001B[0m Trial 38 finished with value: 3.181826591491699 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 3, 'lr': 0.005911363940668768, 'out_layer': 128, 'num_negative_samples': 1, 'alpha': 0.3}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:58,088]\u001B[0m Trial 39 finished with value: 3.8554091453552246 and parameters: {'hidden_layer': 32, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.006775805646261942, 'out_layer': 128, 'num_negative_samples': 16, 'alpha': 0.5}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:29:59,008]\u001B[0m Trial 40 finished with value: 7.322974681854248 and parameters: {'hidden_layer': 64, 'dropout': 0.5, 'size of network, number of convs': 2, 'lr': 0.005031790991720356, 'out_layer': 128, 'num_negative_samples': 21, 'alpha': 0.9}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:04,505]\u001B[0m Trial 41 finished with value: 2.582902193069458 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.006332089728356647, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:10,488]\u001B[0m Trial 42 finished with value: 2.5909156799316406 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.006148719930793415, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:15,743]\u001B[0m Trial 43 finished with value: 2.58264422416687 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.005660016343723836, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:17,171]\u001B[0m Trial 44 finished with value: 5.6556243896484375 and parameters: {'hidden_layer': 128, 'dropout': 0.30000000000000004, 'size of network, number of convs': 3, 'lr': 0.0056224256283455894, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.8}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:20,071]\u001B[0m Trial 45 finished with value: 3.5055856704711914 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 3, 'lr': 0.0053478891806771804, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.4}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:21,838]\u001B[0m Trial 46 finished with value: 4.865943908691406 and parameters: {'hidden_layer': 128, 'dropout': 0.5, 'size of network, number of convs': 3, 'lr': 0.00574891176951331, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.7}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:27,204]\u001B[0m Trial 47 finished with value: 2.61961030960083 and parameters: {'hidden_layer': 32, 'dropout': 0.4, 'size of network, number of convs': 1, 'lr': 0.0066507307696702265, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.1}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:29,388]\u001B[0m Trial 48 finished with value: 4.265909194946289 and parameters: {'hidden_layer': 64, 'dropout': 0.30000000000000004, 'size of network, number of convs': 3, 'lr': 0.006530468035083998, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.6}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n", + "\u001B[32m[I 2023-12-13 17:30:35,526]\u001B[0m Trial 49 finished with value: 2.594179391860962 and parameters: {'hidden_layer': 128, 'dropout': 0.5, 'size of network, number of convs': 2, 'lr': 0.0060436815739752375, 'out_layer': 64, 'num_negative_samples': 21, 'alpha': 0.1}. Best is trial 36 with value: 2.568971872329712.\u001B[0m\n" + ] + } + ], + "source": [ + "model = ModelLinkPrediction(number_of_trials=50)\n", + "train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)\n", + "cl = model.train_cl(train_edges,train_negative)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T14:30:47.104600700Z", + "start_time": "2023-12-13T14:26:19.277802800Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Проведем тестирование и узнаем точность модели для уточненного графа" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 52, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "f1 measure 0.5714285714285714\n" + ] + } + ], + "source": [ + "print(\"f1 measure\", (model.test(cl,test_edges,test_negative)))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-12-13T15:00:29.628031800Z", + "start_time": "2023-12-13T15:00:29.592042800Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } } ], "metadata": {