Typing-Monkeys · ncvescera · May 28, 2023 · May 29, 2023 · May 29, 2023 · May 29, 2023
diff --git a/cnlp/dimensionality_reduction_methods.py b/cnlp/dimensionality_reduction_methods.py
@@ -4,6 +4,28 @@
 problem in machine learning.
 Some researchers employ dimension reduction techniques
 to tackle the above problem and apply it in the link prediction scenario.
+
+References
+----------
+- [Link prediction on evolving data using matrix and tensor
+factorizations](https://doi.org/10.1109/ICDMW.2009.54)
+- [Nonnegative matrix factorization algorithms for link prediction in temporal
+networks using graph
+communicability](https://doi.org/10.1016/j.patcog.2017.06.025)
+- [Temporal-relational classifiers for prediction in
+evolving domains](https://doi.org/10.1109/ICDM.2008.125)
+- [Link prediction via matrix
+factorization](http://dl.acm.org/citation.cfm?id=2034117.2034146)
+- [Link prediction based on non-negative matrix
+factorization](https://doi.org/10.1371/journal.pone.0182968)
+- [A perturbation-based framework for
+link prediction via non-negative matrix
+factorization](https://doi.org/10.1038/srep38938)
+- [Link prediction in dynamic networks based on non-negative matrix
+factorization](https://doi.org/10.26599/BDMA.2017.9020002)
+- [Link prediction algorithm by
+matrix factorization based on importance of
+edges](https://doi.org/10.16451/j.cnki.issn1003-6059.201802006)
 """
 import networkx as nx
 import numpy as np
@@ -54,6 +76,11 @@ def link_prediction_svd(G: nx.Graph,
 
     In most of the works, non-negative matrix factorization has been used.
     Some authors also applied the singular value decomposition technique.
+
+    References
+    ----------
+    [Link prediction using matrix factorization with
+    bagging](https://doi.org/10.1109/ICIS.2016.7550942)
     """
 
     # Create the adjacency matrix of the graph
@@ -125,6 +152,11 @@ def link_prediction_nmf(graph: nx.Graph,
 
     In most of the works, non-negative matrix factorization has been used.
     Some authors also applied the singular value decomposition technique.
+
+    References
+    ----------
+    [Convex and semi-nonnegative matrix
+    factorizations](https://doi.org/10.1109/TPAMI.2008.277)
     """
 
     adj_matrix = to_adjacency_matrix(graph, sparse=False)

diff --git a/cnlp/other_methods/information_theory.py b/cnlp/other_methods/information_theory.py
@@ -27,6 +27,32 @@
 correlation among the degrees of the nodes in the network.
 
 And many many more.
+
+
+References
+----------
+- [Entropy measures for networks: Toward an information theory of complex
+topologies](https://doi.org/10.1103/PhysRevE.80.045102)
+- [Information Theory of Complex Networks: On Evolution and Architectural
+Constraints](https://doi.org/10.1007/978-3-540-44485-5_9)
+- [Maximal entropy random networks with given
+degree distribution](https://arxiv.org/abs/cond-mat/0206150)
+- [Link prediction in complex networks: A mutual
+information perspective](https://doi.org/10.1371/journal.pone.0107056)
+- [An information-theoretic model for link prediction in
+complex networks](https://doi.org/10.1038/srep13707)
+- [Link prediction based on path
+entropy](https://doi.org/10.1016/j.physa.2016.03.091)
+- [Entropy-based link prediction
+in weighted networks](http://stacks.iop.org/1674-1056/26/i=1/a=01890)
+- [Link Prediction in Evolving Networks Based on Popularity of
+Nodes](https://doi.org/10.1038/s41598-017-07315-4)
+- [An evidential link prediction method and link predictability based on
+shannon entropy](https://doi.org/10.1016/j.physa.2017.04.106)
+- [Entropy-based approach to missing-links
+prediction](https://doi.org/10.1007/s41109-018-0073-4)
+- [Maximal entropy random walk on heterogenous network for mirna-disease
+association prediction](https://doi.org/10.1016/j.mbs.2018.10.004)
 """
 import networkx as nx
 import numpy as np
@@ -70,6 +96,11 @@ def MI(G: nx.Graph) -> scipy.csr_matrix:
     Returns
     -------
     res_sparse: csr_matrix : the Similarity Matrix (in sparse format)
+
+    References
+    ----------
+    [Link Prediction in Complex Networks: A Mutual Information
+    Perspective](https://doi.org/10.1371/journal.pone.0107056)
     """
 
     def overlap_info(G: nx.Graph, x, y, edge_num: int) -> float:
@@ -208,6 +239,11 @@ def path_entropy(G: nx.Graph, max_path: int = 3) -> csr_matrix:
     Returns
     -------
     similarity_matrix: csr_matrix: the Similarity Matrix (in sparse format)
+
+    References
+    ----------
+    [Link prediction based on path
+    entropy](https://doi.org/10.1016/j.physa.2016.03.091)
     """
 
     def simple_path_entropy(paths: Generator[list, None, None],

diff --git a/cnlp/probabilistic_methods.py b/cnlp/probabilistic_methods.py
@@ -67,6 +67,11 @@ def stochastic_block_model(G: nx.Graph,
     Returns
     -------
     R: csr_matrix : the Similarity Matrix (in sparse format)
+
+    References
+    ----------
+    [Missing and spurious interactions and the reconstruction of complex
+    networks](https://doi.org/10.1073/pnas.0908366106)
     """
 
     def __random_changes(A_0: csr_matrix, p: float = .05) -> lil_matrix:

diff --git a/cnlp/similarity_methods/global_similarity.py b/cnlp/similarity_methods/global_similarity.py
@@ -46,6 +46,11 @@ def katz_index(G: nx.Graph, beta: int = 1) -> csr_matrix:
     The computational complexity of the given metric is high,
     and it can be roughly estimated to be cubic complexity
     which is not feasible for a large network.
+
+    References
+    ----------
+    [A new status index derived from sociometric
+    analysis](https://link.springer.com/article/10.1007/BF02289026)
     """
 
     def __power_method(A: csr_matrix,
@@ -133,11 +138,17 @@ def link_prediction_rwr(G: nx.Graph,
 
     The transition matrix \\(P\\) can be expressed as
 
-    .. math::
+    \\[
         P_{xy} = \\begin{cases}
                 \\frac{1}{k_x} & \\text{if } x \\text{ and } y \\text{ are connected,} \\\\
                 0 & \\text{otherwise.}
             \\end{cases}
+    \\]
+
+    References
+    ------------
+    [Fast Random Walk with Restart and Its
+    Applications](https://doi.org/10.1109/ICDM.2006.70)
     """
 
     def random_walk_with_restart(e: lil_array,
@@ -275,6 +286,11 @@ def rooted_page_rank(G: nx.Graph, alpha: float = .5) -> csr_matrix:
     walker moves to an arbitrary neighboring vertex with
     probability \\(\\alpha\\)
     and returns to \\(x\\) with probability \\( ( 1 - \\alpha )\\).
+
+    References
+    ----------
+    [The anatomy of a large-scale hypertextual Web search
+    engine](https://doi.org/10.1016/S0169-7552(98)00110-X)
     """
     A = to_adjacency_matrix(G)
     D = lil_matrix(A.shape)
@@ -322,6 +338,11 @@ def shortest_path(G: nx.Graph, cutoff: int = None) -> csr_matrix:
 
     The prediction accuracy
     of this index is low compared to most local indices.
+
+    References
+    ----------
+    [The link prediction problem for social
+    networks](https://doi.org/10.1145/956863.956972)
     """
     dim = G.number_of_nodes()
     if cutoff is None:
@@ -378,6 +399,11 @@ def sim_rank(G: nx.Graph,
     Returns
     -------
     sim_matrix: csr_matrix : the Similarity Matrix (in sparse format)
+
+    References
+    ----------
+    [SimRank: a measure of structural-context
+    similarity](http://doi.acm.org/10.1145/775047.775126)
     """
 
     def init_similarity_matrix(G: nx.Graph, n: int) -> lil_matrix:

diff --git a/cnlp/similarity_methods/local_similarity.py b/cnlp/similarity_methods/local_similarity.py
@@ -37,6 +37,11 @@ def adamic_adar(G: nx.Graph) -> csr_matrix:
     This is also intuitive in the real-world scenario, for example,
     a person with more number of friends spend less time/resource
     with an individual friend as compared to the less number of friends.
+
+    References
+    ----------
+    [Friends and neighbors on the
+    Web](https://doi.org/10.1016/S0378-8733(03)00009-1)
     """
 
     def __adamic_adar(G: nx.Graph, x, y) -> float:
@@ -83,6 +88,11 @@ def common_neighbors(G: nx.Graph) -> csr_matrix:
     -----
     The likelihood of the existence of a link between \\(x\\)
     and \\(y\\) increases with the number of common neighbors between them.
+
+    References
+    ----------
+    [Clustering and preferential attachment in growing
+    networks](https://doi.org/10.1103/PhysRevE.64.025102)
     """
     size = G.number_of_nodes()
     S = lil_matrix((size, size))
@@ -123,6 +133,11 @@ def cosine_similarity(G: nx.Graph) -> csr_matrix:
     This similarity index between two nodes is measured by
     calculating the Cosine of the angle between them.
     The metric is all about the orientation and not magnitude.
+
+    References
+    ----------
+    [Introduction to modern information
+    retrieval](https://archive.org/details/introductiontomo00salt/)
     """
 
     def __cosine_similarity(G: nx.Graph, x, y) -> float:
@@ -170,6 +185,11 @@ def hub_depressed(G: nx.Graph) -> csr_matrix:
     links between hubs and low degree nodes in the networks.
     The Hub depressed index promotes the links evolution
     between the hubs as well as the low degree nodes.
+
+    References
+    ----------
+    [Hierarchical Organization of Modularity in Metabolic
+    Networks](https://doi.org/10.1126/science.1073374)
     """
 
     def __hub_depressed(G: nx.Graph, x, y) -> float:
@@ -214,6 +234,11 @@ def hub_promoted(G: nx.Graph) -> csr_matrix:
     This similarity index promotes the formation of links between
     the sparsely connected nodes and hubs.
     It also tries to prevent links formation between the hub nodes.
+
+    References
+    ----------
+    [Hierarchical Organization of Modularity in Metabolic
+    Networks](https://doi.org/10.1126/science.1073374)
     """
 
     def __hub_promoted(G: nx.Graph, x, y) -> float:
@@ -260,6 +285,12 @@ def jaccard(G: nx.Graph) -> csr_matrix:
     common neighbors between the two vertices considered. Some researcher
     (**Liben-Nowell et al.**) demonstrated that this similarity metric
     performs worse as compared to Common Neighbors.
+
+    References
+    ----------
+    [Distribution de la Flore Alpine dans le Bassin des Dranses
+    et dans quelques
+    régions voisines](http://dx.doi.org/10.5169/seals-266440)
     """
 
     def __jaccard(G: nx.Graph, x, y) -> float:
@@ -316,6 +347,11 @@ def node_clustering(G: nx.Graph) -> csr_matrix:
     of the network in which the clustering coefficients of all
     the common neighbors of a seed node pair are computed
     and summed to find the final similarity score of the pair.
+
+    References
+    ----------
+    [Link prediction with node clustering
+    coefficient](https://doi.org/10.1016/j.physa.2016.01.038)
     """
 
     def __t(G: nx.Graph, z) -> int:
@@ -388,6 +424,11 @@ def preferential_attachment(G: nx.Graph, sum: bool = False) -> csr_matrix:
 
     In the above equation, summation can also be used instead of
     multiplication as an aggregate function (`sum = True`).
+
+    References
+    ----------
+    [Evolution of the social network of scientific
+    collaborations](https://doi.org/10.1016/S0378-4371(02)00736-7)
     """
 
     def __preferential_attachment(G: nx.Graph,
@@ -448,6 +489,11 @@ def resource_allocation(G: nx.Graph) -> csr_matrix:
     This index shows good performance on heterogeneous
     networks with a high clustering coefficient, especially
     on transportation networks.
+
+    References
+    ----------
+    [Predicting missing links via local
+    information](https://doi.org/10.1140/epjb/e2009-00335-8)
     """
 
     def __resource_allocation(G: nx.Graph, x, y) -> float:
@@ -491,6 +537,13 @@ def sorensen(G: nx.Graph) -> csr_matrix:
     -----
     It is very similar to the Jaccard index. **McCune et al.** show
     that it is more robust than Jaccard against the outliers.
+
+    References
+    ----------
+    [A method of establishing group of equal amplitude in plant sociobiology
+    based on similarity of species content and its application to analyses
+    of the vegetation on Danish
+    commons](https://www.semanticscholar.org/paper/A-method-of-establishing-group-of-equal-amplitude-S%C3%B8rensen-S%C3%B8rensen/d8d3e6d95b60ec6ac8f91f42a6914a87b13a6bc1)
     """
 
     def __sorensen(G: nx.Graph, x, y) -> float:

diff --git a/cnlp/similarity_methods/quasi_local_similarity.py b/cnlp/similarity_methods/quasi_local_similarity.py
@@ -52,6 +52,11 @@ def local_path_index(G: nx.Graph, epsilon: float, n: int) -> csr_matrix:
     Computing this index becomes more complicated with the increasing
     value of \\(n\\). The LP index outperforms the proximity-based indices,
     such as RA, AA, and CN.
+
+    References
+    ----------
+    [Similarity index based on local paths for link prediction of complex
+    networks](https://doi.org/10.1103/PhysRevE.80.046122)
     """
     A = to_adjacency_matrix(G)
     A = A @ A
@@ -97,6 +102,11 @@ def path_of_length_three(G: nx.Graph) -> csr_matrix:
     governs protein interaction. The TCP predicts well to the interaction of
     self-interaction proteins (SIPs), which are very small (4%) in PPI networks
     and fails in prediction between SIP and non SIP that amounts to 96%.
+
+    References
+    ----------
+    [Network-based prediction of protein
+    interactions](https://doi.org/10.1101/275529)
     """
 
     def __path_of_length_three_iter(G: nx.Graph, x, y) -> float: