From 86035e003aea876586ea07ff55d4b8238a8fd4b7 Mon Sep 17 00:00:00 2001
From: kbonney <kirkb1998@gmail.com>
Date: Mon, 14 Oct 2024 11:41:05 -0400
Subject: [PATCH 1/3] Fix minor syntax issues in documentation.

---
 pyrocs/information_theory/entropy.py       |  2 +-
 pyrocs/information_theory/kl_divergence.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pyrocs/information_theory/entropy.py b/pyrocs/information_theory/entropy.py
index 2c5b595..06c956e 100644
--- a/pyrocs/information_theory/entropy.py
+++ b/pyrocs/information_theory/entropy.py
@@ -18,7 +18,7 @@ def discrete_entropy(
     
         H = - \\sum_{i=1}^N [p_i * \\log p_i]
     
-    where :math:`H` = entropy, :math:`p` = discrete probability of the occurrence of an event from the :math:`i`th category, 
+    where :math:`H` = entropy, :math:`p` = discrete probability of the occurrence of an event from the :math:`i^{\mathrm{th}}` category, 
     and :math:`N` is the total number of categories. Low entropy values indicate a higher state of disorder 
     while higher entropy values indicate a well-ordered system. The maximum possible value of the
     entropy for a given system is :math:`log(N)`, and is thus varies by group size. Please see 
diff --git a/pyrocs/information_theory/kl_divergence.py b/pyrocs/information_theory/kl_divergence.py
index 1e210f5..25ca02e 100644
--- a/pyrocs/information_theory/kl_divergence.py
+++ b/pyrocs/information_theory/kl_divergence.py
@@ -17,16 +17,16 @@ def kl_divergence(p: np.array, q: np.array, base: int = 2) -> np.array:
         
     where :math:`D` is the KLD value, :math:`N` is the total number of categories, 
     and :math:`p_i` and :math:`q_i` reflect the discrete probability of the occurrence 
-    of an event from the :math:`i`th category of the sample distribution and 
+    of an event from the :math:`i^{\mathrm{th}}` category of the sample distribution and 
     reference distribution respectively.
 
     The function is able to calculate KLD for cases where not all categories from the reference distribution are present within the sample distribution. 
 
     Args:
         p,q (numpy.ndarray): arrays, where rows for each constitute the two
-        probability distributions from which to calculate divergence. p
-        contains the distributions holding probabilities in the numerator of the
-        KL divergence summand.
+            probability distributions from which to calculate divergence. p
+            contains the distributions holding probabilities in the numerator of the
+            KL divergence summand.
         base: log base to compute from; base 2 (bits), base 10 (decimal/whole numbers), or base e (ecology, earth systems)
 
     Returns:
@@ -66,8 +66,8 @@ def novelty_transience_resonance(thetas_arr : np.array, window : int) -> list:
     
     .. math::
     
-        N_w(p_i) &= (1/w)Sum(1 \\leq k \\leq w)[D(p_i || p_(i-k))]\\\\
-        T_w(p_i) &= (1/w)Sum(1 \\leq k \\leq w)[D(p_i || p_(i+k))]\\\\
+        N_w(p_i) &= (1/w)\sum(1 \\leq k \\leq w)[D(p_i || p_(i-k))]\\\\
+        T_w(p_i) &= (1/w)\sum(1 \\leq k \\leq w)[D(p_i || p_(i+k))]\\\\
         R_w(p_i) &= N_w(p_i) - T_w(p_i)
         
     where :math:`N` is novelty, :math:`T` is transience, :math:`R` is resonance, 

From 949e92411edf9cf3d69abe889efe3c5af2e19ce5 Mon Sep 17 00:00:00 2001
From: kbonney <kirkb1998@gmail.com>
Date: Fri, 18 Oct 2024 12:31:02 -0400
Subject: [PATCH 2/3] additional docstring syntax updates

---
 pyrocs/biosciences/affinity.py                | 15 +++++-------
 pyrocs/biosciences/functional_redundancy.py   | 20 +++++++---------
 pyrocs/biosciences/hill_diversity.py          | 20 ++++++++--------
 pyrocs/complex_systems/causal_complexity.py   | 18 +++++++-------
 .../complex_systems/fluctuation_complexity.py |  8 +++----
 pyrocs/complex_systems/grc.py                 |  6 ++---
 pyrocs/information_theory/entropy.py          | 14 +++++------
 pyrocs/information_theory/kl_divergence.py    | 24 ++++++++-----------
 pyrocs/information_theory/mutual_info.py      | 22 ++++++++---------
 9 files changed, 66 insertions(+), 81 deletions(-)

diff --git a/pyrocs/biosciences/affinity.py b/pyrocs/biosciences/affinity.py
index 344a243..6f04691 100644
--- a/pyrocs/biosciences/affinity.py
+++ b/pyrocs/biosciences/affinity.py
@@ -1,10 +1,9 @@
 
 from itertools import repeat
-from typing import Union
 import numpy as np
 from pandas import DataFrame
 
-def affinity(data: Union[np.array, DataFrame], weights=None, to_bool=bool) -> np.array:
+def affinity(data: np.ndarray, weights=None) -> float:
     """
     Returns the affinity between all pairs of columns in binary data.
 
@@ -24,13 +23,11 @@ def affinity(data: Union[np.array, DataFrame], weights=None, to_bool=bool) -> np
     results in a binary implementation of affinity within this software.
     
     Args:
-        data: array or dataframe 
-        weights: (optional) float or array 
-        to_bool: boolean type
+        data (array) 
+        weights (optional array) 
         
     Returns:
-        affinity between columns in data
-    
+        float
     """
     
     num_cols = data.shape[1]
@@ -49,9 +46,9 @@ def affinity(data: Union[np.array, DataFrame], weights=None, to_bool=bool) -> np
     counter = {}
     for row, weight in zip(rows, weights):
         for i in range(num_cols):
-            i_val = to_bool(row[i])
+            i_val = bool(row[i])
             for j in range(i, num_cols):
-                j_val = to_bool(row[j])
+                j_val = bool(row[j])
                 key = (i, j, i_val, j_val)
                 counter[key] = counter.get(key, 0) + weight
                 
diff --git a/pyrocs/biosciences/functional_redundancy.py b/pyrocs/biosciences/functional_redundancy.py
index 7267097..c6ac433 100644
--- a/pyrocs/biosciences/functional_redundancy.py
+++ b/pyrocs/biosciences/functional_redundancy.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-def functional_redundancy(p: np.array, delta: np.array) -> float:
+def functional_redundancy(p: np.ndarray, delta: np.ndarray) -> float:
     '''
     This metric evaluates how interchangeable groups within a population are based 
     on the specific function they perform. As a biological concept, 
@@ -18,19 +18,15 @@ def functional_redundancy(p: np.array, delta: np.array) -> float:
         D &= \\sum_i(p_i*(1-p_i))
 
     Args:
-    ----------
-    p : np.array
-        Relative abundances p[i] (i = 1, 2,…,N) with 0 < p[i] ≤ 1 and where the constraint 0 < p[i]
-        means that all calculations involve only those species that are actually present in 
-        the assemblage with nonzero abundances.
-    delta : np.array
-        :math:`δ_{ij}` symmetric array of pairwise functional dissimilarities between species i and j 
+        p (array): Relative abundances p[i] (i = 1, 2,…,N) with 0 < p[i] ≤ 1 
+            and where the constraint 0 < p[i]
+            means that all calculations involve only those species that 
+            are actually present in the assemblage with nonzero abundances.
+        delta (array): :math:`δ_{ij}` symmetric array of pairwise functional 
+            dissimilarities between species i and j 
 
     Returns:
-    --------
-    FR : float
-        Functional Redundancy Score
-
+        float
     '''
    
     dim = len(p)
diff --git a/pyrocs/biosciences/hill_diversity.py b/pyrocs/biosciences/hill_diversity.py
index f3fc4b7..152705f 100644
--- a/pyrocs/biosciences/hill_diversity.py
+++ b/pyrocs/biosciences/hill_diversity.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 
-def hill_shannon(p: np.array) -> float:
+def hill_shannon(p: np.ndarray) -> float:
     """
     The Hill-Shannon number is a specific instance (i.e. the Perplexity) of Hill Diversity, 
     which prioritizes neither common nor rare species. 
@@ -20,15 +20,15 @@ def hill_shannon(p: np.array) -> float:
     where :math:`q` approaches :math:`1` and the mean is the geometric mean
     
     Args:
-        p: p[i] is the proportion of all individuals that belong to species i
+        p (array): p[i] is the proportion of all individuals that belong to species i
     Returns:
-        A metric for effective count of species (diversity)
+        float
     """
     entropy = -sum(x * np.log(x) for x in p if x > 0)
     return math.exp(entropy)
 
 
-def hill_simpson(p: np.array) -> float:
+def hill_simpson(p: np.ndarray) -> float:
     """
     The Hill-Simpson number is a specific instance (i.e. the Inverse Simpson Index) 
     of Hill Diversity that prioritizes the common species. 
@@ -45,14 +45,14 @@ def hill_simpson(p: np.array) -> float:
     where :math:`q=2` and the mean is the usual arithmetic mean
 
     Args:
-        p: p[i] is the proportion of all individuals that belong to species i
+        p (array): p[i] is the proportion of all individuals that belong to species i
     Returns:
-        A metric for effective count of species (diversity)
+        float
     """
     return 1.0 / p.dot(p)
 
 
-def hill_diversity(p: np.array, q: float) -> float:
+def hill_diversity(p: np.ndarray, q: float) -> float:
     """
     The Hill Numbers are a family of diversity metrics describing "effective number of species".
     
@@ -87,11 +87,11 @@ def hill_diversity(p: np.array, q: float) -> float:
     species :math:`i`, :math:`q` is the exponent that determines the rarity scale on which the mean is taken
     
     Args:
-        p: p[i] is the proportion of all individuals that belong to species i, 
-        q: The exponent that determines the rarity scale on which the mean is taken.
+        p (array): p[i] is the proportion of all individuals that belong to species i, 
+        q (float): The exponent that determines the rarity scale on which the mean is taken.
             Species richness (q=0), Hill-Simpson diversity (q=2), Hill-Shannon diversity (q=1), 
     Returns:
-        D: a metric for effective count of species (diversity) 
+        float
     """
 
     # Special cases
diff --git a/pyrocs/complex_systems/causal_complexity.py b/pyrocs/complex_systems/causal_complexity.py
index 0d1e46d..2984526 100644
--- a/pyrocs/complex_systems/causal_complexity.py
+++ b/pyrocs/complex_systems/causal_complexity.py
@@ -2,7 +2,7 @@
 import networkx as nx
 
 
-def cyclomatic_complexity(A : np.ndarray, directed : bool = False):
+def cyclomatic_complexity(A : np.ndarray, directed : bool = False) -> float:
     '''
     Cyclomatic complexity reflects the number of linearly 
     independent paths within a system of interest 
@@ -23,9 +23,9 @@ def cyclomatic_complexity(A : np.ndarray, directed : bool = False):
     higher cyclomatic complexity values).     
     
     Args:
-        A: array
+        A (array)
     Returns:
-        cyclomatic complexity of the graph   
+        float
     '''
 
     if directed:
@@ -43,7 +43,7 @@ def cyclomatic_complexity(A : np.ndarray, directed : bool = False):
 
     return E - N + 2.0 * P 
 
-def feedback_density(A : np.ndarray, directed : bool = False):
+def feedback_density(A : np.ndarray, directed : bool = False) -> float:
     '''
     Feedback density captures the fraction of edges :math:`(E_{loop})` 
     and nodes (:math:`N_{loop}`) that are involved in at least one feedback loop.
@@ -64,9 +64,9 @@ def feedback_density(A : np.ndarray, directed : bool = False):
     edges are included in one or more feedback loops.
     
     Args:
-        A: array
+        A (array)
     Returns:
-        feedback density of the graph   
+        float
     '''
 
     if directed: 
@@ -95,7 +95,7 @@ def feedback_density(A : np.ndarray, directed : bool = False):
 
     return (Eloop + Nloop) / (Etot + Ntot)
 
-def causal_complexity(A: np.ndarray, directed : bool = False):
+def causal_complexity(A: np.ndarray, directed : bool = False) -> float:
     '''
     Causal complexity measures the underlying causal structure 
     of a system by considering both the system’s intricacy as
@@ -124,9 +124,9 @@ def causal_complexity(A: np.ndarray, directed : bool = False):
     of causal complexity than those systems with lower feedback density.
     
     Args:
-        A: array
+        A (array)
     Returns:
-        causal complexity of the graph
+        float
     '''
     M = cyclomatic_complexity(A, directed=directed)
     D = feedback_density(A, directed=directed)
diff --git a/pyrocs/complex_systems/fluctuation_complexity.py b/pyrocs/complex_systems/fluctuation_complexity.py
index a877484..3c0cfac 100644
--- a/pyrocs/complex_systems/fluctuation_complexity.py
+++ b/pyrocs/complex_systems/fluctuation_complexity.py
@@ -2,7 +2,7 @@
 from collections import Counter
 from functools import lru_cache
 
-def fluctuation_complexity(A : list, L : int = 1):
+def fluctuation_complexity(A, L : int = 1) -> float:
     '''
     
     Fluctuating complexity extends the characterization of discrete entropy 
@@ -24,10 +24,10 @@ def fluctuation_complexity(A : list, L : int = 1):
     respective frequencies of event :math:`i` and :math:`j` within the series. 
     
     Args:
-        A: Sequence of symbols
-        L: If > 1, groups symbols into short subsequences of length L.
+        A (array): Sequence of symbols
+        L (int): If > 1, groups symbols into short subsequences of length L.
     Returns:
-        The Fluctuation Complexity of the sequence
+        float
     '''
     if L > 1:
         A = [tuple(A[i: i + L]) for i in range(len(A) + 1 - L)]
diff --git a/pyrocs/complex_systems/grc.py b/pyrocs/complex_systems/grc.py
index 37d10ff..3fdda0c 100644
--- a/pyrocs/complex_systems/grc.py
+++ b/pyrocs/complex_systems/grc.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 
-def grc(A : np.ndarray, directed : bool):
+def grc(A : np.ndarray, directed : bool) -> float:
     """
     Global reaching centrality (GRC) measures the level of hierarchy within a network based on flow. 
     The equation within the package follows the formulations from 
@@ -23,11 +23,11 @@ def grc(A : np.ndarray, directed : bool):
     versa :cite:p:`lakkaraju_complexity_2019`.
 
     Args:
-        A: Square matrix of adjacencies in the network
+        A (array): Square matrix of adjacencies in the network
         directed (bool): If true, assume A represents a directed graph (row -> column).
             If false, assume A represents an undirected graph.
     Returns:
-        Global reaching centrality of the graph 
+        float 
     """
 
     if directed:
diff --git a/pyrocs/information_theory/entropy.py b/pyrocs/information_theory/entropy.py
index 06c956e..21fe3c3 100644
--- a/pyrocs/information_theory/entropy.py
+++ b/pyrocs/information_theory/entropy.py
@@ -1,15 +1,13 @@
 
 from collections import Counter
-from collections.abc import Sequence
-
 from scipy.stats import entropy
 import numpy as np
 
 
 def discrete_entropy(
-        values: Sequence,
-        counts: Sequence = None,
-        base: int = 2) -> float:
+    values: np.ndarray, 
+    counts: np.ndarray = None, 
+    base: int = 2) -> float:
     """
     Entropy is often used to measure the state of disorder/randomness in a system. 
     The general equation follows the form:
@@ -33,11 +31,11 @@ def discrete_entropy(
     `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html>`_ as well as the references noted above. 
 
     Args:
-        values (Sequence): Sequence of observed values from a random process
-        counts (Sequence[int]): Number of times each value was observed
+        values (array): Sequence of observed values from a random process
+        counts (array[int]): Number of times each value was observed
         base (int): Base of returned entropy (default returns number of bits)
     Returns:
-        mutual information between x and y
+        float
     """
     
     if counts is None:
diff --git a/pyrocs/information_theory/kl_divergence.py b/pyrocs/information_theory/kl_divergence.py
index 25ca02e..e3b7011 100644
--- a/pyrocs/information_theory/kl_divergence.py
+++ b/pyrocs/information_theory/kl_divergence.py
@@ -1,7 +1,6 @@
 import numpy as np
-# from scipy.special import xlogy
 
-def kl_divergence(p: np.array, q: np.array, base: int = 2) -> np.array:
+def kl_divergence(p: np.ndarray, q: np.ndarray, base: int = 2) -> float:
     """
     Sometimes called relative entropy, the Kullback-Leibler Divergence (KLD) 
     measures the similarity between two distributions 
@@ -23,15 +22,12 @@ def kl_divergence(p: np.array, q: np.array, base: int = 2) -> np.array:
     The function is able to calculate KLD for cases where not all categories from the reference distribution are present within the sample distribution. 
 
     Args:
-        p,q (numpy.ndarray): arrays, where rows for each constitute the two
-            probability distributions from which to calculate divergence. p
-            contains the distributions holding probabilities in the numerator of the
-            KL divergence summand.
-        base: log base to compute from; base 2 (bits), base 10 (decimal/whole numbers), or base e (ecology, earth systems)
+        p (array): discrete probability distribution
+        q (array): discrete probability distribution
+        base (int): log base to compute from; base 2 (bits), base 10 (decimal/whole numbers), or base e (ecology, earth systems)
 
     Returns:
-        numpy.ndarray: KL divergences, where the second array's rows are the
-        distributions in the numerator of the log in KL divergence
+        float
     """
 
     assert p.shape == q.shape, 'p and q shapes must be identical'
@@ -52,7 +48,9 @@ def kl_divergence(p: np.array, q: np.array, base: int = 2) -> np.array:
     return kl_div
 
 
-def novelty_transience_resonance(thetas_arr : np.array, window : int) -> list:
+def novelty_transience_resonance(
+    thetas_arr: np.ndarray, 
+    window: int) -> tuple[np.ndarray]:
     """
     These three related metrics extend the Kullback-Leibler Divergence formulation to consider how 
     a distribution differs from past and future distributions within a sequence. Specifically, novelty 
@@ -78,12 +76,10 @@ def novelty_transience_resonance(thetas_arr : np.array, window : int) -> list:
     equation for the KLD.
 
     Args:
-        thetas_arr (numpy.ndarray): rows are topic mixtures
+        thetas_arr (array): rows are topic mixtures
         window (int): positive integer defining scale or scale size
     Returns:
-        novelties 
-        transiences 
-        resonances    
+        tuple(array): novelties, transiences, resonances   
     """
 
     # Find the first and last center speech offset, given window size.
diff --git a/pyrocs/information_theory/mutual_info.py b/pyrocs/information_theory/mutual_info.py
index 2595532..21f3808 100644
--- a/pyrocs/information_theory/mutual_info.py
+++ b/pyrocs/information_theory/mutual_info.py
@@ -1,15 +1,11 @@
-
-from collections.abc import Sequence
-import os
-import sys
-    
+import numpy as np
 from pyrocs.information_theory import discrete_entropy
 
 
 def mutual_info(
-        x: Sequence,
-        y: Sequence,
-        counts: Sequence = None,
+        x: np.ndarray,
+        y: np.ndarray,
+        counts: np.ndarray = None,
         base: int = 2) -> float:
     """
     Mutual information measures how much knowledge is gained about one random variable when another is observed.
@@ -36,14 +32,16 @@ def mutual_info(
     when the other is observed.
 
     Args:
-        x,y (numpy.ndarray): arrays, discretized observations from random
-            distributions x \in X and y \in Y
-        counts (Sequence[int]): If present, the number of times each (x,y) pair was
+        x (array): discretized observations from random
+            distribution x \in X
+        y (array): discretized observations from random
+            distribution y \in Y
+        counts (array[int]): If present, the number of times each (x,y) pair was
             observed
         base (int): If present the base in which to return the entropy
 
     Returns:
-        mutual information between x and y
+        float
     """
     x_entropy = discrete_entropy(x, counts, base)
     y_entropy = discrete_entropy(y, counts, base)

From fe6c8916fd91a59f86976994877cb72869026c8f Mon Sep 17 00:00:00 2001
From: kbonney <kirkb1998@gmail.com>
Date: Fri, 18 Oct 2024 12:40:13 -0400
Subject: [PATCH 3/3] adding to_bool back to affinity func

---
 pyrocs/biosciences/affinity.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyrocs/biosciences/affinity.py b/pyrocs/biosciences/affinity.py
index 6f04691..1312e96 100644
--- a/pyrocs/biosciences/affinity.py
+++ b/pyrocs/biosciences/affinity.py
@@ -3,7 +3,7 @@
 import numpy as np
 from pandas import DataFrame
 
-def affinity(data: np.ndarray, weights=None) -> float:
+def affinity(data: np.ndarray, weights=None, to_bool=bool) -> float:
     """
     Returns the affinity between all pairs of columns in binary data.
 
@@ -25,6 +25,7 @@ def affinity(data: np.ndarray, weights=None) -> float:
     Args:
         data (array) 
         weights (optional array) 
+        to_bool: function or type to convert array values to boolean
         
     Returns:
         float
@@ -46,9 +47,9 @@ def affinity(data: np.ndarray, weights=None) -> float:
     counter = {}
     for row, weight in zip(rows, weights):
         for i in range(num_cols):
-            i_val = bool(row[i])
+            i_val = to_bool(row[i])
             for j in range(i, num_cols):
-                j_val = bool(row[j])
+                j_val = to_bool(row[j])
                 key = (i, j, i_val, j_val)
                 counter[key] = counter.get(key, 0) + weight