py-why · adam2392 · Apr 19, 2023 · Apr 11, 2023 · Apr 11, 2023 · Apr 11, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -63,7 +63,7 @@ jobs:
             sudo apt install libspatialindex-dev xdg-utils
       - python/install-packages:
           pkg-manager: poetry
-          args: "-E graph_func -E viz --with docs"
+          args: "--with docs"
           cache-version: "v1" # change to clear cache
       - run:
           name: Check poetry package versions

diff --git a/doc/api.rst b/doc/api.rst
@@ -28,4 +28,8 @@ Conditional Independence Testing
 Testing for conditional independence among variables is a core part
 of many data analysis procedures.
 
-TBD...
+.. currentmodule:: pywhy_stats
+.. autosummary::
+   :toctree: generated/
+
+   fisherz
diff --git a/doc/whats_new/v0.1.rst b/doc/whats_new/v0.1.rst
@@ -26,6 +26,7 @@ Version 0.1
 Changelog
 ---------
 
+- |Feature| Implement partial correlation test :func:`pywhy_stats.fisherz`, by `Adam Li`_ (:pr:`7`)
 
 
 Code and Documentation Contributors

diff --git a/poetry.lock b/poetry.lock
diff --git a/pywhy_stats/__init__.py b/pywhy_stats/__init__.py
@@ -1 +1,2 @@
+from . import fisherz
 from ._version import __version__  # noqa: F401
diff --git a/pywhy_stats/fisherz.py b/pywhy_stats/fisherz.py
@@ -0,0 +1,116 @@
+from math import log, sqrt
+from typing import Optional
+
+import numpy as np
+from numpy.typing import ArrayLike
+from scipy.stats import norm
+
+from .p_value_result import PValueResult
+
+
+def ind(X: ArrayLike, Y: ArrayLike, correlation_matrix: Optional[ArrayLike] = None) -> PValueResult:
+    """Perform an independence test using Fisher-Z's test.
+
+    Works on Gaussian random variables. This test is also known as the
+    correlation test.
+
+    Parameters
+    ----------
+    X : ArrayLike of shape (n_samples,)
+        The first node variable.
+    Y : ArrayLike of shape (n_samples,)
+        The second node variable.
+    correlation_matrix : ArrayLike of shape (2, 2), optional
+        The precomputed correlation matrix between X and Y., by default None.
+
+    Returns
+    -------
+    X : float
+        The test statistic.
+    p : float
+        The p-value of the test.
+    """
+    return _fisherz(X, Y, condition_on=None, correlation_matrix=correlation_matrix)
+
+
+def condind(
+    X: ArrayLike,
+    Y: ArrayLike,
+    condition_on: ArrayLike,
+    correlation_matrix: Optional[ArrayLike] = None,
+) -> PValueResult:
+    """Perform an independence test using Fisher-Z's test.
+
+    Works on Gaussian random variables. This test is also known as the
+    correlation test.
+
+    Parameters
+    ----------
+    X : ArrayLike of shape (n_samples,)
+        The first node variable.
+    Y : ArrayLike of shape (n_samples,)
+        The second node variable.
+    condition_on : ArrayLike of shape (n_samples, n_variables)
+        The conditioning set.
+    correlation_matrix : ArrayLike of shape (2 + n_variables, 2 + n_variables), optional
+        The precomputed correlation matrix between X, Y and ``condition_on``, by default None.
+
+    Returns
+    -------
+    X : float
+        The test statistic.
+    p : float
+        The p-value of the test.
+    """
+    return _fisherz(X, Y, condition_on=condition_on, correlation_matrix=correlation_matrix)
+
+
+def _fisherz(
+    X: ArrayLike,
+    Y: ArrayLike,
+    condition_on: Optional[ArrayLike] = None,
+    correlation_matrix: Optional[ArrayLike] = None,
+) -> PValueResult:
+    """Perform an independence test using Fisher-Z's test.
+
+    Works on Gaussian random variables. This test is also known as the
+    partial correlation test.
+
+    Parameters
+    ----------
+    X : ArrayLike of shape (n_samples,)
+        The first node variable.
+    Y : ArrayLike of shape (n_samples,)
+        The second node variable.
+    condition_on : ArrayLike of shape (n_samples, n_variables)
+        If `None` (default), will run a marginal independence test.
+    correlation_matrix : np.ndarray of shape (n_variables, n_variables), optional
+        ``None`` means without the parameter of correlation matrix and
+        the correlation will be computed from the data., by default None.
+
+    Returns
+    -------
+    X : float
+        The test statistic.
+    p : float
+        The p-value of the test.
+    """
+    if condition_on is None:
+        condition_on = np.empty((X.shape[0], 0))
+
+    # compute the correlation matrix within the specified data
+    data = np.hstack((X, Y, condition_on))
+    sample_size = data.shape[0]
+    if correlation_matrix is None:
+        correlation_matrix = np.corrcoef(data.T)
+
+    inv = np.linalg.pinv(correlation_matrix)
+    r = -inv[0, 1] / sqrt(inv[0, 0] * inv[1, 1])
+
+    # apply the Fisher Z-transformation
+    Z = 0.5 * log((1 + r) / (1 - r))
+
+    # compute the test statistic
+    statistic = sqrt(sample_size - condition_on.shape[1] - 3) * abs(Z)
+    p = 2 * (1 - norm.cdf(abs(statistic)))
+    return PValueResult(statistic, p)
diff --git a/pywhy_stats/independence.py b/pywhy_stats/independence.py
@@ -1,7 +1,10 @@
 from enum import Enum
+from types import ModuleType
 from typing import Optional
 
-from numpy.testing import ArrayLike
+from numpy.typing import ArrayLike
+
+from pywhy_stats import fisherz
 
 from .p_value_result import PValueResult
 
@@ -10,6 +13,7 @@ class Methods(Enum):
     """Methods for independence testing."""
 
     AUTO = 0
+    FISHERZ = fisherz
 
 
 def independence_test(
@@ -26,11 +30,11 @@ def independence_test(
 
     Parameters
     ----------
-    X : numpy.ndarray, shape (n, d)
+    X : ArrayLike, shape (n_samples, n_features_x)
         Data matrix for X.
-    Y : numpy.ndarray, shape (n, m)
+    Y : ArrayLike, shape (n_samples, n_features_y)
         Data matrix for Y.
-    condition_on : numpy.ndarray or None, shape (n, k), optional
+    condition_on : ArrayLike or None, shape (n_samples, n_features_z), optional
         Data matrix for the conditioning variables. If None is given, an unconditional test
         is performed.
     method : Methods, optional
@@ -44,5 +48,18 @@ def independence_test(
     result : PValueResult
         An instance of the PValueResult data class, containing the p-value, test statistic,
         and any additional information related to the independence test.
+
+    See Also
+    --------
+    fisherz : Fisher's Z test for independence
     """
-    pass
+    method_func: ModuleType
+    if method == Methods.AUTO:
+        method_func = Methods.FISHERZ
+    else:
+        method_func = method
+
+    if condition_on is None:
+        return method_func.ind(X, Y, method, **kwargs)
+    else:
+        return method_func.condind(X, Y, condition_on, method, **kwargs)
diff --git a/pywhy_stats/p_value_result.py b/pywhy_stats/p_value_result.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 from typing import Optional, Union
 
-from numpy.testing import ArrayLike
+from numpy.typing import ArrayLike
 
 
 @dataclass
@@ -10,16 +10,16 @@ class PValueResult:
 
     Attributes
     ----------
-    p_value: float
+    pvalue : float
         The p-value represents the probability of observing the given test statistic, or more
         extreme results, under a certain null hypothesis.
-    test_statistic: float or numpy.ndarray or None
+    statistic : float or ArrayLike or None
         The test statistic of the hypothesis test, which might not always be available.
-    additional_information: object or None
+    additional_information : object or None
         Any additional information or metadata relevant to the specific test conducted. These could
         also be a state of the method to re-use it.
     """
 
-    p_value: float
-    test_statistic: Optional[Union[float, ArrayLike]] = None
+    pvalue: float
+    statistic: Optional[Union[float, ArrayLike]] = None
     additional_information: Optional[object] = None
diff --git a/tests/test_fisherz_test.py b/tests/test_fisherz_test.py
@@ -0,0 +1,44 @@
+import flaky
+import numpy as np
+
+from pywhy_stats import fisherz
+
+
+@flaky.flaky(max_runs=3, min_passes=1)
+def test_fisherz_marg_ind():
+    """Test FisherZ marginal independence test for Gaussian data."""
+    rng = np.random.default_rng()
+
+    # We construct a SCM where X1 -> Y <- X and Y -> Z
+    # so X1 is independent from X, but conditionally dependent
+    # given Y or Z
+    X = rng.standard_normal((300, 1))
+    X1 = rng.standard_normal((300, 1))
+    Y = X + X1 + 0.5 * rng.standard_normal((300, 1))
+    Z = Y + 0.1 * rng.standard_normal((300, 1))
+
+    res = fisherz.ind(X, X1)
+    assert res.pvalue > 0.05
+    res = fisherz.ind(X, Z)
+    assert res.pvalue < 0.05
+
+
+@flaky.flaky(max_runs=3, min_passes=1)
+def test_fisherz_cond_ind():
+    """Test FisherZ conditional independence test for Gaussian data."""
+    rng = np.random.default_rng()
+
+    # We construct a SCM where X1 -> Y <- X and Y -> Z
+    # so X1 is independent from X, but conditionally dependent
+    # given Y or Z
+    X = rng.standard_normal((300, 1))
+    X1 = rng.standard_normal((300, 1))
+    Y = X + X1 + 0.5 * rng.standard_normal((300, 1))
+    Z = Y + 0.1 * rng.standard_normal((300, 1))
+
+    res = fisherz.condind(X, X1, Z)
+    assert res.pvalue < 0.05
+    res = fisherz.condind(X, X1, Y)
+    assert res.pvalue < 0.05
+    res = fisherz.condind(X, Z, Y)
+    assert res.pvalue > 0.05
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,6 +26,7 @@ Version 0.1 @@
     Changelog
     ---------
+    - |Feature| Implement partial correlation test :func:`pywhy_stats.fisherz`, by `Adam Li`_ (:pr:`7`)
     Code and Documentation Contributors
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from . import fisherz
		from ._version import __version__ # noqa: F401