Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Introduce basic time-series causal discovery and experimental research code showing robust learning #86

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ @article{Colombo2012
url = {https://doi.org/10.1214/11-AOS940}
}

@article{Entner2010causal,
title = {On causal discovery from time series data using FCI},
author = {Entner, Doris and Hoyer, Patrik O},
journal = {Probabilistic graphical models},
pages = {121--128},
year = {2010}
}

@article{Lopez2016revisiting,
title = {Revisiting classifier two-sample tests},
author = {Lopez-Paz, David and Oquab, Maxime},
Expand Down
9 changes: 9 additions & 0 deletions doc/tutorials/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,12 @@ no latent confounders.

markovian/example-pc-algo

[Experimental] Time-series causal discovery
===========================================
We include some tutorials on novel time-series causal discovery algorithms.

.. toctree::
:maxdepth: 1
:titlesonly:

timeseries/lpcmci-with-summary
Empty file.
2 changes: 1 addition & 1 deletion dodiscover/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
from ._protocol import EquivalenceClass, Graph
from ._version import __version__ # noqa: F401
from .constraint import FCI, PC
from .context_builder import ContextBuilder, make_context
from .context_builder import ContextBuilder, make_context, make_ts_context
37 changes: 35 additions & 2 deletions dodiscover/_protocol.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Dict, FrozenSet, Iterable, Protocol
from typing import Dict, FrozenSet, Iterable, List, Optional, Protocol

import networkx as nx

from .typing import Column


class Graph(Protocol):
"""Protocol for graphs to work with dodiscover algorithms."""
Expand All @@ -27,7 +29,7 @@ def remove_node(self, u) -> None:
"""Remove a node from the graph."""
pass

def remove_edges_from(self, edges) -> None:
def remove_edges_from(self, edges: List) -> None:
"""Remove a set of edges from the graph."""
pass

Expand Down Expand Up @@ -60,6 +62,33 @@ def copy(self):
pass


class TimeSeriesGraph(Graph, Protocol):
"""A protocol for time-series graphs."""

@property
def max_lag(self) -> int:
pass

def lagged_neighbors(self, u: Column) -> List[Column]:
"""Return neighbors of u that are in a previous time point."""
pass

def contemporaneous_neighbors(self, u: Column) -> List[Column]:
"""Return neighbors of u that are in the same time point."""
pass

# TODO: refactor to
# 1. remove_forward_homologous_edges(self, u, v)
# 2. remove_backward_homologous_edges(self, u, v)
def set_auto_removal(self, auto: Optional[str]) -> None:
"""Specify how to auto-remove homologous edges."""
pass

def nodes_at(self, t):
"""Nodes at specific time point."""
pass


class EquivalenceClass(Graph, Protocol):
"""Protocol for equivalence class of graphs."""

Expand Down Expand Up @@ -88,6 +117,10 @@ def bidirected_edge_name(self) -> str:
"""Name of the bidirected edges."""
pass

def get_graphs(self, graph_name: Optional[str] = None):
"""Get edge sub-graph."""
pass

def orient_uncertain_edge(self, u, v) -> None:
"""Orients an uncertain edge in the equivalence class to directed ``'u'*->'v'``."""
pass
Expand Down
3 changes: 1 addition & 2 deletions dodiscover/ci/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import sklearn.utils
from numpy.typing import ArrayLike

from dodiscover.typing import Column

from ..typing import Column
from .monte_carlo import generate_knn_in_subspace, restricted_nbr_permutation


Expand Down
6 changes: 3 additions & 3 deletions dodiscover/ci/oracle.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Optional, Set
from typing import Optional, Set, Union

import networkx as nx
import numpy as np
import pandas as pd

from dodiscover.typing import Column

from .._protocol import Graph
from .._protocol import Graph, TimeSeriesGraph
from .base import BaseConditionalIndependenceTest


Expand All @@ -23,7 +23,7 @@ class Oracle(BaseConditionalIndependenceTest):

_allow_multivariate_input: bool = True

def __init__(self, graph: Graph) -> None:
def __init__(self, graph: Union[Graph, TimeSeriesGraph]) -> None:
self.graph = graph

def test(
Expand Down
8 changes: 7 additions & 1 deletion dodiscover/ci/simulate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Callable, Tuple
from typing import Callable, Tuple, Optional

import numpy as np
from numpy.typing import NDArray
Expand Down Expand Up @@ -106,3 +106,9 @@ def nonlinear_additive_gaussian(
Y = nonlinear_func(freq * (2 * Axy * X + Z * Azy + std * Y_noise + cause_var))

return X, Y, Z


def vector_auto_regressive_from_summary(
summary_G, max_lag: int=1, n_times: int=1000, random_state: Optional[int] = None
):
pass
4 changes: 3 additions & 1 deletion dodiscover/constraint/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from .config import SkeletonMethods
from .fcialg import FCI
from .pcalg import PC
from .skeleton import LearnSemiMarkovianSkeleton, LearnSkeleton, SkeletonMethods
from .skeleton import LearnSemiMarkovianSkeleton, LearnSkeleton
from .timeseries import LearnTimeSeriesSkeleton, TimeSeriesFCI, TimeSeriesPC
2 changes: 2 additions & 0 deletions dodiscover/constraint/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ class SkeletonMethods(Enum, metaclass=MetaEnum):
NBRS_PATH = "neighbors_path"
PDS = "pds"
PDS_PATH = "pds_path"
PDS_T = "pds_t"
PDS_T_PATH = "pds_t_path"
4 changes: 3 additions & 1 deletion dodiscover/constraint/fcialg.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,9 @@ def learn_skeleton(
d.pop("test_stat")
if "pvalue" in d:
d.pop("pvalue")
context = make_context(context).graph(new_init_graph).state_variable("PAG", pag).build()
context = (
make_context(context).init_graph(new_init_graph).state_variable("PAG", pag).build()
)

# # now compute all possibly d-separating sets and learn a better skeleton
skel_alg = LearnSemiMarkovianSkeleton(
Expand Down
91 changes: 55 additions & 36 deletions dodiscover/constraint/pcalg.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import logging
from itertools import combinations
from typing import Optional
from typing import Iterator, Optional

import networkx as nx

from dodiscover.ci.base import BaseConditionalIndependenceTest
from dodiscover.constraint.config import SkeletonMethods
from dodiscover.constraint.utils import is_in_sep_set
from dodiscover.typing import Column, SeparatingSet

from .._protocol import EquivalenceClass
from ._classes import BaseConstraintDiscovery
from .config import SkeletonMethods

logger = logging.getLogger()

Expand Down Expand Up @@ -121,6 +121,24 @@ def convert_skeleton_graph(self, graph: nx.Graph) -> EquivalenceClass:
graph = CPDAG(incoming_undirected_edges=graph)
return graph

def _orientable_triples(self, graph: EquivalenceClass) -> Iterator:
"""Iterate through orientable triple edges.

Used in orientation of colliders.
"""
for u in graph.nodes:
for v_i, v_j in combinations(graph.neighbors(u), 2):
yield (v_i, u, v_j)

def _orientable_edges(self, graph: EquivalenceClass) -> Iterator:
"""Iterate through orientable edges.

Used in orientation of edges.
"""
for i in graph.nodes:
for j in graph.neighbors(i):
yield (i, j)

def orient_edges(self, graph: EquivalenceClass) -> None:
"""Orient edges in a skeleton graph to estimate the causal DAG, or CPDAG.

Expand All @@ -140,30 +158,27 @@ def orient_edges(self, graph: EquivalenceClass) -> None:
finished = False
while idx < self.max_iter and not finished: # type: ignore
change_flag = False
for i in graph.nodes:
for j in graph.neighbors(i):
if i == j:
continue
# Rule 1: Orient i-j into i->j whenever there is an arrow k->i
# such that k and j are nonadjacent.
r1_add = self._apply_meek_rule1(graph, i, j)

# Rule 2: Orient i-j into i->j whenever there is a chain
# i->k->j.
r2_add = self._apply_meek_rule2(graph, i, j)

# Rule 3: Orient i-j into i->j whenever there are two chains
# i-k->j and i-l->j such that k and l are nonadjacent.
r3_add = self._apply_meek_rule3(graph, i, j)

# Rule 4: Orient i-j into i->j whenever there are two chains
# i-k->l and k->l->j such that k and j are nonadjacent.
#
# However, this rule is not necessary when the PC-algorithm
# is used to estimate a DAG.

if any([r1_add, r2_add, r3_add]) and not change_flag:
change_flag = True
for (i, j) in self._orientable_edges(graph):
# Rule 1: Orient i-j into i->j whenever there is an arrow k->i
# such that k and j are nonadjacent.
r1_add = self._apply_meek_rule1(graph, i, j)

# Rule 2: Orient i-j into i->j whenever there is a chain
# i->k->j.
r2_add = self._apply_meek_rule2(graph, i, j)

# Rule 3: Orient i-j into i->j whenever there are two chains
# i-k->j and i-l->j such that k and l are nonadjacent.
r3_add = self._apply_meek_rule3(graph, i, j)

# Rule 4: Orient i-j into i->j whenever there are two chains
# i-k->l and k->l->j such that k and j are nonadjacent.
#
# However, this rule is not necessary when the PC-algorithm
# is used to estimate a DAG.

if any([r1_add, r2_add, r3_add]) and not change_flag:
change_flag = True
if not change_flag:
finished = True
logger.info(f"Finished applying R1-3, with {idx} iterations")
Expand All @@ -177,6 +192,11 @@ def orient_unshielded_triples(
) -> None:
"""Orient colliders given a graph and separation set.

Unshielded triples are (i, j, k), such that there is no edge (i, k).
This will be oriented as a collider, if 'i' is not conditionally
independent to 'j' given 'k', but is conditionally independent without
'k'.

Parameters
----------
graph : EquivalenceClass
Expand All @@ -185,16 +205,15 @@ def orient_unshielded_triples(
The separating set between any two nodes.
"""
# for every node in the PAG, evaluate neighbors that have any edge
for u in graph.nodes:
for v_i, v_j in combinations(graph.neighbors(u), 2):
# Check that there is no edge of any type between
# v_i and v_j, else this is a "shielded" collider.
# Then check to see if 'u' is in "any" separating
# set. If it is not, then there is a collider.
if v_j not in graph.neighbors(v_i) and not is_in_sep_set(
u, sep_set, v_i, v_j, mode="any"
):
self._orient_collider(graph, v_i, u, v_j)
for v_i, u, v_j in self._orientable_triples(graph):
# Check that there is no edge of any type between
# v_i and v_j, else this is a "shielded" collider.
# Then check to see if 'u' is in "any" separating
# set. If it is not, then there is a collider.
if v_j not in graph.neighbors(v_i) and not is_in_sep_set(
u, sep_set, v_i, v_j, mode="any"
):
self._orient_collider(graph, v_i, u, v_j)

def _orient_collider(
self, graph: EquivalenceClass, v_i: Column, u: Column, v_j: Column
Expand Down
Loading