diff --git a/.codespellignore b/.codespellignore index e4ab998d..0002cbd2 100644 --- a/.codespellignore +++ b/.codespellignore @@ -1,2 +1,3 @@ raison -wee \ No newline at end of file +wee +ges \ No newline at end of file diff --git a/dodiscover/score/__init__.py b/dodiscover/score/__init__.py new file mode 100644 index 00000000..04fed9f9 --- /dev/null +++ b/dodiscover/score/__init__.py @@ -0,0 +1 @@ +from .ges_alg import GES diff --git a/dodiscover/score/ges_alg.py b/dodiscover/score/ges_alg.py new file mode 100644 index 00000000..86331551 --- /dev/null +++ b/dodiscover/score/ges_alg.py @@ -0,0 +1,43 @@ +from typing import Callable, Optional, Union + +import networkx as nx +import pandas as pd +from pywhy_graphs.array.export import clearn_arr_to_graph + +from dodiscover.context import Context + + +# XXX: see https://github.com/juangamella/ges +class GES: + graph_: Optional[nx.DiGraph] + + def __init__( + self, + scoring_method: Union[Callable, str] = "bic", + max_indegree: int = None, + **scoring_method_kwargs, + ) -> None: + self.scoring_method = scoring_method + self.max_indegree = max_indegree + self.scoring_method_kwargs = scoring_method_kwargs + + self.graph_ = None + + def fit(self, df: pd.DataFrame, ctx: Context): + from causallearn.search.ScoreBased.GES import ges + + X = df.to_numpy() + + # run causal-learn + ges_record = ges( + X, score_func=self.scoring_method, maxP=self.max_indegree, **self.scoring_method_kwargs + ) + + causal_learn_graph = ges_record["G"] + names = [n.name for n in causal_learn_graph.nodes] + adjmat = causal_learn_graph.graph + + self.causal_learn_graph_ = causal_learn_graph + self.score_ = ges_record["score"] + self.graph_ = clearn_arr_to_graph(adjmat, arr_idx=names, graph_type="DiGraph") + return self diff --git a/tests/unit_tests/replearning/test_gin.py b/tests/unit_tests/replearning/test_gin.py index 0ed9153d..de5728f2 100644 --- a/tests/unit_tests/replearning/test_gin.py +++ b/tests/unit_tests/replearning/test_gin.py @@ -9,8 +9,7 @@ def test_estimate_gin_testdata(): - """Test the wrapper to the causal-learn GIN algorithm for estimating the causal DAG. - """ + """Test the wrapper to the causal-learn GIN algorithm for estimating the causal DAG.""" # Sim data np.random.seed(123) @@ -18,16 +17,18 @@ def test_estimate_gin_testdata(): # First latent is a uniform latent_var_1 = np.random.uniform(0, 100, num_samples) # Second latent is caused by first via nonlinear transform - latent_var_2 = np.array(list(map( - lambda u: 100 * .03 * u / (1 + .03 * u), - latent_var_1))) + latent_var_2 = np.array(list(map(lambda u: 100 * 0.03 * u / (1 + 0.03 * u), latent_var_1))) # Observed variables. X1 and X2 are caused by L1, X3 and X4 are caused by L2 - observed_vars = np.array([ - latent_var_1 + np.random.normal(0, 1, num_samples), # X1 caused by L1 - (100 - latent_var_1) + np.random.normal(0, 1, num_samples), # X2 caused by L1, mirros X1 - latent_var_2 + np.random.normal(0, 1, num_samples), # X3 caused by L2 - (100 - latent_var_2) + np.random.normal(0, 1, num_samples), # X4 caused by L2, mirrors X3 - ]).transpose() + observed_vars = np.array( + [ + latent_var_1 + np.random.normal(0, 1, num_samples), # X1 caused by L1 + (100 - latent_var_1) + + np.random.normal(0, 1, num_samples), # X2 caused by L1, mirros X1 + latent_var_2 + np.random.normal(0, 1, num_samples), # X3 caused by L2 + (100 - latent_var_2) + + np.random.normal(0, 1, num_samples), # X4 caused by L2, mirrors X3 + ] + ).transpose() data = pd.DataFrame(observed_vars, columns=["X1", "X2", "X3", "X4"]) g_answer = CPDAG( @@ -36,9 +37,10 @@ def test_estimate_gin_testdata(): ("L1", "X2"), ("L2", "X3"), ("L2", "X4"), - ], [ + ], + [ ("L1", "L2"), - ] + ], ) context = make_context().variables(data=data).build()