From 08f6c9805066800f71ef25c963b9ee0866c8d663 Mon Sep 17 00:00:00 2001 From: Aleksandr Michuda Date: Mon, 24 Jun 2024 15:12:45 -0400 Subject: [PATCH 1/6] add rpy2 to pyproject dev dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index dfec8b6..f934802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ pymdown-extensions = ">=10.0" mkdocstrings-python-legacy = "^0.2.3" mkdocstrings = {version = "^0.19.0", extras = ["python"], optional = true } pymdown-extensions = ">=10.0" +rpy2 = "^3.5.16" [build-system] From 0fcfc2824940990963cb87271309d034c2cfa214 Mon Sep 17 00:00:00 2001 From: Aleksandr Michuda Date: Mon, 24 Jun 2024 15:13:37 -0400 Subject: [PATCH 2/6] add test o compare adding seed and rng with same seed --- tests/test_seeds.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_seeds.py b/tests/test_seeds.py index 26ea201..abe6cab 100644 --- a/tests/test_seeds.py +++ b/tests/test_seeds.py @@ -44,3 +44,16 @@ def test_results_from_same_seed(data): np.random.seed(123) b2 = wildboottest(model, param = "X1", cluster = x, B= 999) pd.testing.assert_frame_equal(a2,b2) + +def test_seeds_and_rng(data): + model = sm.ols(formula='Y ~ X1 + X2', data=data) + + cluster_list = [data.cluster, None] + + for x in cluster_list: + + # specifying seed and rng with that seed -> same results + a = wildboottest(model, param = "X1", cluster = x, B= 999, seed=876587) + rng = np.random.default_rng(seed=876587) + b = wildboottest(model, param = "X1", cluster = x, B= 999, seed=rng) + pd.testing.assert_frame_equal(a,b) \ No newline at end of file From e83bf5b8ff68e3c5b7df77206b7d1720feb3a983 Mon Sep 17 00:00:00 2001 From: Aleksandr Michuda Date: Mon, 24 Jun 2024 15:14:10 -0400 Subject: [PATCH 3/6] add random seed to data fixture for cleanliness --- tests/test_weights.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_weights.py b/tests/test_weights.py index 090af40..002217b 100644 --- a/tests/test_weights.py +++ b/tests/test_weights.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd -np.random.seed(89756) ts = list(wild_draw_fun_dict.keys()) full_enum = [True, False] @@ -13,6 +12,7 @@ @pytest.fixture def data(): + np.random.seed(12315) N = 100 k = 2 G= 20 @@ -46,9 +46,11 @@ def test_different_weights(data): X, y, cluster, bootcluster, R, B = data results_dict = {} + + rng = np.random.default_rng(seed=0) for w in ts: - boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341) + boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = rng) boot.get_scores(bootstrap_type = "11", impose_null = True) boot.get_weights(weights_type = w) boot.get_numer() @@ -60,7 +62,9 @@ def test_different_weights(data): results_dict[w] = boot.pvalue results_series = pd.Series(results_dict) + print(results_series) mapd = (results_series - results_series.mean()).abs().mean() / results_series.mean() + print(mapd) assert mapd <= .1# make sure mean absolute percentage deviation is less than 10% (ad hoc) \ No newline at end of file From b466df9231207af5bc64bf71c3bdfa4674d7b93e Mon Sep 17 00:00:00 2001 From: Aleksandr Michuda Date: Mon, 24 Jun 2024 15:14:43 -0400 Subject: [PATCH 4/6] allow seed argument to be a numpy random generator; change function description --- wildboottest/wildboottest.py | 41 +++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py index 4733c11..7a413cf 100644 --- a/wildboottest/wildboottest.py +++ b/wildboottest/wildboottest.py @@ -5,6 +5,7 @@ from wildboottest.weights import draw_weights import warnings from typing import Union, Tuple, Callable +from numpy.random import Generator class WildDrawFunctionException(Exception): pass @@ -55,7 +56,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R : Union[np.ndarray, pd.DataFrame], r: Union[np.ndarray, float], B: int, - seed: Union[int, None] = None) -> None: + seed: Union[int, Generator, None] = None) -> None: """Initializes the Heteroskedastic Wild Bootstrap Class @@ -64,7 +65,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], Y (Union[np.ndarray, pd.DataFrame, pd.Series]): Endogenous variable array or dataframe R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap B (int): bootstrap iterations - seed (Union[int, None], optional): Random seed for random weight types. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. Raises: TypeError: Raise if input arrays are lists @@ -85,10 +88,12 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], else: self.Y = Y - if seed is None: - seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64) - - self.rng = np.random.default_rng(seed = seed) + if isinstance(seed, int): + self.rng = np.random.default_rng(seed=seed) + elif isinstance(seed, Generator): + self.rng = seed + else: + self.rng = np.random.default_rng() self.N = X.shape[0] self.k = X.shape[1] @@ -274,7 +279,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R : Union[np.ndarray, pd.DataFrame], B: int, bootcluster: Union[np.ndarray, pd.DataFrame, pd.Series, None] = None, - seed: Union[int, None] = None, + seed: Union[int, Generator, None] = None, parallel: bool = True) -> None: """Initializes the Wild Cluster Bootstrap Class @@ -285,7 +290,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap B (int): bootstrap iterations bootcluster (Union[np.ndarray, pd.DataFrame, pd.Series, None], optional): Sub-cluster array. Defaults to None. - seed (Union[int, None], optional): Random seed for random weight types. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True. Raises: TypeError: Raise if input arrays are lists @@ -326,11 +333,13 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], self.bootclustid = np.unique(bootcluster) self.bootcluster = bootcluster - if seed is None: - seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64) - - self.rng = np.random.default_rng(seed = seed) - + if isinstance(seed, int): + self.rng = np.random.default_rng(seed=seed) + elif isinstance(seed, Generator): + self.rng = seed + else: + self.rng = np.random.default_rng() + self.N_G_bootcluster = len(self.bootclustid) self.G = len(self.clustid) @@ -647,7 +656,7 @@ def wildboottest(model : 'OLS', weights_type: str = 'rademacher', impose_null: bool = True, bootstrap_type: str = '11', - seed: Union[str, None] = None, + seed: Union[int, Generator, None] = None, adj: bool = True, cluster_adj: bool = True, parallel: bool = True, @@ -666,7 +675,9 @@ def wildboottest(model : 'OLS', Defaults to True. bootstrap_type (str, optional):A string of length one. Allows to choose the bootstrap type to be run. Either '11', '31', '13' or '33'. '11' by default. Defaults to '11'. - seed (Union[str, None], optional): Option to provide a random seed. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. adj (bool, optional): Whether to adjust for small sample. Defaults to True. cluster_adj (bool, optional): Whether to do a cluster-robust small sample correction. Defaults to True. parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True. From 8a32023ae0c914910d53b70e222ebfe4307071b8 Mon Sep 17 00:00:00 2001 From: Aleksandr Michuda Date: Tue, 25 Jun 2024 22:12:59 -0400 Subject: [PATCH 5/6] fix type hint for wilboottest; create tuple of allowed models and raise `NotImplementedError` if those models aren't used --- wildboottest/wildboottest.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py index 7a413cf..9805ddd 100644 --- a/wildboottest/wildboottest.py +++ b/wildboottest/wildboottest.py @@ -6,6 +6,12 @@ import warnings from typing import Union, Tuple, Callable from numpy.random import Generator +from statsmodels.regression.linear_model import OLS + + +_allowed_models = ( + OLS, +) class WildDrawFunctionException(Exception): pass @@ -649,7 +655,7 @@ def get_pvalue(self, pval_type = "two-tailed"): self.pvalue = np.mean(self.t_stat > self.t_boot) -def wildboottest(model : 'OLS', +def wildboottest(model : OLS, B:int, cluster : Union[np.ndarray, pd.Series, pd.DataFrame, None] = None, param : Union[str, None] = None, @@ -713,6 +719,9 @@ def wildboottest(model : 'OLS', >>> wildboottest(model, param = "X1", cluster = cluster, B = 9999) >>> wildboottest(model, cluster = cluster, B = 9999) """ + + if not isinstance(model, _allowed_models): + raise NotImplementedError(f"Only allow models of type {' ,'.join([str(i) for i in _allowed_models])}") # does model.exog already exclude missing values? X = model.exog From 87ea15c2e6947c7d798b6824284a39d7d3a3af6f Mon Sep 17 00:00:00 2001 From: Aleksandr Michuda Date: Wed, 26 Jun 2024 19:57:34 -0400 Subject: [PATCH 6/6] update ci-test --- .github/workflows/ci-tests.yaml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index c4f9dcf..077e5ce 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -21,16 +21,13 @@ jobs: - name: Checkout source uses: actions/checkout@v2 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: '4.2.0' + - name: Setup r2u + uses: eddelbuettel/github-actions/r2u-setup@master - - name: install fwildclusterboot for testing - run: Rscript -e 'install.packages("fwildclusterboot", repos="https://cloud.r-project.org")' + - name: install R packages + run: Rscript -e 'install.packages(c("fwildclusterboot"))' shell: bash - - name: Setup python uses: actions/setup-python@v2 with: