Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise if unsupported #111

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ pymdown-extensions = ">=10.0"
mkdocstrings-python-legacy = "^0.2.3"
mkdocstrings = {version = "^0.19.0", extras = ["python"], optional = true }
pymdown-extensions = ">=10.0"
rpy2 = "^3.5.16"


[build-system]
Expand Down
13 changes: 13 additions & 0 deletions tests/test_seeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,16 @@ def test_results_from_same_seed(data):
np.random.seed(123)
b2 = wildboottest(model, param = "X1", cluster = x, B= 999)
pd.testing.assert_frame_equal(a2,b2)

def test_seeds_and_rng(data):
model = sm.ols(formula='Y ~ X1 + X2', data=data)

cluster_list = [data.cluster, None]

for x in cluster_list:

# specifying seed and rng with that seed -> same results
a = wildboottest(model, param = "X1", cluster = x, B= 999, seed=876587)
rng = np.random.default_rng(seed=876587)
b = wildboottest(model, param = "X1", cluster = x, B= 999, seed=rng)
pd.testing.assert_frame_equal(a,b)
8 changes: 6 additions & 2 deletions tests/test_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pandas as pd

np.random.seed(89756)

ts = list(wild_draw_fun_dict.keys())
full_enum = [True, False]
Expand All @@ -13,6 +12,7 @@

@pytest.fixture
def data():
np.random.seed(12315)
N = 100
k = 2
G= 20
Expand Down Expand Up @@ -46,9 +46,11 @@ def test_different_weights(data):
X, y, cluster, bootcluster, R, B = data

results_dict = {}

rng = np.random.default_rng(seed=0)

for w in ts:
boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341)
boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = rng)
boot.get_scores(bootstrap_type = "11", impose_null = True)
boot.get_weights(weights_type = w)
boot.get_numer()
Expand All @@ -60,7 +62,9 @@ def test_different_weights(data):
results_dict[w] = boot.pvalue

results_series = pd.Series(results_dict)
print(results_series)

mapd = (results_series - results_series.mean()).abs().mean() / results_series.mean()
print(mapd)

assert mapd <= .1# make sure mean absolute percentage deviation is less than 10% (ad hoc)
52 changes: 36 additions & 16 deletions wildboottest/wildboottest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
from wildboottest.weights import draw_weights
import warnings
from typing import Union, Tuple, Callable
from numpy.random import Generator
from statsmodels.regression.linear_model import OLS


_allowed_models = (
OLS,
)

class WildDrawFunctionException(Exception):
pass
Expand Down Expand Up @@ -55,7 +62,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
R : Union[np.ndarray, pd.DataFrame],
r: Union[np.ndarray, float],
B: int,
seed: Union[int, None] = None) -> None:
seed: Union[int, Generator, None] = None) -> None:

"""Initializes the Heteroskedastic Wild Bootstrap Class

Expand All @@ -64,7 +71,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
Y (Union[np.ndarray, pd.DataFrame, pd.Series]): Endogenous variable array or dataframe
R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap
B (int): bootstrap iterations
seed (Union[int, None], optional): Random seed for random weight types. Defaults to None.
seed (Union[int, Generator, None], optional): Random seed for random weight types.
If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator
can also be specified and used. Defaults to None.

Raises:
TypeError: Raise if input arrays are lists
Expand All @@ -85,10 +94,12 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
else:
self.Y = Y

if seed is None:
seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64)

self.rng = np.random.default_rng(seed = seed)
if isinstance(seed, int):
self.rng = np.random.default_rng(seed=seed)
elif isinstance(seed, Generator):
self.rng = seed
else:
self.rng = np.random.default_rng()

self.N = X.shape[0]
self.k = X.shape[1]
Expand Down Expand Up @@ -274,7 +285,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
R : Union[np.ndarray, pd.DataFrame],
B: int,
bootcluster: Union[np.ndarray, pd.DataFrame, pd.Series, None] = None,
seed: Union[int, None] = None,
seed: Union[int, Generator, None] = None,
parallel: bool = True) -> None:
"""Initializes the Wild Cluster Bootstrap Class

Expand All @@ -285,7 +296,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap
B (int): bootstrap iterations
bootcluster (Union[np.ndarray, pd.DataFrame, pd.Series, None], optional): Sub-cluster array. Defaults to None.
seed (Union[int, None], optional): Random seed for random weight types. Defaults to None.
seed (Union[int, Generator, None], optional): Random seed for random weight types.
If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator
can also be specified and used. Defaults to None.
parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True.
Raises:
TypeError: Raise if input arrays are lists
Expand Down Expand Up @@ -326,11 +339,13 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
self.bootclustid = np.unique(bootcluster)
self.bootcluster = bootcluster

if seed is None:
seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64)

self.rng = np.random.default_rng(seed = seed)

if isinstance(seed, int):
self.rng = np.random.default_rng(seed=seed)
elif isinstance(seed, Generator):
self.rng = seed
else:
self.rng = np.random.default_rng()

self.N_G_bootcluster = len(self.bootclustid)
self.G = len(self.clustid)

Expand Down Expand Up @@ -640,14 +655,14 @@ def get_pvalue(self, pval_type = "two-tailed"):
self.pvalue = np.mean(self.t_stat > self.t_boot)


def wildboottest(model : 'OLS',
def wildboottest(model : OLS,
B:int,
cluster : Union[np.ndarray, pd.Series, pd.DataFrame, None] = None,
param : Union[str, None] = None,
weights_type: str = 'rademacher',
impose_null: bool = True,
bootstrap_type: str = '11',
seed: Union[str, None] = None,
seed: Union[int, Generator, None] = None,
adj: bool = True,
cluster_adj: bool = True,
parallel: bool = True,
Expand All @@ -666,7 +681,9 @@ def wildboottest(model : 'OLS',
Defaults to True.
bootstrap_type (str, optional):A string of length one. Allows to choose the bootstrap type
to be run. Either '11', '31', '13' or '33'. '11' by default. Defaults to '11'.
seed (Union[str, None], optional): Option to provide a random seed. Defaults to None.
seed (Union[int, Generator, None], optional): Random seed for random weight types.
If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator
can also be specified and used. Defaults to None.
adj (bool, optional): Whether to adjust for small sample. Defaults to True.
cluster_adj (bool, optional): Whether to do a cluster-robust small sample correction. Defaults to True.
parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True.
Expand Down Expand Up @@ -702,6 +719,9 @@ def wildboottest(model : 'OLS',
>>> wildboottest(model, param = "X1", cluster = cluster, B = 9999)
>>> wildboottest(model, cluster = cluster, B = 9999)
"""

if not isinstance(model, _allowed_models):
raise NotImplementedError(f"Only allow models of type {' ,'.join([str(i) for i in _allowed_models])}")

# does model.exog already exclude missing values?
X = model.exog
Expand Down
Loading