-
Notifications
You must be signed in to change notification settings - Fork 21
/
chapter8.py
52 lines (39 loc) · 1.79 KB
/
chapter8.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import numpy as np
from scipy.stats import norm
from .chapter7 import CI
def bootstrap(observations, statistic, iters, random_state=0):
"""
Yield the statistic applied to bootstapped samples.
:observations: DataFrame,
cols are random variables
rows are observations
:statistic: the statistic to be calculated on each sample
:iters: int, the number of iterations
"""
n = len(observations)
for i in range(iters):
yield statistic(observations.sample(n, replace=True,
random_state=random_state+i))
def bootstrap_variance(observations, statistic, iters, random_state=0):
boots = list(bootstrap(observations, statistic, iters,
random_state=random_state))
vboot = np.var(boots, axis=0)
return vboot
def bootstrap_ci_normal(observations, statistic, iters, a, random_state=0):
t = statistic(observations)
vboot = bootstrap_variance(observations, statistic, iters,
random_state=random_state)
seboot = np.sqrt(vboot)
z = np.abs(norm.ppf(a / 2))
return CI(t - z*seboot, t + z*seboot)
def bootstrap_ci_pivot(observations, statistic, iters, a, random_state=0):
theta = statistic(observations)
boots = list(bootstrap(observations, statistic, iters, random_state=random_state))
theta_l = np.percentile(boots, 100 - 100*a / 2)
theta_r = np.percentile(boots, 100*a / 2)
return CI(2*theta - theta_l, 2*theta - theta_r)
def bootstrap_ci_percentile(observations, statistic, iters, a, random_state=0):
boots = list(bootstrap(observations, statistic, iters, random_state=random_state))
theta_l = np.percentile(boots, 100*a / 2, axis=0)
theta_r = np.percentile(boots, 100 - 100*a / 2, axis=0)
return CI(theta_l, theta_r)