-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathInference_functions.R
182 lines (164 loc) · 7.65 KB
/
Inference_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# function "ConfidenceRegion" does the following things:
# 1) computes confidence regions for the true mean of a normal population
# 2) computes confidence regions for the true mean of any population (n large)
# 3) simultaneous confidence intervals (F)
# 4) Bonferroni simultaneous confidence intervals (t)
#
# REMARK: 1)when the gaussian hp is used it is always check using mcshapiro test, the corresponding
# p-value is printed.
# 2) call the function with the following different values of argument
# to.do:
# "CR" = confidence region
# "T2" = F simulataneous conf intervals
# "Bonf" = Bonferroni simultaneous conf intervals
# # INPUT:
# X = dataframe
# large_n = if TRUE asymptotic analysis, otherwihe gaussian hp
# S = sample covariance matrix
# X_bar = sample mean vector
# alpha = type 1 error
# k = number of Bonferroni intrervals (maybe removable)
# to.do = string for telling the function to do different things
# "CR" --> confidence region @ level alpha
# "T2" --> F simultaneous confidence intervals
# "Bonf" --> Bonferroni confidence intervals
# A = matrix whose rows are the vector where we want to compute
# the simultaneous confidence intervals.
# Example: A[i,] = e_i if you want the CI for the i-th component
# plot = TRUE if you want the CI plot
# OUTPUT:
# IC = matrix with the simultaneous conf intervals
#
ConfidenceRegion <- function(X = NULL,
large_n = F,
S = NULL,
X_bar = NULL,
alpha = 0.05,
k = NULL,
to.do = NULL,
A = NULL,
print.plot = F){
# if not given as input, compute sample mean and sample covariance
if(is.null(X_bar)){X_bar <- colMeans(X)}
if(is.null(S)){S <- cov(X)}
if(is.null(to.do)){stop("to.do not specified, i don't know what to do")}
# sample cardinality # population dimension
n <- dim(X)[1]; p <- dim(X)[2]
# range dataframe value, it's a global variable
# if A, the matrix of the directions for sim conf intervals, is not specified; by default let's
# compute the interval for the mean components
if(is.null(A)) A <- diag(p)
if(!large_n){# n is small, relay on Gaussian hp
# check if the gaussian hp is supported by data (function defined below)
check_gaussianity_test(X, alpha)
# let's start inference analysis
switch(to.do,
CR = {# case "confidence region for the mean"
# plot the confidence region @ level 1-alpha for the mean
plot_ellipse(X_bar, S, alpha = alpha, sample = T, n = n)
# add the projection of the ellipse along the axis: in other
# words compute the F simultaneous intervals and plot them
IC <- simult_CI(X_bar, S, alpha = alpha, A, n, print.plot = print.plot)
segments(IC[1,1],0,IC[1,2],0,lty=1,lwd=2,col='red')
segments(0,IC[2,1],0,IC[2,2],lty=1,lwd=2,col='red')
},
T2 = {# case "simulatneous confidence interval"
# IC is a matrix dim(A)[1]x2, each row is a CI (function defined below)
IC <- simult_CI(X_bar, S, alpha = alpha, A, n, print.plot = print.plot)
# print the results
print("################# T2 CONFIDENCE INTERVALS ###############")
cat("Simultaneus confidence interval @ level alpha = ",alpha,"\n")
print(IC)
return(IC)
},
Bonf = {# case "Bonferroni confidence interval"
# IC is a matrix dim(A)[1]x2, each row is a CI
IC <- simult_CI(X_bar, S, alpha = alpha, A, n, Bonf = T, print.plot = print.plot)
# print the results
print("################# BONFERRONI CONFIDENCE INTERVALS ###############")
cat("Bonferroni confidence interval @ level alpha = ",alpha,"\n")
print(IC)
return(IC)
},
{
print("default on switch \n")
}
) # end switch
}
else{# n is large, build an asyntotic confidence region for the mean
if(p == 2 && print.plot){
# plot the confidence region @ level 1-alpha for the mean
plot_ellipse(X_bar, S, alpha = alpha, sample = T, n = n,large_n = T)
}
# costruzione intervalli confidenza chi-squared simultanei usando
# l'hp di n grande e on quella di gaussianità
k <- dim(A)[1] # numero CI
CI <- data.frame(matrix(nrow = k,ncol = 2))
cfr.chisq <- qchisq(1-alpha,p)
for(i in 1:k){
a <- A[i,]
CI[i,] <- c(a%*%X_bar-sqrt(cfr.chisq)*sqrt(a%*%S%*%a/n),
a%*%X_bar+sqrt(cfr.chisq)*sqrt(a%*%S%*%a/n))
}
colnames(CI) <- c("LB","UB")
print("############# CHI-SQUARED ASYNTOTIC CI ######################")
cat("I seguenti CI sono per combinazione lineari a*mu ","\n")
cat("intervalli di confidenza al livello alpha = ", alpha, "\n")
print(CI)
}
} # end function mean_inference
# compute simultaneous confidence interval for linear combnations of mu
simult_CI <- function(X_bar, S, alpha = 0.05, A, n, Bonf = FALSE,print.plot = FALSE){
# INPUT:
# X_bar = sample mean
# S = sample covariance
# 1-alpha = confidence level
# A = matrix whose ROWS ARE the direction for the linear comb
# n = sample cardinality
# Bonf = if TRUE Bonferroni interval otherwise F intervals
# plot = if TRUE plots the simultanueous intervals
# OUTPUT:
# IC = matrix whose row are confidence intervals
#
# population dimension # number of simultaneous interval
p <- dim(S)[2]; k <- dim(A)[1]
# initialization: IC will contain the simulataneous intervals
IC <- matrix(nrow = k, ncol = 3)
# coefficient that appears in the IC formula, it depends on Bonf or F
csi <- ifelse(Bonf,qt(1 - alpha/(2*k),n-1), sqrt(p*(n-1)/(n-p) * qf(1-alpha, p, n-p)))
for(i in 1:k){
# select the direction for the linear combination
a <- A[i,]
IC[i,] <- c(inf = t(a)%*% X_bar - sqrt(t(a)%*%S%*%a / n) * csi,
M = a%*%X_bar,
sup = t(a)%*% X_bar + sqrt(t(a)%*%S%*%a / n) * csi)
}
# plot the intervals
if(print.plot){plot_intervals(IC, k)}
return(IC)
}# end simult_CI
# definition function "plot_intervals"
plot_intervals <- function(IC, k){
#
# INPUT :
# IC = confidence intervals matrix
# X_bar = vectof of sample means
# k = number of confidence intervals
#
# open graphic device
x11()
# plot just the framework so as to use "segments" and "points"
matplot(1:k, 1:k, pch='', ylim = range(IC), xlab='Variables',ylab='T2 for a component',
main='Simultaneous T2 conf. int. for the components')
for(i in 1:k) segments(i,IC[i,1],i,IC[i,3],lwd=3,col=i)
# add sample means
points(1:k, IC[,2], pch=16, col=1:k)
} # end function plot_intervals
check_gaussianity_test <- function(X, alpha){
# check for gaussianity
mctest <- mcshapiro.test(X)
if(mctest$pvalue < alpha)
warning("WARNING: gaussian hp not supported by data, shapiro p-value = ",mctest$pvalue,"\n")
else
cat("You are assuming gaussianity, there's no evidence to say the contrary, shapiro p-value = ",mctest$pvalue,"\n")
}# end function "check_gaussianity_test"