Skip to content

Commit 0aacbda

Browse files
authored
Create LOOCV Reg.R
0 parents  commit 0aacbda

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

LOOCV Reg.R

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#-------------------------------------------------------------
2+
# Leave-one-out Cross-validation (LOOCV) for regression models
3+
# in R
4+
#-------------------------------------------------------------
5+
6+
# 1. generate artificial data
7+
set.seed(2023)
8+
n <- 300
9+
x <- rnorm(n = n, mean = 5, sd = 3)
10+
y <- x^2 + rnorm(n = n, mean = 0, sd = 8)
11+
data = data.frame(x = x, y = y)
12+
13+
# 2. plot the models that we want to fit
14+
par(mfrow = c(1,2), pty = "s")
15+
16+
model1 <- lm(y ~ x)
17+
plot(x = x, y = y, main = 'Linear model', cex = 1.1, pch = 1, lwd = 1.2)
18+
yhat1 <- model1$coef[1] + model1$coef[2] * x
19+
lines(x, yhat1, lw = 2.5, col = 'red')
20+
21+
model2 <- lm(y ~ x + I(x^2))
22+
plot(x = x, y = y, main = 'Quadratic model', cex = 1.1, pch = 1, lwd = 1.2)
23+
yhat2 <- model2$coef[1] + model2$coef[2] * x + model2$coef[3] * x^2
24+
lines(x[order(x)], yhat2[order(x)], lw = 2.5, col = 'red')
25+
26+
# 3. Cross-Validation
27+
# fit the models on leave-one-out samples
28+
pred.cv.mod1 <- pred.cv.mod2 <- numeric(n)
29+
30+
for(i in 1:n) {
31+
32+
# quadratic model
33+
mod1 = lm(y ~ x, subset = -i)
34+
pred.cv.mod1[i] = predict(mod1, data[i,])
35+
36+
# quadratic model
37+
mod2 = lm(y ~ x + I(x^2), subset = -i)
38+
pred.cv.mod2[i] = predict(mod2, data[i,])
39+
}
40+
41+
MSE1 = (1/n) * sum((y - pred.cv.mod1)^2) # theta_hat_pe
42+
MSE2 = (1/n) * sum((y - pred.cv.mod2)^2) # theta_hat_pe
43+
44+
# Root Mean Squared Error (RMSE)
45+
sqrt(c(MSE1, MSE2))
46+
# [1] 15.68599 7.99332
47+
# The second model (Quadratic) has the lowest RMSE and thus is prefered.
48+
49+
#----
50+
# end
51+
#----

0 commit comments

Comments
 (0)