-
Notifications
You must be signed in to change notification settings - Fork 0
/
lasso_cv.R
66 lines (52 loc) · 2.11 KB
/
lasso_cv.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
setwd('/data/petryshen/yoel/lasso')
X <- read.csv('endo_mat_1035_170.csv')
X <- as.matrix(sapply(X, as.matrix))
y <- read.csv('reduced_y.txt', header=FALSE)
y <- as.vector(as.matrix(y))
if (dim(X)[1] != length(y)) {
for (i in 1:10) {
print("X and y do not have the same number of samples")
}
} else {
print("X and y have the same number of samples")
}
auc_accs <- c()
class_accs <- c()
for (x in 1:100) {
Flds <- createFolds(y, k=5, list=TRUE)
models_auc <- list()
models_class <- list()
auc_best_lambdas <- c()
class_best_lambdas <- c()
fold_vars <- sprintf("Flds$Fold%01d", seq(5))
# compute models
for (i in seq(3)) {
models_auc[[i]] <- cv.glmnet(X[eval(parse(text=fold_vars[i])),],
y[eval(parse(text=fold_vars[i]))],
family='binomial',
type.measure='auc',
nfolds=4)
models_class[[i]] <- cv.glmnet(X[eval(parse(text=fold_vars[i])),],
y[eval(parse(text=fold_vars[i]))],
family='binomial',
type.measure='class',
nfolds=4)
}
# get the bestscores and lambdas
for (i in seq(3)) {
auc_best_lambdas[i] <- models_auc[[i]]$lambda.min
class_best_lambdas[i] <- models_class[[i]]$lambda.min
}
# cross-validate on the left out fold using lambdas from the 3 models computed
mod_auc_4 <- cv.glmnet(X[Flds$Fold4,], y[Flds$Fold4], family='binomial', type.measure='auc', lambda=auc_best_lambdas)
mod_class_4 <- cv.glmnet(X[Flds$Fold4,], y[Flds$Fold4], family='binomial', type.measure='class', lambda=class_best_lambdas)
# final test set on the 5th fold
auc_pred <- predict(mod_auc_4, newx= X[Flds$Fold5,], type="class")
auc_accs[x] <- sum(as.numeric(auc_pred) == y[Flds$Fold5]) / length(y[Flds$Fold5])
class_pred <- predict(mod_class_4, newx= X[Flds$Fold5,], type="class")
class_accs[x] <- sum(as.numeric(class_pred) == y[Flds$Fold5]) / length(y[Flds$Fold5])
}
#> mean(auc_accs)
#[1] 0.6535266
#> mean(class_accs)
#[1] 0.6563768