-
Notifications
You must be signed in to change notification settings - Fork 0
/
LogitClassifier.R
62 lines (55 loc) · 2.38 KB
/
LogitClassifier.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
## Logistic regression classifier
library(glmnet)
library(leaps)
library(pROC)
# Read feature files and covert them into matrics
data_ = read.table("training-features.txt", sep = ",",header = T)
dev = read.table("development-features.txt", sep = ",",header = T)
test = read.table("test-features.txt", sep = ",",header = T)
Xtrain = as.matrix(data_[,-c(1, 40)])
Ytrain= c(rep(1, 20000), rep(0, 20000))
Xdev = as.matrix(dev[,-c(1, 40)])
Ydev = c(rep(1, 1000), rep(0, 1000))
Xtest = as.matrix(test[,-c(1, 40)])
# Build a logistic regression classifier (full model)
new_logit <- glm(label ~ . - Id, data = data_, family = "binomial"(link = "logit"), maxit=2000)
pred = predict(new_logit, dev, type = "response")
dev_ROC = roc(dev$label, pred, direction="<")
# AUC of development data is 0.98
dev_ROC$auc
# Make predictions for test file
Prediction = predict(new_logit, test, type = "response")
write.csv(x = cbind(id, predictions), file = "newpredict_R3.csv",row.names = F)
# Forward stepwise selection
lm7.fit <- regsubsets(Xtrain, Ytrain, method="forward", nvmax=20, best=1)
plot(summary(lm7.fit)$rss, xlab="Number of predicto
rs", ylab="RSS")
# Choose the first 12 features
# f6, f14, f15, f16, f18, f19, f20, f21, f22, f32, f36, f38
lm7_coef <- coef(lm7.fit, 12)
# Test on the develpemnt data
# AUC 0.92
Xdev_new <- as.matrix(cbind(1, Xdev))
colnames(Xdev_new)[1] <- "(Intercept)"
Ydev_new <- Xdev_new[, names(lm7_coef)] %*% as.matrix(lm7_coef)
dev_ROC_new = roc(dev$label, as.numeric(Ydev_new), direction="<")
# Generate test data
# Scores 0.85 on test data
Xtest_new <- as.matrix(cbind(1, Xtest))
colnames(Xtest_new)[1] <- "(Intercept)"
predictions <- Xtest_new[, names(lm7_coef)] %*% as.matrix(lm7_coef)
write.csv(x = predictions, file = "predict_forward_selection.csv",row.names = F, col.names = F)
# LASSO regulasation
lambda.opt <- cv.glmnet(as.matrix(Xtrain), Ytrain,
family = "binomial",
type.measure = "class",
nfolds = 10,
alpha = 1)$lambda.min
lasso.fit <- glmnet(as.matrix(Xtrain), Ytrain,
family = "binomial",
lambda = lambda.opt,
alpha = 1)
Y3.predict <- predict(lasso.fit, as.matrix(Xtest), s=c("lambda.min"),
type = "class")
#head(Y3.predict)
write.csv(x = Y3.predict, file = "predict_lasso.csv",row.names = F, col.names = F)