-
Notifications
You must be signed in to change notification settings - Fork 0
/
bayes.classifier.nc.scram.R
91 lines (84 loc) · 3.44 KB
/
bayes.classifier.nc.scram.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
suppressMessages(library(e1071))
suppressMessages(library(caret))
suppressMessages(library(ROCR))
suppressMessages(library(DescTools))
suppressMessages(library(mltools))
set.seed(2)
args <- commandArgs(trailingOnly = TRUE)
outd <- args[1]
feats <- args[2]
outputFile <-file(paste(outd,"error.scram.txt",sep="/"))
tryCatch({
d2<-read.csv(paste(outd,"PI.scores.nc.both.bayes.na.yn",sep="/"),sep="\t", header=F, row.names=1)
names(d2)<-c("ET","EV","P","H","CP","CF","D","CLASS")
pv<-unlist(strsplit(feats,","))
fv<-c("CLASS",pv)
d2<-subset(d2, select=fv)
f<-sample(5,nrow(d2),prob=c(0.2,0.2,0.2,0.2,0.2),replace=T)
d2$fold <-f
nb.sen <- c()
nb.spe <- c()
nb.acc <- c()
nb.auc <- c()
nb.mcc <- c()
pdfc<-data.frame()
for (i in 1:5) {
d2.1=d2[d2$fold != i,]
d2.1$fold<-NULL
d2.2=d2[d2$fold == i,]
d2.2$fold<-NULL
rp<-sample(d2.1$CLASS)
d2.1$CLASS<-rp
m.nbi <- naiveBayes(CLASS ~ ., data=d2.1)
predictions <- predict(m.nbi, d2.2)
predictions2 <- predict(m.nbi, d2.2,type='raw')
cm<-confusionMatrix(table(predictions,d2.2$CLASS))
nb.sen <- append(cm$byClass['Sensitivity'], nb.sen)
nb.spe <- append(cm$byClass['Specificity'], nb.spe)
nb.acc <- append(cm$overall['Accuracy'], nb.acc)
pa<-predictions
pa<-gsub("pos","TRUE", pa)
pa<-gsub("non","FALSE", pa)
ta<-d2.2$CLASS
ta<-gsub("pos","TRUE", ta)
ta<-gsub("non","FALSE", ta)
mccv<-mcc(as.logical(pa),as.logical(ta))
nb.mcc <- append(mccv, nb.mcc)
score <- predictions2[, c("pos")]
actual_class <- d2.2$CLASS
pred <- prediction(score, actual_class)
perf <- performance(pred, "tpr", "fpr")
roc <- data.frame(fpr=unlist([email protected]), tpr=unlist([email protected]))
nb.auc <- append(AUC(roc$fpr, roc$tpr),nb.auc)
roc$fold <- paste("Fold",as.character(i),sep=" ")
roc$method <- "Non-coding"
pdfc <- rbind(pdfc,roc)
}
sim.res <- data.frame()
tdf<-data.frame(value=nb.sen,measure=rep("Sensitivity", length(nb.sen)), method="Naive Bayes")
sim.res<-rbind(tdf,sim.res)
tdf<-data.frame(value=nb.spe,measure=rep("Specificity", length(nb.spe)), method="Naive Bayes")
sim.res<-rbind(tdf,sim.res)
tdf<-data.frame(value=nb.acc,measure=rep("Accuracy", length(nb.acc)), method="Naive Bayes")
sim.res<-rbind(tdf,sim.res)
tdf<-data.frame(value=nb.auc,measure=rep("AUC", length(nb.auc)), method="Naive Bayes")
sim.res<-rbind(tdf,sim.res)
tdf<-data.frame(value=nb.mcc,measure=rep("MCC", length(nb.mcc)), method="Naive Bayes")
sim.res<-rbind(tdf,sim.res)
save(sim.res, file=paste(outd,"classifer.stats.scram",sep="/"))
save(pdfc, file=paste(outd,"classifer.roc.scram",sep="/"))
tdf.sn <- subset(sim.res, measure == "Sensitivity")
tdf.sp <- subset(sim.res, measure == "Specificity")
tdf.acc <- subset(sim.res, measure == "Accuracy")
tdf.auc <- subset(sim.res, measure == "AUC")
tdf.mcc <- subset(sim.res, measure == "MCC")
cat("Sensitivity: ", mean(tdf.sn$value),"(",sd(tdf.sn$value),")","\n",sep="")
cat("Specificity: ", mean(tdf.sp$value),"(",sd(tdf.sp$value),")","\n",sep="")
cat("Accuracy: ", mean(tdf.acc$value),"(",sd(tdf.acc$value),")","\n",sep="")
cat("AUC: ", mean(tdf.auc$value),"(",sd(tdf.auc$value),")","\n",sep="")
cat("MCC: ", mean(tdf.mcc$value),"(",sd(tdf.mcc$value),")","\n",sep="")
writeLines(as.character("SUCCESS"), outputFile)
}, error = function(e) {
writeLines(as.character(e), outputFile)
})
close(outputFile)