-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.r
102 lines (78 loc) · 2.8 KB
/
main.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
library(devtools)
library(rmcfs)
library(dplyr)
library(R.ROSETTA)
library(VisuNet)
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jdk-12.0.2')
###############################
# #
# Input Parameters #
# #
###############################
filename <- "data/Project5.csv"
filename_results <- 'output/mcfs.rds'
# MCFS parameters
nr_projections <- 'auto'
projection_size <- 'auto'
cutoff_method <- 'criticalAngle'
cutoff_permutations <- 20
splits <- 5
splitset_size <- 0.66
# Rosetta parameters
classifier <- "StandardVoter"
cvNum <- 10
reducer <- "Genetic"
JohnsonParam <- list(Modulo=TRUE, BRT=FALSE, BRTprec=0.9,
Precompute=FALSE, Approximate=TRUE, Fraction=0.95
)
GeneticParam <- list(
Modulo=TRUE, BRT=FALSE, BRTprec=0.9,
Precompute=FALSE, Approximate=TRUE, Fraction=0.95, Algorithm="Simple")
underSample <- FALSE
underSampleNum <- 0
underSampleSize <- 0
# ROC parameters
host_clroc <- 'human'
###############################
# #
# Load dataset #
# #
###############################
load_protein_IS <- function(filename) {
# loads file without header
# this fixes a bug where T and F proteins are interpreted as booleans
data <- read.table(filename, sep="\t", header=FALSE)
# drop the header row and readd it as column names
first_row <- data[1,]
data <- data[-1,]
colnames(data) <- as.character(unlist(first_row))
data <- droplevels(data)
return(data)
}
data = load_protein_IS(filename)
dim(data)
attributes(data)
table(data$Host)
attr = attributes(data)$names
print(paste("Attributes per tree:", length(attr)*proj_size))
###############################
# #
# MCFS #
# #
###############################
?mcfs
mcfs_result <- mcfs(Host~., data, projections=nr_projections,projectionSize=projection_size, splits=splits, splitSetSize=splitset_size,
cutoffMethod = cutoff_method, cutoffPermutations = cutoff_permutations, threadsNumber = 8)
head(mcfs_result$RI)
plot(mcfs_result, type="distances")
gid <- build.idgraph(mcfs_result, size = 20)
plot.idgraph(gid, label_dist = 0.3)
###############################
# #
# Rosetta #
# #
###############################
?rosetta
ross_results <- rosetta(rule_df, discrete=TRUE, reducer=reducer, roc=TRUE, clroc=host_clroc,
classifier=classifier, cvNum=cvNum, reducer=reducer, JohnsonParam=JohnsonParam, GeneticParam=GeneticParam,
underSample=underSample, underSampleNum=underSampleNum, underSampleSize=underSampleSize)