forked from jmhayes3/predicting-chronic-kidney-disease
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecision_tree_final.R
59 lines (46 loc) · 1.41 KB
/
decision_tree_final.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
setwd(choose.dir())
ckd <- read.csv(file="chronic_kidney_disease_dataset/chronic_kidney_disease.csv",
header=TRUE, sep= ",", dec = ".",
na.strings = "?", strip.white = T)
ckd$sg <- as.factor(ckd$sg)
ckd$al <- as.factor(ckd$al)
ckd$su <- as.factor(ckd$su)
# dataset with dropped columns
ckd <- subset(ckd, select = -c(sg, al, su, rbc, pc,
bgr, sod, pot, hemo,
pcv, wbcc, rbcc))
#remove rows with missing values
ckd <- na.omit(ckd)
# decision tree ################
library(tree)
tree.ckd <- tree(class ~ ., data=ckd)
summary(tree.ckd)
tree.ckd
# plot tree
plot(tree.ckd)
text(tree.ckd, pretty=0)
# train/test
set.seed(1)
train = sample(1:nrow(ckd), 355*.8)
test = ckd[-train,]
tree.ckd <- tree(class ~ ., data=ckd, subset = train)
tree.pred <- predict(tree.ckd, test, type="class")
table(tree.pred, ckd$class[-train])
(44+25)/71
# pruning
set.seed(1)
cv.ckd = cv.tree(tree.ckd, FUN = prune.misclass)
cv.ckd
par(mfrow=c(1,2))
plot(cv.ckd$size, cv.ckd$dev, type="b")
plot(cv.ckd$k, cv.ckd$dev, type="b")
par(mfrow=c(1,1))
# prune using smallest size
prune.ckd = prune.misclass(tree.ckd, best=4)
plot(prune.ckd)
text(prune.ckd, pretty=0)
# use predict to check performance
tree.pred = predict(prune.ckd, test, type="class")
table(tree.pred, ckd$class[-train])
1-(41+26)/71
(0.014+0.056+0.042+0.042+0.098+0.056+0.042+0.07+0.028+0.084)/10