forked from dmlc/xgboost
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.R
64 lines (48 loc) · 1.51 KB
/
train.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
library(data.table)
library(xgboost)
if (!file.exists("./dermatology.data")) {
download.file(
"https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data",
"dermatology.data",
method = "curl"
)
}
df <- fread("dermatology.data", sep = ",", header = FALSE)
df[, `:=`(V34 = as.integer(ifelse(V34 == "?", 0L, V34)),
V35 = V35 - 1L)]
idx <- sample(nrow(df), size = round(0.7 * nrow(df)), replace = FALSE)
train <- df[idx,]
test <- df[-idx,]
train_x <- train[, 1:34]
train_y <- train[, V35]
test_x <- test[, 1:34]
test_y <- test[, V35]
xg_train <- xgb.DMatrix(data = as.matrix(train_x), label = train_y)
xg_test = xgb.DMatrix(as.matrix(test_x), label = test_y)
params <- list(
objective = 'multi:softmax',
num_class = 6,
max_depth = 6,
nthread = 4,
eta = 0.1
)
watchlist = list(train = xg_train, test = xg_test)
bst <- xgb.train(
params = params,
data = xg_train,
watchlist = watchlist,
nrounds = 5
)
pred <- predict(bst, xg_test)
error_rate <- sum(pred != test_y) / length(test_y)
print(paste("Test error using softmax =", error_rate))
# do the same thing again, but output probabilities
params$objective <- 'multi:softprob'
bst <- xgb.train(params, xg_train, nrounds = 5, watchlist)
pred_prob <- predict(bst, xg_test)
pred_mat <- matrix(pred_prob, ncol = 6, byrow = TRUE)
# validation
# rowSums(pred_mat)
pred_label <- apply(pred_mat, 1, which.max) - 1L
error_rate = sum(pred_label != test_y) / length(test_y)
print(paste("Test error using softprob =", error_rate))