-
Notifications
You must be signed in to change notification settings - Fork 7
/
script.R
151 lines (125 loc) · 4.62 KB
/
script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
##### Process image #####
library(keras)
library(EBImage)
library(stringr)
library(pbapply)
secondCat <- readImage("train/cat.1.jpg")
display(secondCat)
# Set image size
width <- 50
height <- 50
extract_feature <- function(dir_path, width, height, labelsExist = T) {
img_size <- width * height
## List images in path
images_names <- list.files(dir_path)
if(labelsExist){
## Select only cats or dogs images
catdog <- str_extract(images_names, "^(cat|dog)")
# Set cat == 0 and dog == 1
key <- c("cat" = 0, "dog" = 1)
y <- key[catdog]
}
print(paste("Start processing", length(images_names), "images"))
## This function will resize an image, turn it into greyscale
feature_list <- pblapply(images_names, function(imgname) {
## Read image
img <- readImage(file.path(dir_path, imgname))
## Resize image
img_resized <- resize(img, w = width, h = height)
## Set to grayscale (normalized to max)
grayimg <- channel(img_resized, "gray")
## Get the image as a matrix
img_matrix <- [email protected]
## Coerce to a vector (row-wise)
img_vector <- as.vector(t(img_matrix))
return(img_vector)
})
## bind the list of vector into matrix
feature_matrix <- do.call(rbind, feature_list)
feature_matrix <- as.data.frame(feature_matrix)
## Set names
names(feature_matrix) <- paste0("pixel", c(1:img_size))
if(labelsExist){
return(list(X = feature_matrix, y = y))
}else{
return(feature_matrix)
}
}
# Takes approx. 15min
trainData <- extract_feature("train/", width, height)
# Takes slightly less
testData <- extract_feature("test1/", width, height, labelsExist = F)
# Check processing on second cat
par(mar = rep(0, 4))
testCat <- t(matrix(as.numeric(trainData$X[2,]),
nrow = width, ncol = height, T))
image(t(apply(testCat, 2, rev)), col = gray.colors(12),
axes = F)
# Save / load
save(trainData, testData, file = "catdogData.RData")
# load("catdogData.RData")
##### Fit NN #####
# Fix structure for 2d CNN
train_array <- t(trainData$X)
dim(train_array) <- c(50, 50, nrow(trainData$X), 1)
# Reorder dimensions
train_array <- aperm(train_array, c(3,1,2,4))
test_array <- t(testData)
dim(test_array) <- c(50, 50, nrow(testData), 1)
# Reorder dimensions
test_array <- aperm(test_array, c(3,1,2,4))
# Check cat again
testCat <- train_array[2,,,]
image(t(apply(testCat, 2, rev)), col = gray.colors(12),
axes = F)
# Build CNN model
model <- keras_model_sequential()
model %>%
layer_conv_2d(kernel_size = c(3, 3), filter = 32,
activation = "relu", padding = "same",
input_shape = c(50, 50, 1),
data_format = "channels_last") %>%
layer_conv_2d(kernel_size = c(3, 3), filter = 32,
activation = "relu", padding = "valid") %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.25) %>%
layer_conv_2d(kernel_size = c(3, 3), filter = 64, strides = 2,
activation = "relu", padding = "same") %>%
layer_conv_2d(kernel_size = c(3, 3), filter = 64,
activation = "relu", padding = "valid") %>%
layer_max_pooling_2d(pool_size = 2) %>%
layer_dropout(rate = 0.25) %>%
layer_flatten() %>%
layer_dense(units = 50, activation = "relu") %>%
layer_dropout(rate = 0.25) %>%
layer_dense(units = 1, activation = "sigmoid")
summary(model)
model %>% compile(
loss = 'binary_crossentropy',
optimizer = "adam",
metrics = c('accuracy')
)
history <- model %>% fit(
x = train_array, y = as.numeric(trainData$y),
epochs = 30, batch_size = 100,
validation_split = 0.2
)
plot(history)
# Compute probabilities and predictions on test set
predictions <- predict_classes(model, test_array)
probabilities <- predict_proba(model, test_array)
# Visual inspection of 32 cases
set.seed(100)
random <- sample(1:nrow(testData), 32)
preds <- predictions[random,]
probs <- as.vector(round(probabilities[random,], 2))
par(mfrow = c(4, 8), mar = rep(0, 4))
for(i in 1:length(random)){
image(t(apply(test_array[random[i],,,], 2, rev)),
col = gray.colors(12), axes = F)
legend("topright", legend = ifelse(preds[i] == 0, "Cat", "Dog"),
text.col = ifelse(preds[i] == 0, 2, 4), bty = "n", text.font = 2)
legend("topleft", legend = probs[i], bty = "n", col = "white")
}
# Save model
save(model, file = "CNNmodel.RData")