-
Notifications
You must be signed in to change notification settings - Fork 61
/
bf_classification.py
executable file
·316 lines (284 loc) · 12.7 KB
/
bf_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
##
# @file bf_classification.py
# @author Kyeong Soo (Joseph) Kim <[email protected]>
# @date 2017-08-17
#
# @brief Build and evaluate a deep-learning-based buidling-floor
# classification system using Wi-Fi fingerprinting
#
# @remarks This work is based on the <a href="https://keras.io/">Keras</a>-based
# implementation of the system described in "<a
# href="https://arxiv.org/abs/1611.02049v2">Low-effort place
# recognition with WiFi fingerprints using deep learning</a>".
#
### import modules (except keras and its backend)
import argparse
import datetime
import os
import numpy as np
import pandas as pd
import sys
from sklearn.preprocessing import scale
from timeit import default_timer as timer
### global constant variables
#------------------------------------------------------------------------
# general
#------------------------------------------------------------------------
TRAINING_RATIO = 0.9 # ratio of training data to overall data
INPUT_DIM = 520
OUTPUT_DIM = 13 # number of labels
VERBOSE = 1 # 0 for turning off logging
#------------------------------------------------------------------------
# stacked auto encoder (sae)
#------------------------------------------------------------------------
# SAE_ACTIVATION = 'tanh'
SAE_ACTIVATION = 'relu'
SAE_BIAS = False
SAE_OPTIMIZER = 'adam'
SAE_LOSS = 'mse'
#------------------------------------------------------------------------
# classifier
#------------------------------------------------------------------------
# CLASSIFIER_ACTIVATION = 'relu'
CLASSIFIER_ACTIVATION = 'tanh'
CLASSIFIER_BIAS = False
CLASSIFIER_OPTIMIZER = 'adam'
# CLASSIFIER_OPTIMIZER = 'rmsprop'
CLASSIFIER_LOSS = 'categorical_crossentropy'
#------------------------------------------------------------------------
# input files
#------------------------------------------------------------------------
path_train = '../data/UJIIndoorLoc/trainingData2.csv' # '-110' for the lack of AP.
path_validation = '../data/UJIIndoorLoc/validationData2.csv' # ditto
#------------------------------------------------------------------------
# output files
#------------------------------------------------------------------------
path_base = '../results/' + os.path.splitext(os.path.basename(__file__))[0]
path_out = path_base + '_out'
path_sae_model = path_base + '_sae_model.hdf5'
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-G",
"--gpu_id",
help="ID of GPU device to run this script; default is 0; set it to a negative number for CPU (i.e., no GPU)",
default=0,
type=int)
parser.add_argument(
"-R",
"--random_seed",
help="random seed",
default=0,
type=int)
parser.add_argument(
"-E",
"--epochs",
help="number of epochs; default is 20",
default=20,
type=int)
parser.add_argument(
"-B",
"--batch_size",
help="batch size; default is 10",
default=10,
type=int)
parser.add_argument(
"-S",
"--sae_hidden_layers",
help=
"comma-separated numbers of units in SAE hidden layers; default is '256,128,64,128,256'",
default='256,128,64,128,256',
type=str)
parser.add_argument(
"-C",
"--classifier_hidden_layers",
help=
"comma-separated numbers of units in classifier hidden layers; default '' (i.e., no hidden layer)",
default='',
type=str)
parser.add_argument(
"-D",
"--dropout",
help=
"dropout rate before and after classifier hidden layers; default 0.0",
default='0.0',
type=float)
args = parser.parse_args()
# set variables using command-line arguments
gpu_id = args.gpu_id
random_seed = args.random_seed
epochs = args.epochs
batch_size = args.batch_size
sae_hidden_layers = [int(i) for i in (args.sae_hidden_layers).split(',')]
if args.classifier_hidden_layers == '':
classifier_hidden_layers = ''
else:
classifier_hidden_layers = [int(i) for i in (args.classifier_hidden_layers).split(',')]
dropout = args.dropout
### initialize random seed generator
np.random.seed(random_seed)
#------------------------------------------------------------------------
# import keras and its backend (e.g., tensorflow)
#------------------------------------------------------------------------
if gpu_id >= 0:
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
# os.environ['TF_CPP_MIN_LOG_LEVEL']='2' # supress warning messages
import tensorflow as tf
from keras.layers import Dense, Dropout
from keras.models import Sequential, load_model
train_df = pd.read_csv(path_train,header = 0) # pass header=0 to be able to replace existing names
train_df = train_df[:19930]
train_AP_strengths = train_df.iloc[:,:520] #select first 520 columns
# scale transforms data to center to the mean and component wise scale to unit variance
train_AP_features = scale(np.asarray(train_AP_strengths).astype(float), axis=1) # convert integer to float and scale jointly (axis=1)
# the following two objects are actually pandas.core.series.Series objects
building_ids_str = train_df['BUILDINGID'].map(str) #convert all the building ids to strings
building_floors_str = train_df['FLOOR'].map(str) #convert all the building floors to strings
train_labels = np.asarray(building_ids_str+'-'+building_floors_str) #element wise concatenation of BUILDINGID+FLOOR
# convert labels to categorical variables, dummy_labels has type 'pandas.core.frame.DataFrame'
dummy_labels = pd.get_dummies(train_labels)
"""one hot encode the dummy_labels.
this is done because dummy_labels is a dataframe with the labels (BUILDINGID+FLOOR)
as the column names
"""
train_labels = np.asarray(dummy_labels) #labels is an array of shape 19937 x 13. (there are 13 types of labels)
# generate len(train_AP_features) of floats in between 0 and 1
train_val_split = np.random.rand(len(train_AP_features))
# convert train_val_split to an array of booleans: if elem < TRAINING_RATIO = true, else: false
train_val_split = train_val_split < TRAINING_RATIO
# We aren't given a formal testing set, so we will treat the given validation
# set as the testing set: We will then split our given training set into
# training + validation
train_X = train_AP_features[train_val_split]
train_y = train_labels[train_val_split]
val_X = train_AP_features[~train_val_split]
val_y = train_labels[~train_val_split]
# turn the given validation set into a testing set
test_df = pd.read_csv(path_validation,header = 0)
test_AP_features = scale(np.asarray(test_df.iloc[:,0:520]).astype(float), axis=1) # convert integer to float and scale jointly (axis=1)
test_labels = np.asarray(test_df["BUILDINGID"].map(str) + test_df["FLOOR"].map(str))
test_labels = np.asarray(pd.get_dummies(test_labels))
### build SAE encoder model
print("\nPart 1: buidling SAE encoder model ...")
if False:
# if os.path.isfile(path_sae_model) and (os.path.getmtime(path_sae_model) > os.path.getmtime(__file__)):
model = load_model(path_sae_model)
else:
# create a model based on stacked autoencoder (SAE)
model = Sequential()
model.add(Dense(sae_hidden_layers[0], input_dim=INPUT_DIM, activation=SAE_ACTIVATION, use_bias=SAE_BIAS))
for units in sae_hidden_layers[1:]:
model.add(Dense(units, activation=SAE_ACTIVATION, use_bias=SAE_BIAS))
model.add(Dense(INPUT_DIM, activation=SAE_ACTIVATION, use_bias=SAE_BIAS))
model.compile(optimizer=SAE_OPTIMIZER, loss=SAE_LOSS)
# train the model
model.fit(train_X, train_X, batch_size=batch_size, epochs=epochs, verbose=VERBOSE)
# remove the decoder part
num_to_remove = (len(sae_hidden_layers) + 1) // 2
for i in range(num_to_remove):
model.pop()
# # set all layers (i.e., SAE encoder) to non-trainable (weights will not be updated)
# for layer in model.layers[:]:
# layer.trainable = False
# save the model for later use
model.save(path_sae_model)
### build and evaluate a complete model with the trained SAE encoder and a new classifier
print("\nPart 2: buidling a complete model ...")
'''
define custom accuracy functions based on the following one hot encoding:
0: 00 (1st digit: building, 2nd digit: floor)
1: 01
2: 02
3: 03
4: 10
5: 11
6: 12
7: 13
8: 20
9: 21
10: 22
11: 23
12: 24
'''
import keras.backend as K
from keras.metrics import categorical_accuracy
def bld_idx(x):
def b0(): return tf.constant(0, dtype=x.dtype)
def b1(): return tf.constant(1, dtype=x.dtype)
def b2(): return tf.constant(2, dtype=x.dtype)
return tf.case([(tf.less(x, tf.constant(4, dtype=x.dtype)), b0),
(tf.less(x, tf.constant(8, dtype=x.dtype)), b1)],
default = b2, exclusive=False)
def building_accuracy(y_true, y_pred):
idx_true = K.argmax(y_true, axis=-1)
idx_pred = K.argmax(y_pred, axis=-1)
bld_true = tf.map_fn(bld_idx, idx_true)
bld_pred = tf.map_fn(bld_idx, idx_pred)
return K.cast(K.equal(bld_true, bld_pred), K.floatx())
def flr_idx(x):
def f0(): return x
def f1(): return tf.subtract(x, tf.constant(4, dtype=x.dtype))
def f2(): return tf.subtract(x, tf.constant(8, dtype=x.dtype))
return tf.case([(tf.less(x, tf.constant(4, dtype=x.dtype)), f0),
(tf.less(x, tf.constant(8, dtype=x.dtype)), f1)],
default = f2, exclusive=False)
def floor_accuracy(y_true, y_pred):
idx_true = K.argmax(y_true, axis=-1)
idx_pred = K.argmax(y_pred, axis=-1)
flr_true = tf.map_fn(flr_idx, idx_true)
flr_pred = tf.map_fn(flr_idx, idx_pred)
return K.cast(K.equal(flr_true, flr_pred), K.floatx())
# append a classifier to the model
model.add(Dropout(dropout))
for units in classifier_hidden_layers:
model.add(Dense(units, activation=CLASSIFIER_ACTIVATION, use_bias=CLASSIFIER_BIAS))
model.add(Dropout(dropout))
model.add(Dense(OUTPUT_DIM, activation='softmax', use_bias=CLASSIFIER_BIAS))
model.compile(optimizer=CLASSIFIER_OPTIMIZER, loss=CLASSIFIER_LOSS, metrics=['accuracy', building_accuracy, floor_accuracy])
# train the model
startTime = timer()
model.fit(train_X, train_y, validation_data=(val_X, val_y), batch_size=batch_size, epochs=epochs, verbose=VERBOSE)
# evaluate the model
elapsedTime = timer() - startTime
print("Model trained in %e s." % elapsedTime)
loss, acc, acc_bld, acc_flr = model.evaluate(test_AP_features, test_labels)
### print out final results
now = datetime.datetime.now()
path_out += "_" + now.strftime("%Y%m%d-%H%M%S") + ".org"
f = open(path_out, 'w')
f.write("#+STARTUP: showall\n") # unfold everything when opening
f.write("* System parameters\n")
f.write(" - Numpy random number seed: %d\n" % random_seed)
f.write(" - Ratio of training data to overall data: %.2f\n" % TRAINING_RATIO)
f.write(" - Number of epochs: %d\n" % epochs)
f.write(" - Batch size: %d\n" % batch_size)
f.write(" - SAE hidden layers: %d" % sae_hidden_layers[0])
for units in sae_hidden_layers[1:]:
f.write("-%d" % units)
f.write("\n")
f.write(" - SAE activation: %s\n" % SAE_ACTIVATION)
f.write(" - SAE bias: %s\n" % SAE_BIAS)
f.write(" - SAE optimizer: %s\n" % SAE_OPTIMIZER)
f.write(" - SAE loss: %s\n" % SAE_LOSS)
f.write(" - Classifier hidden layers: ")
if classifier_hidden_layers == '':
f.write("N/A\n")
else:
f.write("%d" % classifier_hidden_layers[0])
for units in classifier_hidden_layers[1:]:
f.write("-%d" % units)
f.write("\n")
f.write(" - Classifier hidden layer activation: %s\n" % CLASSIFIER_ACTIVATION)
f.write(" - Classifier bias: %s\n" % CLASSIFIER_BIAS)
f.write(" - Classifier optimizer: %s\n" % CLASSIFIER_OPTIMIZER)
f.write(" - Classifier loss: %s\n" % CLASSIFIER_LOSS)
f.write(" - Classifier dropout rate: %.2f\n" % dropout)
f.write("* Performance\n")
f.write(" - Loss = %e\n" % loss)
f.write(" - Accuracy (overall) = %e\n" % acc)
f.write(" - Accuracy (building) = %e\n" % acc_bld)
f.write(" - Accuracy (floor) = %e\n" % acc_flr)
f.close()