diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e0e4e77 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +venv/ +LOGS/* +!LOGS/.placeholder + +*.log* + +Gatesizing_NAS.py \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..2b7e46d --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.formatting.provider": "yapf" +} \ No newline at end of file diff --git a/CONSTANTS.py b/CONSTANTS.py index 3d17b0d..db23b33 100644 --- a/CONSTANTS.py +++ b/CONSTANTS.py @@ -1,10 +1,10 @@ ######################################################## # NAS PARAMETERS # ######################################################## -CONTROLLER_SAMPLING_EPOCHS = 10 +CONTROLLER_SAMPLING_EPOCHS = 1000 SAMPLES_PER_CONTROLLER_EPOCH = 10 CONTROLLER_TRAINING_EPOCHS = 10 -ARCHITECTURE_TRAINING_EPOCHS = 10 +ARCHITECTURE_TRAINING_EPOCHS = 5 CONTROLLER_LOSS_ALPHA = 0.9 ######################################################## @@ -38,3 +38,9 @@ # OUTPUT PARAMETERS # ######################################################## TOP_N = 5 + +######################################################## +# RANDOM SEED # +######################################################## + +RANDOM_SEED = 1 \ No newline at end of file diff --git a/LOGS/.placeholder b/LOGS/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/LOGS/event1051/controller_weights.h5 b/LOGS/event1051/controller_weights.h5 deleted file mode 100644 index 1c25958..0000000 Binary files a/LOGS/event1051/controller_weights.h5 and /dev/null differ diff --git a/LOGS/event1051/nas_data.pkl b/LOGS/event1051/nas_data.pkl deleted file mode 100644 index 24461f1..0000000 Binary files a/LOGS/event1051/nas_data.pkl and /dev/null differ diff --git a/LOGS/event1265/controller_weights.h5 b/LOGS/event1265/controller_weights.h5 deleted file mode 100644 index 9b99fc5..0000000 Binary files a/LOGS/event1265/controller_weights.h5 and /dev/null differ diff --git a/LOGS/event1265/nas_data.pkl b/LOGS/event1265/nas_data.pkl deleted file mode 100644 index 0136d68..0000000 Binary files a/LOGS/event1265/nas_data.pkl and /dev/null differ diff --git a/LOGS/event1265/shared_weights.pkl b/LOGS/event1265/shared_weights.pkl deleted file mode 100644 index 37d86df..0000000 Binary files a/LOGS/event1265/shared_weights.pkl and /dev/null differ diff --git a/LOGS/event1547/controller_weights.h5 b/LOGS/event1547/controller_weights.h5 deleted file mode 100644 index fa917fb..0000000 Binary files a/LOGS/event1547/controller_weights.h5 and /dev/null differ diff --git a/LOGS/event1547/nas_data.pkl b/LOGS/event1547/nas_data.pkl deleted file mode 100644 index 52201dd..0000000 Binary files a/LOGS/event1547/nas_data.pkl and /dev/null differ diff --git a/LOGS/event1547/shared_weights.pkl b/LOGS/event1547/shared_weights.pkl deleted file mode 100644 index 8b74fff..0000000 Binary files a/LOGS/event1547/shared_weights.pkl and /dev/null differ diff --git a/LOGS/event3868/hybrid_weights.h5 b/LOGS/event3868/hybrid_weights.h5 deleted file mode 100755 index ffebcd7..0000000 Binary files a/LOGS/event3868/hybrid_weights.h5 and /dev/null differ diff --git a/LOGS/event3868/nas_data.pkl b/LOGS/event3868/nas_data.pkl deleted file mode 100755 index 1b1ad41..0000000 Binary files a/LOGS/event3868/nas_data.pkl and /dev/null differ diff --git a/LOGS/event3868/shared_weights.pkl b/LOGS/event3868/shared_weights.pkl deleted file mode 100755 index d00d8fa..0000000 Binary files a/LOGS/event3868/shared_weights.pkl and /dev/null differ diff --git a/LOGS/event9208/controller_weights.h5 b/LOGS/event9208/controller_weights.h5 deleted file mode 100644 index 5827a99..0000000 Binary files a/LOGS/event9208/controller_weights.h5 and /dev/null differ diff --git a/LOGS/event9208/nas_data.pkl b/LOGS/event9208/nas_data.pkl deleted file mode 100644 index 0db71ea..0000000 Binary files a/LOGS/event9208/nas_data.pkl and /dev/null differ diff --git a/controller.py b/controller.py index 1b1352a..71c073e 100644 --- a/controller.py +++ b/controller.py @@ -1,9 +1,13 @@ import os import numpy as np + +np.set_printoptions(precision=10) from keras import optimizers from keras.layers import Dense, LSTM from keras.models import Model from keras.engine.input_layer import Input +import tensorflow as tf +from tensorflow.keras.utils import to_categorical from keras.preprocessing.sequence import pad_sequences from mlp_generator import MLPSearchSpace @@ -30,6 +34,7 @@ def __init__(self): super().__init__(TARGET_CLASSES) self.controller_classes = len(self.vocab) + 1 + self.eps = 0.5 def sample_architecture_sequences(self, model, number_of_samples): final_layer_id = len(self.vocab) @@ -39,78 +44,126 @@ def sample_architecture_sequences(self, model, number_of_samples): print("GENERATING ARCHITECTURE SAMPLES...") print('------------------------------------------------------') while len(samples) < number_of_samples: - seed = [] + seed = [0] + # initial with 'start' token + sequence = pad_sequences([seed], + maxlen=self.max_len, + padding='post', + value=0) + sequence = sequence.reshape(1, self.max_len, 1) + sequence = to_categorical(sequence, self.controller_classes) + if self.use_predictor: + (probab, _) = model.predict(sequence) + else: + probab = model.predict(sequence) + probab = probab[0] while len(seed) < self.max_len: - sequence = pad_sequences([seed], maxlen=self.max_len - 1, padding='post') - sequence = sequence.reshape(1, 1, self.max_len - 1) - if self.use_predictor: - (probab, _) = model.predict(sequence) - else: - probab = model.predict(sequence) - probab = probab[0][0] - next = np.random.choice(vocab_idx, size=1, p=probab)[0] - if next == dropout_id and len(seed) == 0: + next = np.random.choice(vocab_idx, size=1, + p=probab[len(seed)])[0] + if next == 0: + continue + if next == dropout_id and len(seed) == 1: continue - if next == final_layer_id and len(seed) == 0: + if next == final_layer_id and len(seed) == 1: continue if next == final_layer_id: seed.append(next) break - if len(seed) == self.max_len - 1: - seed.append(final_layer_id) - break - if not next == 0: + if not next == 1: seed.append(next) - if seed not in self.seq_data: - samples.append(seed) - self.seq_data.append(seed) + if len(seed) == self.max_len and seed[-1] != final_layer_id: + seed.append(final_layer_id) + # # search unexplored architecture + # # use epsilon-greedy + # p = np.random.random() + # if (seed not in self.seq_data): + # samples.append(seed[1:]) + # self.seq_data.append(seed) + # elif (p < self.eps): + # samples.append(seed[1:]) + samples.append(seed[1:]) + for idx in range(self.max_len): + probab_percent = probab[idx] * 100.0 + print( + "Controller prob {}: mean: {:.10f}, std: {:.10f}, top5:{}". + format(idx, tf.math.reduce_mean(probab_percent), + tf.math.reduce_std(probab_percent), + tf.math.top_k(probab_percent, 5).indices.numpy())) return samples def control_model(self, controller_input_shape, controller_batch_size): - main_input = Input(shape=controller_input_shape, batch_shape=controller_batch_size, name='main_input') + main_input = Input(shape=controller_input_shape, name='main_input') x = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input) - main_output = Dense(self.controller_classes, activation='softmax', name='main_output')(x) + main_output = Dense(self.controller_classes, + activation='softmax', + name='main_output')(x) model = Model(inputs=[main_input], outputs=[main_output]) return model - def train_control_model(self, model, x_data, y_data, loss_func, controller_batch_size, nb_epochs): + def train_control_model(self, model, x_data, y_data, loss_func, + controller_batch_size, nb_epochs): if self.controller_optimizer == 'sgd': - optim = optimizers.SGD(lr=self.controller_lr, decay=self.controller_decay, momentum=self.controller_momentum, clipnorm=1.0) + optim = optimizers.SGD(lr=self.controller_lr, + decay=self.controller_decay, + momentum=self.controller_momentum, + clipnorm=1.0) else: - optim = getattr(optimizers, self.controller_optimizer)(lr=self.controller_lr, decay=self.controller_decay, clipnorm=1.0) + optim = getattr(optimizers, self.controller_optimizer)( + lr=self.controller_lr, + decay=self.controller_decay, + clipnorm=1.0) model.compile(optimizer=optim, loss={'main_output': loss_func}) if os.path.exists(self.controller_weights): model.load_weights(self.controller_weights) print("TRAINING CONTROLLER...") - model.fit({'main_input': x_data}, - {'main_output': y_data.reshape(len(y_data), 1, self.controller_classes)}, + model.fit({'main_input': x_data}, {'main_output': y_data}, epochs=nb_epochs, batch_size=controller_batch_size, verbose=0) model.save_weights(self.controller_weights) - def hybrid_control_model(self, controller_input_shape, controller_batch_size): - main_input = Input(shape=controller_input_shape, batch_shape=controller_batch_size, name='main_input') + def hybrid_control_model(self, controller_input_shape, + controller_batch_size): + main_input = Input(shape=controller_input_shape, name='main_input') x = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input) - predictor_output = Dense(1, activation='sigmoid', name='predictor_output')(x) - main_output = Dense(self.controller_classes, activation='softmax', name='main_output')(x) - model = Model(inputs=[main_input], outputs=[main_output, predictor_output]) + predictor_output = Dense(1, + activation='sigmoid', + name='predictor_output')(x) + main_output = Dense(self.controller_classes, + activation='softmax', + name='main_output')(x) + model = Model(inputs=[main_input], + outputs=[main_output, predictor_output]) return model - def train_hybrid_model(self, model, x_data, y_data, pred_target, loss_func, controller_batch_size, nb_epochs): + def train_hybrid_model(self, model, x_data, y_data, pred_target, loss_func, + controller_batch_size, nb_epochs): if self.controller_optimizer == 'sgd': - optim = optimizers.SGD(lr=self.controller_lr, decay=self.controller_decay, momentum=self.controller_momentum, clipnorm=1.0) + optim = optimizers.SGD(lr=self.controller_lr, + decay=self.controller_decay, + momentum=self.controller_momentum, + clipnorm=1.0) else: - optim = getattr(optimizers, self.controller_optimizer)(lr=self.controller_lr, decay=self.controller_decay, clipnorm=1.0) + optim = getattr(optimizers, self.controller_optimizer)( + lr=self.controller_lr, + decay=self.controller_decay, + clipnorm=1.0) model.compile(optimizer=optim, - loss={'main_output': loss_func, 'predictor_output': 'mse'}, - loss_weights={'main_output': 1, 'predictor_output': 1}) + loss={ + 'main_output': loss_func, + 'predictor_output': 'mse' + }, + loss_weights={ + 'main_output': 1, + 'predictor_output': 1 + }) if os.path.exists(self.controller_weights): model.load_weights(self.controller_weights) print("TRAINING CONTROLLER...") - model.fit({'main_input': x_data}, - {'main_output': y_data.reshape(len(y_data), 1, self.controller_classes), - 'predictor_output': np.array(pred_target).reshape(len(pred_target), 1, 1)}, + model.fit({'main_input': x_data}, { + 'main_output': y_data, + 'predictor_output': pred_target + }, epochs=nb_epochs, batch_size=controller_batch_size, verbose=0) @@ -119,8 +172,13 @@ def train_hybrid_model(self, model, x_data, y_data, pred_target, loss_func, cont def get_predicted_accuracies_hybrid_model(self, model, seqs): pred_accuracies = [] for seq in seqs: - control_sequences = pad_sequences([seq], maxlen=self.max_len, padding='post') - xc = control_sequences[:, :-1].reshape(len(control_sequences), 1, self.max_len - 1) - (_, pred_accuracy) = [x[0][0] for x in model.predict(xc)] - pred_accuracies.append(pred_accuracy[0]) + control_sequences = pad_sequences([seq], + maxlen=self.max_len + 1, + padding='pre', + value=0) + xc = control_sequences[:, :-1].reshape(len(control_sequences), + self.max_len, 1) + xc = to_categorical(xc, self.controller_classes) + _, pred_accuracy = model.predict(xc) + pred_accuracies.append(pred_accuracy[0][-1]) return pred_accuracies diff --git a/mlp_generator.py b/mlp_generator.py index 722bf6d..7ff0b52 100644 --- a/mlp_generator.py +++ b/mlp_generator.py @@ -19,6 +19,7 @@ def vocab_dict(self): nodes = [8, 16, 32, 64, 128, 256, 512] act_funcs = ['sigmoid', 'tanh', 'relu', 'elu'] layer_params = [] + # NOTE: id: 0 is 'start' token layer_id = [] for i in range(len(nodes)): for j in range(len(act_funcs)): @@ -65,10 +66,12 @@ def __init__(self): super().__init__(TARGET_CLASSES) - if self.mlp_one_shot: self.weights_file = 'LOGS/shared_weights.pkl' - self.shared_weights = pd.DataFrame({'bigram_id': [], 'weights': []}) + self.shared_weights = pd.DataFrame({ + 'bigram_id': [], + 'weights': [] + }) if not os.path.exists(self.weights_file): print("Initializing shared weights dictionary...") self.shared_weights.to_pickle(self.weights_file) @@ -82,23 +85,34 @@ def create_model(self, sequence, mlp_input_shape): if layer_conf is 'dropout': model.add(Dropout(self.mlp_dropout, name='dropout')) else: - model.add(Dense(units=layer_conf[0], activation=layer_conf[1])) + model.add( + Dense(units=layer_conf[0], activation=layer_conf[1])) else: for i, layer_conf in enumerate(layer_configs): if i == 0: - model.add(Dense(units=layer_conf[0], activation=layer_conf[1], input_shape=mlp_input_shape)) + model.add( + Dense(units=layer_conf[0], + activation=layer_conf[1], + input_shape=mlp_input_shape)) elif layer_conf is 'dropout': model.add(Dropout(self.mlp_dropout, name='dropout')) else: - model.add(Dense(units=layer_conf[0], activation=layer_conf[1])) + model.add( + Dense(units=layer_conf[0], activation=layer_conf[1])) return model def compile_model(self, model): if self.mlp_optimizer == 'sgd': - optim = optimizers.SGD(lr=self.mlp_lr, decay=self.mlp_decay, momentum=self.mlp_momentum) + optim = optimizers.SGD(lr=self.mlp_lr, + decay=self.mlp_decay, + momentum=self.mlp_momentum) else: - optim = getattr(optimizers, self.mlp_optimizer)(lr=self.mlp_lr, decay=self.mlp_decay) - model.compile(loss=self.mlp_loss_func, optimizer=optim, metrics=self.metrics) + optim = getattr(optimizers, + self.mlp_optimizer)(lr=self.mlp_lr, + decay=self.mlp_decay) + model.compile(loss=self.mlp_loss_func, + optimizer=optim, + metrics=self.metrics) return model def update_weights(self, model): @@ -107,7 +121,8 @@ def update_weights(self, model): if 'flatten' in layer.name: layer_configs.append(('flatten')) elif 'dropout' not in layer.name: - layer_configs.append((layer.get_config()['units'], layer.get_config()['activation'])) + layer_configs.append((layer.get_config()['units'], + layer.get_config()['activation'])) config_ids = [] for i in range(1, len(layer_configs)): config_ids.append((layer_configs[i - 1], layer_configs[i])) @@ -121,21 +136,37 @@ def update_weights(self, model): if config_ids[j] == bigram_ids[i]: search_index.append(i) if len(search_index) == 0: - self.shared_weights = self.shared_weights.append({'bigram_id': config_ids[j], - 'weights': layer.get_weights()}, - ignore_index=True) + self.shared_weights = self.shared_weights.append( + { + 'bigram_id': config_ids[j], + 'weights': layer.get_weights() + }, + ignore_index=True) else: - self.shared_weights.at[search_index[0], 'weights'] = layer.get_weights() + self.shared_weights.at[search_index[0], + 'weights'] = layer.get_weights() j += 1 self.shared_weights.to_pickle(self.weights_file) + def load_shared_weights(self, model): + all_subdirs = [ + 'LOGS/' + d for d in os.listdir('LOGS') + if os.path.isdir('LOGS/' + d) + ] + latest_subdir = max(all_subdirs, key=os.path.getmtime) + old_weights_file = os.path.basename(self.weights_file) + old_weights_file = os.path.join(latest_subdir, old_weights_file) + self.shared_weights = pd.read_pickle(old_weights_file) + self.set_model_weights(model) + def set_model_weights(self, model): layer_configs = ['input'] for layer in model.layers: if 'flatten' in layer.name: layer_configs.append(('flatten')) elif 'dropout' not in layer.name: - layer_configs.append((layer.get_config()['units'], layer.get_config()['activation'])) + layer_configs.append((layer.get_config()['units'], + layer.get_config()['activation'])) config_ids = [] for i in range(1, len(layer_configs)): config_ids.append((layer_configs[i - 1], layer_configs[i])) @@ -150,10 +181,17 @@ def set_model_weights(self, model): search_index.append(i) if len(search_index) > 0: print("Transferring weights for layer:", config_ids[j]) - layer.set_weights(self.shared_weights['weights'].values[search_index[0]]) + layer.set_weights( + self.shared_weights['weights'].values[search_index[0]]) j += 1 - def train_model(self, model, x_data, y_data, nb_epochs, validation_split=0.1, callbacks=None): + def train_model(self, + model, + x_data, + y_data, + nb_epochs, + validation_split=0.1, + callbacks=None): if self.mlp_one_shot: self.set_model_weights(model) history = model.fit(x_data, @@ -171,3 +209,7 @@ def train_model(self, model, x_data, y_data, nb_epochs, validation_split=0.1, ca callbacks=callbacks, verbose=0) return history + + def inference_model(self, model, x_data, y_data): + results = model.evaluate(x_data, y_data, verbose=0) + return results diff --git a/mlpnas.py b/mlpnas.py index 6e27bae..101981c 100644 --- a/mlpnas.py +++ b/mlpnas.py @@ -1,6 +1,15 @@ import pickle +import sys +import tvm +from tvm import relay +from tvm.relay import data_dep_optimization as ddo +import tvm.relay.testing +from tvm.contrib import graph_executor +import tensorflow as tf +import keras +import numpy as np import keras.backend as K -from keras.utils import to_categorical +from tensorflow.keras.utils import to_categorical from keras.preprocessing.sequence import pad_sequences from CONSTANTS import * @@ -32,52 +41,113 @@ def __init__(self, x, y): self.model_generator = MLPGenerator() self.controller_batch_size = len(self.data) - self.controller_input_shape = (1, MAX_ARCHITECTURE_LENGTH - 1) + self.controller_input_shape = (MAX_ARCHITECTURE_LENGTH, + self.controller_classes) if self.use_predictor: - self.controller_model = self.hybrid_control_model(self.controller_input_shape, self.controller_batch_size) + self.controller_model = self.hybrid_control_model( + self.controller_input_shape, self.controller_batch_size) else: - self.controller_model = self.control_model(self.controller_input_shape, self.controller_batch_size) + self.controller_model = self.control_model( + self.controller_input_shape, self.controller_batch_size) + + self.target_dev = "llvm -mcpu=core-avx2" + #self.target_dev = "cuda" + #self.target_dev = "cuda -libs=cudnn" + self.tvm_opt_level = 3 + self.tvm_module = '' def create_architecture(self, sequence): if self.target_classes == 2: self.model_generator.loss_func = 'binary_crossentropy' - model = self.model_generator.create_model(sequence, np.shape(self.x[0])) + model = self.model_generator.create_model(sequence, + np.shape(self.x[0])) model = self.model_generator.compile_model(model) return model def train_architecture(self, model): x, y = unison_shuffled_copies(self.x, self.y) - history = self.model_generator.train_model(model, x, y, self.architecture_train_epochs) + history = self.model_generator.train_model( + model, x, y, self.architecture_train_epochs) return history + def load_shared_weights(self, model): + self.model_generator.load_shared_weights(model) + + def to_tvm_module(self, model): + shape_dict = {model.input_names[0]: (1, self.x.shape[1])} + mod, params = relay.frontend.from_keras(model, shape_dict) + + target = tvm.target.Target(self.target_dev) + dev = tvm.device(str(target), 0) + + with tvm.transform.PassContext(opt_level=self.tvm_opt_level): + lib = relay.build_module.build(mod, target=target, params=params) + module = graph_executor.GraphModule(lib["default"](dev)) + data_tvm = tvm.nd.array( + (np.random.uniform(size=(1, self.x.shape[1]))).astype("float32")) + module.set_input(model.input_names[0], data_tvm) + return module, dev + + def evaluate_latency(self, model): + module, dev = self.to_tvm_module(model) + return module.benchmark(dev, repeat=10, min_repeat_ms=500) + + def inference_architecture(self, model): + results = self.model_generator.inference_model(model, self.x, self.y) + return results + def append_model_metrics(self, sequence, history, pred_accuracy=None): if len(history.history['val_accuracy']) == 1: if pred_accuracy: - self.data.append([sequence, - history.history['val_accuracy'][0], - pred_accuracy]) + self.data.append([ + sequence, history.history['val_accuracy'][0], pred_accuracy + ]) + print('predict accuracy: ', pred_accuracy, ' (', + pred_accuracy - history.history['val_accuracy'][0], ')') else: - self.data.append([sequence, - history.history['val_accuracy'][0]]) + self.data.append( + [sequence, history.history['val_accuracy'][0]]) print('validation accuracy: ', history.history['val_accuracy'][0]) else: - val_acc = np.ma.average(history.history['val_accuracy'], - weights=np.arange(1, len(history.history['val_accuracy']) + 1), - axis=-1) + val_acc = np.ma.average( + history.history['val_accuracy'], + weights=np.arange(1, + len(history.history['val_accuracy']) + 1), + axis=-1) if pred_accuracy: - self.data.append([sequence, - val_acc, - pred_accuracy]) + self.data.append([sequence, val_acc, pred_accuracy]) + print('predict accuracy: ', pred_accuracy, ' (', + pred_accuracy - val_acc, ')') else: - self.data.append([sequence, - val_acc]) + self.data.append([sequence, val_acc]) print('validation accuracy: ', val_acc) def prepare_controller_data(self, sequences): - controller_sequences = pad_sequences(sequences, maxlen=self.max_len, padding='post') - xc = controller_sequences[:, :-1].reshape(len(controller_sequences), 1, self.max_len - 1) - yc = to_categorical(controller_sequences[:, -1], self.controller_classes) - val_acc_target = [item[1] for item in self.data] + final_layer_id = len(self.vocab) + # pad with 'start' token + controller_sequences = pad_sequences(sequences, + maxlen=self.max_len, + padding='post', + value=final_layer_id) + controller_sequences = pad_sequences(sequences, + maxlen=self.max_len + 1, + padding='pre', + value=0) + xc = controller_sequences[:, :-1].reshape(len(controller_sequences), + self.max_len, 1) + xc = to_categorical(xc, self.controller_classes) + yc = controller_sequences[:, 1:].reshape(len(controller_sequences), + self.max_len, 1) + val_acc_target = [] + for idx in range(len(self.data)): + data_arch = self.data[idx][0] + data_arch = np.pad(data_arch, (0, self.max_len - len(data_arch)), + constant_values=final_layer_id) + data_acc = np.ones([self.max_len], + dtype=np.float) * self.data[idx][1] + data_acc = np.where(data_arch == final_layer_id, data_acc, 0.0) + val_acc_target.append(data_acc) + val_acc_target = np.array(val_acc_target) return xc, yc, val_acc_target def get_discounted_reward(self, rewards): @@ -89,56 +159,68 @@ def get_discounted_reward(self, rewards): running_add += self.controller_loss_alpha**exp * r exp += 1 discounted_r[t] = running_add - discounted_r = (discounted_r - discounted_r.mean()) / discounted_r.std() + discounted_r = (discounted_r - + discounted_r.mean()) / discounted_r.std() return discounted_r def custom_loss(self, target, output): - baseline = 0.5 - reward = np.array([item[1] - baseline for item in self.data[-self.samples_per_controller_epoch:]]).reshape( - self.samples_per_controller_epoch, 1) - discounted_reward = self.get_discounted_reward(reward) - loss = - K.log(output) * discounted_reward[:, None] + reward = np.array([ + item[1] for item in self.data[-self.samples_per_controller_epoch:] + ]).reshape(self.samples_per_controller_epoch, 1) + reward_norm = (reward - reward.mean()) / (reward.std()) + + discounted_reward = self.get_discounted_reward(reward_norm) + # select action probability for each layer from target + # and get action probability for each architecture with + # conditional probability + sel_output = tf.gather(output, target, axis=2) + sel_output = tf.math.reduce_prod(sel_output, axis=1) + loss = -K.log(sel_output) * discounted_reward[:, None] return loss def train_controller(self, model, x, y, pred_accuracy=None): if self.use_predictor: - self.train_hybrid_model(model, - x, - y, - pred_accuracy, - self.custom_loss, - len(self.data), + self.train_hybrid_model(model, x, y, pred_accuracy, + self.custom_loss, len(self.data), self.controller_train_epochs) else: - self.train_control_model(model, - x, - y, - self.custom_loss, + self.train_control_model(model, x, y, self.custom_loss, len(self.data), self.controller_train_epochs) def search(self): for controller_epoch in range(self.controller_sampling_epochs): - print('------------------------------------------------------------------') - print(' CONTROLLER EPOCH: {}'.format(controller_epoch)) - print('------------------------------------------------------------------') - sequences = self.sample_architecture_sequences(self.controller_model, self.samples_per_controller_epoch) + print( + '------------------------------------------------------------------' + ) + print(' CONTROLLER EPOCH: {}'.format( + controller_epoch)) + print( + '------------------------------------------------------------------' + ) + sequences = self.sample_architecture_sequences( + self.controller_model, self.samples_per_controller_epoch) + sys.stdout.flush() if self.use_predictor: - pred_accuracies = self.get_predicted_accuracies_hybrid_model(self.controller_model, sequences) + pred_accuracies = self.get_predicted_accuracies_hybrid_model( + self.controller_model, sequences) for i, sequence in enumerate(sequences): - print('Architecture: ', self.decode_sequence(sequence)) + print('Architecture: ', self.decode_sequence(sequence), + sequence) model = self.create_architecture(sequence) history = self.train_architecture(model) if self.use_predictor: - self.append_model_metrics(sequence, history, pred_accuracies[i]) + self.append_model_metrics(sequence, history, + pred_accuracies[i][0]) else: self.append_model_metrics(sequence, history) print('------------------------------------------------------') + sys.stdout.flush() xc, yc, val_acc_target = self.prepare_controller_data(sequences) - self.train_controller(self.controller_model, - xc, - yc, - val_acc_target[-self.samples_per_controller_epoch:]) + self.train_controller( + self.controller_model, xc, yc, + val_acc_target[-self.samples_per_controller_epoch:]) + sys.stdout.flush() with open(self.nas_data_log, 'wb') as f: pickle.dump(self.data, f) log_event() diff --git a/requriments.txt b/requriments.txt new file mode 100644 index 0000000..f9080e2 --- /dev/null +++ b/requriments.txt @@ -0,0 +1,48 @@ +absl-py==1.0.0 +astunparse==1.6.3 +cachetools==4.2.4 +certifi==2021.10.8 +charset-normalizer==2.0.11 +cycler==0.11.0 +dataclasses==0.8 +gast==0.3.3 +google-auth==2.6.0 +google-auth-oauthlib==0.4.6 +google-pasta==0.2.0 +grpcio==1.43.0 +h5py==2.10.0 +idna==3.3 +importlib-metadata==4.8.3 +Keras==2.4.3 +Keras-Preprocessing==1.1.2 +kiwisolver==1.3.1 +Markdown==3.3.6 +matplotlib==3.3.4 +numpy==1.18.5 +oauthlib==3.2.0 +opt-einsum==3.3.0 +pandas==1.1.5 +Pillow==8.4.0 +protobuf==3.19.4 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyparsing==3.0.7 +python-dateutil==2.8.2 +pytz==2021.3 +PyYAML==6.0 +requests==2.27.1 +requests-oauthlib==1.3.1 +rsa==4.8 +scipy==1.5.4 +six==1.16.0 +tensorboard==2.8.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.3.1 +tensorflow-estimator==2.3.0 +termcolor==1.1.0 +typing_extensions==4.0.1 +urllib3==1.26.8 +Werkzeug==2.0.3 +wrapt==1.13.3 +zipp==3.6.0 diff --git a/run.py b/run.py index 29f0cdd..90ec2b1 100644 --- a/run.py +++ b/run.py @@ -1,15 +1,43 @@ import pandas as pd +import tensorflow as tf + +from CONSTANTS import TOP_N, RANDOM_SEED + +from numpy.random import seed + +seed(RANDOM_SEED) +tf.compat.v2.random.set_seed(RANDOM_SEED) from utils import * from mlpnas import MLPNAS -from CONSTANTS import TOP_N +gpus = tf.config.experimental.list_physical_devices('GPU') +if gpus: + try: + for gpu in gpus: + tf.config.experimental.set_memory_growth(gpu, True) + except RuntimeError as e: + print(e) data = pd.read_csv('DATASETS/wine-quality.csv') x = data.drop('quality_label', axis=1, inplace=False).values y = pd.get_dummies(data['quality_label']).values nas_object = MLPNAS(x, y) +#print(nas_object.controller_model.summary()) data = nas_object.search() get_top_n_architectures(TOP_N) + +# data = load_nas_data() +# data = sort_search_data(data) +# for seq_data in data[:TOP_N]: +# print('Model') +# model = nas_object.create_architecture(seq_data[0]) +# print(model.summary()) +# print("Evaluate inference time cost...") +# latency_results = nas_object.evaluate_latency(model) +# print(latency_results) +# nas_object.load_shared_weights(model) +# results = nas_object.inference_architecture(model) +# print("test loss, test acc:", results) diff --git a/utils.py b/utils.py index 65829db..0850fac 100644 --- a/utils.py +++ b/utils.py @@ -8,7 +8,6 @@ from CONSTANTS import * from mlp_generator import MLPSearchSpace - ######################################################## # DATA PROCESSING # ######################################################## @@ -40,11 +39,13 @@ def log_event(): filelist = os.listdir('LOGS') for file in filelist: if os.path.isfile('LOGS/{}'.format(file)): - shutil.move('LOGS/{}'.format(file),dest) + shutil.move('LOGS/{}'.format(file), dest) def get_latest_event_id(): - all_subdirs = ['LOGS/' + d for d in os.listdir('LOGS') if os.path.isdir('LOGS/' + d)] + all_subdirs = [ + 'LOGS/' + d for d in os.listdir('LOGS') if os.path.isdir('LOGS/' + d) + ] latest_subdir = max(all_subdirs, key=os.path.getmtime) return int(latest_subdir.replace('LOGS/event', '')) @@ -68,17 +69,20 @@ def sort_search_data(nas_data): nas_data = [nas_data[x] for x in sorted_idx] return nas_data + ######################################################## # EVALUATION AND PLOTS # ######################################################## + def get_top_n_architectures(n): data = load_nas_data() data = sort_search_data(data) search_space = MLPSearchSpace(TARGET_CLASSES) print('Top {} Architectures:'.format(n)) for seq_data in data[:n]: - print('Architecture', search_space.decode_sequence(seq_data[0])) + print('Architecture', search_space.decode_sequence(seq_data[0]), + seq_data[0]) print('Validation Accuracy:', seq_data[1]) @@ -92,12 +96,9 @@ def get_nas_accuracy_plot(): def get_accuracy_distribution(): event = get_latest_event_id() data = load_nas_data() - accuracies = [x[1]*100. for x in data] + accuracies = [x[1] * 100. for x in data] accuracies = [int(x) for x in accuracies] sorted_accs = np.sort(accuracies) count_dict = {k: len(list(v)) for k, v in groupby(sorted_accs)} plt.bar(list(count_dict.keys()), list(count_dict.values())) plt.show() - - -