codeaway23 · faridabdolhosseinpour · Feb 9, 2022 · Feb 9, 2022 · Feb 9, 2022 · Feb 22, 2022
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+__pycache__/
+venv/
+LOGS/*
+!LOGS/.placeholder
+
+*.log*
+
+Gatesizing_NAS.py
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.formatting.provider": "yapf"
+}
diff --git a/CONSTANTS.py b/CONSTANTS.py
@@ -1,10 +1,10 @@
 ########################################################
 #                   NAS PARAMETERS                     #
 ########################################################
-CONTROLLER_SAMPLING_EPOCHS = 10
+CONTROLLER_SAMPLING_EPOCHS = 1000
 SAMPLES_PER_CONTROLLER_EPOCH = 10
 CONTROLLER_TRAINING_EPOCHS = 10
-ARCHITECTURE_TRAINING_EPOCHS = 10
+ARCHITECTURE_TRAINING_EPOCHS = 5
 CONTROLLER_LOSS_ALPHA = 0.9
 
 ########################################################
@@ -38,3 +38,9 @@
 #                  OUTPUT PARAMETERS                   #
 ########################################################
 TOP_N = 5
+
+########################################################
+#                    RANDOM SEED                       #
+########################################################
+
+RANDOM_SEED = 1
diff --git a/LOGS/.placeholder b/LOGS/.placeholder
diff --git a/LOGS/event1051/controller_weights.h5 b/LOGS/event1051/controller_weights.h5
diff --git a/LOGS/event1051/nas_data.pkl b/LOGS/event1051/nas_data.pkl
diff --git a/LOGS/event1265/controller_weights.h5 b/LOGS/event1265/controller_weights.h5
diff --git a/LOGS/event1265/nas_data.pkl b/LOGS/event1265/nas_data.pkl
diff --git a/LOGS/event1265/shared_weights.pkl b/LOGS/event1265/shared_weights.pkl
diff --git a/LOGS/event1547/controller_weights.h5 b/LOGS/event1547/controller_weights.h5
diff --git a/LOGS/event1547/nas_data.pkl b/LOGS/event1547/nas_data.pkl
diff --git a/LOGS/event1547/shared_weights.pkl b/LOGS/event1547/shared_weights.pkl
diff --git a/LOGS/event3868/hybrid_weights.h5 b/LOGS/event3868/hybrid_weights.h5
diff --git a/LOGS/event3868/nas_data.pkl b/LOGS/event3868/nas_data.pkl
diff --git a/LOGS/event3868/shared_weights.pkl b/LOGS/event3868/shared_weights.pkl
diff --git a/LOGS/event9208/controller_weights.h5 b/LOGS/event9208/controller_weights.h5
diff --git a/LOGS/event9208/nas_data.pkl b/LOGS/event9208/nas_data.pkl
diff --git a/controller.py b/controller.py
@@ -1,9 +1,13 @@
 import os
 import numpy as np
+
+np.set_printoptions(precision=10)
 from keras import optimizers
 from keras.layers import Dense, LSTM
 from keras.models import Model
 from keras.engine.input_layer import Input
+import tensorflow as tf
+from tensorflow.keras.utils import to_categorical
 from keras.preprocessing.sequence import pad_sequences
 
 from mlp_generator import MLPSearchSpace
@@ -30,6 +34,7 @@ def __init__(self):
         super().__init__(TARGET_CLASSES)
 
         self.controller_classes = len(self.vocab) + 1
+        self.eps = 0.5
 
     def sample_architecture_sequences(self, model, number_of_samples):
         final_layer_id = len(self.vocab)
@@ -39,78 +44,126 @@ def sample_architecture_sequences(self, model, number_of_samples):
         print("GENERATING ARCHITECTURE SAMPLES...")
         print('------------------------------------------------------')
         while len(samples) < number_of_samples:
-            seed = []
+            seed = [0]
+            # initial with 'start' token
+            sequence = pad_sequences([seed],
+                                     maxlen=self.max_len,
+                                     padding='post',
+                                     value=0)
+            sequence = sequence.reshape(1, self.max_len, 1)
+            sequence = to_categorical(sequence, self.controller_classes)
+            if self.use_predictor:
+                (probab, _) = model.predict(sequence)
+            else:
+                probab = model.predict(sequence)
+            probab = probab[0]
             while len(seed) < self.max_len:
-                sequence = pad_sequences([seed], maxlen=self.max_len - 1, padding='post')
-                sequence = sequence.reshape(1, 1, self.max_len - 1)
-                if self.use_predictor:
-                    (probab, _) = model.predict(sequence)
-                else:
-                    probab = model.predict(sequence)
-                probab = probab[0][0]
-                next = np.random.choice(vocab_idx, size=1, p=probab)[0]
-                if next == dropout_id and len(seed) == 0:
+                next = np.random.choice(vocab_idx, size=1,
+                                        p=probab[len(seed)])[0]
+                if next == 0:
+                    continue
+                if next == dropout_id and len(seed) == 1:
                     continue
-                if next == final_layer_id and len(seed) == 0:
+                if next == final_layer_id and len(seed) == 1:
                     continue
                 if next == final_layer_id:
                     seed.append(next)
                     break
-                if len(seed) == self.max_len - 1:
-                    seed.append(final_layer_id)
-                    break
-                if not next == 0:
+                if not next == 1:
                     seed.append(next)
-            if seed not in self.seq_data:
-                samples.append(seed)
-                self.seq_data.append(seed)
+            if len(seed) == self.max_len and seed[-1] != final_layer_id:
+                seed.append(final_layer_id)
+            # # search unexplored architecture
+            # # use epsilon-greedy
+            # p = np.random.random()
+            # if (seed not in self.seq_data):
+            #     samples.append(seed[1:])
+            #     self.seq_data.append(seed)
+            # elif (p < self.eps):
+            #     samples.append(seed[1:])
+            samples.append(seed[1:])
+            for idx in range(self.max_len):
+                probab_percent = probab[idx] * 100.0
+                print(
+                    "Controller prob {}: mean: {:.10f}, std: {:.10f}, top5:{}".
+                    format(idx, tf.math.reduce_mean(probab_percent),
+                           tf.math.reduce_std(probab_percent),
+                           tf.math.top_k(probab_percent, 5).indices.numpy()))
         return samples
 
     def control_model(self, controller_input_shape, controller_batch_size):
-        main_input = Input(shape=controller_input_shape, batch_shape=controller_batch_size, name='main_input')
+        main_input = Input(shape=controller_input_shape, name='main_input')
         x = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
-        main_output = Dense(self.controller_classes, activation='softmax', name='main_output')(x)
+        main_output = Dense(self.controller_classes,
+                            activation='softmax',
+                            name='main_output')(x)
         model = Model(inputs=[main_input], outputs=[main_output])
         return model
 
-    def train_control_model(self, model, x_data, y_data, loss_func, controller_batch_size, nb_epochs):
+    def train_control_model(self, model, x_data, y_data, loss_func,
+                            controller_batch_size, nb_epochs):
         if self.controller_optimizer == 'sgd':
-            optim = optimizers.SGD(lr=self.controller_lr, decay=self.controller_decay, momentum=self.controller_momentum, clipnorm=1.0)
+            optim = optimizers.SGD(lr=self.controller_lr,
+                                   decay=self.controller_decay,
+                                   momentum=self.controller_momentum,
+                                   clipnorm=1.0)
         else:
-            optim = getattr(optimizers, self.controller_optimizer)(lr=self.controller_lr, decay=self.controller_decay, clipnorm=1.0)
+            optim = getattr(optimizers, self.controller_optimizer)(
+                lr=self.controller_lr,
+                decay=self.controller_decay,
+                clipnorm=1.0)
         model.compile(optimizer=optim, loss={'main_output': loss_func})
         if os.path.exists(self.controller_weights):
             model.load_weights(self.controller_weights)
         print("TRAINING CONTROLLER...")
-        model.fit({'main_input': x_data},
-                  {'main_output': y_data.reshape(len(y_data), 1, self.controller_classes)},
+        model.fit({'main_input': x_data}, {'main_output': y_data},
                   epochs=nb_epochs,
                   batch_size=controller_batch_size,
                   verbose=0)
         model.save_weights(self.controller_weights)
 
-    def hybrid_control_model(self, controller_input_shape, controller_batch_size):
-        main_input = Input(shape=controller_input_shape, batch_shape=controller_batch_size, name='main_input')
+    def hybrid_control_model(self, controller_input_shape,
+                             controller_batch_size):
+        main_input = Input(shape=controller_input_shape, name='main_input')
         x = LSTM(self.controller_lstm_dim, return_sequences=True)(main_input)
-        predictor_output = Dense(1, activation='sigmoid', name='predictor_output')(x)
-        main_output = Dense(self.controller_classes, activation='softmax', name='main_output')(x)
-        model = Model(inputs=[main_input], outputs=[main_output, predictor_output])
+        predictor_output = Dense(1,
+                                 activation='sigmoid',
+                                 name='predictor_output')(x)
+        main_output = Dense(self.controller_classes,
+                            activation='softmax',
+                            name='main_output')(x)
+        model = Model(inputs=[main_input],
+                      outputs=[main_output, predictor_output])
         return model
 
-    def train_hybrid_model(self, model, x_data, y_data, pred_target, loss_func, controller_batch_size, nb_epochs):
+    def train_hybrid_model(self, model, x_data, y_data, pred_target, loss_func,
+                           controller_batch_size, nb_epochs):
         if self.controller_optimizer == 'sgd':
-            optim = optimizers.SGD(lr=self.controller_lr, decay=self.controller_decay, momentum=self.controller_momentum, clipnorm=1.0)
+            optim = optimizers.SGD(lr=self.controller_lr,
+                                   decay=self.controller_decay,
+                                   momentum=self.controller_momentum,
+                                   clipnorm=1.0)
         else:
-            optim = getattr(optimizers, self.controller_optimizer)(lr=self.controller_lr, decay=self.controller_decay, clipnorm=1.0)
+            optim = getattr(optimizers, self.controller_optimizer)(
+                lr=self.controller_lr,
+                decay=self.controller_decay,
+                clipnorm=1.0)
         model.compile(optimizer=optim,
-                      loss={'main_output': loss_func, 'predictor_output': 'mse'},
-                      loss_weights={'main_output': 1, 'predictor_output': 1})
+                      loss={
+                          'main_output': loss_func,
+                          'predictor_output': 'mse'
+                      },
+                      loss_weights={
+                          'main_output': 1,
+                          'predictor_output': 1
+                      })
         if os.path.exists(self.controller_weights):
             model.load_weights(self.controller_weights)
         print("TRAINING CONTROLLER...")
-        model.fit({'main_input': x_data},
-                  {'main_output': y_data.reshape(len(y_data), 1, self.controller_classes),
-                   'predictor_output': np.array(pred_target).reshape(len(pred_target), 1, 1)},
+        model.fit({'main_input': x_data}, {
+            'main_output': y_data,
+            'predictor_output': pred_target
+        },
                   epochs=nb_epochs,
                   batch_size=controller_batch_size,
                   verbose=0)
@@ -119,8 +172,13 @@ def train_hybrid_model(self, model, x_data, y_data, pred_target, loss_func, cont
     def get_predicted_accuracies_hybrid_model(self, model, seqs):
         pred_accuracies = []
         for seq in seqs:
-            control_sequences = pad_sequences([seq], maxlen=self.max_len, padding='post')
-            xc = control_sequences[:, :-1].reshape(len(control_sequences), 1, self.max_len - 1)
-            (_, pred_accuracy) = [x[0][0] for x in model.predict(xc)]
-            pred_accuracies.append(pred_accuracy[0])
+            control_sequences = pad_sequences([seq],
+                                              maxlen=self.max_len + 1,
+                                              padding='pre',
+                                              value=0)
+            xc = control_sequences[:, :-1].reshape(len(control_sequences),
+                                                   self.max_len, 1)
+            xc = to_categorical(xc, self.controller_classes)
+            _, pred_accuracy = model.predict(xc)
+            pred_accuracies.append(pred_accuracy[0][-1])
         return pred_accuracies
diff --git a/mlp_generator.py b/mlp_generator.py
@@ -19,6 +19,7 @@ def vocab_dict(self):
         nodes = [8, 16, 32, 64, 128, 256, 512]
         act_funcs = ['sigmoid', 'tanh', 'relu', 'elu']
         layer_params = []
+        # NOTE: id: 0 is 'start' token
         layer_id = []
         for i in range(len(nodes)):
             for j in range(len(act_funcs)):
@@ -65,10 +66,12 @@ def __init__(self):
 
         super().__init__(TARGET_CLASSES)
 
-
         if self.mlp_one_shot:
             self.weights_file = 'LOGS/shared_weights.pkl'
-            self.shared_weights = pd.DataFrame({'bigram_id': [], 'weights': []})
+            self.shared_weights = pd.DataFrame({
+                'bigram_id': [],
+                'weights': []
+            })
             if not os.path.exists(self.weights_file):
                 print("Initializing shared weights dictionary...")
                 self.shared_weights.to_pickle(self.weights_file)
@@ -82,23 +85,34 @@ def create_model(self, sequence, mlp_input_shape):
                 if layer_conf is 'dropout':
                     model.add(Dropout(self.mlp_dropout, name='dropout'))
                 else:
-                    model.add(Dense(units=layer_conf[0], activation=layer_conf[1]))
+                    model.add(
+                        Dense(units=layer_conf[0], activation=layer_conf[1]))
         else:
             for i, layer_conf in enumerate(layer_configs):
                 if i == 0:
-                    model.add(Dense(units=layer_conf[0], activation=layer_conf[1], input_shape=mlp_input_shape))
+                    model.add(
+                        Dense(units=layer_conf[0],
+                              activation=layer_conf[1],
+                              input_shape=mlp_input_shape))
                 elif layer_conf is 'dropout':
                     model.add(Dropout(self.mlp_dropout, name='dropout'))
                 else:
-                    model.add(Dense(units=layer_conf[0], activation=layer_conf[1]))
+                    model.add(
+                        Dense(units=layer_conf[0], activation=layer_conf[1]))
         return model
 
     def compile_model(self, model):
         if self.mlp_optimizer == 'sgd':
-            optim = optimizers.SGD(lr=self.mlp_lr, decay=self.mlp_decay, momentum=self.mlp_momentum)
+            optim = optimizers.SGD(lr=self.mlp_lr,
+                                   decay=self.mlp_decay,
+                                   momentum=self.mlp_momentum)
         else:
-            optim = getattr(optimizers, self.mlp_optimizer)(lr=self.mlp_lr, decay=self.mlp_decay)
-        model.compile(loss=self.mlp_loss_func, optimizer=optim, metrics=self.metrics)
+            optim = getattr(optimizers,
+                            self.mlp_optimizer)(lr=self.mlp_lr,
+                                                decay=self.mlp_decay)
+        model.compile(loss=self.mlp_loss_func,
+                      optimizer=optim,
+                      metrics=self.metrics)
         return model
 
     def update_weights(self, model):
@@ -107,7 +121,8 @@ def update_weights(self, model):
             if 'flatten' in layer.name:
                 layer_configs.append(('flatten'))
             elif 'dropout' not in layer.name:
-                layer_configs.append((layer.get_config()['units'], layer.get_config()['activation']))
+                layer_configs.append((layer.get_config()['units'],
+                                      layer.get_config()['activation']))
         config_ids = []
         for i in range(1, len(layer_configs)):
             config_ids.append((layer_configs[i - 1], layer_configs[i]))
@@ -121,21 +136,37 @@ def update_weights(self, model):
                     if config_ids[j] == bigram_ids[i]:
                         search_index.append(i)
                 if len(search_index) == 0:
-                    self.shared_weights = self.shared_weights.append({'bigram_id': config_ids[j],
-                                                                      'weights': layer.get_weights()},
-                                                                     ignore_index=True)
+                    self.shared_weights = self.shared_weights.append(
+                        {
+                            'bigram_id': config_ids[j],
+                            'weights': layer.get_weights()
+                        },
+                        ignore_index=True)
                 else:
-                    self.shared_weights.at[search_index[0], 'weights'] = layer.get_weights()
+                    self.shared_weights.at[search_index[0],
+                                           'weights'] = layer.get_weights()
                 j += 1
         self.shared_weights.to_pickle(self.weights_file)
 
+    def load_shared_weights(self, model):
+        all_subdirs = [
+            'LOGS/' + d for d in os.listdir('LOGS')
+            if os.path.isdir('LOGS/' + d)
+        ]
+        latest_subdir = max(all_subdirs, key=os.path.getmtime)
+        old_weights_file = os.path.basename(self.weights_file)
+        old_weights_file = os.path.join(latest_subdir, old_weights_file)
+        self.shared_weights = pd.read_pickle(old_weights_file)
+        self.set_model_weights(model)
+
     def set_model_weights(self, model):
         layer_configs = ['input']
         for layer in model.layers:
             if 'flatten' in layer.name:
                 layer_configs.append(('flatten'))
             elif 'dropout' not in layer.name:
-                layer_configs.append((layer.get_config()['units'], layer.get_config()['activation']))
+                layer_configs.append((layer.get_config()['units'],
+                                      layer.get_config()['activation']))
         config_ids = []
         for i in range(1, len(layer_configs)):
             config_ids.append((layer_configs[i - 1], layer_configs[i]))
@@ -150,10 +181,17 @@ def set_model_weights(self, model):
                         search_index.append(i)
                 if len(search_index) > 0:
                     print("Transferring weights for layer:", config_ids[j])
-                    layer.set_weights(self.shared_weights['weights'].values[search_index[0]])
+                    layer.set_weights(
+                        self.shared_weights['weights'].values[search_index[0]])
                 j += 1
 
-    def train_model(self, model, x_data, y_data, nb_epochs, validation_split=0.1, callbacks=None):
+    def train_model(self,
+                    model,
+                    x_data,
+                    y_data,
+                    nb_epochs,
+                    validation_split=0.1,
+                    callbacks=None):
         if self.mlp_one_shot:
             self.set_model_weights(model)
             history = model.fit(x_data,
@@ -171,3 +209,7 @@ def train_model(self, model, x_data, y_data, nb_epochs, validation_split=0.1, ca
                                 callbacks=callbacks,
                                 verbose=0)
         return history
+
+    def inference_model(self, model, x_data, y_data):
+        results = model.evaluate(x_data, y_data, verbose=0)
+        return results