Add mitigation of training information

JMGaljaard · Jun 7, 2021 · f26a8fc · f26a8fc
1 parent 2a9e236
commit f26a8fc
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 10 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -29,7 +29,7 @@ RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r require
 ADD configs configs
 
 ADD fltk fltk
-
+COPY cloud_configs/cloud_experiment.yaml configs/cloud_config.yaml
 # Install newest version of library
 RUN python3 -m setup install
 

diff --git a/configs/local_experiment.yaml b/configs/local_experiment.yaml
@@ -1,5 +1,5 @@
 # Experiment configuration
-total_epochs: 130
+total_epochs: 1
 epochs_per_cycle: 1
 wait_for_clients: true
 net: Cifar10CNN
@@ -30,6 +30,5 @@ poison:
     type: "flip"
     config:
       - 5: 3
-      - 3: 5
 
 
diff --git a/fltk/client.py b/fltk/client.py
@@ -99,9 +99,16 @@ def reset_model(self):
                                           self.args.get_scheduler_step_size(),
                                           self.args.get_scheduler_gamma(),
                                           self.args.get_min_lr())
+        # Reset logger
+        self.args.init_logger(logging)
         # Reset the epoch counter
         self.epoch_counter = 0
-        self.finished_init = True
+        self.finished_init = False
+        # Dataset will be re-initialized
+        del self.dataset
+        # This will be set afterwards, but we delete possible gradient information.
+        del self.net
+        self.set_net(self.load_default_model())
 
     def ping(self):
         """
@@ -215,6 +222,7 @@ def update_nn_parameters(self, new_params):
         :param new_params: New weights for the neural network
         :type new_params: dict
         """
+
         self.net.load_state_dict(copy.deepcopy(new_params), strict=True)
         if self.log_rref:
             self.remote_log(f'Weights of the model are updated')
@@ -235,6 +243,7 @@ def train(self, epoch, pill: PoisonPill = None):
         if self.args.distributed:
             self.dataset.train_sampler.set_epoch(epoch)
 
+
         for i, (inputs, labels) in enumerate(self.dataset.get_train_loader(), 0):
             inputs, labels = inputs.to(self.device), labels.to(self.device)
             # TODO: check if these parameters are correct, labels or ouputs?
@@ -243,7 +252,7 @@ def train(self, epoch, pill: PoisonPill = None):
                 inputs, labels = pill.poison_output(inputs, labels)
 
             # zero the parameter gradients
-            self.optimizer.zero_grad()
+            self.optimizer.zero_grad(set_to_none=True)
 
             # forward + backward + optimize
 
@@ -254,7 +263,7 @@ def train(self, epoch, pill: PoisonPill = None):
             self.optimizer.step()
 
             # print statistics
-            running_loss += loss.item()
+            running_loss += loss.detach().item()
             if i % self.args.get_log_interval() == 0:
                 self.args.get_logger().info('[%d, %5d] loss: %.3f' % (epoch, i, running_loss / self.args.get_log_interval()))
                 final_running_loss = running_loss / self.args.get_log_interval()
@@ -327,7 +336,7 @@ def run_epochs(self, num_epoch, pill: PoisonPill = None):
 
         # Copy GPU tensors to CPU
         for k, v in weights.items():
-            weights[k] = v.cpu()
+            weights[k] = v.cpu().detach()
         return data, weights
 
     def save_model(self, epoch, suffix):

diff --git a/fltk/federator.py b/fltk/federator.py
@@ -114,12 +114,13 @@ def create_clients(self, client_id_triple):
     def update_clients(self, ratio):
         # Prevent abrupt ending of the client
         self.tb_writer.close()
-        self.tb_writer = SummaryWriter(f'{self.tb_path_base}/{self.config.experiment_prefix}_federator')
+        self.tb_writer = SummaryWriter(f'{self.tb_path_base}/{self.config.experiment_prefix}_federator_{ratio}')
         for client in self.clients:
             # Create new writer and close old writer
             writer = SummaryWriter(f'{self.tb_path_base}/{self.config.experiment_prefix}_client_{client.name}_{ratio}')
             client.tb_writer.close()
             client.tb_writer = writer
+            # Clear client updates ofteraf
             self.client_data[client.name] = []
 
 
@@ -277,7 +278,7 @@ def save_epoch_data(self, ratio = None):
     def ensure_path_exists(self, path):
         Path(path).mkdir(parents=True, exist_ok=True)
 
-    def run(self, ratios = [0.06, 0.12, 0.18] ):
+    def run(self, ratios = [0.0, 0.06, 0.12, 0.18]):
         """
         Main loop of the Federator
         :return:
@@ -292,6 +293,7 @@ def run(self, ratios = [0.06, 0.12, 0.18] ):
             model = initialize_default_model(self.config, self.config.get_net())
             # Re-use the functionality to update
             self.distribute_new_model(model.state_dict())
+
             # Update the clients to point to the newer version.
             self.update_clients(rat)
             if self.attack:
@@ -308,6 +310,7 @@ def run(self, ratios = [0.06, 0.12, 0.18] ):
             addition = 0
             epoch_to_run = self.config.epochs
             epoch_size = self.config.epochs_per_cycle
+            print(f"Running a total of {epoch_to_run} epochs...")
             for epoch in range(epoch_to_run):
                 print(f'Running epoch {epoch}')
                 # Get new model during run, update iteratively. The model is needed to calculate the

diff --git a/fltk/util/fed_avg.py b/fltk/util/fed_avg.py
@@ -7,6 +7,6 @@ def average_nn_parameters(parameters):
     """
     new_params = {}
     for name in parameters[0].keys():
-        new_params[name] = sum([param[name].data for param in parameters]) / len(parameters)
+        new_params[name] = sum([param[name].data.clone() for param in parameters]) / len(parameters)
 
     return new_params