From ebebb26c5c3372c1d6311e7d59770f9d6a2d99f2 Mon Sep 17 00:00:00 2001 From: Bart Cox Date: Fri, 7 May 2021 16:07:35 +0200 Subject: [PATCH 01/73] Add timing callbacks --- fltk/client.py | 12 ++++--- fltk/federator.py | 84 ++++++++++++++++++--------------------------- fltk/util/remote.py | 54 +++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 54 deletions(-) create mode 100644 fltk/util/remote.py diff --git a/fltk/client.py b/fltk/client.py index de3095e3..99b17271 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -171,10 +171,14 @@ def update_nn_parameters(self, new_params): :param new_params: New weights for the neural network :type new_params: dict """ + start_time = time.time() self.net.load_state_dict(copy.deepcopy(new_params), strict=True) if self.log_rref: self.remote_log(f'Weights of the model are updated') + end_time = time.time() + return end_time - start_time + def train(self, epoch): """ :param epoch: Current epoch # @@ -248,10 +252,10 @@ def test(self): self.args.get_logger().debug('Test set: Accuracy: {}/{} ({:.0f}%)'.format(correct, total, accuracy)) self.args.get_logger().debug('Test set: Loss: {}'.format(loss)) - self.args.get_logger().debug("Classification Report:\n" + classification_report(targets_, pred_)) - self.args.get_logger().debug("Confusion Matrix:\n" + str(confusion_mat)) - self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) - self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) + # self.args.get_logger().debug("Classification Report:\n" + classification_report(targets_, pred_)) + # self.args.get_logger().debug("Confusion Matrix:\n" + str(confusion_mat)) + # self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) + # self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) return accuracy, loss, class_precision, class_recall diff --git a/fltk/federator.py b/fltk/federator.py index 88ccf31d..cbb7ac97 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -1,4 +1,3 @@ -import datetime import time from typing import List @@ -6,50 +5,18 @@ from torch.distributed import rpc from fltk.client import Client -from fltk.datasets.data_distribution import distribute_batches_equally from fltk.strategy.client_selection import random_selection -from fltk.util.arguments import Arguments -from fltk.util.base_config import BareConfig -from fltk.util.data_loader_utils import load_train_data_loader, load_test_data_loader, \ - generate_data_loaders_from_distributed_dataset from fltk.util.fed_avg import average_nn_parameters from fltk.util.log import FLLogger -from torchsummary import summary from torch.utils.tensorboard import SummaryWriter from pathlib import Path import logging +from fltk.util.remote import ClientRef, _remote_method, _remote_method_async, AsyncCall, time_remote_async_call from fltk.util.results import EpochData -from fltk.util.tensor_converter import convert_distributed_data_into_numpy logging.basicConfig(level=logging.DEBUG) -def _call_method(method, rref, *args, **kwargs): - return method(rref.local_value(), *args, **kwargs) - - -def _remote_method(method, rref, *args, **kwargs): - args = [method, rref] + list(args) - return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) - -def _remote_method_async(method, rref, *args, **kwargs): - args = [method, rref] + list(args) - return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) - -class ClientRef: - ref = None - name = "" - data_size = 0 - tb_writer = None - - def __init__(self, name, ref, tensorboard_writer): - self.name = name - self.ref = ref - self.tb_writer = tensorboard_writer - - def __repr__(self): - return self.name - class Federator: """ Central component of the Federated Learning System: The Federator @@ -137,31 +104,42 @@ def clients_ready(self): logging.info('All clients are ready') def remote_run_epoch(self, epochs): - responses = [] + + responses: List[AsyncCall] = [] client_weights = [] selected_clients = self.select_clients(self.config.clients_per_round) for client in selected_clients: - responses.append((client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs))) + response = time_remote_async_call(client, Client.run_epochs, client.ref, num_epoch=epochs) + responses.append(response) + self.epoch_counter += epochs + durations = [] for res in responses: - epoch_data, weights = res[1].wait() + res.future.wait() + epoch_data, weights = res.future.wait() + fed_stop_time = time.time() self.client_data[epoch_data.client_id].append(epoch_data) - logging.info(f'{res[0]} had a loss of {epoch_data.loss}') - logging.info(f'{res[0]} had a epoch data of {epoch_data}') + logging.info(f'{res.client.name} had a loss of {epoch_data.loss}') + logging.info(f'{res.client.name} had a epoch data of {epoch_data}') + logging.info(f'[TIMING FUT]\t{res.client.name} had a epoch duration of {res.duration()}') + fed_duration = fed_stop_time - res.end_time + logging.info(f'[TIMING LOCAL]\t{res.client.name} had a epoch duration of {fed_duration}') + durations.append((res.client.name, res.duration(), fed_duration)) - res[0].tb_writer.add_scalar('training loss', + + res.client.tb_writer.add_scalar('training loss', epoch_data.loss_train, # for every 1000 minibatches - self.epoch_counter * res[0].data_size) + self.epoch_counter * res.client.data_size) - res[0].tb_writer.add_scalar('accuracy', + res.client.tb_writer.add_scalar('accuracy', epoch_data.accuracy, # for every 1000 minibatches - self.epoch_counter * res[0].data_size) + self.epoch_counter * res.client.data_size) - res[0].tb_writer.add_scalar('training loss per epoch', + res.client.tb_writer.add_scalar('training loss per epoch', epoch_data.loss_train, # for every 1000 minibatches self.epoch_counter) - res[0].tb_writer.add_scalar('accuracy per epoch', + res.client.tb_writer.add_scalar('accuracy per epoch', epoch_data.accuracy, # for every 1000 minibatches self.epoch_counter) @@ -172,20 +150,26 @@ def remote_run_epoch(self, epochs): logging.info("Testing on global test set") self.test_data.update_nn_parameters(updated_model) accuracy, loss, class_precision, class_recall = self.test_data.test() - # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) - responses = [] for client in self.clients: - responses.append( - (client, _remote_method_async(Client.update_nn_parameters, client.ref, new_params=updated_model))) + response = time_remote_async_call(client, Client.update_nn_parameters, client.ref, new_params=updated_model) + responses.append(response) for res in responses: - res[1].wait() + func_duration = res.future.wait() + print(f'[Client:: {res.client.name}] internal weights copied in {func_duration}') + print(f'[Client:: {res.client.name}] model transfer time: {res.duration()}') logging.info('Weights are updated') + print('Duration timing') + for name, fut_time, fed_time in durations: + print(f'Client: {name} has these timings:') + print(f'FUT:\t{fut_time}') + print(f'Fed:\t{fed_time}') + def update_client_data_sizes(self): responses = [] for client in self.clients: diff --git a/fltk/util/remote.py b/fltk/util/remote.py new file mode 100644 index 00000000..9e5475c7 --- /dev/null +++ b/fltk/util/remote.py @@ -0,0 +1,54 @@ +import time + +from torch.distributed import rpc +from dataclasses import dataclass +from torch.futures import Future + +def _call_method(method, rref, *args, **kwargs): + return method(rref.local_value(), *args, **kwargs) + +def _remote_method(method, rref, *args, **kwargs): + args = [method, rref] + list(args) + return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) + +def _remote_method_async(method, rref, *args, **kwargs): + args = [method, rref] + list(args) + return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) + +class ClientRef: + ref = None + name = "" + data_size = 0 + tb_writer = None + + def __init__(self, name, ref, tensorboard_writer): + self.name = name + self.ref = ref + self.tb_writer = tensorboard_writer + + def __repr__(self): + return self.name + +@dataclass +class AsyncCall: + future: Future + client: ClientRef + start_time: float = 0 + end_time: float = 0 + + def duration(self): + return self.end_time - self.start_time + + +def bind_timing_cb(response_obj: AsyncCall): + def callback(fut): + stop_time = time.time() + response_obj.end_time = stop_time + response_obj.future.then(callback) + +def time_remote_async_call(client, method, rref, *args, **kwargs): + start_time = time.time() + fut = _remote_method_async(method, rref, *args, **kwargs) + response = AsyncCall(fut, client, start_time=start_time) + bind_timing_cb(response) + return response \ No newline at end of file From 9754e0cd0d429bb344cbc6d8eccddd4b1a5b5371 Mon Sep 17 00:00:00 2001 From: Bart Cox Date: Sat, 8 May 2021 16:22:15 +0200 Subject: [PATCH 02/73] Add timing profiling --- fltk/client.py | 4 +- fltk/federator.py | 86 ++++++++++++++++++++++++---------------- fltk/util/base_config.py | 4 ++ fltk/util/remote.py | 16 +++++++- 4 files changed, 72 insertions(+), 38 deletions(-) diff --git a/fltk/client.py b/fltk/client.py index 99b17271..ffcb1534 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -214,6 +214,8 @@ def train(self, epoch): final_running_loss = running_loss / self.args.get_log_interval() running_loss = 0.0 + break + self.scheduler.step() # save model @@ -308,4 +310,4 @@ def get_client_datasize(self): return len(self.dataset.get_train_sampler()) def __del__(self): - print(f'Client {self.id} is stopping') + logging.info(f'Client {self.id} is stopping') diff --git a/fltk/federator.py b/fltk/federator.py index cbb7ac97..e073b720 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -12,7 +12,7 @@ from pathlib import Path import logging -from fltk.util.remote import ClientRef, _remote_method, _remote_method_async, AsyncCall, time_remote_async_call +from fltk.util.remote import ClientRef, AsyncCall, timed_remote_async_call, _remote_method, TimingRecord from fltk.util.results import EpochData logging.basicConfig(level=logging.DEBUG) @@ -70,17 +70,18 @@ def ping_all(self): answer = _remote_method(Client.ping, client.ref) t_end = time.time() duration = (t_end - t_start)*1000 + client.timing_data.append(TimingRecord(f'{client.name}', 'ping', duration)) logging.info(f'Ping to {client} is {duration:.3}ms') def rpc_test_all(self): for client in self.clients: - res = _remote_method_async(Client.rpc_test, client.ref) - while not res.done(): + res = timed_remote_async_call(client, Client.rpc_test, client.ref) + while not res.future.done(): pass def client_load_data(self): for client in self.clients: - _remote_method_async(Client.init_dataloader, client.ref) + timed_remote_async_call(client, Client.init_dataloader, client.ref) def clients_ready(self): all_ready = False @@ -89,15 +90,16 @@ def clients_ready(self): responses = [] for client in self.clients: if client.name not in ready_clients: - responses.append((client, _remote_method_async(Client.is_ready, client.ref))) + response = timed_remote_async_call(client, Client.is_ready, client.ref) + responses.append(response) all_ready = True for res in responses: - result = res[1].wait() + result = res.future.wait() if result: - logging.info(f'{res[0]} is ready') - ready_clients.append(res[0]) + logging.info(f'{res.client} is ready') + ready_clients.append(res.client) else: - logging.info(f'Waiting for {res[0]}') + logging.info(f'Waiting for {res.client}') all_ready = False time.sleep(2) @@ -109,7 +111,7 @@ def remote_run_epoch(self, epochs): client_weights = [] selected_clients = self.select_clients(self.config.clients_per_round) for client in selected_clients: - response = time_remote_async_call(client, Client.run_epochs, client.ref, num_epoch=epochs) + response = timed_remote_async_call(client, Client.run_epochs, client.ref, num_epoch=epochs) responses.append(response) self.epoch_counter += epochs @@ -121,11 +123,7 @@ def remote_run_epoch(self, epochs): self.client_data[epoch_data.client_id].append(epoch_data) logging.info(f'{res.client.name} had a loss of {epoch_data.loss}') logging.info(f'{res.client.name} had a epoch data of {epoch_data}') - logging.info(f'[TIMING FUT]\t{res.client.name} had a epoch duration of {res.duration()}') - fed_duration = fed_stop_time - res.end_time - logging.info(f'[TIMING LOCAL]\t{res.client.name} had a epoch duration of {fed_duration}') - durations.append((res.client.name, res.duration(), fed_duration)) - + res.client.timing_data.append(TimingRecord(f'{res.client.name}', 'epoch_time_round_trip', res.duration())) res.client.tb_writer.add_scalar('training loss', epoch_data.loss_train, # for every 1000 minibatches @@ -155,40 +153,36 @@ def remote_run_epoch(self, epochs): responses = [] for client in self.clients: - response = time_remote_async_call(client, Client.update_nn_parameters, client.ref, new_params=updated_model) + response = timed_remote_async_call(client, Client.update_nn_parameters, client.ref, new_params=updated_model) responses.append(response) for res in responses: func_duration = res.future.wait() - print(f'[Client:: {res.client.name}] internal weights copied in {func_duration}') - print(f'[Client:: {res.client.name}] model transfer time: {res.duration()}') + res.client.timing_data.append(TimingRecord(res.client.name, 'update_param_inner', func_duration)) + res.client.timing_data.append(TimingRecord(f'{res.client.name}', 'update_param_round_trip', res.duration())) logging.info('Weights are updated') - print('Duration timing') - for name, fut_time, fed_time in durations: - print(f'Client: {name} has these timings:') - print(f'FUT:\t{fut_time}') - print(f'Fed:\t{fed_time}') - def update_client_data_sizes(self): responses = [] for client in self.clients: - responses.append((client, _remote_method_async(Client.get_client_datasize, client.ref))) + response = timed_remote_async_call(client, Client.get_client_datasize, client.ref) + responses.append(response) for res in responses: - res[0].data_size = res[1].wait() - logging.info(f'{res[0]} had a result of datasize={res[0].data_size}') + res.client.data_size = res.future.wait() + logging.info(f'{res.client.name} had a result of datasize={res.client.data_size}') def remote_test_sync(self): responses = [] for client in self.clients: - responses.append((client, _remote_method_async(Client.test, client.ref))) + response = timed_remote_async_call(client, Client.test, client.ref) + responses.append(response) for res in responses: - accuracy, loss, class_precision, class_recall = res[1].wait() - logging.info(f'{res[0]} had a result of accuracy={accuracy}') + accuracy, loss, class_precision, class_recall = res.future.wait() + logging.info(f'{res.client.name} had a result of accuracy={accuracy}') def save_epoch_data(self): - file_output = f'./{self.config.output_location}' + file_output = f'./{self.config.output_location}/{self.config.experiment_prefix}_data' self.ensure_path_exists(file_output) for key in self.client_data: filename = f'{file_output}/{key}_epochs.csv' @@ -197,6 +191,18 @@ def save_epoch_data(self): w = DataclassWriter(f, self.client_data[key], EpochData) w.write() + def save_profiling_data(self): + file_output = f'./{self.config.output_location}/{self.config.experiment_prefix}_data' + filename = f'{file_output}/profiling_data.csv' + self.ensure_path_exists(file_output) + with open(filename, "w") as f: + for client in self.clients: + for record in client.timing_data: + w = DataclassWriter(f, [record], TimingRecord) + w.write() + + + def ensure_path_exists(self, path): Path(path).mkdir(parents=True, exist_ok=True) @@ -216,13 +222,23 @@ def run(self): epoch_to_run = self.config.epochs epoch_size = self.config.epochs_per_cycle for epoch in range(epoch_to_run): - print(f'Running epoch {epoch}') + logging.info(f'Running epoch {epoch}') self.remote_run_epoch(epoch_size) addition += 1 - logging.info('Printing client data') - print(self.client_data) + logging.info('Available clients with data') + logging.info(self.client_data.keys()) - logging.info(f'Saving data') + logging.info('Saving data') self.save_epoch_data() + + logging.info('Printing all clients timing data') + for client in self.clients: + logging.info(f"Timing data for client {client}") + for record in client.timing_data: + logging.info(f'{record}') + + logging.info('Saving profiling data') + self.save_profiling_data() + logging.info(f'Federator is stopping') diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index c814965f..5f87840a 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -1,3 +1,5 @@ +from datetime import datetime + import torch import json @@ -109,6 +111,8 @@ def merge_yaml(self, cfg = {}): self.dataset_name = cfg['dataset'] if 'experiment_prefix' in cfg: self.experiment_prefix = cfg['experiment_prefix'] + else: + self.experiment_prefix = f'{datetime.now()}' if 'output_location' in cfg: self.output_location = cfg['output_location'] if 'tensor_board_active' in cfg: diff --git a/fltk/util/remote.py b/fltk/util/remote.py index 9e5475c7..0202f92f 100644 --- a/fltk/util/remote.py +++ b/fltk/util/remote.py @@ -1,7 +1,8 @@ import time +from typing import Any, List from torch.distributed import rpc -from dataclasses import dataclass +from dataclasses import dataclass, field from torch.futures import Future def _call_method(method, rref, *args, **kwargs): @@ -15,16 +16,27 @@ def _remote_method_async(method, rref, *args, **kwargs): args = [method, rref] + list(args) return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) +@dataclass +class TimingRecord: + client_id: str + metric: str + value: Any + epoch: int = None + timestamp: float = field(default_factory=time.time) + + class ClientRef: ref = None name = "" data_size = 0 tb_writer = None + timing_data: List[TimingRecord] = [] def __init__(self, name, ref, tensorboard_writer): self.name = name self.ref = ref self.tb_writer = tensorboard_writer + self.timing_data = [] def __repr__(self): return self.name @@ -46,7 +58,7 @@ def callback(fut): response_obj.end_time = stop_time response_obj.future.then(callback) -def time_remote_async_call(client, method, rref, *args, **kwargs): +def timed_remote_async_call(client, method, rref, *args, **kwargs): start_time = time.time() fut = _remote_method_async(method, rref, *args, **kwargs) response = AsyncCall(fut, client, start_time=start_time) From ff03acd142be630ab28f58f1951b2c7866a691d9 Mon Sep 17 00:00:00 2001 From: Bart Cox Date: Sat, 8 May 2021 16:24:09 +0200 Subject: [PATCH 03/73] remove debug log lines --- fltk/federator.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fltk/federator.py b/fltk/federator.py index e073b720..de36c6bd 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -228,15 +228,9 @@ def run(self): logging.info('Available clients with data') logging.info(self.client_data.keys()) + # Save experiment data logging.info('Saving data') self.save_epoch_data() - - logging.info('Printing all clients timing data') - for client in self.clients: - logging.info(f"Timing data for client {client}") - for record in client.timing_data: - logging.info(f'{record}') - logging.info('Saving profiling data') self.save_profiling_data() From 4e75481abc6b4a6c43c85a7f2f8a7dda9358fbb5 Mon Sep 17 00:00:00 2001 From: Bart Cox Date: Sat, 8 May 2021 21:26:27 +0200 Subject: [PATCH 04/73] Fix profiling data export --- fltk/client.py | 20 +++---------- fltk/federator.py | 72 ++++++++++++++++++++++++++++------------------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/fltk/client.py b/fltk/client.py index ffcb1534..12ad0b56 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -93,9 +93,6 @@ def remote_log(self, message): def local_log(self, message): logging.info(f'[{self.id}: {time.time()}]: {message}') - def set_configuration(self, config: str): - yaml_config = yaml.safe_load(config) - def init(self): pass @@ -115,11 +112,6 @@ def set_net(self, net): self.net = net self.net.to(self.device) - def load_model_from_file(self, model_file_path): - model_class = self.args.get_net() - default_model_path = os.path.join(self.args.get_default_model_folder_path(), model_class.__name__ + ".model") - return self.load_model_from_file(default_model_path) - def get_nn_parameters(self): """ Return the NN's parameters. @@ -184,8 +176,6 @@ def train(self, epoch): :param epoch: Current epoch # :type epoch: int """ - # self.net.train() - # save model if self.args.should_save_model(epoch): self.save_model(epoch, self.args.get_epoch_save_start_suffix()) @@ -214,8 +204,6 @@ def train(self, epoch): final_running_loss = running_loss / self.args.get_log_interval() running_loss = 0.0 - break - self.scheduler.step() # save model @@ -254,10 +242,10 @@ def test(self): self.args.get_logger().debug('Test set: Accuracy: {}/{} ({:.0f}%)'.format(correct, total, accuracy)) self.args.get_logger().debug('Test set: Loss: {}'.format(loss)) - # self.args.get_logger().debug("Classification Report:\n" + classification_report(targets_, pred_)) - # self.args.get_logger().debug("Confusion Matrix:\n" + str(confusion_mat)) - # self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) - # self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) + self.args.get_logger().debug("Classification Report:\n" + classification_report(targets_, pred_)) + self.args.get_logger().debug("Confusion Matrix:\n" + str(confusion_mat)) + self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) + self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) return accuracy, loss, class_precision, class_recall diff --git a/fltk/federator.py b/fltk/federator.py index de36c6bd..47acf0af 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -1,6 +1,9 @@ +import copy +import os import time from typing import List +import torch from dataclass_csv import DataclassWriter from torch.distributed import rpc @@ -52,7 +55,6 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): self.test_data.init_dataloader() config.data_sampler = copy_sampler - def create_clients(self, client_id_triple): for id, rank, world_size in client_id_triple: client = rpc.remote(id, Client, kwargs=dict(id=id, log_rref=self.log_rref, rank=rank, world_size=world_size, config=self.config)) @@ -106,42 +108,68 @@ def clients_ready(self): logging.info('All clients are ready') def remote_run_epoch(self, epochs): + """ + Federated Learning steps: + 1. Client selection + 2. Selected clients download model + 3. Local training + 4. Model aggregation + Repeat + :param epochs: + :return: + """ + + # 1. Client selection + selected_clients = self.select_clients(self.config.clients_per_round) + + # 2. Selected clients download model + responses = [] + for client in selected_clients: + response = timed_remote_async_call(client, Client.update_nn_parameters, client.ref, + new_params=self.test_data.get_nn_parameters()) + responses.append(response) + + for res in responses: + func_duration = res.future.wait() + res.client.timing_data.append(TimingRecord(res.client.name, 'update_param_inner', func_duration)) + res.client.timing_data.append(TimingRecord(f'{res.client.name}', 'update_param_round_trip', res.duration())) + logging.info('Weights are updated') + # 3. Local training responses: List[AsyncCall] = [] client_weights = [] - selected_clients = self.select_clients(self.config.clients_per_round) for client in selected_clients: response = timed_remote_async_call(client, Client.run_epochs, client.ref, num_epoch=epochs) responses.append(response) self.epoch_counter += epochs - durations = [] for res in responses: res.future.wait() epoch_data, weights = res.future.wait() - fed_stop_time = time.time() self.client_data[epoch_data.client_id].append(epoch_data) logging.info(f'{res.client.name} had a loss of {epoch_data.loss}') logging.info(f'{res.client.name} had a epoch data of {epoch_data}') res.client.timing_data.append(TimingRecord(f'{res.client.name}', 'epoch_time_round_trip', res.duration())) res.client.tb_writer.add_scalar('training loss', - epoch_data.loss_train, # for every 1000 minibatches - self.epoch_counter * res.client.data_size) + epoch_data.loss_train, # for every 1000 minibatches + self.epoch_counter * res.client.data_size) res.client.tb_writer.add_scalar('accuracy', - epoch_data.accuracy, # for every 1000 minibatches - self.epoch_counter * res.client.data_size) + epoch_data.accuracy, # for every 1000 minibatches + self.epoch_counter * res.client.data_size) res.client.tb_writer.add_scalar('training loss per epoch', - epoch_data.loss_train, # for every 1000 minibatches - self.epoch_counter) + epoch_data.loss_train, # for every 1000 minibatches + self.epoch_counter) res.client.tb_writer.add_scalar('accuracy per epoch', - epoch_data.accuracy, # for every 1000 minibatches - self.epoch_counter) + epoch_data.accuracy, # for every 1000 minibatches + self.epoch_counter) client_weights.append(weights) + + # 3. Model aggregation updated_model = average_nn_parameters(client_weights) # test global model @@ -151,17 +179,6 @@ def remote_run_epoch(self, epochs): self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) - responses = [] - for client in self.clients: - response = timed_remote_async_call(client, Client.update_nn_parameters, client.ref, new_params=updated_model) - responses.append(response) - - for res in responses: - func_duration = res.future.wait() - res.client.timing_data.append(TimingRecord(res.client.name, 'update_param_inner', func_duration)) - res.client.timing_data.append(TimingRecord(f'{res.client.name}', 'update_param_round_trip', res.duration())) - logging.info('Weights are updated') - def update_client_data_sizes(self): responses = [] for client in self.clients: @@ -195,13 +212,10 @@ def save_profiling_data(self): file_output = f'./{self.config.output_location}/{self.config.experiment_prefix}_data' filename = f'{file_output}/profiling_data.csv' self.ensure_path_exists(file_output) + records = [data for client in self.clients for data in client.timing_data] with open(filename, "w") as f: - for client in self.clients: - for record in client.timing_data: - w = DataclassWriter(f, [record], TimingRecord) - w.write() - - + w = DataclassWriter(f, records, TimingRecord) + w.write() def ensure_path_exists(self, path): Path(path).mkdir(parents=True, exist_ok=True) From 4e1416caaa32045a1c893a17adec29c10f3d7b9e Mon Sep 17 00:00:00 2001 From: Bart Cox Date: Sat, 8 May 2021 21:27:53 +0200 Subject: [PATCH 05/73] Version bump --- fltk/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fltk/__init__.py b/fltk/__init__.py index aa6e546b..d1eb1a0f 100644 --- a/fltk/__init__.py +++ b/fltk/__init__.py @@ -1,2 +1,2 @@ -__version__ = '0.3.1' \ No newline at end of file +__version__ = '0.3.2' \ No newline at end of file From e458dfc240d2fb07b03866950e14b900917e7f4d Mon Sep 17 00:00:00 2001 From: Bart Cox Date: Sat, 8 May 2021 22:08:16 +0200 Subject: [PATCH 06/73] Extend profiling data --- fltk/client.py | 15 +++++++-------- fltk/federator.py | 37 ++++++++++++++++++++++++------------- fltk/util/results.py | 5 +++-- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/fltk/client.py b/fltk/client.py index 12ad0b56..90acc8ed 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -250,25 +250,24 @@ def test(self): return accuracy, loss, class_precision, class_recall def run_epochs(self, num_epoch): - start_time_train = datetime.datetime.now() + start_time_train = time.time() self.dataset.get_train_sampler().set_epoch_size(num_epoch) loss, weights = self.train(self.epoch_counter) self.epoch_counter += num_epoch - elapsed_time_train = datetime.datetime.now() - start_time_train - train_time_ms = int(elapsed_time_train.total_seconds()*1000) + elapsed_train_time = time.time() - start_time_train - start_time_test = datetime.datetime.now() + start_time_test = time.time() accuracy, test_loss, class_precision, class_recall = self.test() - elapsed_time_test = datetime.datetime.now() - start_time_test - test_time_ms = int(elapsed_time_test.total_seconds()*1000) + elapsed_test_time = time.time() - start_time_test - data = EpochData(self.epoch_counter, train_time_ms, test_time_ms, loss, accuracy, test_loss, class_precision, class_recall, client_id=self.id) + data = EpochData(self.epoch_counter, num_epoch, elapsed_train_time, elapsed_test_time, loss, accuracy, test_loss, class_precision, class_recall, client_id=self.id) self.epoch_results.append(data) # Copy GPU tensors to CPU for k, v in weights.items(): weights[k] = v.cpu() - return data, weights + end_func_time = time.time() - start_time_train + return data, weights, end_func_time def save_model(self, epoch, suffix): """ diff --git a/fltk/federator.py b/fltk/federator.py index 47acf0af..fe41363f 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -107,7 +107,7 @@ def clients_ready(self): time.sleep(2) logging.info('All clients are ready') - def remote_run_epoch(self, epochs): + def remote_run_epoch(self, epochs_subset): """ Federated Learning steps: 1. Client selection @@ -131,25 +131,33 @@ def remote_run_epoch(self, epochs): for res in responses: func_duration = res.future.wait() - res.client.timing_data.append(TimingRecord(res.client.name, 'update_param_inner', func_duration)) - res.client.timing_data.append(TimingRecord(f'{res.client.name}', 'update_param_round_trip', res.duration())) + res.client.timing_data.append(TimingRecord(res.client.name, 'update_param_inner', func_duration, epochs_subset[0])) + res.client.timing_data.append(TimingRecord(res.client.name, 'update_param_round_trip', res.duration(), epochs_subset[0])) + communication_duration_2way = res.duration() - func_duration + res.client.timing_data.append( + TimingRecord(res.client.name, 'communication_2way', communication_duration_2way, epochs_subset[0])) logging.info('Weights are updated') # 3. Local training responses: List[AsyncCall] = [] client_weights = [] for client in selected_clients: - response = timed_remote_async_call(client, Client.run_epochs, client.ref, num_epoch=epochs) + response = timed_remote_async_call(client, Client.run_epochs, client.ref, num_epoch=len(epochs_subset)) responses.append(response) - self.epoch_counter += epochs + self.epoch_counter += len(epochs_subset) for res in responses: res.future.wait() - epoch_data, weights = res.future.wait() + epoch_data, weights, func_duration = res.future.wait() self.client_data[epoch_data.client_id].append(epoch_data) logging.info(f'{res.client.name} had a loss of {epoch_data.loss}') logging.info(f'{res.client.name} had a epoch data of {epoch_data}') - res.client.timing_data.append(TimingRecord(f'{res.client.name}', 'epoch_time_round_trip', res.duration())) + res.client.timing_data.append(TimingRecord(res.client.name, 'epoch_time_inner', func_duration, epochs_subset[0])) + res.client.timing_data.append(TimingRecord(res.client.name, 'epoch_time_train', epoch_data.duration_train, epochs_subset[0])) + res.client.timing_data.append(TimingRecord(res.client.name, 'epoch_time_test', epoch_data.duration_test, epochs_subset[0])) + res.client.timing_data.append(TimingRecord(res.client.name, 'epoch_time_round_trip', res.duration(), epochs_subset[0])) + communication_duration_2way = res.duration() - func_duration + res.client.timing_data.append(TimingRecord(res.client.name, 'communication_2way', communication_duration_2way, epochs_subset[0])) res.client.tb_writer.add_scalar('training loss', epoch_data.loss_train, # for every 1000 minibatches @@ -231,14 +239,17 @@ def run(self): self.clients_ready() self.update_client_data_sizes() - epoch_to_run = self.num_epoch - addition = 0 + + + # Get total epoch to run epoch_to_run = self.config.epochs epoch_size = self.config.epochs_per_cycle - for epoch in range(epoch_to_run): - logging.info(f'Running epoch {epoch}') - self.remote_run_epoch(epoch_size) - addition += 1 + + epochs = list(range(1, epoch_to_run + 1)) + epoch_chunks = [epochs[x:x + epoch_size] for x in range(0, len(epochs), epoch_size)] + for epoch_subset in epoch_chunks: + logging.info(f'Running epochs {epoch_subset}') + self.remote_run_epoch(epoch_subset) logging.info('Available clients with data') logging.info(self.client_data.keys()) diff --git a/fltk/util/results.py b/fltk/util/results.py index af560479..cf762b8a 100644 --- a/fltk/util/results.py +++ b/fltk/util/results.py @@ -4,8 +4,9 @@ @dataclass class EpochData: epoch_id: int - duration_train: int - duration_test: int + num_epochs: int + duration_train: float + duration_test: float loss_train: float accuracy: float loss: float From 2dd4da13efb6cbf175cc81a7cfa9c2cd8f7c293a Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 11 Jan 2022 02:27:15 +0100 Subject: [PATCH 07/73] Add profiler --- Dockerfile | 18 +- configs/experiment.yaml | 17 +- configs/experiment_deadline.yaml | 25 ++ configs/experiment_vanilla.yaml | 25 ++ deploy/templates/client_stub_default.yml | 6 +- deploy/templates/client_stub_medium.yml | 4 +- deploy/templates/client_stub_slow.yml | 4 +- deploy/templates/system_stub.yml | 2 + fltk/client.py | 200 +++++++++++++- fltk/federator.py | 324 ++++++++++++++++++++--- fltk/launch.py | 4 +- fltk/strategy/aggregation.py | 14 +- fltk/util/analyze.py | 15 ++ fltk/util/base_config.py | 2 +- fltk/util/generate_data_distribution.py | 9 +- fltk/util/generate_docker_compose.py | 8 +- fltk/util/profile_plots.py | 151 +++++++++++ fltk/util/profiler.py | 189 +++++++++++++ requirements.txt | 13 + 19 files changed, 953 insertions(+), 77 deletions(-) create mode 100644 configs/experiment_deadline.yaml create mode 100644 configs/experiment_vanilla.yaml create mode 100644 fltk/util/analyze.py create mode 100644 fltk/util/profile_plots.py create mode 100644 fltk/util/profiler.py create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile index b42cd68c..8ad4937b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,16 +19,19 @@ RUN apt-get update \ # Copy the current folder to the working directory COPY setup.py ./ +COPY requirements.txt ./ +#COPY fltk ./fltk +#COPY configs ./configs # Install all required packages for the generator -RUN pip3 setup.py install +RUN python3 -m pip install -r requirements.txt #RUN mkdir -p ./data/MNIST #COPY ./data/MNIST ../data/MNIST -ADD fltk ./fedsim +#ADD fltk ./fedsim #RUN ls -la -COPY federated_learning.py ./ -COPY custom_mnist.py ./ +#COPY federated_learning.py ./ +#COPY custom_mnist.py ./ #RUN ls -la ./fedsim # Expose the container's port to the host OS @@ -38,4 +41,9 @@ EXPOSE 5000 # CMD ["python3", "/opt/Generatrix/rpc_parameter_server.py", "--world_size=2", "--rank=0", "--master_addr=192.168.144.2"] #CMD python3 /opt/federation-lab/rpc_parameter_server.py --world_size=$WORLD_SIZE --rank=$RANK --master_addr=10.5.0.11 -CMD python3 /opt/federation-lab/federated_learning.py $RANK $WORLD_SIZE 10.5.0.11 \ No newline at end of file +#CMD python3 /opt/federation-lab/federated_learning.py $RANK $WORLD_SIZE 10.5.0.11 +COPY fltk ./fltk +COPY configs ./configs +#CMD python3 ./fltk/__main__.py single configs/experiment.yaml --rank=$RANK +CMD python3 -m fltk single configs/experiment.yaml --rank=$RANK +#CMD python3 setup.py \ No newline at end of file diff --git a/configs/experiment.yaml b/configs/experiment.yaml index a59786bf..c8e30bce 100644 --- a/configs/experiment.yaml +++ b/configs/experiment.yaml @@ -1,19 +1,24 @@ --- # Experiment configuration -total_epochs: 5 +total_epochs: 4 epochs_per_cycle: 1 wait_for_clients: true net: Cifar10CNN dataset: cifar10 # Use cuda is available; setting to false will force CPU -cuda: true +cuda: false experiment_prefix: 'experiment_sample' output_location: 'output' tensor_board_active: true -clients_per_round: 1 +clients_per_round: 2 +# sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused system: federator: - hostname: '131.180.40.72' - nic: 'wlo1' + hostname: '10.5.0.11' + nic: 'eth0' clients: - amount: 1 + amount: 2 diff --git a/configs/experiment_deadline.yaml b/configs/experiment_deadline.yaml new file mode 100644 index 00000000..5ffdca23 --- /dev/null +++ b/configs/experiment_deadline.yaml @@ -0,0 +1,25 @@ +--- +# Experiment configuration +total_epochs: 4 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'offloading_deadline' +offload_stategy: offload +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +# sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 2 diff --git a/configs/experiment_vanilla.yaml b/configs/experiment_vanilla.yaml new file mode 100644 index 00000000..90fcb77b --- /dev/null +++ b/configs/experiment_vanilla.yaml @@ -0,0 +1,25 @@ +--- +# Experiment configuration +total_epochs: 4 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'offloading_vanilla' +offload_stategy: vanilla +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +# sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 2 diff --git a/deploy/templates/client_stub_default.yml b/deploy/templates/client_stub_default.yml index d65e2624..d8955310 100644 --- a/deploy/templates/client_stub_default.yml +++ b/deploy/templates/client_stub_default.yml @@ -3,9 +3,11 @@ client_name: # name can be anything restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: - - ./docker_data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data - ./default_models:/opt/federation-lab/default_models - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk environment: - PYTHONUNBUFFERED=1 - RANK={rank} @@ -17,5 +19,5 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '1.25' + cpus: '2' memory: 1024M diff --git a/deploy/templates/client_stub_medium.yml b/deploy/templates/client_stub_medium.yml index 7083a3b6..8f07f46b 100644 --- a/deploy/templates/client_stub_medium.yml +++ b/deploy/templates/client_stub_medium.yml @@ -3,9 +3,11 @@ client_name: # name can be anything restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: - - ./docker_data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data - ./default_models:/opt/federation-lab/default_models - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk environment: - PYTHONUNBUFFERED=1 - RANK={rank} diff --git a/deploy/templates/client_stub_slow.yml b/deploy/templates/client_stub_slow.yml index 03a3fe48..7d541d65 100644 --- a/deploy/templates/client_stub_slow.yml +++ b/deploy/templates/client_stub_slow.yml @@ -3,9 +3,11 @@ client_name: # name can be anything restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: - - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data - ./default_models:/opt/federation-lab/default_models - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk environment: - PYTHONUNBUFFERED=1 - RANK={rank} diff --git a/deploy/templates/system_stub.yml b/deploy/templates/system_stub.yml index eda5fc6d..4f05dbfc 100644 --- a/deploy/templates/system_stub.yml +++ b/deploy/templates/system_stub.yml @@ -7,7 +7,9 @@ services: build: . # look for the docker file where this file is currently located volumes: # - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk environment: - PYTHONUNBUFFERED=1 - RANK=0 diff --git a/fltk/client.py b/fltk/client.py index de3095e3..f841a332 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -12,16 +12,23 @@ import numpy as np from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report +from torch.distributed.rpc import RRef + from fltk.schedulers import MinCapableStepLR from fltk.util.arguments import Arguments +from fltk.util.fed_avg import average_nn_parameters from fltk.util.log import FLLogger import yaml +from fltk.util.profiler import Profiler from fltk.util.results import EpochData logging.basicConfig(level=logging.DEBUG) +global_dict = {} +global_model_weights = {} +global_offload_received = False def _call_method(method, rref, *args, **kwargs): @@ -37,21 +44,38 @@ def _remote_method(method, rref, *args, **kwargs): args = [method, rref] + list(args) return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) -def _remote_method_async(method, rref, *args, **kwargs): + +def _remote_method_async(method, rref, *args, **kwargs) -> torch.Future: args = [method, rref] + list(args) return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) + +def _remote_method_async_by_info(method, worker_info, *args, **kwargs): + args = [method, worker_info] + list(args) + return rpc.rpc_async(worker_info, _call_method, args=args, kwargs=kwargs) + class Client: counter = 0 finished_init = False dataset = None epoch_results: List[EpochData] = [] epoch_counter = 0 + server_ref = None + + # Model offloading + received_offload_model = False + offloaded_model_weights = None + call_to_offload = False + client_to_offload_to : str = None def __init__(self, id, log_rref, rank, world_size, config = None): logging.info(f'Welcome to client {id}') self.id = id + global_dict['id'] = id + global global_model_weights, global_offload_received + global_model_weights = None + global_offload_received = False self.log_rref = log_rref self.rank = rank self.world_size = world_size @@ -75,9 +99,20 @@ def init_device(self): else: return torch.device("cpu") + def send_reference(self, server_ref): + self.local_log(f'Got worker_info from server {server_ref}') + self.server_ref = server_ref + + @staticmethod + def static_ping(): + print(f'Got static ping with global_dict={global_dict}') + def ping(self): + self.local_log(f'Pong!') + self.local_log(f'Pong2! {self.id}') return 'pong' + def rpc_test(self): sleep_time = random.randint(1, 5) time.sleep(sleep_time) @@ -109,7 +144,8 @@ def init_dataloader(self, ): logging.info('Done with init') def is_ready(self): - return self.finished_init + logging.info("Client is ready") + return self.finished_init, RRef(self) def set_net(self, net): self.net = net @@ -175,13 +211,62 @@ def update_nn_parameters(self, new_params): if self.log_rref: self.remote_log(f'Weights of the model are updated') - def train(self, epoch): + def report_performance_async(self, performance_data): + self.local_log('Reporting performance') + from fltk.federator import Federator + return _remote_method_async(Federator.perf_metric_endpoint, self.server_ref, self.id, performance_data) + + def report_performance_estimate(self, performance_data): + self.local_log('Reporting performance estimate') + from fltk.federator import Federator + return _remote_method_async(Federator.perf_est_endpoint, self.server_ref, self.id, performance_data) + + @staticmethod + def offload_receive_endpoint(model_weights): + print(f'Got the offload_receive_endpoint endpoint') + global global_model_weights, global_offload_received + global_model_weights = copy.deepcopy(model_weights.copy()) + global_offload_received = True + + @staticmethod + def offload_receive_endpoint_2(string): + print(f'Got the offload_receive_endpoint endpoint') + print(f'Got the offload_receive_endpoint endpoint with arg={string}') + # global global_model_weights, global_offload_received + # global_model_weights = model_weights.copy(deep=True) + # global_offload_received = True + + + def call_to_offload_endpoint(self, client_to_offload: RRef): + self.local_log(f'Got the call to offload endpoint to {client_to_offload}') + self.client_to_offload_to = client_to_offload + self.call_to_offload = True + + def freeze_layers(self, until): + ct = 0 + for child in self.net.children(): + ct += 1 + if ct < until: + for param in child.parameters(): + param.requires_grad = False + + def unfreeze_layers(self): + for param in self.net.parameters(): + param.requires_grad = True + + def train(self, epoch, deadline_time: int = None): """ :param epoch: Current epoch # :type epoch: int """ - # self.net.train() + # Ignore profiler for now + # p = Profiler() + # p.attach(self.net) + + # self.net.train() + global global_model_weights, global_offload_received + deadline_time = None # save model if self.args.should_save_model(epoch): self.save_model(epoch, self.args.get_epoch_save_start_suffix()) @@ -190,19 +275,78 @@ def train(self, epoch): final_running_loss = 0.0 if self.args.distributed: self.dataset.train_sampler.set_epoch(epoch) + self.args.get_logger().info(f'{self.id}: Number of training samples: {len(list(self.dataset.get_train_loader()))}') + number_of_training_samples = len(list(self.dataset.get_train_loader())) + # Ignore profiler for now + # performance_metric_interval = 20 + # perf_resp = None + + profiling_size = 40 + profiling_data = np.zeros(profiling_size) + active_profiling = True + control_start_time = time.time() for i, (inputs, labels) in enumerate(self.dataset.get_train_loader(), 0): - inputs, labels = inputs.to(self.device), labels.to(self.device) + start_train_time = time.time() + + # Check if there is a call to offload + if self.call_to_offload: + self.args.get_logger().info('Got call to offload model') + model_weights = self.get_nn_parameters() + # print(self.client_to_offload_to) + # r_ref = rpc.remote(self.client_to_offload_to, Client.static_ping, args=()) + # print(f'Result of rref: {r_ref.to_here()}') + # ret = rpc.rpc_sync(self.client_to_offload_to, Client.static_ping, args=()) + # print(f'Result of rref: {ret}') + # ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint_2, args=(["Hello"])) + # print(f'Result of rref: {ret}') + + ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights])) + print(f'Result of rref: {ret}') + + # r_ref = rpc.remote(self.client_to_offload_to, Client.static_ping, args=()) + # r_ref = rpc.remote(self.client_to_offload_to, Client.offload_receive_endpoint_2, args=("Hello world")) + # _remote_method_async(Client.static_ping, self.client_to_offload_to) + # fut1 = rpc.rpc_async(self.client_to_offload_to, Client.ping) + # _remote_method_async_by_info(Client.offload_receive_endpoint, self.client_to_offload_to, model_weights) + self.call_to_offload = False + self.client_to_offload_to = None + # This number only works for cifar10cnn + self.freeze_layers(15) + + # Check if there is a model to incorporate + if global_offload_received: + self.args.get_logger().info('Merging offloaded model') + self.args.get_logger().info('FedAvg locally with offloaded model') + updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) + self.args.get_logger().info('Updating local weights due to offloading') + self.update_nn_parameters(updated_weights) + global_offload_received = False + global_model_weights = None + + + if deadline_time is not None: + if time.time() >= deadline_time: + self.args.get_logger().info('Stopping training due to deadline time') + break + else: + self.args.get_logger().info(f'Time to deadline: {deadline_time - time.time()}') + inputs, labels = inputs.to(self.device), labels.to(self.device) # zero the parameter gradients self.optimizer.zero_grad() + # Ignore profile for now + # p.set_warmup(False) + # p.signal_forward_start() # forward + backward + optimize outputs = self.net(inputs) loss = self.loss_function(outputs, labels) + + # Ignore profiler for now + # p.signal_backward_start() loss.backward() self.optimizer.step() - # print statistics running_loss += loss.item() if i % self.args.get_log_interval() == 0: @@ -210,8 +354,43 @@ def train(self, epoch): final_running_loss = running_loss / self.args.get_log_interval() running_loss = 0.0 + # Ignore profiler for now + # p.set_warmup(True) + # if i % performance_metric_interval == 0: + # # perf_metrics = p.calc_metric(15) + # perf_metrics = p.export_data() + # self.args.get_logger().info(f'Number of events = {len(perf_metrics)}') + # perf_resp = self.report_performance_async(perf_metrics) + # p.reset() + if active_profiling: + # print(i) + end_train_time = time.time() + batch_duration = end_train_time - start_train_time + profiling_data[i] = batch_duration + if i == profiling_size-1: + active_profiling = False + time_per_batch = profiling_data.mean() + logging.info(f'Average batch duration is {time_per_batch}') + + # Estimated training time + est_total_time = number_of_training_samples * time_per_batch + logging.info(f'Estimated training time is {est_total_time}') + self.report_performance_estimate((time_per_batch, est_total_time, number_of_training_samples)) + # logging.info(f'Batch time is {batch_duration}') + + + if i > 50: + break + + control_end_time = time.time() + + logging.info(f'Measure end time is {(control_end_time - control_start_time)}') + self.scheduler.step() + # Reset the layers + self.unfreeze_layers() + # save model if self.args.should_save_model(epoch): self.save_model(epoch, self.args.get_epoch_save_end_suffix()) @@ -255,10 +434,15 @@ def test(self): return accuracy, loss, class_precision, class_recall - def run_epochs(self, num_epoch): + def run_epochs(self, num_epoch, deadline: int = None): + start_time = time.time() + deadline_threshold = 10 start_time_train = datetime.datetime.now() + train_stop_time = None + if deadline is not None: + train_stop_time = start_time + deadline - deadline_threshold self.dataset.get_train_sampler().set_epoch_size(num_epoch) - loss, weights = self.train(self.epoch_counter) + loss, weights = self.train(self.epoch_counter, train_stop_time) self.epoch_counter += num_epoch elapsed_time_train = datetime.datetime.now() - start_time_train train_time_ms = int(elapsed_time_train.total_seconds()*1000) diff --git a/fltk/federator.py b/fltk/federator.py index 88ccf31d..f70f30cc 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -1,9 +1,14 @@ import datetime import time +from dataclasses import dataclass from typing import List +import pandas as pd +import torch from dataclass_csv import DataclassWriter from torch.distributed import rpc +from torch.distributed.rpc import RRef, get_worker_info +from torch.utils.data._utils.worker import WorkerInfo from fltk.client import Client from fltk.datasets.data_distribution import distribute_batches_equally @@ -19,28 +24,44 @@ from pathlib import Path import logging +from fltk.util.profile_plots import stability_plot, parse_stability_data from fltk.util.results import EpochData from fltk.util.tensor_converter import convert_distributed_data_into_numpy logging.basicConfig(level=logging.DEBUG) + def _call_method(method, rref, *args, **kwargs): return method(rref.local_value(), *args, **kwargs) +def _call_method_2(method, rref, *args, **kwargs): + print(method) + return method(rref, *args, **kwargs) + def _remote_method(method, rref, *args, **kwargs): args = [method, rref] + list(args) return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) -def _remote_method_async(method, rref, *args, **kwargs): + +def _remote_method_async(method, rref, *args, **kwargs) -> torch.Future: args = [method, rref] + list(args) return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) + +def _remote_method_async_by_name(method, client_name, *args, **kwargs) -> torch.Future: + args = [method, client_name] + list(args) + print(client_name) + print(_call_method_2) + return rpc.rpc_sync(client_name, _call_method_2, args=args, kwargs=kwargs) + + class ClientRef: ref = None name = "" data_size = 0 tb_writer = None + available = False def __init__(self, name, ref, tensorboard_writer): self.name = name @@ -50,6 +71,25 @@ def __init__(self, name, ref, tensorboard_writer): def __repr__(self): return self.name +@dataclass +class ClientResponse: + id: int + client: ClientRef + future: torch.Future + start_time: float = time.time() + end_time: float = 0 + done: bool = False + dropped = True + + def finish(self): + self.end_time = time.time() + self.done = True + self.dropped = False + + def duration(self): + return self.end_time - self.start_time + + class Federator: """ Central component of the Federated Learning System: The Federator @@ -66,6 +106,15 @@ class Federator: clients: List[ClientRef] = [] epoch_counter = 0 client_data = {} + response_list : List[ClientResponse] = [] + response_id = 0 + + reference_lookup = {} + performance_estimate = {} + + + # Keep track of the experiment data + exp_data_general = [] def __init__(self, client_id_triple, num_epochs = 3, config=None): log_rref = rpc.RRef(FLLogger()) @@ -77,13 +126,15 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): self.tb_writer = SummaryWriter(f'{self.tb_path}/{config.experiment_prefix}_federator') self.create_clients(client_id_triple) self.config.init_logger(logging) + self.performance_data = {} logging.info("Creating test client") copy_sampler = config.data_sampler config.data_sampler = "uniform" self.test_data = Client("test", None, 1, 2, config) - self.test_data.init_dataloader() config.data_sampler = copy_sampler + self.reference_lookup[get_worker_info().name] = RRef(self) + def create_clients(self, client_id_triple): @@ -94,7 +145,8 @@ def create_clients(self, client_id_triple): self.client_data[id] = [] def select_clients(self, n = 2): - return random_selection(self.clients, n) + available_clients = list(filter(lambda x : x.available, self.clients)) + return random_selection(available_clients, n) def ping_all(self): for client in self.clients: @@ -125,67 +177,236 @@ def clients_ready(self): responses.append((client, _remote_method_async(Client.is_ready, client.ref))) all_ready = True for res in responses: - result = res[1].wait() + result, client_ref = res[1].wait() if result: + self.reference_lookup[res[0].name] = client_ref logging.info(f'{res[0]} is ready') ready_clients.append(res[0]) + # Set the client to available + res[0].available = True else: logging.info(f'Waiting for {res[0]}') all_ready = False time.sleep(2) + + # WorkerInfo(id=1, name="client1").local_value() + # rpc.rpc_sync(self.nameclients[0].ref.owner(), Client.ping, args=(self.clients[0].ref)) + logging.info(f'Sending a ping to client {self.clients[0].name}') + r_ref = rpc.remote(self.clients[0].name, Client.static_ping, args=()) + print(f'Result of rref: {r_ref.to_here()}') logging.info('All clients are ready') + for idx, c in enumerate(self.clients): + logging.info(f'[{idx}]={c}') - def remote_run_epoch(self, epochs): - responses = [] - client_weights = [] - selected_clients = self.select_clients(self.config.clients_per_round) - for client in selected_clients: - responses.append((client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs))) - self.epoch_counter += epochs - for res in responses: - epoch_data, weights = res[1].wait() - self.client_data[epoch_data.client_id].append(epoch_data) - logging.info(f'{res[0]} had a loss of {epoch_data.loss}') - logging.info(f'{res[0]} had a epoch data of {epoch_data}') - res[0].tb_writer.add_scalar('training loss', - epoch_data.loss_train, # for every 1000 minibatches - self.epoch_counter * res[0].data_size) + def perf_metric_endpoint(self, node_id, perf_data): + if node_id not in self.performance_data.keys(): + self.performance_data[node_id] = [] + self.performance_data[node_id].append(perf_data) - res[0].tb_writer.add_scalar('accuracy', - epoch_data.accuracy, # for every 1000 minibatches - self.epoch_counter * res[0].data_size) + def perf_est_endpoint(self, node_id, performance_data): + logging.info(f'Received performance estimate of node {node_id}') + self.performance_estimate[node_id] = performance_data - res[0].tb_writer.add_scalar('training loss per epoch', - epoch_data.loss_train, # for every 1000 minibatches - self.epoch_counter) + def send_clients_ref(self): - res[0].tb_writer.add_scalar('accuracy per epoch', - epoch_data.accuracy, # for every 1000 minibatches - self.epoch_counter) + for c in self.clients: + # _remote_method_async(Client.send_reference, c.ref, rpc.get_worker_info()) + _remote_method_async(Client.send_reference, c.ref, RRef(self)) - client_weights.append(weights) - updated_model = average_nn_parameters(client_weights) + def num_available_clients(self): + return sum(c.available == True for c in self.clients) - # test global model - logging.info("Testing on global test set") - self.test_data.update_nn_parameters(updated_model) - accuracy, loss, class_precision, class_recall = self.test_data.test() - # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) - self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) - self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) + def process_response_list(self): + for resp in self.response_list: + if resp.future.done(): + resp.finish() + resp.client.available = True + self.response_list = list(filter(lambda x: not x.done, self.response_list)) + + def ask_client_to_offload(self, client1_ref, client2_ref): + logging.info(f'Offloading call from {client1_ref} to {client2_ref}') + # args = [method, rref] + list(args) + # rpc.rpc_sync(client1_ref, Client.call_to_offload_endpoint, args=(client2_ref)) + # print(_remote_method_async_by_name(Client.client_to_offload_to, client1_ref, client2_ref)) + _remote_method(Client.call_to_offload_endpoint, client1_ref, client2_ref) + logging.info(f'Done with call to offload') + + def remote_run_epoch(self, epochs): + start_epoch_time = time.time() + deadline = 400 + """ + 1. Client selection + 2. Run local updates + 3. Retrieve data + 4. Aggregate data + """ + + client_weights = [] + while self.num_available_clients() < self.config.clients_per_round: + logging.warning(f'Waiting for enough clients to become available. # Available Clients = {self.num_available_clients()}, but need {self.config.clients_per_round}') + self.process_response_list() + time.sleep(1) + #### Client Selection #### + selected_clients = self.select_clients(self.config.clients_per_round) + + #### Send model to clients #### responses = [] - for client in self.clients: + for client in selected_clients: + logging.info(f'Send updated model to selected client: {client.name}') responses.append( - (client, _remote_method_async(Client.update_nn_parameters, client.ref, new_params=updated_model))) + (client, _remote_method_async(Client.update_nn_parameters, client.ref, new_params=self.test_data.get_nn_parameters()))) for res in responses: res[1].wait() logging.info('Weights are updated') + responses: List[ClientResponse] = [] + for client in selected_clients: + cr = ClientResponse(self.response_id, client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs, deadline=deadline)) + self.response_id += 1 + self.response_list.append(cr) + responses.append(cr) + client.available = False + # responses.append((client, time.time(), _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs))) + self.epoch_counter += epochs + + deadline_time = 400 + # deadline_time = None + # Wait loop with deadline + start = time.time() + def reached_deadline(): + if deadline_time is None: + return False + # logging.info(f'{(time.time() - start)} >= {deadline_time}') + return (time.time() -start) >= deadline_time + + logging.info('Starting waiting period') + # Wait loop without deadline + all_finished = False + + # Debug for testing! + has_not_called = True + + show_perf_data = True + while not all_finished and not reached_deadline(): + + # if has_not_called and (time.time() -start) > 10: + # logging.info('Sending call to offload') + # has_not_called = False + # + # self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) + + # Check if all performance data has come in + has_all_perf_data = True + + if show_perf_data: + for sc in selected_clients: + if sc.name not in self.performance_estimate.keys(): + has_all_perf_data = False + + if has_all_perf_data: + logging.info('Got all performance data') + print(self.performance_estimate) + show_perf_data = False + + # Make offloading call + # @NOTE: this will only work for the two node scenario + + lowest_est_time = 0 + est_keys = list(self.performance_estimate.keys()) + + # for k, v in self.performance_estimate.items(): + # if v[1] > lowest_est_time: + # lowest_est_time = v[1] + # weak_client = k + # else: + # strong_client = k + + weak_client = est_keys[0] + strong_client = est_keys[1] + if self.performance_estimate[est_keys[1]][1] > self.performance_estimate[est_keys[0]][1]: + weak_client = est_keys[1] + strong_client = est_keys[0] + + logging.info(f'Offloading from {weak_client} -> {strong_client} due to {self.performance_estimate[weak_client]} and {self.performance_estimate[strong_client]}') + logging.info('Sending call to offload') + self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) + # selected_clients[0] + # logging.info(f'Status of all_finished={all_finished} and deadline={reached_deadline()}') + all_finished = True + for client_response in responses: + if client_response.future.done(): + if not client_response.done: + client_response.finish() + else: + all_finished = False + logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') + + for client_response in responses: + + client = client_response.client + logging.info(f'{client} had a exec time of {client_response.duration()} dropped?={client_response.dropped}') + if client_response.dropped: + client_response.end_time = time.time() + logging.info( + f'{client} had a exec time of {client_response.duration()} dropped?={client_response.dropped}') + + if not client_response.dropped: + client.available = True + epoch_data, weights = client_response.future.wait() + self.client_data[epoch_data.client_id].append(epoch_data) + logging.info(f'{client} had a loss of {epoch_data.loss}') + logging.info(f'{client} had a epoch data of {epoch_data}') + + client.tb_writer.add_scalar('training loss', + epoch_data.loss_train, # for every 1000 minibatches + self.epoch_counter * client.data_size) + + client.tb_writer.add_scalar('accuracy', + epoch_data.accuracy, # for every 1000 minibatches + self.epoch_counter * client.data_size) + + client.tb_writer.add_scalar('training loss per epoch', + epoch_data.loss_train, # for every 1000 minibatches + self.epoch_counter) + + client.tb_writer.add_scalar('accuracy per epoch', + epoch_data.accuracy, # for every 1000 minibatches + self.epoch_counter) + + client_weights.append(weights) + + self.performance_estimate = {} + if len(client_weights): + updated_model = average_nn_parameters(client_weights) + + # test global model + logging.info("Testing on global test set") + self.test_data.update_nn_parameters(updated_model) + accuracy, loss, class_precision, class_recall = self.test_data.test() + # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) + self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) + self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) + end_epoch_time = time.time() + duration = end_epoch_time - start_epoch_time + + self.exp_data_general.append([self.epoch_counter, duration, accuracy, loss, class_precision, class_recall]) + + + def save_experiment_data(self): + p = Path(f'./{self.config.output_location}') + file_output = f'./{self.config.output_location}' + exp_prefix = self.config.experiment_prefix + self.ensure_path_exists(file_output) + file_output /= f'{exp_prefix}-general_data.csv' + # general_filename = f'{file_output}/general_data.csv' + df = pd.DataFrame(self.exp_data_general, columns=['epoch', 'duration', 'accuracy', 'loss', 'class_precision', 'class_recall']) + df.to_csv(file_output) + def update_client_data_sizes(self): responses = [] for client in self.clients: @@ -193,6 +414,7 @@ def update_client_data_sizes(self): for res in responses: res[0].data_size = res[1].wait() logging.info(f'{res[0]} had a result of datasize={res[0].data_size}') + # @TODO: Use datasize in aggregation method def remote_test_sync(self): responses = [] @@ -216,13 +438,25 @@ def save_epoch_data(self): def ensure_path_exists(self, path): Path(path).mkdir(parents=True, exist_ok=True) + def run(self): """ Main loop of the Federator :return: + + + + Steps in federated learning process + + 1. Client selection + 2. Run local updates + 3. Retrieve data + 4. Aggregate data """ # # Make sure the clients have loaded all the data + self.send_clients_ref() self.client_load_data() + self.test_data.init_dataloader() self.ping_all() self.clients_ready() self.update_client_data_sizes() @@ -232,13 +466,19 @@ def run(self): epoch_to_run = self.config.epochs epoch_size = self.config.epochs_per_cycle for epoch in range(epoch_to_run): - print(f'Running epoch {epoch}') + self.process_response_list() + logging.info(f'Running epoch {epoch}') self.remote_run_epoch(epoch_size) addition += 1 - logging.info('Printing client data') - print(self.client_data) + logging.info(f'Saving data') self.save_epoch_data() + self.save_experiment_data() + + # Ignore profiler for now + # logging.info(f'Reporting profile data') + # for key in self.performance_data.keys(): + # parse_stability_data(self.performance_data[key], save_to_file=True) logging.info(f'Federator is stopping') diff --git a/fltk/launch.py b/fltk/launch.py index d0e49904..8e5c783a 100644 --- a/fltk/launch.py +++ b/fltk/launch.py @@ -29,8 +29,8 @@ def run_single(rank, world_size, host = None, args = None, nic = None): os.environ['GLOO_SOCKET_IFNAME'] = nic os.environ['TP_SOCKET_IFNAME'] = nic else: - os.environ['GLOO_SOCKET_IFNAME'] = 'wlo1' - os.environ['TP_SOCKET_IFNAME'] = 'wlo1' + os.environ['GLOO_SOCKET_IFNAME'] = 'eth0' + os.environ['TP_SOCKET_IFNAME'] = 'eth0' logging.info(f'Starting with host={os.environ["MASTER_ADDR"]} and port={os.environ["MASTER_PORT"]}') options = rpc.TensorPipeRpcBackendOptions( num_worker_threads=16, diff --git a/fltk/strategy/aggregation.py b/fltk/strategy/aggregation.py index e062d78c..81726d9f 100644 --- a/fltk/strategy/aggregation.py +++ b/fltk/strategy/aggregation.py @@ -1,5 +1,17 @@ +def average_nn_parameters_simple(parameters): + """ + Averages passed parameters. + :param parameters: nn model named parameters + :type parameters: list + """ + new_params = {} + for name in parameters[0].keys(): + new_params[name] = sum([param[name].data for param in parameters]) / len(parameters) + + return new_params + def average_nn_parameters(parameters): """ @@ -13,7 +25,7 @@ def average_nn_parameters(parameters): return new_params -def fed_average_nn_parameters(parameters, sizes): +def average_nn_parameters(parameters, sizes): new_params = {} sum_size = 0 for client in parameters: diff --git a/fltk/util/analyze.py b/fltk/util/analyze.py new file mode 100644 index 00000000..985bc080 --- /dev/null +++ b/fltk/util/analyze.py @@ -0,0 +1,15 @@ +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + + + + +if __name__ == '__main__': + df = pd.read_csv('output/general_data.csv') + print(df) + + + plt.figure() + sns.pointplot(data=df, x='epoch', y='accuracy') + plt.show() \ No newline at end of file diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index c814965f..f15cdedf 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -12,7 +12,7 @@ class BareConfig: def __init__(self): # self.logger = logger - self.batch_size = 10 + self.batch_size = 1 self.test_batch_size = 1000 self.epochs = 1 self.lr = 0.001 diff --git a/fltk/util/generate_data_distribution.py b/fltk/util/generate_data_distribution.py index 34135c0b..047aa202 100644 --- a/fltk/util/generate_data_distribution.py +++ b/fltk/util/generate_data_distribution.py @@ -2,7 +2,8 @@ import os import logging -from fltk.datasets import CIFAR10Dataset, FashionMNISTDataset, CIFAR100Dataset +from fltk.datasets.distributed import DistCIFAR10Dataset, DistCIFAR100Dataset, DistFashionMNISTDataset +# from fltk.datasets import CIFAR10Dataset, FashionMNISTDataset, CIFAR100Dataset from fltk.util.arguments import Arguments from fltk.util.data_loader_utils import generate_train_loader, generate_test_loader, save_data_loader_to_file @@ -15,7 +16,7 @@ # --------------------------------- # ------------ CIFAR10 ------------ # --------------------------------- - dataset = CIFAR10Dataset(args) + dataset = DistCIFAR10Dataset(args) TRAIN_DATA_LOADER_FILE_PATH = "data_loaders/cifar10/train_data_loader.pickle" TEST_DATA_LOADER_FILE_PATH = "data_loaders/cifar10/test_data_loader.pickle" @@ -34,7 +35,7 @@ # --------------------------------- # --------- Fashion-MNIST --------- # --------------------------------- - dataset = FashionMNISTDataset(args) + dataset = DistFashionMNISTDataset(args) TRAIN_DATA_LOADER_FILE_PATH = "data_loaders/fashion-mnist/train_data_loader.pickle" TEST_DATA_LOADER_FILE_PATH = "data_loaders/fashion-mnist/test_data_loader.pickle" @@ -53,7 +54,7 @@ # --------------------------------- # ------------ CIFAR100 ----------- # --------------------------------- - dataset = CIFAR100Dataset(args) + dataset = DistCIFAR100Dataset(args) TRAIN_DATA_LOADER_FILE_PATH = "data_loaders/cifar100/train_data_loader.pickle" TEST_DATA_LOADER_FILE_PATH = "data_loaders/cifar100/test_data_loader.pickle" diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 52a7bb36..5c67c8da 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -39,10 +39,10 @@ def generate(num_clients: int): for client_id in range(1, num_clients+1): client_type = 'default' - if client_id == 1: - client_type='slow' - if client_id == 2: - client_type='medium' + # if client_id == 1: + # client_type='slow' + # if client_id == 2: + # client_type='medium' client_template: dict = load_client_template(type=client_type) client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type) system_template['services'].update(client_definition) diff --git a/fltk/util/profile_plots.py b/fltk/util/profile_plots.py new file mode 100644 index 00000000..c1637c71 --- /dev/null +++ b/fltk/util/profile_plots.py @@ -0,0 +1,151 @@ +from typing import List + +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +def parse_data(df: pd.DataFrame, name: str, meta_data: dict): + # type = 'feature' + first_cls_layer = int(meta_data[name]) + df['type'] = 'feature' + mask = df['layer_id'] >= first_cls_layer + df.loc[mask, 'type'] = 'classifier' + mask = df['layer_id'] < first_cls_layer + df.loc[mask, 'type'] = 'feature' + + tmp = df.groupby(['execution_id', 'event', 'id_type_combined', 'layer_id', 'type']).time.mean().reset_index() + sorted = tmp.sort_values(['event', 'execution_id'], ascending=[False, True]) + + grouped_df = tmp.groupby(['event', 'type']).sum().reset_index()[['event', 'type', 'time']] + grouped_df['model'] = name + # for idx, row in df.iterrows(): + # print(idx, row) + return grouped_df + + +def parse_stability_data(data: List[pd.DataFrame], save_to_file: bool = False, filename: str = 'stability_data.csv'): + df_list = [] + for idx, df in enumerate(data): + df['idx'] = idx + df_list.append(df) + + combined_df = pd.concat(df_list, ignore_index=True) + if save_to_file: + combined_df.to_csv(filename) + return combined_df + + +def stability_plot(df: pd.DataFrame): + # for idx, df in enumerate(data): + # print(idx) + pass + +def calc_metric(df, start_cls_layer): + df['type'] = 'feature' + mask = df['layer_id'] >= start_cls_layer + df.loc[mask, 'type'] = 'classifier' + mask = df['layer_id'] < start_cls_layer + df.loc[mask, 'type'] = 'feature' + combined = df.groupby(['event', 'type', 'idx']).sum().reset_index() + + features_f: pd.DataFrame = combined[(combined['type'] == 'feature') & (combined['event'] == 'forward')][['time', 'idx']] + classifier_f = combined[(combined['type'] == 'classifier') & (combined['event'] == 'forward')][['time', 'idx']] + features_b = combined[(combined['type'] == 'feature') & (combined['event'] == 'backward')][['time', 'idx']] + classifier_b = combined[(combined['type'] == 'classifier') & (combined['event'] == 'backward')][['time', 'idx']] + + features_f2: pd.DataFrame = combined[(combined['type'] == 'feature') & (combined['event'] == 'forward')] + classifier_f2 = combined[(combined['type'] == 'classifier') & (combined['event'] == 'forward')] + features_b2 = combined[(combined['type'] == 'feature') & (combined['event'] == 'backward')] + classifier_b2 = combined[(combined['type'] == 'classifier') & (combined['event'] == 'backward')] + + plt.figure() + # sns.lineplot(data=pd.concat([features_b2, features_f2, classifier_b2, classifier_f2], ignore_index=True), x='idx', y='time', hue='type') + sns.lineplot(data=pd.concat([features_f2, classifier_b2, classifier_f2], ignore_index=True), x='idx', y='time', hue='event') + plt.title('Weak offloaded Client') + plt.show() + + plt.figure() + # sns.lineplot(data=pd.concat([features_b2, features_f2, classifier_b2, classifier_f2], ignore_index=True), x='idx', y='time', hue='type') + sns.lineplot(data=pd.concat([features_f2, features_b2, classifier_b2, classifier_f2], ignore_index=True), x='idx', y='time', + hue='event') + plt.title('Original Weak client') + plt.show() + plt.figure() + sns.lineplot(data=pd.concat([features_f2, features_b2], ignore_index=True), x='idx', y='time', hue='event') + plt.title('Offload') + plt.show() + + plt.figure() + # sns.lineplot(data=pd.concat([features_b2, features_f2, classifier_b2, classifier_f2], ignore_index=True), x='idx', y='time', hue='type') + sns.lineplot(data=pd.concat([features_f2, classifier_b2, classifier_f2], ignore_index=True), x='idx', y='time') + plt.title('Weak offloaded Client #2') + plt.show() + + plt.figure() + # sns.lineplot(data=pd.concat([features_b2, features_f2, classifier_b2, classifier_f2], ignore_index=True), x='idx', y='time', hue='type') + sns.lineplot(data=pd.concat([features_f2, features_b2, classifier_b2, classifier_f2], ignore_index=True), x='idx', + y='time') + plt.title('Original Weak client #2') + plt.show() + plt.figure() + sns.lineplot(data=pd.concat([features_f2, features_b2], ignore_index=True), x='idx', y='time') + plt.title('Offload #2') + plt.show() + + + features_f.rename(columns={'time': 'time_f_f'}, inplace=True) + classifier_f.rename(columns={'time': 'time_c_f'}, inplace=True) + features_b.rename(columns={'time': 'time_f_b'}, inplace=True) + classifier_b.rename(columns={'time': 'time_c_b'}, inplace=True) + + combined_df = features_f.copy(deep=True) + combined_df = combined_df.merge(classifier_f, on='idx') + combined_df = combined_df.merge(features_b, on='idx') + combined_df = combined_df.merge(classifier_b, on='idx') + + combined_df['offload_time'] = combined_df['time_f_f'] + combined_df['time_f_b'] + combined_df['gained_time'] = combined_df['time_c_f'] + combined_df['time_f_f'] + combined_df['time_f_b'] + + data_list = [] + for _, row in combined_df.iterrows(): + data_list.append([row['offload_time'], 'offload', row['idx']]) + data_list.append([row['gained_time'], 'gained', row['idx']]) + # offload = features_f.copy(deep=True) + # frozen = features_f.copy(deep=True) + # + # offload['time'] += features_b['time'] + # frozen['time'] = classifier_f['time'].values + classifier_b['time'].values + # Compute time of part that is offloaded to strong node + + return pd.DataFrame(data_list, columns=['time', 'type', 'idx']) + +if __name__ == '__main__': + print('Hello world') + + + df = pd.read_csv('stability_data.csv') + calc = calc_metric(df, 15) + + plt.figure() + sns.lineplot(data=calc, x='idx', y='time', hue='type') + plt.show() + # first = df.head(10) + # groups = df.groupby(['idx', 'layer_id', 'event']) + # # df['layer_id'] = pd.to_ + # df['layer_id'] = df['layer_id'].astype(str) + # plt.figure() + # # sns.lineplot(data=df, x='idx', y='time', hue='layer_id') + # g = sns.FacetGrid(df, col="event", hue='layer_id') + # g.map(sns.lineplot, "idx", "time") + # plt.show() + + # for i in groups.groups: + # print(groups.groups[i]) +# +# clean_df = parse_data(df, model_name, meta_data) +# meta_data = { +# 'lenet-5': 6, +# 'alexnet': 13, +# 'vgg16': 13, +# 'cifar_10_cnn': 15 +# } \ No newline at end of file diff --git a/fltk/util/profiler.py b/fltk/util/profiler.py new file mode 100644 index 00000000..ceb30a90 --- /dev/null +++ b/fltk/util/profiler.py @@ -0,0 +1,189 @@ +from dataclasses import dataclass + +import torch +from torch.nn import Module +import time +import pandas as pd + +@dataclass +class Event: + time: int + layer_id: int + name: str + event: str + execution_id: int + + def to_list(self): + return [self.time, self.layer_id, self.name, self.event, f'{self.layer_id}-{self.name}', self.execution_id] + +class Profiler: + current_layer = 0 + event_list = [] + last_time = 0 + execution_id = 0 + last_forward_event = None + warmup = False + hook_handles = [] + + def add(self, event: Event): + if event.layer_id >= 100: + print('Error') + print(event) + for e in self.event_list[-150:]: + print(e) + assert(event.layer_id < 100) + self.event_list.append(event) + + def pre_forward(self, other, input): + if self.warmup: + return None + # print(f'Pre forward: {other.__class__.__name__}') + # self.event_list.append(Event(time.time_ns(), self.current_layer, other.__class__.__name__, "pre_forward")) + self.last_forward_event = Event(time.time_ns(), self.current_layer, other.__class__.__name__, "forward", self.execution_id) + + def forward(self, other, input, output): + if self.warmup: + return None + # print(f'Forward: {other.__class__.__name__}') + self.last_forward_event.time = time.time_ns() - self.last_forward_event.time + # self.event_list.append(self.last_forward_event) + self.add(self.last_forward_event) + self.current_layer += 1 + self.execution_id += 1 + + def backward(self, module, grad_input, grad_output): + # pass + if self.warmup: + return None + # print(f'Backward: {module.__class__.__name__}') + # self.event_list.append(Event(time.time_ns() - self.last_time, self.current_layer, module.__class__.__name__, "backward", self.execution_id)) + self.add(Event(time.time_ns() - self.last_time, self.current_layer, module.__class__.__name__, "backward", self.execution_id)) + self.current_layer -= 1 + self.execution_id += 1 + self.last_time = time.time_ns() + return None + + def signal_backward_start(self): + self.current_layer -= 1 + self.last_time = time.time_ns() + + def signal_forward_start(self): + self.current_layer = 0 + self.execution_id = 0 + self.last_time = None + self.last_forward_event = None + + def print_events(self): + for e in self.event_list: + print(e) + + def to_dataframe(self) -> pd.DataFrame: + data = [x.to_list() for x in self.event_list] + return pd.DataFrame(data, columns = ['time', 'layer_id', 'layer_type', 'event', 'id_type_combined', 'execution_id']) + + def export_data(self): + return self.to_dataframe().groupby(['event', 'layer_id']).mean().reset_index()[['event', 'layer_id', 'time']] + + def reset(self): + self.event_list = [] + + def calc_metric(self, start_cls_layer): + df = self.to_dataframe() + df['type'] = 'feature' + mask = df['layer_id'] >= start_cls_layer + df.loc[mask, 'type'] = 'classifier' + mask = df['layer_id'] < start_cls_layer + df.loc[mask, 'type'] = 'feature' + combined = df.groupby(['event', 'type']).sum().reset_index() + + features_f = combined[(combined['type'] == 'feature') & (combined['event'] == 'forward')]['time'].values[0] + classifier_f = combined[(combined['type'] == 'classifier') & (combined['event'] == 'forward')]['time'].values[0] + features_b = combined[(combined['type'] == 'feature') & (combined['event'] == 'backward')]['time'].values[0] + classifier_b = combined[(combined['type'] == 'classifier') & (combined['event'] == 'backward')]['time'].values[0] + return features_f, features_b, classifier_f, classifier_b + + + def set_warmup(self, value): + self.warmup = value + + def printnorm(self, other, input, output): + # input is a tuple of packed inputs + # output is a Tensor. output.data is the Tensor we are interested + print('Inside ' + other.__class__.__name__ + ' forward') + # print('') + # print('input: ', type(input)) + # print('input[0]: ', type(input[0])) + # print('output: ', type(output)) + # print('') + # print('input size:', input[0].size()) + # print('output size:', output.data.size()) + # print('output norm:', output.data.norm()) + + def remove_all_handles(self): + for handle in self.hook_handles: + handle.remove() + + def attach(self, module: Module): + + def get_children(model: torch.nn.Module): + # get children form model! + children = list(model.children()) + flatt_children = [] + if children == []: + # if model has no children; model is last child! :O + return model + else: + # look for children from children... to the last child! + for child in children: + try: + flatt_children.extend(get_children(child)) + except TypeError: + flatt_children.append(get_children(child)) + return flatt_children + + kids = get_children(module) + + print(module) + for k in kids: + print(f'Registrating hooks for layer {k}') + h1 = k.register_forward_hook(self.forward) + self.hook_handles.append(h1) + h2 = k.register_forward_pre_hook(self.pre_forward) + self.hook_handles.append(h2) + h3 = k.register_backward_hook(self.backward) + self.hook_handles.append(h3) + # module.register_forward_hook(self.printnorm) + # for name, m in module.named_children(): + # print(f'>> Name: {name}') + # print(f'>> Content: {m.parameters()}') + # for child in module.children(): + # print(f'Registrating hooks for layer {child}') + # child.register_forward_hook(self.forward) + # child.register_forward_pre_hook(self.pre_forward) + # child.register_backward_hook(self.backward) + # child.register_full_backward_hook(self.backward) + + def profile_run(self, module, input, iterations, warmup_time = 0) -> pd.DataFrame: + output = module(input) + g0 = torch.rand_like(output) + + self.attach(module) + module.train() + self.set_warmup(True) + for i in range(warmup_time): # warmup + print('warmup cycle') + self.signal_forward_start() + output = module(input) + self.signal_backward_start() + output.backward(g0) + self.set_warmup(False) + for i in range(iterations): + print(i, end='') + self.signal_forward_start() + output = module(input) + self.signal_backward_start() + output.backward(g0) + print('') + self.print_events() + + return self.to_dataframe() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..b01b714e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +tqdm==4.49.0 +scikit-learn==0.23.2 +pandas==1.1.2 +numpy>=1.20.0 +torch==1.7.1 +torchvision==0.8.2 +scipy==1.4.1 +h5py==2.10.0 +requests +pyyaml +torchsummary +dataclass-csv +tensorboard \ No newline at end of file From 16a4e91e8eb39d7c96e5cac65a5c6355830cb4a8 Mon Sep 17 00:00:00 2001 From: bacox Date: Wed, 12 Jan 2022 10:24:13 +0100 Subject: [PATCH 08/73] Enable offloading --- Dockerfile | 2 +- configs/experiment.yaml | 9 +- configs/experiment_vanilla.yaml | 10 +- deploy/templates/client_stub_default.yml | 2 +- deploy/templates/client_stub_medium.yml | 2 +- fltk/client.py | 185 +++++++++++++++-------- fltk/federator.py | 100 +++++++++--- fltk/strategy/aggregation.py | 17 +++ fltk/strategy/offloading.py | 22 +++ fltk/util/base_config.py | 10 ++ fltk/util/generate_docker_compose.py | 35 ++++- fltk/util/results.py | 1 + requirements.txt | 4 +- 13 files changed, 301 insertions(+), 98 deletions(-) create mode 100644 fltk/strategy/offloading.py diff --git a/Dockerfile b/Dockerfile index 8ad4937b..006c97d0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,5 +45,5 @@ EXPOSE 5000 COPY fltk ./fltk COPY configs ./configs #CMD python3 ./fltk/__main__.py single configs/experiment.yaml --rank=$RANK -CMD python3 -m fltk single configs/experiment.yaml --rank=$RANK +CMD python3 -m fltk single configs/experiment_vanilla.yaml --rank=$RANK #CMD python3 setup.py \ No newline at end of file diff --git a/configs/experiment.yaml b/configs/experiment.yaml index c8e30bce..62ee3a93 100644 --- a/configs/experiment.yaml +++ b/configs/experiment.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 4 +total_epochs: 30 epochs_per_cycle: 1 wait_for_clients: true net: Cifar10CNN @@ -8,11 +8,14 @@ dataset: cifar10 # Use cuda is available; setting to false will force CPU cuda: false experiment_prefix: 'experiment_sample' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 output_location: 'output' tensor_board_active: true clients_per_round: 2 -# sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) + sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: - 0.07 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused diff --git a/configs/experiment_vanilla.yaml b/configs/experiment_vanilla.yaml index 90fcb77b..a8c10a79 100644 --- a/configs/experiment_vanilla.yaml +++ b/configs/experiment_vanilla.yaml @@ -1,19 +1,21 @@ --- # Experiment configuration -total_epochs: 4 +total_epochs: 20 epochs_per_cycle: 1 wait_for_clients: true net: Cifar10CNN dataset: cifar10 # Use cuda is available; setting to false will force CPU cuda: false -experiment_prefix: 'offloading_vanilla' +experiment_prefix: 'exp_offload_vanilla' offload_stategy: vanilla +profiling_time: 100 +deadline: 500 output_location: 'output' tensor_board_active: true clients_per_round: 2 -# sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: - 0.07 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused diff --git a/deploy/templates/client_stub_default.yml b/deploy/templates/client_stub_default.yml index d8955310..838cf699 100644 --- a/deploy/templates/client_stub_default.yml +++ b/deploy/templates/client_stub_default.yml @@ -20,4 +20,4 @@ client_name: # name can be anything resources: limits: cpus: '2' - memory: 1024M +# memory: 1024M diff --git a/deploy/templates/client_stub_medium.yml b/deploy/templates/client_stub_medium.yml index 8f07f46b..6037ce44 100644 --- a/deploy/templates/client_stub_medium.yml +++ b/deploy/templates/client_stub_medium.yml @@ -19,5 +19,5 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '0.75' + cpus: '1' memory: 1024M diff --git a/fltk/client.py b/fltk/client.py index f841a332..7c5fa710 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -15,6 +15,7 @@ from torch.distributed.rpc import RRef from fltk.schedulers import MinCapableStepLR +from fltk.strategy.offloading import OffloadingStrategy from fltk.util.arguments import Arguments from fltk.util.fed_avg import average_nn_parameters from fltk.util.log import FLLogger @@ -68,6 +69,8 @@ class Client: call_to_offload = False client_to_offload_to : str = None + strategy = OffloadingStrategy.VANILLA + def __init__(self, id, log_rref, rank, world_size, config = None): logging.info(f'Welcome to client {id}') @@ -92,6 +95,43 @@ def __init__(self, id, log_rref, rank, world_size, config = None): self.args.get_scheduler_step_size(), self.args.get_scheduler_gamma(), self.args.get_min_lr()) + self.strategy = OffloadingStrategy.Parse(config.offload_strategy) + self.configure_strategy(self.strategy) + + + def configure_strategy(self, strategy : OffloadingStrategy): + if strategy == OffloadingStrategy.VANILLA: + logging.info('Running with offloading strategy: VANILLA') + self.deadline_enabled = False + self.swyh_enabled = False + self.freeze_layers_enabled = False + self.offload_enabled = False + if strategy == OffloadingStrategy.DEADLINE: + logging.info('Running with offloading strategy: DEADLINE') + self.deadline_enabled = True + self.swyh_enabled = False + self.freeze_layers_enabled = False + self.offload_enabled = False + if strategy == OffloadingStrategy.SWYH: + logging.info('Running with offloading strategy: SWYH') + self.deadline_enabled = True + self.swyh_enabled = True + self.freeze_layers_enabled = False + self.offload_enabled = False + if strategy == OffloadingStrategy.FREEZE: + logging.info('Running with offloading strategy: FREEZE') + self.deadline_enabled = True + self.swyh_enabled = False + self.freeze_layers_enabled = True + self.offload_enabled = False + if strategy == OffloadingStrategy.MODEL_OFFLOAD: + logging.info('Running with offloading strategy: MODEL_OFFLOAD') + self.deadline_enabled = True + self.swyh_enabled = False + self.freeze_layers_enabled = True + self.offload_enabled = True + logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') + def init_device(self): if self.args.cuda and torch.cuda.is_available(): @@ -254,11 +294,36 @@ def unfreeze_layers(self): for param in self.net.parameters(): param.requires_grad = True - def train(self, epoch, deadline_time: int = None): + def train(self, epoch, deadline: int = None): """ + + Different modes: + 1. Vanilla + 2. Deadline + 3. SWYH + 4. Just Freeze + 5. Model Offload + + + :: Vanilla + Disable deadline + Disable swyh + Disable offload + + :: Deadline + We need to keep track of the incoming deadline + We don't need to send data before the deadline + :param epoch: Current epoch # :type epoch: int """ + start_time = time.time() + deadline_threshold = 5 + train_stop_time = None + if self.deadline_enabled and deadline is not None: + train_stop_time = start_time + deadline - deadline_threshold + + strategy = OffloadingStrategy.VANILLA # Ignore profiler for now # p = Profiler() @@ -266,7 +331,7 @@ def train(self, epoch, deadline_time: int = None): # self.net.train() global global_model_weights, global_offload_received - deadline_time = None + # deadline_time = None # save model if self.args.should_save_model(epoch): self.save_model(epoch, self.args.get_epoch_save_start_suffix()) @@ -281,65 +346,58 @@ def train(self, epoch, deadline_time: int = None): # performance_metric_interval = 20 # perf_resp = None - profiling_size = 40 + # Profiling parameters + profiling_size = self.args.profiling_size profiling_data = np.zeros(profiling_size) active_profiling = True control_start_time = time.time() + training_process = 0 for i, (inputs, labels) in enumerate(self.dataset.get_train_loader(), 0): start_train_time = time.time() - # Check if there is a call to offload - if self.call_to_offload: - self.args.get_logger().info('Got call to offload model') - model_weights = self.get_nn_parameters() - # print(self.client_to_offload_to) - # r_ref = rpc.remote(self.client_to_offload_to, Client.static_ping, args=()) - # print(f'Result of rref: {r_ref.to_here()}') - # ret = rpc.rpc_sync(self.client_to_offload_to, Client.static_ping, args=()) - # print(f'Result of rref: {ret}') - # ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint_2, args=(["Hello"])) - # print(f'Result of rref: {ret}') - - ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights])) - print(f'Result of rref: {ret}') - - # r_ref = rpc.remote(self.client_to_offload_to, Client.static_ping, args=()) - # r_ref = rpc.remote(self.client_to_offload_to, Client.offload_receive_endpoint_2, args=("Hello world")) - # _remote_method_async(Client.static_ping, self.client_to_offload_to) - # fut1 = rpc.rpc_async(self.client_to_offload_to, Client.ping) - # _remote_method_async_by_info(Client.offload_receive_endpoint, self.client_to_offload_to, model_weights) - self.call_to_offload = False - self.client_to_offload_to = None - # This number only works for cifar10cnn - self.freeze_layers(15) - - # Check if there is a model to incorporate - if global_offload_received: - self.args.get_logger().info('Merging offloaded model') - self.args.get_logger().info('FedAvg locally with offloaded model') - updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) - self.args.get_logger().info('Updating local weights due to offloading') - self.update_nn_parameters(updated_weights) - global_offload_received = False - global_model_weights = None - - - if deadline_time is not None: - if time.time() >= deadline_time: - self.args.get_logger().info('Stopping training due to deadline time') - break - else: - self.args.get_logger().info(f'Time to deadline: {deadline_time - time.time()}') + if self.offload_enabled: + # Check if there is a call to offload + if self.call_to_offload: + self.args.get_logger().info('Got call to offload model') + model_weights = self.get_nn_parameters() + + ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights])) + print(f'Result of rref: {ret}') + + self.call_to_offload = False + self.client_to_offload_to = None + # This number only works for cifar10cnn + # @TODO: Make this dynamic for other networks + self.freeze_layers(15) + + # Check if there is a model to incorporate + if global_offload_received: + self.args.get_logger().info('Merging offloaded model') + self.args.get_logger().info('FedAvg locally with offloaded model') + updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) + self.args.get_logger().info('Updating local weights due to offloading') + self.update_nn_parameters(updated_weights) + global_offload_received = False + global_model_weights = None + + if self.deadline_enabled: + # Deadline + if train_stop_time is not None: + if time.time() >= train_stop_time: + self.args.get_logger().info('Stopping training due to deadline time') + break + # else: + # self.args.get_logger().info(f'Time to deadline: {train_stop_time - time.time()}') + + + inputs, labels = inputs.to(self.device), labels.to(self.device) + training_process = i # zero the parameter gradients self.optimizer.zero_grad() - # Ignore profile for now - # p.set_warmup(False) - # p.signal_forward_start() - # forward + backward + optimize outputs = self.net(inputs) loss = self.loss_function(outputs, labels) @@ -376,15 +434,25 @@ def train(self, epoch, deadline_time: int = None): est_total_time = number_of_training_samples * time_per_batch logging.info(f'Estimated training time is {est_total_time}') self.report_performance_estimate((time_per_batch, est_total_time, number_of_training_samples)) - # logging.info(f'Batch time is {batch_duration}') + if self.freeze_layers_enabled: + logging.info(f'Checking if need to freeze layers ? {est_total_time} > {deadline}') + if est_total_time > deadline: + logging.info('Will freeze layers to speed up computation') + # This number only works for cifar10cnn + # @TODO: Make this dynamic for other networks + self.freeze_layers(15) + # logging.info(f'Batch time is {batch_duration}') - if i > 50: - break + # Break away from loop for debug purposes + # if i > 50: + # break control_end_time = time.time() logging.info(f'Measure end time is {(control_end_time - control_start_time)}') + logging.info(f'Trained on {training_process} samples') + self.scheduler.step() @@ -395,7 +463,7 @@ def train(self, epoch, deadline_time: int = None): if self.args.should_save_model(epoch): self.save_model(epoch, self.args.get_epoch_save_end_suffix()) - return final_running_loss, self.get_nn_parameters() + return final_running_loss, self.get_nn_parameters(), training_process def test(self): self.net.eval() @@ -435,14 +503,11 @@ def test(self): return accuracy, loss, class_precision, class_recall def run_epochs(self, num_epoch, deadline: int = None): - start_time = time.time() - deadline_threshold = 10 start_time_train = datetime.datetime.now() - train_stop_time = None - if deadline is not None: - train_stop_time = start_time + deadline - deadline_threshold + self.dataset.get_train_sampler().set_epoch_size(num_epoch) - loss, weights = self.train(self.epoch_counter, train_stop_time) + # Train locally + loss, weights, training_process = self.train(self.epoch_counter, deadline) self.epoch_counter += num_epoch elapsed_time_train = datetime.datetime.now() - start_time_train train_time_ms = int(elapsed_time_train.total_seconds()*1000) @@ -452,7 +517,7 @@ def run_epochs(self, num_epoch, deadline: int = None): elapsed_time_test = datetime.datetime.now() - start_time_test test_time_ms = int(elapsed_time_test.total_seconds()*1000) - data = EpochData(self.epoch_counter, train_time_ms, test_time_ms, loss, accuracy, test_loss, class_precision, class_recall, client_id=self.id) + data = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss, accuracy, test_loss, class_precision, class_recall, training_process, self.id) self.epoch_results.append(data) # Copy GPU tensors to CPU diff --git a/fltk/federator.py b/fltk/federator.py index f70f30cc..8790747c 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -12,7 +12,9 @@ from fltk.client import Client from fltk.datasets.data_distribution import distribute_batches_equally +from fltk.strategy.aggregation import FedAvg from fltk.strategy.client_selection import random_selection +from fltk.strategy.offloading import OffloadingStrategy from fltk.util.arguments import Arguments from fltk.util.base_config import BareConfig from fltk.util.data_loader_utils import load_train_data_loader, load_test_data_loader, \ @@ -112,6 +114,14 @@ class Federator: reference_lookup = {} performance_estimate = {} + # Strategies + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False + + strategy = OffloadingStrategy.VANILLA + # Keep track of the experiment data exp_data_general = [] @@ -134,8 +144,43 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): self.test_data = Client("test", None, 1, 2, config) config.data_sampler = copy_sampler self.reference_lookup[get_worker_info().name] = RRef(self) - - + self.strategy = OffloadingStrategy.Parse(config.offload_strategy) + self.configure_strategy(self.strategy) + + + + def configure_strategy(self, strategy : OffloadingStrategy): + if strategy == OffloadingStrategy.VANILLA: + logging.info('Running with offloading strategy: VANILLA') + self.deadline_enabled = False + self.swyh_enabled = False + self.freeze_layers_enabled = False + self.offload_enabled = False + if strategy == OffloadingStrategy.DEADLINE: + logging.info('Running with offloading strategy: DEADLINE') + self.deadline_enabled = True + self.swyh_enabled = False + self.freeze_layers_enabled = False + self.offload_enabled = False + if strategy == OffloadingStrategy.SWYH: + logging.info('Running with offloading strategy: SWYH') + self.deadline_enabled = True + self.swyh_enabled = True + self.freeze_layers_enabled = False + self.offload_enabled = False + if strategy == OffloadingStrategy.FREEZE: + logging.info('Running with offloading strategy: FREEZE') + self.deadline_enabled = True + self.swyh_enabled = False + self.freeze_layers_enabled = True + self.offload_enabled = False + if strategy == OffloadingStrategy.MODEL_OFFLOAD: + logging.info('Running with offloading strategy: MODEL_OFFLOAD') + self.deadline_enabled = True + self.swyh_enabled = False + self.freeze_layers_enabled = True + self.offload_enabled = True + logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') def create_clients(self, client_id_triple): for id, rank, world_size in client_id_triple: @@ -235,8 +280,8 @@ def ask_client_to_offload(self, client1_ref, client2_ref): def remote_run_epoch(self, epochs): start_epoch_time = time.time() - deadline = 400 - + deadline = self.config.deadline + deadline_time = self.config.deadline """ 1. Client selection 2. Run local updates @@ -245,6 +290,9 @@ def remote_run_epoch(self, epochs): """ client_weights = [] + + client_weights_dict = {} + client_training_process_dict = {} while self.num_available_clients() < self.config.clients_per_round: logging.warning(f'Waiting for enough clients to become available. # Available Clients = {self.num_available_clients()}, but need {self.config.clients_per_round}') self.process_response_list() @@ -264,6 +312,10 @@ def remote_run_epoch(self, epochs): res[1].wait() logging.info('Weights are updated') + # Let clients train locally + + if not self.deadline_enabled: + deadline = 0 responses: List[ClientResponse] = [] for client in selected_clients: cr = ClientResponse(self.response_id, client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs, deadline=deadline)) @@ -274,7 +326,6 @@ def remote_run_epoch(self, epochs): # responses.append((client, time.time(), _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs))) self.epoch_counter += epochs - deadline_time = 400 # deadline_time = None # Wait loop with deadline start = time.time() @@ -292,8 +343,8 @@ def reached_deadline(): has_not_called = True show_perf_data = True - while not all_finished and not reached_deadline(): - + while not all_finished and not (self.deadline_enabled and reached_deadline()): + # if self.deadline_enabled and reached_deadline() # if has_not_called and (time.time() -start) > 10: # logging.info('Sending call to offload') # has_not_called = False @@ -325,16 +376,17 @@ def reached_deadline(): # weak_client = k # else: # strong_client = k + if self.offload_enabled: + weak_client = est_keys[0] + strong_client = est_keys[1] + if self.performance_estimate[est_keys[1]][1] > self.performance_estimate[est_keys[0]][1]: + weak_client = est_keys[1] + strong_client = est_keys[0] - weak_client = est_keys[0] - strong_client = est_keys[1] - if self.performance_estimate[est_keys[1]][1] > self.performance_estimate[est_keys[0]][1]: - weak_client = est_keys[1] - strong_client = est_keys[0] + logging.info(f'Offloading from {weak_client} -> {strong_client} due to {self.performance_estimate[weak_client]} and {self.performance_estimate[strong_client]}') + logging.info('Sending call to offload') + self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) - logging.info(f'Offloading from {weak_client} -> {strong_client} due to {self.performance_estimate[weak_client]} and {self.performance_estimate[strong_client]}') - logging.info('Sending call to offload') - self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) # selected_clients[0] # logging.info(f'Status of all_finished={all_finished} and deadline={reached_deadline()}') all_finished = True @@ -344,6 +396,7 @@ def reached_deadline(): client_response.finish() else: all_finished = False + time.sleep(0.1) logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') for client_response in responses: @@ -361,6 +414,7 @@ def reached_deadline(): self.client_data[epoch_data.client_id].append(epoch_data) logging.info(f'{client} had a loss of {epoch_data.loss}') logging.info(f'{client} had a epoch data of {epoch_data}') + logging.info(f'{client} has trained on {epoch_data.training_process} samples') client.tb_writer.add_scalar('training loss', epoch_data.loss_train, # for every 1000 minibatches @@ -379,10 +433,13 @@ def reached_deadline(): self.epoch_counter) client_weights.append(weights) + client_weights_dict[client.name] = weights + client_training_process_dict[client.name] = epoch_data.training_process self.performance_estimate = {} if len(client_weights): - updated_model = average_nn_parameters(client_weights) + updated_model = FedAvg(client_weights_dict, client_training_process_dict) + # updated_model = average_nn_parameters(client_weights) # test global model logging.info("Testing on global test set") @@ -399,13 +456,13 @@ def reached_deadline(): def save_experiment_data(self): p = Path(f'./{self.config.output_location}') - file_output = f'./{self.config.output_location}' + # file_output = f'./{self.config.output_location}' exp_prefix = self.config.experiment_prefix - self.ensure_path_exists(file_output) - file_output /= f'{exp_prefix}-general_data.csv' + self.ensure_path_exists(p) + p /= f'{exp_prefix}-general_data.csv' # general_filename = f'{file_output}/general_data.csv' df = pd.DataFrame(self.exp_data_general, columns=['epoch', 'duration', 'accuracy', 'loss', 'class_precision', 'class_recall']) - df.to_csv(file_output) + df.to_csv(p) def update_client_data_sizes(self): responses = [] @@ -427,9 +484,10 @@ def remote_test_sync(self): def save_epoch_data(self): file_output = f'./{self.config.output_location}' + exp_prefix = self.config.experiment_prefix self.ensure_path_exists(file_output) for key in self.client_data: - filename = f'{file_output}/{key}_epochs.csv' + filename = f'{file_output}/{exp_prefix}_{key}_epochs.csv' logging.info(f'Saving data at {filename}') with open(filename, "w") as f: w = DataclassWriter(f, self.client_data[key], EpochData) diff --git a/fltk/strategy/aggregation.py b/fltk/strategy/aggregation.py index 81726d9f..10a9975c 100644 --- a/fltk/strategy/aggregation.py +++ b/fltk/strategy/aggregation.py @@ -25,6 +25,23 @@ def average_nn_parameters(parameters): return new_params +def FedAvg(parameters, sizes): + new_params = {} + sum_size = 0 + for client in parameters: + for name in parameters[client].keys(): + try: + new_params[name].data += (parameters[client][name].data * sizes[client]) + except: + new_params[name] = (parameters[client][name].data * sizes[client]) + sum_size += sizes[client] + + for name in new_params: + # @TODO: Is .long() really required? + new_params[name].data = new_params[name].data.long() / sum_size + + return new_params + def average_nn_parameters(parameters, sizes): new_params = {} sum_size = 0 diff --git a/fltk/strategy/offloading.py b/fltk/strategy/offloading.py new file mode 100644 index 00000000..4473ad90 --- /dev/null +++ b/fltk/strategy/offloading.py @@ -0,0 +1,22 @@ +from enum import Enum + + +class OffloadingStrategy(Enum): + VANILLA = 1 + DEADLINE = 2 + SWYH = 3 + FREEZE = 4 + MODEL_OFFLOAD = 5 + + @classmethod + def Parse(cls, string_value): + if string_value == 'vanilla': + return OffloadingStrategy.VANILLA + if string_value == 'deadline': + return OffloadingStrategy.DEADLINE + if string_value == 'swyh': + return OffloadingStrategy.SWYH + if string_value == 'freeze': + return OffloadingStrategy.FREEZE + if string_value == 'offload': + return OffloadingStrategy.MODEL_OFFLOAD \ No newline at end of file diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index a5a3b74b..e41b92b9 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -43,6 +43,10 @@ def __init__(self): self.num_workers = 50 # self.num_poisoned_workers = 10 + self.offload_strategy = 'vanilla' + self.profiling_size = 100 + self.deadline = 400 + self.federator_host = '0.0.0.0' self.rank = 0 self.world_size = 0 @@ -109,6 +113,12 @@ def merge_yaml(self, cfg = {}): self.set_net_by_name(cfg['net']) if 'dataset' in cfg: self.dataset_name = cfg['dataset'] + if 'offload_stategy' in cfg: + self.offload_strategy = cfg['offload_stategy'] + if 'profiling_size' in cfg: + self.profiling_size = cfg['profiling_size'] + if 'deadline' in cfg: + self.deadline = cfg['deadline'] if 'experiment_prefix' in cfg: self.experiment_prefix = cfg['experiment_prefix'] else: diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 5c67c8da..8d910446 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -29,6 +29,28 @@ def generate_client(id, template: dict, world_size: int, type='default'): return local_template, container_name +def generate_offload_exp(): + num_clients = 2 + world_size = num_clients + 1 + system_template: dict = load_system_template() + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + + for client_id in range(1, num_clients + 1): + client_type = 'default' + if client_id == 1: + client_type = 'medium' + # if client_id == 2: + # client_type = 'slow' + client_template: dict = load_client_template(type=client_type) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + def generate(num_clients: int): world_size = num_clients + 1 system_template :dict = load_system_template() @@ -39,10 +61,10 @@ def generate(num_clients: int): for client_id in range(1, num_clients+1): client_type = 'default' - # if client_id == 1: - # client_type='slow' - # if client_id == 2: - # client_type='medium' + if client_id == 1: + client_type='slow' + if client_id == 2: + client_type='medium' client_template: dict = load_client_template(type=client_type) client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type) system_template['services'].update(client_definition) @@ -53,7 +75,8 @@ def generate(num_clients: int): if __name__ == '__main__': - num_clients = int(sys.argv[1]) - generate(num_clients) + # num_clients = int(sys.argv[1]) + # generate(num_clients) + generate_offload_exp() print('Done') diff --git a/fltk/util/results.py b/fltk/util/results.py index cf762b8a..a37fc8ad 100644 --- a/fltk/util/results.py +++ b/fltk/util/results.py @@ -12,6 +12,7 @@ class EpochData: loss: float class_precision: Any class_recall: Any + training_process: int client_id: str = None def to_csv_line(self): diff --git a/requirements.txt b/requirements.txt index b01b714e..e87e007e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,6 @@ requests pyyaml torchsummary dataclass-csv -tensorboard \ No newline at end of file +tensorboard +seaborn +matplotlib \ No newline at end of file From 902d8256882eaa60c5ea7f8b1af4f5a50daf48e7 Mon Sep 17 00:00:00 2001 From: Lydia Date: Thu, 13 Jan 2022 10:57:29 +0100 Subject: [PATCH 09/73] Update offloading --- Dockerfile | 1 + configs/experiment.yaml | 1 + configs/experiment_deadline.yaml | 13 ++++--- configs/experiment_freeze.yaml | 28 ++++++++++++++ configs/experiment_offload.yaml | 28 ++++++++++++++ configs/experiment_swyh.yaml | 28 ++++++++++++++ configs/experiment_swyh_first_long.yaml | 29 ++++++++++++++ configs/experiment_swyh_warmup.yaml | 28 ++++++++++++++ configs/experiment_vanilla.yaml | 1 + deploy/templates/client_stub_default.yml | 1 + deploy/templates/client_stub_medium.yml | 2 +- deploy/templates/client_stub_slow.yml | 4 +- deploy/templates/system_stub.yml | 1 + fltk/client.py | 47 +++++++++++++---------- fltk/federator.py | 41 ++++++++++++++++---- fltk/util/analyze.py | 33 +++++++++++++++- fltk/util/base_config.py | 6 +++ run_multi_exp.bash | 48 ++++++++++++++++++++++++ 18 files changed, 303 insertions(+), 37 deletions(-) create mode 100644 configs/experiment_freeze.yaml create mode 100644 configs/experiment_offload.yaml create mode 100644 configs/experiment_swyh.yaml create mode 100644 configs/experiment_swyh_first_long.yaml create mode 100644 configs/experiment_swyh_warmup.yaml create mode 100644 run_multi_exp.bash diff --git a/Dockerfile b/Dockerfile index 006c97d0..abb7ce0a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,4 +46,5 @@ COPY fltk ./fltk COPY configs ./configs #CMD python3 ./fltk/__main__.py single configs/experiment.yaml --rank=$RANK CMD python3 -m fltk single configs/experiment_vanilla.yaml --rank=$RANK +CMD python3 -m fltk single $EXP_CONFIG --rank=$RANK #CMD python3 setup.py \ No newline at end of file diff --git a/configs/experiment.yaml b/configs/experiment.yaml index 62ee3a93..70296073 100644 --- a/configs/experiment.yaml +++ b/configs/experiment.yaml @@ -11,6 +11,7 @@ experiment_prefix: 'experiment_sample' offload_stategy: vanilla profiling_time: 100 deadline: 500 +warmup_round: true output_location: 'output' tensor_board_active: true clients_per_round: 2 diff --git a/configs/experiment_deadline.yaml b/configs/experiment_deadline.yaml index 5ffdca23..c038e3a6 100644 --- a/configs/experiment_deadline.yaml +++ b/configs/experiment_deadline.yaml @@ -1,19 +1,22 @@ --- # Experiment configuration -total_epochs: 4 +total_epochs: 20 epochs_per_cycle: 1 wait_for_clients: true net: Cifar10CNN dataset: cifar10 # Use cuda is available; setting to false will force CPU cuda: false -experiment_prefix: 'offloading_deadline' -offload_stategy: offload +experiment_prefix: 'exp_offload_deadline' +offload_stategy: deadline +profiling_time: 50 +deadline: 140 +warmup_round: false output_location: 'output' tensor_board_active: true clients_per_round: 2 -# sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: - 0.07 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused diff --git a/configs/experiment_freeze.yaml b/configs/experiment_freeze.yaml new file mode 100644 index 00000000..78631070 --- /dev/null +++ b/configs/experiment_freeze.yaml @@ -0,0 +1,28 @@ +--- +# Experiment configuration +total_epochs: 20 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_freeze_deadline' +offload_stategy: freeze +profiling_time: 50 +deadline: 140 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 2 diff --git a/configs/experiment_offload.yaml b/configs/experiment_offload.yaml new file mode 100644 index 00000000..ccf8c0c1 --- /dev/null +++ b/configs/experiment_offload.yaml @@ -0,0 +1,28 @@ +--- +# Experiment configuration +total_epochs: 1 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_model_offload_deadline_fedavg_test' +offload_stategy: offload +profiling_time: 50 +deadline: 140 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 2 diff --git a/configs/experiment_swyh.yaml b/configs/experiment_swyh.yaml new file mode 100644 index 00000000..86b185fd --- /dev/null +++ b/configs/experiment_swyh.yaml @@ -0,0 +1,28 @@ +--- +# Experiment configuration +total_epochs: 20 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_swyh_deadline' +offload_stategy: swyh +profiling_time: 50 +deadline: 140 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 2 diff --git a/configs/experiment_swyh_first_long.yaml b/configs/experiment_swyh_first_long.yaml new file mode 100644 index 00000000..7089d52b --- /dev/null +++ b/configs/experiment_swyh_first_long.yaml @@ -0,0 +1,29 @@ +--- +# Experiment configuration +total_epochs: 20 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_swyh_first_long_deadline' +offload_stategy: swyh +profiling_time: 50 +deadline: 140 +first_deadline: 400 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 2 diff --git a/configs/experiment_swyh_warmup.yaml b/configs/experiment_swyh_warmup.yaml new file mode 100644 index 00000000..72400588 --- /dev/null +++ b/configs/experiment_swyh_warmup.yaml @@ -0,0 +1,28 @@ +--- +# Experiment configuration +total_epochs: 20 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_swyh__warmup_deadline' +offload_stategy: swyh +profiling_time: 50 +deadline: 140 +warmup_round: true +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 2 diff --git a/configs/experiment_vanilla.yaml b/configs/experiment_vanilla.yaml index a8c10a79..2ab96331 100644 --- a/configs/experiment_vanilla.yaml +++ b/configs/experiment_vanilla.yaml @@ -11,6 +11,7 @@ experiment_prefix: 'exp_offload_vanilla' offload_stategy: vanilla profiling_time: 100 deadline: 500 +warmup_round: false output_location: 'output' tensor_board_active: true clients_per_round: 2 diff --git a/deploy/templates/client_stub_default.yml b/deploy/templates/client_stub_default.yml index 838cf699..5ff5eeb5 100644 --- a/deploy/templates/client_stub_default.yml +++ b/deploy/templates/client_stub_default.yml @@ -12,6 +12,7 @@ client_name: # name can be anything - PYTHONUNBUFFERED=1 - RANK={rank} - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: diff --git a/deploy/templates/client_stub_medium.yml b/deploy/templates/client_stub_medium.yml index 6037ce44..0d3ded62 100644 --- a/deploy/templates/client_stub_medium.yml +++ b/deploy/templates/client_stub_medium.yml @@ -12,6 +12,7 @@ client_name: # name can be anything - PYTHONUNBUFFERED=1 - RANK={rank} - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: @@ -20,4 +21,3 @@ client_name: # name can be anything resources: limits: cpus: '1' - memory: 1024M diff --git a/deploy/templates/client_stub_slow.yml b/deploy/templates/client_stub_slow.yml index 7d541d65..5f39b9b3 100644 --- a/deploy/templates/client_stub_slow.yml +++ b/deploy/templates/client_stub_slow.yml @@ -12,6 +12,7 @@ client_name: # name can be anything - PYTHONUNBUFFERED=1 - RANK={rank} - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: @@ -19,5 +20,4 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '0.5' - memory: 1024M + cpus: '0.5' \ No newline at end of file diff --git a/deploy/templates/system_stub.yml b/deploy/templates/system_stub.yml index 4f05dbfc..53159b83 100644 --- a/deploy/templates/system_stub.yml +++ b/deploy/templates/system_stub.yml @@ -14,6 +14,7 @@ services: - PYTHONUNBUFFERED=1 - RANK=0 - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} ports: - "5000:5000" # {machine-port}:{docker-port} networks: diff --git a/fltk/client.py b/fltk/client.py index 7c5fa710..2826c053 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -15,6 +15,7 @@ from torch.distributed.rpc import RRef from fltk.schedulers import MinCapableStepLR +from fltk.strategy.aggregation import FedAvg from fltk.strategy.offloading import OffloadingStrategy from fltk.util.arguments import Arguments from fltk.util.fed_avg import average_nn_parameters @@ -25,10 +26,15 @@ from fltk.util.profiler import Profiler from fltk.util.results import EpochData -logging.basicConfig(level=logging.DEBUG) +logging.basicConfig( + level=logging.DEBUG, + + format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', +) global_dict = {} global_model_weights = {} +global_model_data_size = 0 global_offload_received = False @@ -76,9 +82,10 @@ def __init__(self, id, log_rref, rank, world_size, config = None): logging.info(f'Welcome to client {id}') self.id = id global_dict['id'] = id - global global_model_weights, global_offload_received + global global_model_weights, global_offload_received, global_model_data_size global_model_weights = None global_offload_received = False + global_model_data_size = 0 self.log_rref = log_rref self.rank = rank self.world_size = world_size @@ -262,10 +269,11 @@ def report_performance_estimate(self, performance_data): return _remote_method_async(Federator.perf_est_endpoint, self.server_ref, self.id, performance_data) @staticmethod - def offload_receive_endpoint(model_weights): + def offload_receive_endpoint(model_weights, num_train_samples): print(f'Got the offload_receive_endpoint endpoint') - global global_model_weights, global_offload_received + global global_model_weights, global_offload_received, global_model_data_size global_model_weights = copy.deepcopy(model_weights.copy()) + global_model_data_size = num_train_samples global_offload_received = True @staticmethod @@ -294,7 +302,7 @@ def unfreeze_layers(self): for param in self.net.parameters(): param.requires_grad = True - def train(self, epoch, deadline: int = None): + def train(self, epoch, deadline: int = None, warmup=False): """ Different modes: @@ -318,13 +326,11 @@ def train(self, epoch, deadline: int = None): :type epoch: int """ start_time = time.time() - deadline_threshold = 5 + deadline_threshold = 10 train_stop_time = None if self.deadline_enabled and deadline is not None: train_stop_time = start_time + deadline - deadline_threshold - strategy = OffloadingStrategy.VANILLA - # Ignore profiler for now # p = Profiler() # p.attach(self.net) @@ -356,13 +362,13 @@ def train(self, epoch, deadline: int = None): for i, (inputs, labels) in enumerate(self.dataset.get_train_loader(), 0): start_train_time = time.time() - if self.offload_enabled: + if self.offload_enabled and not warmup: # Check if there is a call to offload if self.call_to_offload: self.args.get_logger().info('Got call to offload model') model_weights = self.get_nn_parameters() - ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights])) + ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights, i])) print(f'Result of rref: {ret}') self.call_to_offload = False @@ -375,13 +381,15 @@ def train(self, epoch, deadline: int = None): if global_offload_received: self.args.get_logger().info('Merging offloaded model') self.args.get_logger().info('FedAvg locally with offloaded model') - updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) + updated_weights = FedAvg({'own': self.get_nn_parameters(), 'remote': global_model_weights}, {'own': i, 'remote': global_model_data_size}) + + # updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) self.args.get_logger().info('Updating local weights due to offloading') self.update_nn_parameters(updated_weights) global_offload_received = False global_model_weights = None - if self.deadline_enabled: + if self.deadline_enabled and not warmup: # Deadline if train_stop_time is not None: if time.time() >= train_stop_time: @@ -435,7 +443,7 @@ def train(self, epoch, deadline: int = None): logging.info(f'Estimated training time is {est_total_time}') self.report_performance_estimate((time_per_batch, est_total_time, number_of_training_samples)) - if self.freeze_layers_enabled: + if self.freeze_layers_enabled and not warmup: logging.info(f'Checking if need to freeze layers ? {est_total_time} > {deadline}') if est_total_time > deadline: logging.info('Will freeze layers to speed up computation') @@ -445,7 +453,7 @@ def train(self, epoch, deadline: int = None): # logging.info(f'Batch time is {batch_duration}') # Break away from loop for debug purposes - # if i > 50: + # if i > 5: # break control_end_time = time.time() @@ -453,8 +461,8 @@ def train(self, epoch, deadline: int = None): logging.info(f'Measure end time is {(control_end_time - control_start_time)}') logging.info(f'Trained on {training_process} samples') - - self.scheduler.step() + if not warmup: + self.scheduler.step() # Reset the layers self.unfreeze_layers() @@ -502,13 +510,14 @@ def test(self): return accuracy, loss, class_precision, class_recall - def run_epochs(self, num_epoch, deadline: int = None): + def run_epochs(self, num_epoch, deadline: int = None, warmup=False): start_time_train = datetime.datetime.now() self.dataset.get_train_sampler().set_epoch_size(num_epoch) # Train locally - loss, weights, training_process = self.train(self.epoch_counter, deadline) - self.epoch_counter += num_epoch + loss, weights, training_process = self.train(self.epoch_counter, deadline, warmup) + if not warmup: + self.epoch_counter += num_epoch elapsed_time_train = datetime.datetime.now() - start_time_train train_time_ms = int(elapsed_time_train.total_seconds()*1000) diff --git a/fltk/federator.py b/fltk/federator.py index 8790747c..b9f72ffc 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -26,11 +26,14 @@ from pathlib import Path import logging -from fltk.util.profile_plots import stability_plot, parse_stability_data +# from fltk.util.profile_plots import stability_plot, parse_stability_data from fltk.util.results import EpochData from fltk.util.tensor_converter import convert_distributed_data_into_numpy -logging.basicConfig(level=logging.DEBUG) +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', +) def _call_method(method, rref, *args, **kwargs): @@ -119,6 +122,9 @@ class Federator: swyh_enabled = False freeze_layers_enabled = False offload_enabled = False + warmup_active = False + + exp_start_time = 0 strategy = OffloadingStrategy.VANILLA @@ -278,10 +284,15 @@ def ask_client_to_offload(self, client1_ref, client2_ref): _remote_method(Client.call_to_offload_endpoint, client1_ref, client2_ref) logging.info(f'Done with call to offload') - def remote_run_epoch(self, epochs): + def remote_run_epoch(self, epochs, warmup=False, first_epoch=False): + if warmup: + logging.info('This is a WARMUP round') start_epoch_time = time.time() deadline = self.config.deadline deadline_time = self.config.deadline + if first_epoch: + deadline = self.config.first_deadline + deadline_time = self.config.first_deadline """ 1. Client selection 2. Run local updates @@ -318,7 +329,7 @@ def remote_run_epoch(self, epochs): deadline = 0 responses: List[ClientResponse] = [] for client in selected_clients: - cr = ClientResponse(self.response_id, client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs, deadline=deadline)) + cr = ClientResponse(self.response_id, client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs, deadline=deadline, warmup=warmup)) self.response_id += 1 self.response_list.append(cr) responses.append(cr) @@ -343,7 +354,7 @@ def reached_deadline(): has_not_called = True show_perf_data = True - while not all_finished and not (self.deadline_enabled and reached_deadline()): + while not all_finished and not ((self.deadline_enabled and reached_deadline()) or warmup): # if self.deadline_enabled and reached_deadline() # if has_not_called and (time.time() -start) > 10: # logging.info('Sending call to offload') @@ -376,7 +387,7 @@ def reached_deadline(): # weak_client = k # else: # strong_client = k - if self.offload_enabled: + if self.offload_enabled and not warmup: weak_client = est_keys[0] strong_client = est_keys[1] if self.performance_estimate[est_keys[1]][1] > self.performance_estimate[est_keys[0]][1]: @@ -398,9 +409,9 @@ def reached_deadline(): all_finished = False time.sleep(0.1) logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') - for client_response in responses: - + if warmup: + break client = client_response.client logging.info(f'{client} had a exec time of {client_response.duration()} dropped?={client_response.dropped}') if client_response.dropped: @@ -415,6 +426,7 @@ def reached_deadline(): logging.info(f'{client} had a loss of {epoch_data.loss}') logging.info(f'{client} had a epoch data of {epoch_data}') logging.info(f'{client} has trained on {epoch_data.training_process} samples') + elapsed_time = client_response.end_time - self.exp_start_time client.tb_writer.add_scalar('training loss', epoch_data.loss_train, # for every 1000 minibatches @@ -424,6 +436,9 @@ def reached_deadline(): epoch_data.accuracy, # for every 1000 minibatches self.epoch_counter * client.data_size) + client.tb_writer.add_scalar('accuracy wall time', + epoch_data.accuracy, # for every 1000 minibatches + elapsed_time) client.tb_writer.add_scalar('training loss per epoch', epoch_data.loss_train, # for every 1000 minibatches self.epoch_counter) @@ -448,6 +463,10 @@ def reached_deadline(): # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) + elapsed_time = time.time() - self.exp_start_time + self.tb_writer.add_scalar('accuracy wall time', + accuracy, # for every 1000 minibatches + elapsed_time) end_epoch_time = time.time() duration = end_epoch_time - start_epoch_time @@ -523,6 +542,12 @@ def run(self): addition = 0 epoch_to_run = self.config.epochs epoch_size = self.config.epochs_per_cycle + + if self.config.warmup_round: + logging.info('Running warmup round') + self.remote_run_epoch(epoch_size, warmup=True) + + self.exp_start_time = time.time() for epoch in range(epoch_to_run): self.process_response_list() logging.info(f'Running epoch {epoch}') diff --git a/fltk/util/analyze.py b/fltk/util/analyze.py index 985bc080..5c304464 100644 --- a/fltk/util/analyze.py +++ b/fltk/util/analyze.py @@ -6,10 +6,39 @@ if __name__ == '__main__': - df = pd.read_csv('output/general_data.csv') + exp_name = 'output/exp_offload_vanilla' + + general_file = f'{exp_name}-general_data.csv' + print(f'Loading data file: {general_file}') + df = pd.read_csv(general_file) print(df) plt.figure() sns.pointplot(data=df, x='epoch', y='accuracy') - plt.show() \ No newline at end of file + plt.title('Accuracy per epoch') + plt.show() + + plt.figure() + # sns.pointplot(data=df[df['epoch'] > 1], x='epoch', y='duration') + sns.pointplot(data=df, x='epoch', y='duration') + plt.title('Train time per epoch') + plt.show() + + dfs = [] + for file in [f'{exp_name}_client1_epochs.csv', f'{exp_name}_client2_epochs.csv']: + dfs.append(pd.read_csv(file)) + client_df = pd.concat(dfs, ignore_index=True) + + print('Loading client data') + plt.figure() + # sns.pointplot(data=client_df[client_df['epoch_id'] > 1], x='epoch_id', y='duration_train', hue='client_id') + sns.pointplot(data=client_df, x='epoch_id', y='duration_train', hue='client_id') + plt.title('Train time per epoch clients') + plt.show() + + plt.figure() + sns.pointplot(data=client_df, x='epoch_id', y='accuracy', hue='client_id') + plt.title('Accuracy per epoch clients') + plt.show() + diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index e41b92b9..284a3f51 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -46,6 +46,8 @@ def __init__(self): self.offload_strategy = 'vanilla' self.profiling_size = 100 self.deadline = 400 + self.first_deadline = 400 + self.warmup_round = False self.federator_host = '0.0.0.0' self.rank = 0 @@ -119,6 +121,10 @@ def merge_yaml(self, cfg = {}): self.profiling_size = cfg['profiling_size'] if 'deadline' in cfg: self.deadline = cfg['deadline'] + if 'first_deadline' in cfg: + self.first_deadline = cfg['first_deadline'] + if 'warmup_round' in cfg: + self.warmup_round = cfg['warmup_round'] if 'experiment_prefix' in cfg: self.experiment_prefix = cfg['experiment_prefix'] else: diff --git a/run_multi_exp.bash b/run_multi_exp.bash new file mode 100644 index 00000000..b833e6a0 --- /dev/null +++ b/run_multi_exp.bash @@ -0,0 +1,48 @@ +#!/bin/bash + +## declare an array variable +declare -a arr=("configs/experiment_vanilla.yaml" + # "configs/experiment_deadline.yaml" + # "configs/experiment_swyh.yaml" + # "configs/experiment_freeze.yaml" + # "configs/experiment_offload.yaml" + ) +EVENT_FILE="exp_events.txt" +# Check if all files are present +for i in "${arr[@]}" +do +# echo "$i" + if [ ! -f $i ]; then + echo "File not found! Cannot find: $i" +# exit + fi + # or do whatever with individual element of the array +done + +read -p "Do you wish to continue? (y/n)?" choice +case "$choice" in + y|Y ) ;; + n|N ) exit;; + * ) exit;; +esac + +echo "" > $EVENT_FILE + +# Start running experiments +## now loop through the above array +for i in "${arr[@]}" +do + export EXP_CONFIG_FILE="$i" + echo "[$(date +"%T")] Starting $EXP_CONFIG_FILE" + echo "[$(date +"%T")] Starting $EXP_CONFIG_FILE" >> $EVENT_FILE + start_time=$(date +%s) + docker-compose up --build 2>&1 | tee dc_log.txt + end_time=$(date +%s) + # elapsed time with second resolution + elapsed=$(( end_time - start_time )) + echo "[$(date +"%T")] Finished with $EXP_CONFIG_FILE in $elapsed seconds" >> $EVENT_FILE +# docker-compose up + # or do whatever with individual element of the array +done +echo "[$(date +"%T")] Finished all experiments" +echo "[$(date +"%T")] Finished all experiments" >> $EVENT_FILE From 6cb23c3c08991e2f4c9c0b0f4adedab7f30e72cf Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 17 Jan 2022 17:40:58 +0100 Subject: [PATCH 10/73] Update docker deployment --- Dockerfile | 2 +- deploy/templates/client_stub_default.yml | 1 + deploy/templates/client_stub_fast.yml | 24 ++++++++++++++++++++++++ deploy/templates/client_stub_medium.yml | 3 ++- deploy/templates/client_stub_slow.yml | 3 ++- deploy/templates/system_stub.yml | 1 + run_multi_exp.bash | 4 +++- 7 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 deploy/templates/client_stub_fast.yml diff --git a/Dockerfile b/Dockerfile index abb7ce0a..6e79f4a4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,6 +45,6 @@ EXPOSE 5000 COPY fltk ./fltk COPY configs ./configs #CMD python3 ./fltk/__main__.py single configs/experiment.yaml --rank=$RANK -CMD python3 -m fltk single configs/experiment_vanilla.yaml --rank=$RANK +# CMD python3 -m fltk single configs/experiment_vanilla.yaml --rank=$RANK CMD python3 -m fltk single $EXP_CONFIG --rank=$RANK #CMD python3 setup.py \ No newline at end of file diff --git a/deploy/templates/client_stub_default.yml b/deploy/templates/client_stub_default.yml index 5ff5eeb5..3a1774cf 100644 --- a/deploy/templates/client_stub_default.yml +++ b/deploy/templates/client_stub_default.yml @@ -1,5 +1,6 @@ client_name: # name can be anything # container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: diff --git a/deploy/templates/client_stub_fast.yml b/deploy/templates/client_stub_fast.yml new file mode 100644 index 00000000..f03012ff --- /dev/null +++ b/deploy/templates/client_stub_fast.yml @@ -0,0 +1,24 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '3' diff --git a/deploy/templates/client_stub_medium.yml b/deploy/templates/client_stub_medium.yml index 0d3ded62..49abdeb2 100644 --- a/deploy/templates/client_stub_medium.yml +++ b/deploy/templates/client_stub_medium.yml @@ -1,5 +1,6 @@ client_name: # name can be anything # container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: @@ -20,4 +21,4 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '1' + cpus: '2' diff --git a/deploy/templates/client_stub_slow.yml b/deploy/templates/client_stub_slow.yml index 5f39b9b3..baf2ac30 100644 --- a/deploy/templates/client_stub_slow.yml +++ b/deploy/templates/client_stub_slow.yml @@ -1,5 +1,6 @@ client_name: # name can be anything # container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: @@ -20,4 +21,4 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '0.5' \ No newline at end of file + cpus: '0.75' \ No newline at end of file diff --git a/deploy/templates/system_stub.yml b/deploy/templates/system_stub.yml index 53159b83..4d161fc0 100644 --- a/deploy/templates/system_stub.yml +++ b/deploy/templates/system_stub.yml @@ -3,6 +3,7 @@ version: "3.3" services: fl_server: # name can be anything container_name: federation-lab-server # what the name for this container would be + cpuset: '0' restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: diff --git a/run_multi_exp.bash b/run_multi_exp.bash index b833e6a0..c86dba06 100644 --- a/run_multi_exp.bash +++ b/run_multi_exp.bash @@ -1,7 +1,9 @@ #!/bin/bash ## declare an array variable -declare -a arr=("configs/experiment_vanilla.yaml" +declare -a arr=( + "configs/exp_p2_vanilla.yaml" + # "configs/experiment_vanilla.yaml" # "configs/experiment_deadline.yaml" # "configs/experiment_swyh.yaml" # "configs/experiment_freeze.yaml" From 9f23e026ceaa1bdecd5f7e1920b84dbdab722de0 Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 17 Jan 2022 17:41:16 +0100 Subject: [PATCH 11/73] Update exp descriptions --- configs/exp_p2_vanilla.yaml | 30 +++++++++++++++++++++ configs/exp_p2_w_4_s2_vanilla.yaml | 30 +++++++++++++++++++++ configs/exp_p2_w_4_s4_vanilla.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_baseline.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_baseline_check.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_baseline_long.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_deadline.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_deadline_check.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_deadline_long.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_freeze.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_freeze_long.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_slow_baseline.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_slow_deadline.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_swyh.yaml | 30 +++++++++++++++++++++ configs/exp_p3_w_4_s4_swyh_long.yaml | 30 +++++++++++++++++++++ configs/exp_p4_w_4_s2_swyh_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p4_w_4_s2_vanilla_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p5_w_4_s4_deadline_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p5_w_4_s4_freeze_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p5_w_4_s4_swyh_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p5_w_4_s4_vanilla_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p6_w_4_s4_deadline_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p6_w_4_s4_swyh_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p7_w_4_s2_deadline_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p7_w_4_s2_freeze_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p7_w_4_s2_swyh_non_iid.yaml | 30 +++++++++++++++++++++ configs/exp_p7_w_4_s2_vanilla_non_iid.yaml | 30 +++++++++++++++++++++ configs/experiment.yaml | 8 +++--- 28 files changed, 814 insertions(+), 4 deletions(-) create mode 100644 configs/exp_p2_vanilla.yaml create mode 100644 configs/exp_p2_w_4_s2_vanilla.yaml create mode 100644 configs/exp_p2_w_4_s4_vanilla.yaml create mode 100644 configs/exp_p3_w_4_s4_baseline.yaml create mode 100644 configs/exp_p3_w_4_s4_baseline_check.yaml create mode 100644 configs/exp_p3_w_4_s4_baseline_long.yaml create mode 100644 configs/exp_p3_w_4_s4_deadline.yaml create mode 100644 configs/exp_p3_w_4_s4_deadline_check.yaml create mode 100644 configs/exp_p3_w_4_s4_deadline_long.yaml create mode 100644 configs/exp_p3_w_4_s4_freeze.yaml create mode 100644 configs/exp_p3_w_4_s4_freeze_long.yaml create mode 100644 configs/exp_p3_w_4_s4_slow_baseline.yaml create mode 100644 configs/exp_p3_w_4_s4_slow_deadline.yaml create mode 100644 configs/exp_p3_w_4_s4_swyh.yaml create mode 100644 configs/exp_p3_w_4_s4_swyh_long.yaml create mode 100644 configs/exp_p4_w_4_s2_swyh_non_iid.yaml create mode 100644 configs/exp_p4_w_4_s2_vanilla_non_iid.yaml create mode 100644 configs/exp_p5_w_4_s4_deadline_non_iid.yaml create mode 100644 configs/exp_p5_w_4_s4_freeze_non_iid.yaml create mode 100644 configs/exp_p5_w_4_s4_swyh_non_iid.yaml create mode 100644 configs/exp_p5_w_4_s4_vanilla_non_iid.yaml create mode 100644 configs/exp_p6_w_4_s4_deadline_non_iid.yaml create mode 100644 configs/exp_p6_w_4_s4_swyh_non_iid.yaml create mode 100644 configs/exp_p7_w_4_s2_deadline_non_iid.yaml create mode 100644 configs/exp_p7_w_4_s2_freeze_non_iid.yaml create mode 100644 configs/exp_p7_w_4_s2_swyh_non_iid.yaml create mode 100644 configs/exp_p7_w_4_s2_vanilla_non_iid.yaml diff --git a/configs/exp_p2_vanilla.yaml b/configs/exp_p2_vanilla.yaml new file mode 100644 index 00000000..3ef45031 --- /dev/null +++ b/configs/exp_p2_vanilla.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 250 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p2_vanilla' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 5 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 19 diff --git a/configs/exp_p2_w_4_s2_vanilla.yaml b/configs/exp_p2_w_4_s2_vanilla.yaml new file mode 100644 index 00000000..d306a678 --- /dev/null +++ b/configs/exp_p2_w_4_s2_vanilla.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 100 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p2_w4_s2_vanilla' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p2_w_4_s4_vanilla.yaml b/configs/exp_p2_w_4_s4_vanilla.yaml new file mode 100644 index 00000000..66a45d77 --- /dev/null +++ b/configs/exp_p2_w_4_s4_vanilla.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 100 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p2_w4_s4_vanilla' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_baseline.yaml b/configs/exp_p3_w_4_s4_baseline.yaml new file mode 100644 index 00000000..89c91bf4 --- /dev/null +++ b/configs/exp_p3_w_4_s4_baseline.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_baseline' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_baseline_check.yaml b/configs/exp_p3_w_4_s4_baseline_check.yaml new file mode 100644 index 00000000..38903f2e --- /dev/null +++ b/configs/exp_p3_w_4_s4_baseline_check.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_baseline_check' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_baseline_long.yaml b/configs/exp_p3_w_4_s4_baseline_long.yaml new file mode 100644 index 00000000..70c1373c --- /dev/null +++ b/configs/exp_p3_w_4_s4_baseline_long.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 250 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_baseline_long' +offload_stategy: vanilla +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_deadline.yaml b/configs/exp_p3_w_4_s4_deadline.yaml new file mode 100644 index 00000000..6e72e0f0 --- /dev/null +++ b/configs/exp_p3_w_4_s4_deadline.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_deadline' +offload_stategy: deadline +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_deadline_check.yaml b/configs/exp_p3_w_4_s4_deadline_check.yaml new file mode 100644 index 00000000..d0bf9250 --- /dev/null +++ b/configs/exp_p3_w_4_s4_deadline_check.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_deadline_check' +offload_stategy: deadline +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_deadline_long.yaml b/configs/exp_p3_w_4_s4_deadline_long.yaml new file mode 100644 index 00000000..24c814a8 --- /dev/null +++ b/configs/exp_p3_w_4_s4_deadline_long.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 250 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_deadline_long' +offload_stategy: deadline +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_freeze.yaml b/configs/exp_p3_w_4_s4_freeze.yaml new file mode 100644 index 00000000..e0aaa770 --- /dev/null +++ b/configs/exp_p3_w_4_s4_freeze.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_freeze' +offload_stategy: freeze +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_freeze_long.yaml b/configs/exp_p3_w_4_s4_freeze_long.yaml new file mode 100644 index 00000000..c91144ad --- /dev/null +++ b/configs/exp_p3_w_4_s4_freeze_long.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 250 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_freeze_long' +offload_stategy: freeze +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_slow_baseline.yaml b/configs/exp_p3_w_4_s4_slow_baseline.yaml new file mode 100644 index 00000000..8e6bea48 --- /dev/null +++ b/configs/exp_p3_w_4_s4_slow_baseline.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_slow_baseline' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_slow_deadline.yaml b/configs/exp_p3_w_4_s4_slow_deadline.yaml new file mode 100644 index 00000000..6e72e0f0 --- /dev/null +++ b/configs/exp_p3_w_4_s4_slow_deadline.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_deadline' +offload_stategy: deadline +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_swyh.yaml b/configs/exp_p3_w_4_s4_swyh.yaml new file mode 100644 index 00000000..e43ce120 --- /dev/null +++ b/configs/exp_p3_w_4_s4_swyh.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_swyh' +offload_stategy: swyh +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p3_w_4_s4_swyh_long.yaml b/configs/exp_p3_w_4_s4_swyh_long.yaml new file mode 100644 index 00000000..6e898a94 --- /dev/null +++ b/configs/exp_p3_w_4_s4_swyh_long.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 250 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_w4_s4_swyh_long' +offload_stategy: swyh +profiling_time: 100 +deadline: 35 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p4_w_4_s2_swyh_non_iid.yaml b/configs/exp_p4_w_4_s2_swyh_non_iid.yaml new file mode 100644 index 00000000..afdcef17 --- /dev/null +++ b/configs/exp_p4_w_4_s2_swyh_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p4_w4_s2_swyh_non_iid' +offload_stategy: swyh +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p4_w_4_s2_vanilla_non_iid.yaml b/configs/exp_p4_w_4_s2_vanilla_non_iid.yaml new file mode 100644 index 00000000..187a816b --- /dev/null +++ b/configs/exp_p4_w_4_s2_vanilla_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 30 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p4_w4_s2_vanilla_non_iid' +offload_stategy: vanilla +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p5_w_4_s4_deadline_non_iid.yaml b/configs/exp_p5_w_4_s4_deadline_non_iid.yaml new file mode 100644 index 00000000..81cdbf31 --- /dev/null +++ b/configs/exp_p5_w_4_s4_deadline_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 100 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p5_w4_s4_deadline_non_iid' +offload_stategy: deadline +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p5_w_4_s4_freeze_non_iid.yaml b/configs/exp_p5_w_4_s4_freeze_non_iid.yaml new file mode 100644 index 00000000..ec7f11b6 --- /dev/null +++ b/configs/exp_p5_w_4_s4_freeze_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 100 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p5_w4_s4_freeze_non_iid' +offload_stategy: freeze +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p5_w_4_s4_swyh_non_iid.yaml b/configs/exp_p5_w_4_s4_swyh_non_iid.yaml new file mode 100644 index 00000000..49f4e6ed --- /dev/null +++ b/configs/exp_p5_w_4_s4_swyh_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 100 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p5_w4_s4_swyh_non_iid' +offload_stategy: swyh +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p5_w_4_s4_vanilla_non_iid.yaml b/configs/exp_p5_w_4_s4_vanilla_non_iid.yaml new file mode 100644 index 00000000..b3290d2d --- /dev/null +++ b/configs/exp_p5_w_4_s4_vanilla_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 100 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p5_w4_s4_vanilla_non_iid' +offload_stategy: vanilla +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p6_w_4_s4_deadline_non_iid.yaml b/configs/exp_p6_w_4_s4_deadline_non_iid.yaml new file mode 100644 index 00000000..07e8c38e --- /dev/null +++ b/configs/exp_p6_w_4_s4_deadline_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p6_w4_s4_deadline_non_iid' +offload_stategy: deadline +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p6_w_4_s4_swyh_non_iid.yaml b/configs/exp_p6_w_4_s4_swyh_non_iid.yaml new file mode 100644 index 00000000..31859721 --- /dev/null +++ b/configs/exp_p6_w_4_s4_swyh_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p6_w4_s4_swhy_non_iid' +offload_stategy: swyh +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p7_w_4_s2_deadline_non_iid.yaml b/configs/exp_p7_w_4_s2_deadline_non_iid.yaml new file mode 100644 index 00000000..ef30cb9d --- /dev/null +++ b/configs/exp_p7_w_4_s2_deadline_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p7_w4_s2_deadline_non_iid' +offload_stategy: deadline +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p7_w_4_s2_freeze_non_iid.yaml b/configs/exp_p7_w_4_s2_freeze_non_iid.yaml new file mode 100644 index 00000000..1a72f579 --- /dev/null +++ b/configs/exp_p7_w_4_s2_freeze_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p7_w4_s2_freeze_non_iid' +offload_stategy: freeze +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p7_w_4_s2_swyh_non_iid.yaml b/configs/exp_p7_w_4_s2_swyh_non_iid.yaml new file mode 100644 index 00000000..7f8c7332 --- /dev/null +++ b/configs/exp_p7_w_4_s2_swyh_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p7_w4_s2_swyh_non_iid' +offload_stategy: swyh +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p7_w_4_s2_vanilla_non_iid.yaml b/configs/exp_p7_w_4_s2_vanilla_non_iid.yaml new file mode 100644 index 00000000..2662656e --- /dev/null +++ b/configs/exp_p7_w_4_s2_vanilla_non_iid.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p7_w4_s2_vanilla_non_iid' +offload_stategy: vanilla +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/experiment.yaml b/configs/experiment.yaml index 70296073..9fad4710 100644 --- a/configs/experiment.yaml +++ b/configs/experiment.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 30 +total_epochs: 20 epochs_per_cycle: 1 wait_for_clients: true net: Cifar10CNN @@ -14,8 +14,8 @@ deadline: 500 warmup_round: true output_location: 'output' tensor_board_active: true -clients_per_round: 2 - sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) #sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: - 0.07 # label limit || q probability || alpha || unused @@ -25,4 +25,4 @@ system: hostname: '10.5.0.11' nic: 'eth0' clients: - amount: 2 + amount: 4 From 4f89331d723e3f77d80efc382852aa77e0c7ce44 Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 17 Jan 2022 17:42:12 +0100 Subject: [PATCH 12/73] Update core for model transfer --- fltk/__main__.py | 5 +++ fltk/client.py | 15 ++++--- fltk/federator.py | 20 +++++++++- fltk/strategy/data_samplers.py | 58 ++++++++++++++++++++++++++++ fltk/util/analyze.py | 4 +- fltk/util/generate_docker_compose.py | 34 ++++++++++++---- 6 files changed, 121 insertions(+), 15 deletions(-) diff --git a/fltk/__main__.py b/fltk/__main__.py index ac8cf05c..263a7fa5 100644 --- a/fltk/__main__.py +++ b/fltk/__main__.py @@ -1,5 +1,8 @@ import os +import random import sys +import time + import torch.distributed.rpc as rpc import logging @@ -55,6 +58,8 @@ def main(): run_single(rank=args.rank, world_size=world_size, host=master_address, args=None, nic=nic) else: with open(args.config) as file: + sleep_time = random.uniform(0, 5.0) + time.sleep(sleep_time) cfg = BareConfig() yaml_data = yaml.load(file, Loader=yaml.FullLoader) cfg.merge_yaml(yaml_data) diff --git a/fltk/client.py b/fltk/client.py index 2826c053..ce29a0ae 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -389,7 +389,7 @@ def train(self, epoch, deadline: int = None, warmup=False): global_offload_received = False global_model_weights = None - if self.deadline_enabled and not warmup: + if self.swyh_enabled and not warmup: # Deadline if train_stop_time is not None: if time.time() >= train_stop_time: @@ -457,12 +457,17 @@ def train(self, epoch, deadline: int = None, warmup=False): # break control_end_time = time.time() - logging.info(f'Measure end time is {(control_end_time - control_start_time)}') logging.info(f'Trained on {training_process} samples') if not warmup: self.scheduler.step() + # logging.info(self.optimizer.param_groups) + scheduler_data = { + 'lr': self.scheduler.optimizer.param_groups[0]['lr'], + 'momentum': self.scheduler.optimizer.param_groups[0]['momentum'], + 'wd': self.scheduler.optimizer.param_groups[0]['weight_decay'], + } # Reset the layers self.unfreeze_layers() @@ -471,7 +476,7 @@ def train(self, epoch, deadline: int = None, warmup=False): if self.args.should_save_model(epoch): self.save_model(epoch, self.args.get_epoch_save_end_suffix()) - return final_running_loss, self.get_nn_parameters(), training_process + return final_running_loss, self.get_nn_parameters(), training_process, scheduler_data def test(self): self.net.eval() @@ -515,7 +520,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): self.dataset.get_train_sampler().set_epoch_size(num_epoch) # Train locally - loss, weights, training_process = self.train(self.epoch_counter, deadline, warmup) + loss, weights, training_process, scheduler_data = self.train(self.epoch_counter, deadline, warmup) if not warmup: self.epoch_counter += num_epoch elapsed_time_train = datetime.datetime.now() - start_time_train @@ -532,7 +537,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): # Copy GPU tensors to CPU for k, v in weights.items(): weights[k] = v.cpu() - return data, weights + return data, weights, scheduler_data def save_model(self, epoch, suffix): """ diff --git a/fltk/federator.py b/fltk/federator.py index b9f72ffc..40b5311f 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -90,6 +90,7 @@ def finish(self): self.end_time = time.time() self.done = True self.dropped = False + print(f'>>>> \t\tClient {self.id} has a duration of {self.duration()}') def duration(self): return self.end_time - self.start_time @@ -330,6 +331,7 @@ def remote_run_epoch(self, epochs, warmup=False, first_epoch=False): responses: List[ClientResponse] = [] for client in selected_clients: cr = ClientResponse(self.response_id, client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs, deadline=deadline, warmup=warmup)) + cr.start_time = time.time() self.response_id += 1 self.response_list.append(cr) responses.append(cr) @@ -421,7 +423,7 @@ def reached_deadline(): if not client_response.dropped: client.available = True - epoch_data, weights = client_response.future.wait() + epoch_data, weights, scheduler_data = client_response.future.wait() self.client_data[epoch_data.client_id].append(epoch_data) logging.info(f'{client} had a loss of {epoch_data.loss}') logging.info(f'{client} had a epoch data of {epoch_data}') @@ -447,6 +449,22 @@ def reached_deadline(): epoch_data.accuracy, # for every 1000 minibatches self.epoch_counter) + client.tb_writer.add_scalar('Client time per epoch', + client_response.duration(), # for every 1000 minibatches + self.epoch_counter) + + client.tb_writer.add_scalar('learning rate', + scheduler_data['lr'], + self.epoch_counter) + + client.tb_writer.add_scalar('momentum', + scheduler_data['momentum'], + self.epoch_counter) + + client.tb_writer.add_scalar('weight decay', + scheduler_data['wd'], + self.epoch_counter) + client_weights.append(weights) client_weights_dict[client.name] = weights client_training_process_dict[client.name] = epoch_data.training_process diff --git a/fltk/strategy/data_samplers.py b/fltk/strategy/data_samplers.py index 98eba58e..4256ae06 100644 --- a/fltk/strategy/data_samplers.py +++ b/fltk/strategy/data_samplers.py @@ -59,6 +59,61 @@ def __iter__(self) -> Iterator[int]: def __len__(self) -> int: return len(self.indices) + +class LimitLabelsSamplerFlex(DistributedSamplerWrapper): + """ + A sampler that limits the number of labels per client + The number of clients must <= than number of labels + """ + + def __init__(self, dataset, num_replicas, rank, args=(5, 42)): + limit, seed = args + super().__init__(dataset, num_replicas, rank, seed) + + labels_per_client = int(np.floor(self.n_labels / self.n_clients)) + remaining_labels = self.n_labels - labels_per_client + labels = list(range(self.n_labels)) # list of labels to distribute + clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client + random.seed(seed) # seed, such that the same result can be obtained multiple times + print(client_labels) + + label_order = random.sample(labels, len(labels)) + client_label_dict = {} + for client_id in clients: + client_label_dict[client_id] = [] + for _ in range(labels_per_client): + chosen_label = label_order.pop() + client_label_dict[client_id].append(chosen_label) + client_labels[client_id].add(chosen_label) + client_label_dict['rest'] = label_order + + indices = [] + ordered_by_label = self.order_by_label(dataset) + labels = client_label_dict[self.client_id] + for label in labels: + n_samples = int(len(ordered_by_label[label])) + clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label + index = clients.index(self.client_id) # find the position of this client + start_index = index * n_samples # inclusive + if rank == self.n_clients: + end_index = len(ordered_by_label[label]) # exclusive + else: + end_index = start_index + n_samples # exclusive + + indices += ordered_by_label[label][start_index:end_index] + + # Last part is uniform sampler + rest_indices = [] + for l in client_label_dict['rest']: + rest_indices += ordered_by_label[l] + filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] + indices += filtered_rest_indices + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices + class LimitLabelsSampler(DistributedSamplerWrapper): """ A sampler that limits the number of labels per client @@ -251,6 +306,9 @@ def get_sampler(dataset, args): elif method == "limit labels": sampler = LimitLabelsSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) + elif method == "limit labels flex": + sampler = LimitLabelsSamplerFlex(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) elif method == "dirichlet": sampler = DirichletSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) diff --git a/fltk/util/analyze.py b/fltk/util/analyze.py index 5c304464..0784978b 100644 --- a/fltk/util/analyze.py +++ b/fltk/util/analyze.py @@ -6,7 +6,7 @@ if __name__ == '__main__': - exp_name = 'output/exp_offload_vanilla' + exp_name = 'output/exp_p3_w4_s4_deadline' general_file = f'{exp_name}-general_data.csv' print(f'Loading data file: {general_file}') @@ -26,7 +26,7 @@ plt.show() dfs = [] - for file in [f'{exp_name}_client1_epochs.csv', f'{exp_name}_client2_epochs.csv']: + for file in [f'{exp_name}_client1_epochs.csv', f'{exp_name}_client2_epochs.csv', f'{exp_name}_client3_epochs.csv', f'{exp_name}_client4_epochs.csv']: dfs.append(pd.read_csv(file)) client_df = pd.concat(dfs, ignore_index=True) diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 8d910446..f0e533ef 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -14,7 +14,7 @@ def load_client_template(type='default'): documents = yaml.full_load(file) return documents -def generate_client(id, template: dict, world_size: int, type='default'): +def generate_client(id, template: dict, world_size: int, type='default', cpu_set=''): local_template = copy.deepcopy(template) key_name = list(local_template.keys())[0] container_name = f'client_{type}_{id}' @@ -24,28 +24,48 @@ def generate_client(id, template: dict, world_size: int, type='default'): local_template[container_name]['environment'][key] = item.format(rank=id) if item == 'WORLD_SIZE={world_size}': local_template[container_name]['environment'][key] = item.format(world_size=world_size) + # for key, item in enumerate(local_template[container_name]): + # if item == 'cpuset: {cpu_set}': + # local_template[container_name][key] = item.format(cpu_set=cpu_set) local_template[container_name]['ports'] = [f'{5000+id}:5000'] + local_template[container_name]['cpuset'] = f'{cpu_set}' return local_template, container_name def generate_offload_exp(): - num_clients = 2 + num_clients = 4 + cpu_per_client = 1 + num_cpus = 20 world_size = num_clients + 1 system_template: dict = load_system_template() for key, item in enumerate(system_template['services']['fl_server']['environment']): if item == 'WORLD_SIZE={world_size}': system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) - + cpu_set = 0 + cpu_idx = 1 for client_id in range(1, num_clients + 1): + # client_type = 'medium' client_type = 'default' - if client_id == 1: + if client_id == 1 or client_id == 2: client_type = 'medium' - # if client_id == 2: - # client_type = 'slow' + cpu_set = f'{cpu_idx}-{cpu_idx+1}' + cpu_idx += 2 + elif client_id == 3: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif client_id == 4: + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + client_template: dict = load_client_template(type=client_type) - client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) system_template['services'].update(client_definition) with open(r'./docker-compose.yml', 'w') as file: From 74a467cb76d5d2e5c1875235ff18d7a167dfb87a Mon Sep 17 00:00:00 2001 From: Lydia Date: Tue, 18 Jan 2022 16:01:12 +0100 Subject: [PATCH 13/73] Add other datasets --- configs/experiment.yaml | 6 ++--- configs/experiment_cifar100.yaml | 28 ++++++++++++++++++++++ configs/experiment_fmnist.yaml | 28 ++++++++++++++++++++++ deploy/templates/client_stub_slow.yml | 2 +- deploy/templates/system_stub.yml | 2 +- fltk/util/generate_docker_compose.py | 34 ++++++++++++++------------- 6 files changed, 79 insertions(+), 21 deletions(-) create mode 100644 configs/experiment_cifar100.yaml create mode 100644 configs/experiment_fmnist.yaml diff --git a/configs/experiment.yaml b/configs/experiment.yaml index 9fad4710..1f95b0d6 100644 --- a/configs/experiment.yaml +++ b/configs/experiment.yaml @@ -11,10 +11,10 @@ experiment_prefix: 'experiment_sample' offload_stategy: vanilla profiling_time: 100 deadline: 500 -warmup_round: true +warmup_round: false output_location: 'output' tensor_board_active: true -clients_per_round: 4 +clients_per_round: 10 sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) #sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: @@ -25,4 +25,4 @@ system: hostname: '10.5.0.11' nic: 'eth0' clients: - amount: 4 + amount: 15 diff --git a/configs/experiment_cifar100.yaml b/configs/experiment_cifar100.yaml new file mode 100644 index 00000000..b579942a --- /dev/null +++ b/configs/experiment_cifar100.yaml @@ -0,0 +1,28 @@ +--- +# Experiment configuration +total_epochs: 60 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar100ResNet +dataset: cifar100 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'experiment_cifar100' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 4 diff --git a/configs/experiment_fmnist.yaml b/configs/experiment_fmnist.yaml new file mode 100644 index 00000000..ed14131d --- /dev/null +++ b/configs/experiment_fmnist.yaml @@ -0,0 +1,28 @@ +--- +# Experiment configuration +total_epochs: 20 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'experiment_fmnist' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 15 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 15 diff --git a/deploy/templates/client_stub_slow.yml b/deploy/templates/client_stub_slow.yml index baf2ac30..7540d5b7 100644 --- a/deploy/templates/client_stub_slow.yml +++ b/deploy/templates/client_stub_slow.yml @@ -21,4 +21,4 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '0.75' \ No newline at end of file + cpus: '1' \ No newline at end of file diff --git a/deploy/templates/system_stub.yml b/deploy/templates/system_stub.yml index 4d161fc0..c84b2ecb 100644 --- a/deploy/templates/system_stub.yml +++ b/deploy/templates/system_stub.yml @@ -3,7 +3,7 @@ version: "3.3" services: fl_server: # name can be anything container_name: federation-lab-server # what the name for this container would be - cpuset: '0' + cpuset: '0-2' restart: "no" # if it crashes for example build: . # look for the docker file where this file is currently located volumes: diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index f0e533ef..04cb0d34 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -32,6 +32,8 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set local_template[container_name]['cpuset'] = f'{cpu_set}' return local_template, container_name +def generate_compose_file(): + print() def generate_offload_exp(): num_clients = 4 @@ -44,25 +46,25 @@ def generate_offload_exp(): if item == 'WORLD_SIZE={world_size}': system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) cpu_set = 0 - cpu_idx = 1 + cpu_idx = 3 for client_id in range(1, num_clients + 1): # client_type = 'medium' client_type = 'default' - if client_id == 1 or client_id == 2: - client_type = 'medium' - cpu_set = f'{cpu_idx}-{cpu_idx+1}' - cpu_idx += 2 - elif client_id == 3: - client_type = 'slow' - cpu_set = f'{cpu_idx}' - cpu_idx += 1 - elif client_id == 4: - client_type = 'fast' - cpu_set = f'{cpu_idx}-{cpu_idx + 2}' - cpu_idx += 3 - else: - cpu_set = f'{cpu_idx}' - cpu_idx += 1 + # if client_id == 1 or client_id == 2: + # client_type = 'medium' + # cpu_set = f'{cpu_idx}-{cpu_idx+1}' + # cpu_idx += 2 + # elif client_id == 3: + # client_type = 'slow' + # cpu_set = f'{cpu_idx}' + # cpu_idx += 1 + # elif client_id == 4: + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + # else: + # cpu_set = f'{cpu_idx}' + # cpu_idx += 1 client_template: dict = load_client_template(type=client_type) client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) From 3d3db17ad7a1da24064b7556a34aaa0f4e73249d Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 24 Jan 2022 00:59:56 +0100 Subject: [PATCH 14/73] Update experiments --- ...exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml | 32 ++ .../exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml | 32 ++ .../exp_p8_w_4_s2_offload_fmnist_non_iid.yaml | 32 ++ .../exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml | 32 ++ configs/exp_p8_w_4_s4_baseline_fmnist.yaml | 32 ++ ...exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml | 32 ++ configs/exp_p8_w_4_s4_freeze_fmnist.yaml | 32 ++ .../exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml | 32 ++ configs/exp_p8_w_4_s4_offload_fmnist.yaml | 32 ++ .../exp_p8_w_4_s4_offload_fmnist_non_iid.yaml | 32 ++ configs/exp_p8_w_4_s4_swyh_fmnist.yaml | 32 ++ .../exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml | 32 ++ configs/exp_p8_w_4_s4_vanilla_.yaml | 30 ++ configs/exp_p8_w_4_s4_vanilla_fmnist.yaml | 32 ++ configs/experiment.yaml | 10 +- configs/experiment_fmnist.yaml | 4 +- configs/experiment_fmnist_offload.yaml | 30 ++ deploy/templates/client_stub_slow.yml | 2 +- fltk/client.py | 314 ++++++++++++++---- fltk/federator.py | 140 +++++++- fltk/util/generate_docker_compose.py | 32 +- fltk/util/profiler.py | 2 +- fltk/util/profilerV2.py | 171 ++++++++++ fltk/util/profilerV3.py | 262 +++++++++++++++ fltk/util/timer.py | 11 + 25 files changed, 1328 insertions(+), 96 deletions(-) create mode 100644 configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s4_baseline_fmnist.yaml create mode 100644 configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s4_freeze_fmnist.yaml create mode 100644 configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s4_offload_fmnist.yaml create mode 100644 configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s4_swyh_fmnist.yaml create mode 100644 configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml create mode 100644 configs/exp_p8_w_4_s4_vanilla_.yaml create mode 100644 configs/exp_p8_w_4_s4_vanilla_fmnist.yaml create mode 100644 configs/experiment_fmnist_offload.yaml create mode 100644 fltk/util/profilerV2.py create mode 100644 fltk/util/profilerV3.py create mode 100644 fltk/util/timer.py diff --git a/configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml new file mode 100644 index 00000000..218bffdd --- /dev/null +++ b/configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s2_baseline_fmnist_non_iid' +offload_stategy: vanilla +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml new file mode 100644 index 00000000..fe437054 --- /dev/null +++ b/configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s2_freeze_fmnist_non_iid' +offload_stategy: freeze +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml new file mode 100644 index 00000000..7194ea73 --- /dev/null +++ b/configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s2_offload_fmnist_non_iid' +offload_stategy: offload +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml new file mode 100644 index 00000000..836b99c8 --- /dev/null +++ b/configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s2_swyh_fmnist_non_iid' +offload_stategy: swyh +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_baseline_fmnist.yaml b/configs/exp_p8_w_4_s4_baseline_fmnist.yaml new file mode 100644 index 00000000..9b5dbca1 --- /dev/null +++ b/configs/exp_p8_w_4_s4_baseline_fmnist.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_baseline_fmnist' +offload_stategy: vanilla +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml new file mode 100644 index 00000000..aca5e153 --- /dev/null +++ b/configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_baseline_fmnist_non_iid' +offload_stategy: vanilla +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_freeze_fmnist.yaml b/configs/exp_p8_w_4_s4_freeze_fmnist.yaml new file mode 100644 index 00000000..50788622 --- /dev/null +++ b/configs/exp_p8_w_4_s4_freeze_fmnist.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_freeze_fmnist' +offload_stategy: freeze +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml new file mode 100644 index 00000000..bb4bad80 --- /dev/null +++ b/configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_freeze_fmnist_non_iid' +offload_stategy: freeze +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_offload_fmnist.yaml b/configs/exp_p8_w_4_s4_offload_fmnist.yaml new file mode 100644 index 00000000..04e5ef9d --- /dev/null +++ b/configs/exp_p8_w_4_s4_offload_fmnist.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_offload_fmnist' +offload_stategy: offload +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml new file mode 100644 index 00000000..5d2f1618 --- /dev/null +++ b/configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_offload_fmnist_non_iid' +offload_stategy: offload +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_swyh_fmnist.yaml b/configs/exp_p8_w_4_s4_swyh_fmnist.yaml new file mode 100644 index 00000000..bce29ecd --- /dev/null +++ b/configs/exp_p8_w_4_s4_swyh_fmnist.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_swyh_fmnist' +offload_stategy: swyh +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml new file mode 100644 index 00000000..97a5ba7f --- /dev/null +++ b/configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_swyh_fmnist_non_iid' +offload_stategy: swyh +profiling_time: 100 +deadline: 17.5 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_vanilla_.yaml b/configs/exp_p8_w_4_s4_vanilla_.yaml new file mode 100644 index 00000000..e03d0b4d --- /dev/null +++ b/configs/exp_p8_w_4_s4_vanilla_.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_vanilla' +offload_stategy: vanilla +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/exp_p8_w_4_s4_vanilla_fmnist.yaml b/configs/exp_p8_w_4_s4_vanilla_fmnist.yaml new file mode 100644 index 00000000..29a819b3 --- /dev/null +++ b/configs/exp_p8_w_4_s4_vanilla_fmnist.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +#net: Cifar10CNN +#dataset: cifar10 +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p8_w4_s4_vanilla_fmnist' +offload_stategy: vanilla +profiling_time: 100 +deadline: 37 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/experiment.yaml b/configs/experiment.yaml index 1f95b0d6..8e180b73 100644 --- a/configs/experiment.yaml +++ b/configs/experiment.yaml @@ -8,13 +8,13 @@ dataset: cifar10 # Use cuda is available; setting to false will force CPU cuda: false experiment_prefix: 'experiment_sample' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 +offload_stategy: deadline +profiling_time: -1 +deadline: 1000 warmup_round: false output_location: 'output' tensor_board_active: true -clients_per_round: 10 +clients_per_round: 4 sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) #sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: @@ -25,4 +25,4 @@ system: hostname: '10.5.0.11' nic: 'eth0' clients: - amount: 15 + amount: 4 diff --git a/configs/experiment_fmnist.yaml b/configs/experiment_fmnist.yaml index ed14131d..cb9ce75d 100644 --- a/configs/experiment_fmnist.yaml +++ b/configs/experiment_fmnist.yaml @@ -14,7 +14,7 @@ deadline: 500 warmup_round: false output_location: 'output' tensor_board_active: true -clients_per_round: 15 +clients_per_round: 4 sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) #sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: @@ -25,4 +25,4 @@ system: hostname: '10.5.0.11' nic: 'eth0' clients: - amount: 15 + amount: 4 diff --git a/configs/experiment_fmnist_offload.yaml b/configs/experiment_fmnist_offload.yaml new file mode 100644 index 00000000..696808cf --- /dev/null +++ b/configs/experiment_fmnist_offload.yaml @@ -0,0 +1,30 @@ +--- +# Experiment configuration +total_epochs: 20 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +#net: Cifar10CNN +#dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'experiment_fmnist_offload' +offload_stategy: offload +profiling_time: 100 +deadline: 13 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 4 diff --git a/deploy/templates/client_stub_slow.yml b/deploy/templates/client_stub_slow.yml index 7540d5b7..9cbdabb5 100644 --- a/deploy/templates/client_stub_slow.yml +++ b/deploy/templates/client_stub_slow.yml @@ -21,4 +21,4 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '1' \ No newline at end of file + cpus: '0.5' \ No newline at end of file diff --git a/fltk/client.py b/fltk/client.py index ce29a0ae..51bbd874 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -24,7 +24,10 @@ import yaml from fltk.util.profiler import Profiler +from fltk.util.profilerV2 import Profiler as P2 +from fltk.util.profilerV3 import Profiler as P3 from fltk.util.results import EpochData +from fltk.util.timer import elapsed_timer logging.basicConfig( level=logging.DEBUG, @@ -35,6 +38,7 @@ global_dict = {} global_model_weights = {} global_model_data_size = 0 +global_sender_id = "" global_offload_received = False @@ -68,18 +72,20 @@ class Client: epoch_results: List[EpochData] = [] epoch_counter = 0 server_ref = None + offloaded_net = None # Model offloading received_offload_model = False offloaded_model_weights = None call_to_offload = False client_to_offload_to : str = None + offloaded_model_ready = False strategy = OffloadingStrategy.VANILLA def __init__(self, id, log_rref, rank, world_size, config = None): - logging.info(f'Welcome to client {id}') + # logging.info(f'Welcome to client {id}') self.id = id global_dict['id'] = id global global_model_weights, global_offload_received, global_model_data_size @@ -105,38 +111,60 @@ def __init__(self, id, log_rref, rank, world_size, config = None): self.strategy = OffloadingStrategy.Parse(config.offload_strategy) self.configure_strategy(self.strategy) + def load_offloaded_model(self): + self.offloaded_net = self.load_default_model() + self.offloaded_net.to(self.device) + logging.info('Offloaded network loaded') - def configure_strategy(self, strategy : OffloadingStrategy): + def copy_offloaded_model_weights(self): + self.update_nn_parameters(global_model_weights, True) + logging.info('Parameters of offloaded model updated') + self.offloaded_model_ready = True + + + def parse_strategy(self, strategy: OffloadingStrategy): + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False if strategy == OffloadingStrategy.VANILLA: logging.info('Running with offloading strategy: VANILLA') - self.deadline_enabled = False - self.swyh_enabled = False - self.freeze_layers_enabled = False - self.offload_enabled = False + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False if strategy == OffloadingStrategy.DEADLINE: logging.info('Running with offloading strategy: DEADLINE') - self.deadline_enabled = True - self.swyh_enabled = False - self.freeze_layers_enabled = False - self.offload_enabled = False + deadline_enabled = True + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False if strategy == OffloadingStrategy.SWYH: logging.info('Running with offloading strategy: SWYH') - self.deadline_enabled = True - self.swyh_enabled = True - self.freeze_layers_enabled = False - self.offload_enabled = False + deadline_enabled = True + swyh_enabled = True + freeze_layers_enabled = False + offload_enabled = False if strategy == OffloadingStrategy.FREEZE: logging.info('Running with offloading strategy: FREEZE') - self.deadline_enabled = True - self.swyh_enabled = False - self.freeze_layers_enabled = True - self.offload_enabled = False + deadline_enabled = True + swyh_enabled = False + freeze_layers_enabled = True + offload_enabled = False if strategy == OffloadingStrategy.MODEL_OFFLOAD: logging.info('Running with offloading strategy: MODEL_OFFLOAD') - self.deadline_enabled = True - self.swyh_enabled = False - self.freeze_layers_enabled = True - self.offload_enabled = True + deadline_enabled = True + swyh_enabled = False + freeze_layers_enabled = True + offload_enabled = True + return deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled + + def configure_strategy(self, strategy : OffloadingStrategy): + deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled = self.parse_strategy(strategy) + self.deadline_enabled = deadline_enabled + self.swyh_enabled = swyh_enabled + self.freeze_layers_enabled = freeze_layers_enabled + self.offload_enabled = offload_enabled logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') @@ -247,17 +275,21 @@ def get_client_index(self): """ return self.client_idx - def update_nn_parameters(self, new_params): + def update_nn_parameters(self, new_params, is_offloaded_model = False): """ Update the NN's parameters. :param new_params: New weights for the neural network :type new_params: dict """ - self.net.load_state_dict(copy.deepcopy(new_params), strict=True) + if is_offloaded_model: + self.offloaded_net.load_state_dict(copy.deepcopy(new_params), strict=True) + else: + self.net.load_state_dict(copy.deepcopy(new_params), strict=True) if self.log_rref: self.remote_log(f'Weights of the model are updated') + def report_performance_async(self, performance_data): self.local_log('Reporting performance') from fltk.federator import Federator @@ -269,11 +301,12 @@ def report_performance_estimate(self, performance_data): return _remote_method_async(Federator.perf_est_endpoint, self.server_ref, self.id, performance_data) @staticmethod - def offload_receive_endpoint(model_weights, num_train_samples): + def offload_receive_endpoint(model_weights, num_train_samples, sender_id): print(f'Got the offload_receive_endpoint endpoint') - global global_model_weights, global_offload_received, global_model_data_size + global global_model_weights, global_offload_received, global_model_data_size, global_sender_id global_model_weights = copy.deepcopy(model_weights.copy()) global_model_data_size = num_train_samples + global_sender_id = sender_id global_offload_received = True @staticmethod @@ -290,6 +323,26 @@ def call_to_offload_endpoint(self, client_to_offload: RRef): self.client_to_offload_to = client_to_offload self.call_to_offload = True + def freeze_layers2(self, until, net): + + def get_children(model: torch.nn.Module): + children = list(model.children()) + flatt_children = [] + if children == []: + return model + else: + for child in children: + try: + flatt_children.extend(get_children(child)) + except TypeError: + flatt_children.append(get_children(child)) + return flatt_children + + for idx, layer in enumerate(get_children(net)): + if idx < until: + print(f'[{idx}] Freezing layer: {layer}') + for param in layer.parameters(): + param.requires_grad = False def freeze_layers(self, until): ct = 0 for child in self.net.children(): @@ -298,11 +351,12 @@ def freeze_layers(self, until): for param in child.parameters(): param.requires_grad = False + def unfreeze_layers(self): for param in self.net.parameters(): param.requires_grad = True - def train(self, epoch, deadline: int = None, warmup=False): + def train(self, epoch, deadline: int = None, warmup=False, use_offloaded_model=False): """ Different modes: @@ -325,7 +379,20 @@ def train(self, epoch, deadline: int = None, warmup=False): :param epoch: Current epoch # :type epoch: int """ + + + perf_data = { + 'total_duration': 0, + 'p_v2_data': None, + 'p_v1_data': None, + 'n_batches': 0 + } + start_time = time.time() + + if use_offloaded_model: + for param in self.offloaded_net.parameters(): + param.requires_grad = True deadline_threshold = 10 train_stop_time = None if self.deadline_enabled and deadline is not None: @@ -338,28 +405,65 @@ def train(self, epoch, deadline: int = None, warmup=False): # self.net.train() global global_model_weights, global_offload_received # deadline_time = None - # save model - if self.args.should_save_model(epoch): - self.save_model(epoch, self.args.get_epoch_save_start_suffix()) + # # save model + # if self.args.should_save_model(epoch): + # self.save_model(epoch, self.args.get_epoch_save_start_suffix()) running_loss = 0.0 final_running_loss = 0.0 if self.args.distributed: self.dataset.train_sampler.set_epoch(epoch) - self.args.get_logger().info(f'{self.id}: Number of training samples: {len(list(self.dataset.get_train_loader()))}') - number_of_training_samples = len(list(self.dataset.get_train_loader())) + number_of_training_samples = len(self.dataset.get_train_loader()) + self.args.get_logger().info(f'{self.id}: Number of training samples: {number_of_training_samples}') + # self.args.get_logger().info(f'{self.id}: Number of training samples: {len(self.dataset.get_train_loader())}') # Ignore profiler for now # performance_metric_interval = 20 # perf_resp = None # Profiling parameters profiling_size = self.args.profiling_size + if profiling_size == -1: + profiling_size = number_of_training_samples profiling_data = np.zeros(profiling_size) + profiling_forwards_data = np.zeros(profiling_size) + profiling_backwards_data = np.zeros(profiling_size) + pre_train_loop_data = np.zeros(profiling_size) + post_train_loop_data = np.zeros(profiling_size) active_profiling = True + p = P2(profiling_size, 7) + p3 = P3(profiling_size, 7) + if use_offloaded_model: + p.attach(self.offloaded_net) + p3.attach(self.offloaded_net) + else: + p.attach(self.net) + p3.attach(self.net) + profiler_active = True control_start_time = time.time() training_process = 0 + + def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_left): + logging.info(f'Calc optimal point: profiler_data={profiler_data}, time_till_deadline={time_till_deadline}, iterations_left={iterations_left}') + ff, cf, cb, fb = profiler_data + full_network = ff + cf + cb + fb + frozen_network = ff + cf + cb + split_point = 0 + for z in range(iterations_left, -1, -1): + x = z + y = iterations_left - x + # print(z) + new_est_split = (x * full_network) + (y * frozen_network) + split_point = x + if new_est_split < time_till_deadline: + break + logging.info(f'The offloading point is a iteration: {split_point}') + logging.info(f'Estimated default runtime={full_network* iterations_left}') + logging.info(f'new_est_split={new_est_split}, deadline={deadline}') + + start_loop_time = time.time() for i, (inputs, labels) in enumerate(self.dataset.get_train_loader(), 0): + loop_pre_train_start = time.time() start_train_time = time.time() if self.offload_enabled and not warmup: @@ -368,26 +472,28 @@ def train(self, epoch, deadline: int = None, warmup=False): self.args.get_logger().info('Got call to offload model') model_weights = self.get_nn_parameters() - ret = rpc.rpc_sync(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights, i])) + ret = rpc.rpc_async(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights, i, self.id])) print(f'Result of rref: {ret}') - + # self.call_to_offload = False self.client_to_offload_to = None # This number only works for cifar10cnn # @TODO: Make this dynamic for other networks - self.freeze_layers(15) + # self.freeze_layers(5) + self.freeze_layers2(8, self.net) # Check if there is a model to incorporate - if global_offload_received: - self.args.get_logger().info('Merging offloaded model') - self.args.get_logger().info('FedAvg locally with offloaded model') - updated_weights = FedAvg({'own': self.get_nn_parameters(), 'remote': global_model_weights}, {'own': i, 'remote': global_model_data_size}) - - # updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) - self.args.get_logger().info('Updating local weights due to offloading') - self.update_nn_parameters(updated_weights) - global_offload_received = False - global_model_weights = None + # Disable for now to offloading testing + # if global_offload_received: + # self.args.get_logger().info('Merging offloaded model') + # self.args.get_logger().info('FedAvg locally with offloaded model') + # updated_weights = FedAvg({'own': self.get_nn_parameters(), 'remote': global_model_weights}, {'own': i, 'remote': global_model_data_size}) + # + # # updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) + # self.args.get_logger().info('Updating local weights due to offloading') + # self.update_nn_parameters(updated_weights) + # global_offload_received = False + # global_model_weights = None if self.swyh_enabled and not warmup: # Deadline @@ -403,22 +509,44 @@ def train(self, epoch, deadline: int = None, warmup=False): inputs, labels = inputs.to(self.device), labels.to(self.device) training_process = i + # zero the parameter gradients self.optimizer.zero_grad() - - outputs = self.net(inputs) + loop_pre_train_end = time.time() + if profiler_active: + p.signal_forward_start() + p3.signal_forward_start() + outputs = None + if use_offloaded_model: + outputs = self.offloaded_net(inputs) + else: + outputs = self.net(inputs) loss = self.loss_function(outputs, labels) + post_train_time = time.time() + if active_profiling: + profiling_forwards_data[i] = post_train_time - start_train_time # Ignore profiler for now # p.signal_backward_start() + if profiler_active: + p.signal_backward_start() + p3.signal_forward_end() + p3.signal_backwards_start() loss.backward() self.optimizer.step() + if profiler_active: + p3.signal_backwards_end() + p.step() + p3.step() + loop_post_train_start = time.time() # print statistics running_loss += loss.item() if i % self.args.get_log_interval() == 0: self.args.get_logger().info('[%d, %5d] loss: %.3f' % (epoch, i, running_loss / self.args.get_log_interval())) final_running_loss = running_loss / self.args.get_log_interval() running_loss = 0.0 + if active_profiling: + profiling_backwards_data[i] = time.time() - post_train_time # Ignore profiler for now # p.set_warmup(True) @@ -434,9 +562,17 @@ def train(self, epoch, deadline: int = None, warmup=False): batch_duration = end_train_time - start_train_time profiling_data[i] = batch_duration if i == profiling_size-1: + profiler_active = False active_profiling = False + p.remove_all_handles() + p3.remove_all_handles() time_per_batch = profiling_data.mean() logging.info(f'Average batch duration is {time_per_batch}') + profiler_data = p.aggregate_values() + p3_data = p3.aggregate_values() + logging.info(f'Profiler data: {profiler_data}') + logging.info(f'P3 Profiler data: {p3_data}') + calc_optimal_offloading_point(profiler_data, deadline, number_of_training_samples - i) # Estimated training time est_total_time = number_of_training_samples * time_per_batch @@ -449,17 +585,35 @@ def train(self, epoch, deadline: int = None, warmup=False): logging.info('Will freeze layers to speed up computation') # This number only works for cifar10cnn # @TODO: Make this dynamic for other networks - self.freeze_layers(15) + # self.freeze_layers(5) + self.freeze_layers2(8, self.net) # logging.info(f'Batch time is {batch_duration}') # Break away from loop for debug purposes # if i > 5: # break + loop_post_train_end = time.time() + if active_profiling: + pre_train_loop_data[i] = loop_pre_train_end - loop_pre_train_start + post_train_loop_data[i] = loop_post_train_end - loop_post_train_start control_end_time = time.time() + end_loop_time = time.time() logging.info(f'Measure end time is {(control_end_time - control_start_time)}') logging.info(f'Trained on {training_process} samples') - + # logging.info(f'Profiler data: {p.get_values()}') + + perf_data['total_duration'] = control_end_time - control_start_time + perf_data['n_batches'] = len(self.dataset.get_train_loader()) + perf_data['p_v2_data'] = p.get_values() + perf_data['p_v3_data'] = p3.get_values() + perf_data['p_v1_data'] = profiling_data + perf_data['pre_train_loop_data'] = pre_train_loop_data + perf_data['post_train_loop_data'] = post_train_loop_data + perf_data['p_v1_pre_loop'] = start_loop_time - start_time + perf_data['p_v1_forwards'] = profiling_forwards_data + perf_data['p_v1_backwards'] = profiling_backwards_data + perf_data['loop_duration'] = end_loop_time - start_loop_time if not warmup: self.scheduler.step() # logging.info(self.optimizer.param_groups) @@ -475,11 +629,14 @@ def train(self, epoch, deadline: int = None, warmup=False): # save model if self.args.should_save_model(epoch): self.save_model(epoch, self.args.get_epoch_save_end_suffix()) + perf_data['p_v1_post_loop'] = time.time() - control_end_time + return final_running_loss, self.get_nn_parameters(), training_process, scheduler_data, perf_data - return final_running_loss, self.get_nn_parameters(), training_process, scheduler_data - - def test(self): - self.net.eval() + def test(self, use_offloaded_model = False): + if use_offloaded_model: + self.offloaded_net.eval() + else: + self.net.eval() correct = 0 total = 0 @@ -490,7 +647,11 @@ def test(self): for (images, labels) in self.dataset.get_test_loader(): images, labels = images.to(self.device), labels.to(self.device) - outputs = self.net(images) + if use_offloaded_model: + outputs = self.offloaded_net(images) + else: + outputs = self.net(images) + _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() @@ -515,29 +676,62 @@ def test(self): return accuracy, loss, class_precision, class_recall + def run_epochs(self, num_epoch, deadline: int = None, warmup=False): + """ + Timing data to measure: + Total execution tim: + """ + start = time.time() + start_time_train = datetime.datetime.now() self.dataset.get_train_sampler().set_epoch_size(num_epoch) # Train locally - loss, weights, training_process, scheduler_data = self.train(self.epoch_counter, deadline, warmup) + loss, weights, training_process, scheduler_data, perf_data = self.train(self.epoch_counter, deadline, warmup) if not warmup: self.epoch_counter += num_epoch elapsed_time_train = datetime.datetime.now() - start_time_train train_time_ms = int(elapsed_time_train.total_seconds()*1000) + post_training_time = time.time() start_time_test = datetime.datetime.now() accuracy, test_loss, class_precision, class_recall = self.test() elapsed_time_test = datetime.datetime.now() - start_time_test test_time_ms = int(elapsed_time_test.total_seconds()*1000) - - data = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss, accuracy, test_loss, class_precision, class_recall, training_process, self.id) + post_test_time = time.time() + + # Timing data that needs to be send back + duration_train = post_training_time - start + duration_test = post_test_time - post_training_time + logging.info( + f'Time for training={duration_train}, time for testing={duration_test}, total time={duration_train + duration_test}') + data = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss, accuracy, test_loss, + class_precision, class_recall, training_process, self.id) self.epoch_results.append(data) - - # Copy GPU tensors to CPU for k, v in weights.items(): weights[k] = v.cpu() - return data, weights, scheduler_data + response_obj = {'own': [data, weights, scheduler_data, perf_data]} + + global global_offload_received + if self.offload_enabled and global_offload_received: + self.configure_strategy(OffloadingStrategy.SWYH) + logging.info('Processing offloaded model') + self.load_offloaded_model() + self.copy_offloaded_model_weights() + loss_offload, weights_offload, training_process_offload, scheduler_data_offload, perf_data_offload = self.train(self.epoch_counter, deadline, warmup, use_offloaded_model=True) + accuracy, test_loss, class_precision, class_recall = self.test(use_offloaded_model=True) + global global_sender_id + data_offload = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss_offload, accuracy, test_loss, + class_precision, class_recall, training_process, f'{global_sender_id}-offload') + # Copy GPU tensors to CPU + for k, v in weights_offload.items(): + weights_offload[k] = v.cpu() + response_obj['offload'] = [ data_offload, weights_offload, scheduler_data_offload, perf_data_offload, global_sender_id] + self.configure_strategy(OffloadingStrategy.MODEL_OFFLOAD) + else: + logging.info(f'Not doing offloading due to offload_enabled={self.offload_enabled} and global_offload_received={global_offload_received}') + return response_obj def save_model(self, epoch, suffix): """ diff --git a/fltk/federator.py b/fltk/federator.py index 40b5311f..d3610089 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -66,12 +66,14 @@ class ClientRef: name = "" data_size = 0 tb_writer = None + tb_writer_offload = None available = False - def __init__(self, name, ref, tensorboard_writer): + def __init__(self, name, ref, tensorboard_writer, tensorboard_writer_offload): self.name = name self.ref = ref self.tb_writer = tensorboard_writer + self.tb_writer_offload = tensorboard_writer_offload def __repr__(self): return self.name @@ -193,7 +195,8 @@ def create_clients(self, client_id_triple): for id, rank, world_size in client_id_triple: client = rpc.remote(id, Client, kwargs=dict(id=id, log_rref=self.log_rref, rank=rank, world_size=world_size, config=self.config)) writer = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}') - self.clients.append(ClientRef(id, client, tensorboard_writer=writer)) + writer_offload = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}_offload') + self.clients.append(ClientRef(id, client, tensorboard_writer=writer, tensorboard_writer_offload=writer_offload)) self.client_data[id] = [] def select_clients(self, n = 2): @@ -324,6 +327,10 @@ def remote_run_epoch(self, epochs, warmup=False, first_epoch=False): res[1].wait() logging.info('Weights are updated') + ### Clients train locally + # Structure of the async message: + # - Client will respond with two messages: + # Let clients train locally if not self.deadline_enabled: @@ -389,16 +396,49 @@ def reached_deadline(): # weak_client = k # else: # strong_client = k - if self.offload_enabled and not warmup: - weak_client = est_keys[0] - strong_client = est_keys[1] - if self.performance_estimate[est_keys[1]][1] > self.performance_estimate[est_keys[0]][1]: - weak_client = est_keys[1] - strong_client = est_keys[0] - logging.info(f'Offloading from {weak_client} -> {strong_client} due to {self.performance_estimate[weak_client]} and {self.performance_estimate[strong_client]}') + # (time_per_batch, est_total_time, number_of_training_samples) + if self.offload_enabled and not warmup: + first = True + weakest = 0 + strongest = 0 + weak_performance = 0 + strong_performance = 0 + for k, v in self.performance_estimate.items(): + # print(v) + if first: + first = False + est_total_time = v[1] + weakest = k + strongest = k + weak_performance = est_total_time + strong_performance = est_total_time + else: + est_total_time = v[1] + if est_total_time > weak_performance: + weak_performance = est_total_time + weakest = k + if est_total_time < strong_performance: + strong_performance = est_total_time + strongest = k + logging.info( + f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') logging.info('Sending call to offload') - self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) + self.ask_client_to_offload(self.reference_lookup[weakest], + strongest) + + # if self.offload_enabled and not warmup: + # logging.info(f'self.performance_estimate={self.performance_estimate}') + # logging.info(f'est_keys={est_keys}') + # weak_client = est_keys[0] + # strong_client = est_keys[1] + # if self.performance_estimate[est_keys[1]][1] > self.performance_estimate[est_keys[0]][1]: + # weak_client = est_keys[1] + # strong_client = est_keys[0] + # + # logging.info(f'Offloading from {weak_client} -> {strong_client} due to {self.performance_estimate[weak_client]} and {self.performance_estimate[strong_client]}') + # logging.info('Sending call to offload') + # self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) # selected_clients[0] # logging.info(f'Status of all_finished={all_finished} and deadline={reached_deadline()}') @@ -423,11 +463,15 @@ def reached_deadline(): if not client_response.dropped: client.available = True - epoch_data, weights, scheduler_data = client_response.future.wait() + logging.info(f'Fetching response for client: {client}') + response_obj = client_response.future.wait() + + epoch_data, weights, scheduler_data, perf_data = response_obj['own'] self.client_data[epoch_data.client_id].append(epoch_data) logging.info(f'{client} had a loss of {epoch_data.loss}') logging.info(f'{client} had a epoch data of {epoch_data}') logging.info(f'{client} has trained on {epoch_data.training_process} samples') + # logging.info(f'{client} has perf data: {perf_data}') elapsed_time = client_response.end_time - self.exp_start_time client.tb_writer.add_scalar('training loss', @@ -464,13 +508,87 @@ def reached_deadline(): client.tb_writer.add_scalar('weight decay', scheduler_data['wd'], self.epoch_counter) + total_time_t1 = perf_data['total_duration'] + loop_duration = perf_data['loop_duration'] + p_v1_time = perf_data['p_v1_data'].mean() * perf_data['n_batches'] + p_v1_time_sum = perf_data['p_v1_data'].sum() + p_v1_pre_loop = perf_data['p_v1_pre_loop'] + p_v1_post_loop = perf_data['p_v1_post_loop'] + pre_train_loop_data = perf_data['pre_train_loop_data'] + post_train_loop_data = perf_data['post_train_loop_data'] + p_v2_forwards = (perf_data['p_v2_data'][0].mean() + perf_data['p_v2_data'][1].mean()) * perf_data['n_batches'] + p_v2_backwards = (perf_data['p_v2_data'][2].mean() + perf_data['p_v2_data'][3].mean()) * perf_data['n_batches'] + p_v3_forwards = (perf_data['p_v3_data'][0].mean() + perf_data['p_v3_data'][1].mean()) * perf_data[ + 'n_batches'] + p_v3_backwards = (perf_data['p_v3_data'][2].mean() + perf_data['p_v3_data'][3].mean()) * perf_data[ + 'n_batches'] + p_v2_time = sum([x.mean() for x in perf_data['p_v2_data']]) * perf_data['n_batches'] + p_v1_forwards = perf_data['p_v1_forwards'].mean() * perf_data['n_batches'] + p_v1_backwards = perf_data['p_v1_backwards'].mean() * perf_data['n_batches'] + logging.info(f'{client} has time estimates: {[total_time_t1, loop_duration, p_v1_time_sum, p_v1_time, p_v2_time, [p_v1_forwards, p_v1_backwards], [p_v2_forwards, p_v2_backwards]]}') + logging.info(f'{client} combined times pre post loop stuff: {[p_v1_pre_loop, loop_duration, p_v1_post_loop]} = {sum([p_v1_pre_loop, loop_duration, p_v1_post_loop])} ? {total_time_t1}') + logging.info(f'{client} p3 time = {p_v3_forwards} + {p_v3_backwards} = {p_v3_forwards+ p_v3_backwards}') + logging.info(f'{client} Pre train loop time = {pre_train_loop_data.mean()}, post train loop time = {post_train_loop_data.mean()}') + # logging.info(f'{client} p_v1 data: {perf_data["p_v1_data"]}') + + + + client.tb_writer.add_scalar('train_time_estimate_delta', loop_duration - (p_v3_forwards+ p_v3_backwards), self.epoch_counter) + client.tb_writer.add_scalar('train_time_estimate_delta_2', loop_duration - (p_v2_forwards+ p_v2_backwards), self.epoch_counter) client_weights.append(weights) client_weights_dict[client.name] = weights client_training_process_dict[client.name] = epoch_data.training_process + if 'offload' in response_obj: + epoch_data_offload, weights_offload, scheduler_data_offload, perf_data_offload, sender_id = response_obj['offload'] + if epoch_data_offload.client_id not in self.client_data: + self.client_data[epoch_data_offload.client_id] = [] + self.client_data[epoch_data_offload.client_id].append(epoch_data_offload) + + writer = client.tb_writer_offload + + writer.add_scalar('training loss', + epoch_data_offload.loss_train, # for every 1000 minibatches + self.epoch_counter * client.data_size) + + writer.add_scalar('accuracy', + epoch_data_offload.accuracy, # for every 1000 minibatches + self.epoch_counter * client.data_size) + + writer.add_scalar('accuracy wall time', + epoch_data_offload.accuracy, # for every 1000 minibatches + elapsed_time) + writer.add_scalar('training loss per epoch', + epoch_data_offload.loss_train, # for every 1000 minibatches + self.epoch_counter) + + writer.add_scalar('accuracy per epoch', + epoch_data_offload.accuracy, # for every 1000 minibatches + self.epoch_counter) + + writer.add_scalar('Client time per epoch', + client_response.duration(), # for every 1000 minibatches + self.epoch_counter) + + writer.add_scalar('learning rate', + scheduler_data_offload['lr'], + self.epoch_counter) + + writer.add_scalar('momentum', + scheduler_data_offload['momentum'], + self.epoch_counter) + + writer.add_scalar('weight decay', + scheduler_data_offload['wd'], + self.epoch_counter) + client_weights.append(weights_offload) + client_weights_dict[epoch_data_offload.client_id] = weights_offload + client_training_process_dict[epoch_data_offload.client_id] = epoch_data_offload.training_process + self.performance_estimate = {} if len(client_weights): + logging.info(f'Aggregating {len(client_weights)} models') updated_model = FedAvg(client_weights_dict, client_training_process_dict) # updated_model = average_nn_parameters(client_weights) diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 04cb0d34..5adb915a 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -48,23 +48,23 @@ def generate_offload_exp(): cpu_set = 0 cpu_idx = 3 for client_id in range(1, num_clients + 1): - # client_type = 'medium' + client_type = 'medium' client_type = 'default' - # if client_id == 1 or client_id == 2: - # client_type = 'medium' - # cpu_set = f'{cpu_idx}-{cpu_idx+1}' - # cpu_idx += 2 - # elif client_id == 3: - # client_type = 'slow' - # cpu_set = f'{cpu_idx}' - # cpu_idx += 1 - # elif client_id == 4: - client_type = 'fast' - cpu_set = f'{cpu_idx}-{cpu_idx + 2}' - cpu_idx += 3 - # else: - # cpu_set = f'{cpu_idx}' - # cpu_idx += 1 + if client_id == 1 or client_id == 2: + client_type = 'medium' + cpu_set = f'{cpu_idx}-{cpu_idx+1}' + cpu_idx += 2 + elif client_id == 3: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif client_id == 4: + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 client_template: dict = load_client_template(type=client_type) client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) diff --git a/fltk/util/profiler.py b/fltk/util/profiler.py index ceb30a90..d8f6c246 100644 --- a/fltk/util/profiler.py +++ b/fltk/util/profiler.py @@ -145,7 +145,7 @@ def get_children(model: torch.nn.Module): print(module) for k in kids: - print(f'Registrating hooks for layer {k}') + # print(f'Registrating hooks for layer {k}') h1 = k.register_forward_hook(self.forward) self.hook_handles.append(h1) h2 = k.register_forward_pre_hook(self.pre_forward) diff --git a/fltk/util/profilerV2.py b/fltk/util/profilerV2.py new file mode 100644 index 00000000..9576f302 --- /dev/null +++ b/fltk/util/profilerV2.py @@ -0,0 +1,171 @@ +import torch +from torch.nn import Module +import time + +import numpy as np + + +class Profiler: + current_layer = 0 + last_time = 0 + execution_id = 0 + last_forward_time = None + warmup = False + hook_handles = [] + + feature_layers_ends: int = 0 + ff: np.ndarray + fb: np.ndarray + cf: np.ndarray + cb: np.ndarray + + batch_idx = 0 + + def __init__(self, rounds: int, feature_layers_ends: int): + self.round = rounds + self.ff = np.zeros(self.round) + self.fb = np.zeros(self.round) + self.cf = np.zeros(self.round) + self.cb = np.zeros(self.round) + self.feature_layers_ends = feature_layers_ends + + def attach(self, module: Module): + def get_children(model: torch.nn.Module): + # get children form model! + children = list(model.children()) + flatt_children = [] + if children == []: + # if model has no children; model is last child! :O + return model + else: + # look for children from children... to the last child! + for child in children: + try: + flatt_children.extend(get_children(child)) + except TypeError: + flatt_children.append(get_children(child)) + return flatt_children + + kids = get_children(module) + + print(module) + for idx, k in enumerate(kids): + # print(f'[{idx}] Registrating hooks for layer {k}') + h1 = k.register_forward_hook(self.forward) + self.hook_handles.append(h1) + h2 = k.register_forward_pre_hook(self.pre_forward) + self.hook_handles.append(h2) + h3 = k.register_backward_hook(self.backward) + self.hook_handles.append(h3) + + def remove_all_handles(self): + for handle in self.hook_handles: + handle.remove() + + def set_warmup(self, value): + self.warmup = value + + def add(self, layer_id, duration, backprogation: bool = False): + is_cls = layer_id > self.feature_layers_ends + if is_cls: + if backprogation: + # use cb + self.cb[self.batch_idx] += duration + else: + # use cf + self.cf[self.batch_idx] += duration + else: + if backprogation: + # use fb + self.fb[self.batch_idx] += duration + else: + # use ff + self.ff[self.batch_idx] += duration + + + def pre_forward(self, other, input): + if self.warmup: + return None + self.last_forward_time = time.time() + + def forward(self, other, input, output): + if self.warmup: + return None + # print(f'Forward: {other.__class__.__name__}') + self.last_forward_time = time.time() - self.last_forward_time + # self.event_list.append(self.last_forward_event) + # self.add(self.last_forward_event) + self.add(self.current_layer, self.last_forward_time, False) + self.current_layer += 1 + self.execution_id += 1 + + def backward(self, module, grad_input, grad_output): + if self.warmup: + return None + # print(f'Backward: {module.__class__.__name__}') + # self.event_list.append(Event(time.time() - self.last_time, self.current_layer, module.__class__.__name__, "backward", self.execution_id)) + self.add(self.current_layer, time.time() - self.last_time, True) + # self.add(Event(time.time() - self.last_time, self.current_layer, module.__class__.__name__, "backward", self.execution_id)) + self.current_layer -= 1 + self.execution_id += 1 + self.last_time = time.time() + return None + + def signal_backward_start(self): + self.current_layer -= 1 + self.last_time = time.time() + + def signal_forward_start(self): + self.current_layer = 0 + self.execution_id = 0 + self.last_time = None + self.last_time = 0 + + def step(self): + self.batch_idx += 1 + + def get_values(self): + """ + Returns the measured values in the following order: ff, cf, cb, fb + ff = feature layers forward propagation + cf = classifier layers forward propagation + cb = feature layers backwards propagation + fb = feature layers backwards propagation + The order is the execution order of forward and then backward propagation of a network + """ + return self.ff, self.cf, self.cb, self.fb + + def aggregate_values(self, from_layer: int = 0): + """ + Returns the measured values in the following order: ff, cf, cb, fb + ff = feature layers forward propagation + cf = classifier layers forward propagation + cb = feature layers backwards propagation + fb = feature layers backwards propagation + The order is the execution order of forward and then backward propagation of a network + """ + return self.ff[from_layer:].mean(), self.cf[from_layer:].mean(), self.fb[from_layer:].mean(), self.cb[ + from_layer:].mean() + + def profile_run(self, module, input, iterations, warmup_time = 0): + output = module(input) + g0 = torch.rand_like(output) + + self.attach(module) + module.train() + self.set_warmup(True) + for i in range(warmup_time): # warmup + print('warmup cycle') + self.signal_forward_start() + output = module(input) + self.signal_backward_start() + output.backward(g0) + self.set_warmup(False) + for i in range(iterations): + print(i, end='') + self.signal_forward_start() + output = module(input) + self.signal_backward_start() + output.backward(g0) + self.step() + print('') \ No newline at end of file diff --git a/fltk/util/profilerV3.py b/fltk/util/profilerV3.py new file mode 100644 index 00000000..ab7d54c7 --- /dev/null +++ b/fltk/util/profilerV3.py @@ -0,0 +1,262 @@ +import torch +from torch.nn import Module +import time + +import numpy as np + + +class Profiler: + current_layer = 0 + last_time = 0 + execution_id = 0 + last_forward_time = None + warmup = False + hook_handles = [] + + feature_layers_ends: int = 0 + ff: np.ndarray + fb: np.ndarray + cf: np.ndarray + cb: np.ndarray + + batch_idx = 0 + + ## Total values needed: + # network_start + # pre_forward_hook(split + 1) + # full_backwards_hook(split) + # backwards_end + # forwards_ends + # Start backwards + + # Intermediate time values + forward_start_time: float + backwards_start_time: float + forward_end_time: float + backwards_end_time: float + pre_forward_post_split_time: float + backwards_split_time: float + + def __init__(self, rounds: int, feature_layers_ends: int): + self.round = rounds + self.ff = np.zeros(self.round) + self.fb = np.zeros(self.round) + self.cf = np.zeros(self.round) + self.cb = np.zeros(self.round) + self.feature_layers_ends = feature_layers_ends + + def attach(self, module: Module): + def get_children(model: torch.nn.Module): + # get children form model! + children = list(model.children()) + flatt_children = [] + if children == []: + # if model has no children; model is last child! :O + return model + else: + # look for children from children... to the last child! + for child in children: + try: + flatt_children.extend(get_children(child)) + except TypeError: + flatt_children.append(get_children(child)) + return flatt_children + + kids = get_children(module) + + print(module) + + # Core idea is to find the following segments + # ff = network start <-> pre_forward_hook(split + 1) + # fb = full_backwards_hook(split) <-> backward ends + # cf = pre_forward_hook(split+ 1) <-> end forward + # cb = start backwards <-> full_backwards_hook(split) + ## Total values needed: + # network_start + # pre_forward_hook(split + 1) + # full_backwards_hook(split) + # backwards_end + # forwards_ends + # Start backwards + + for idx, k in enumerate(kids): + # print(f'[{idx}] Registering hooks for layer {k}') + + if idx == self.feature_layers_ends: + # handle = k.register_full_backward_hook(self.full_backwards) + handle = k.register_backward_hook(self.full_backwards) + self.hook_handles.append(handle) + if idx == self.feature_layers_ends + 1: + handle = k.register_forward_pre_hook(self.pre_forward) + self.hook_handles.append(handle) + # h1 = k.register_forward_hook(self.forward) + # self.hook_handles.append(h1) + # h2 = k.register_forward_pre_hook(self.pre_forward) + # self.hook_handles.append(h2) + # h3 = k.register_backward_hook(self.backward) + # module.register_forward_pre_hook(self.pre_network_forward) + # self.hook_handles.append(h3) + + def full_backwards(self, module, grad_input, grad_output): + self.backwards_split_time = time.time() + self.cb[self.batch_idx] = self.backwards_split_time - self.backwards_start_time + return None + + def pre_forward(self, other, input): + self.pre_forward_post_split_time = time.time() + self.ff[self.batch_idx] = self.pre_forward_post_split_time - self.forward_start_time + # if self.warmup: + # return None + # self.last_forward_time = time.time() + # print('Pre layer hook') + # print('Inside ' + other.__class__.__name__ + ' forward') + + def remove_all_handles(self): + for handle in self.hook_handles: + handle.remove() + + def set_warmup(self, value): + self.warmup = value + + def add(self, layer_id, duration, backprogation: bool = False): + is_cls = layer_id > self.feature_layers_ends + if is_cls: + if backprogation: + # use cb + self.cb[self.batch_idx] += duration + else: + # use cf + self.cf[self.batch_idx] += duration + else: + if backprogation: + # use fb + self.fb[self.batch_idx] += duration + else: + # use ff + self.ff[self.batch_idx] += duration + + + # def pre_forward(self, other, input): + # if self.warmup: + # return None + # self.last_forward_time = time.time() + # print('Pre layer hook') + # print('Inside ' + other.__class__.__name__ + ' forward') + # + # + # def pre_network_forward(self, other, input): + # print('Pre network hook') + # print('Inside ' + other.__class__.__name__ + ' forward') + # + # def forward(self, other, input, output): + # if self.warmup: + # return None + # # print(f'Forward: {other.__class__.__name__}') + # self.last_forward_time = time.time() - self.last_forward_time + # # self.event_list.append(self.last_forward_event) + # # self.add(self.last_forward_event) + # self.add(self.current_layer, self.last_forward_time, False) + # self.current_layer += 1 + # self.execution_id += 1 + + + # def backward(self, module, grad_input, grad_output): + # if self.warmup: + # return None + # # print(f'Backward: {module.__class__.__name__}') + # # self.event_list.append(Event(time.time() - self.last_time, self.current_layer, module.__class__.__name__, "backward", self.execution_id)) + # self.add(self.current_layer, time.time() - self.last_time, True) + # # self.add(Event(time.time() - self.last_time, self.current_layer, module.__class__.__name__, "backward", self.execution_id)) + # self.current_layer -= 1 + # self.execution_id += 1 + # self.last_time = time.time() + # return None + + # Core idea is to find the following segments + # ff = network start <-> pre_forward_hook(split + 1) + # fb = full_backwards_hook(split) <-> backward ends + # cf = pre_forward_hook(split+ 1) <-> end forward + # cb = start backwards <-> full_backwards_hook(split) + def signal_forward_start(self): + self.forward_start_time = time.time() + + def signal_forward_end(self): + self.forward_end_time = time.time() + self.cf[self.batch_idx] = self.forward_end_time - self.pre_forward_post_split_time + + def signal_backwards_start(self): + self.backwards_start_time = time.time() + + + def signal_backwards_end(self): + self.backwards_end_time = time.time() + self.fb[self.batch_idx] = self.backwards_end_time - self.backwards_split_time + + + # def signal_backwards_start_combined(self): + # self.backwards_start_time = time.time() + # self.forward_end_time = time.time() + + # def signal_backward_start(self): + # self.current_layer -= 1 + # self.last_time = time.time() + # + # def signal_forward_start(self): + # self.current_layer = 0 + # self.execution_id = 0 + # self.last_time = None + # self.last_time = 0 + + def step(self): + self.batch_idx += 1 + + def get_values(self): + """ + Returns the measured values in the following order: ff, cf, cb, fb + ff = feature layers forward propagation + cf = classifier layers forward propagation + cb = feature layers backwards propagation + fb = feature layers backwards propagation + The order is the execution order of forward and then backward propagation of a network + """ + return self.ff, self.cf, self.cb, self.fb + + def aggregate_values(self, from_layer: int = 0): + """ + Returns the measured values in the following order: ff, cf, cb, fb + ff = feature layers forward propagation + cf = classifier layers forward propagation + cb = feature layers backwards propagation + fb = feature layers backwards propagation + The order is the execution order of forward and then backward propagation of a network + """ + return self.ff[from_layer:].mean(), self.cf[from_layer:].mean(), self.fb[from_layer:].mean(), self.cb[ + from_layer:].mean() + + def profile_run(self, module, input, iterations, warmup_time = 0): + output = module(input) + g0 = torch.rand_like(output) + + self.attach(module) + module.train() + self.set_warmup(True) + for i in range(warmup_time): # warmup + print('warmup cycle') + self.signal_forward_start() + output = module(input) + self.signal_forward_end() + self.signal_backwards_start() + output.backward(g0) + self.signal_backwards_end() + self.set_warmup(False) + for i in range(iterations): + print(i, end='') + self.signal_forward_start() + output = module(input) + self.signal_forward_end() + self.signal_backwards_start() + output.backward(g0) + self.signal_backwards_end() + self.step() + print('') + print(self.get_values()) \ No newline at end of file diff --git a/fltk/util/timer.py b/fltk/util/timer.py new file mode 100644 index 00000000..bb0d08d9 --- /dev/null +++ b/fltk/util/timer.py @@ -0,0 +1,11 @@ +import time +from contextlib import contextmanager +# from timeit import default_timer + +@contextmanager +def elapsed_timer(): + start = time.time() + elapser = lambda: time.time() - start + yield lambda: elapser() + end = time.time() + elapser = lambda: end-start \ No newline at end of file From c365205f380a07030442efc6f298a2ed33e975b8 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 24 Jan 2022 01:01:45 +0100 Subject: [PATCH 15/73] add distribution visualizations --- fltk/util/offloading_estimate.py | 61 +++++++ fltk/util/show_client_distributions.py | 238 +++++++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 fltk/util/offloading_estimate.py create mode 100644 fltk/util/show_client_distributions.py diff --git a/fltk/util/offloading_estimate.py b/fltk/util/offloading_estimate.py new file mode 100644 index 00000000..cfbf25de --- /dev/null +++ b/fltk/util/offloading_estimate.py @@ -0,0 +1,61 @@ + +def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_left): + ff, cf, cb, fb = profiler_data + full_network = ff + cf + cb + fb + frozen_network = ff + cf + cb + split_point = 0 + for z in range(iterations_left, -1, -1): + x = z + y = iterations_left - x + # print(z) + new_est_split = (x * full_network) + (y * frozen_network) + split_point = x + if new_est_split < time_till_deadline: + break + + +def estimate(): + """ + freeze_network = ff + cf + cb + fb + frozen_network = ff + cf + cb + + td = time until deadline + cl = cycles left + + a = 1 + b = cl - a + + + """ + np = { + 'a': 2, + 'b': 1, + 'c': 3, + 'd': 4, + } + + sp = { + 'time_left': 400, + 'iter_left': 44 + } + + f_n = np['a'] + np['b'] + np['c'] + np['d'] + o_n = np['a'] + np['b'] + np['c'] + est_full_comp_time = f_n * sp['iter_left'] + new_est = o_n * sp['iter_left'] + x = 20 + y = sp['iter_left'] - x + new_est_split = (x * f_n) + (y * o_n) + + print(f'estimate: {est_full_comp_time} < {sp["time_left"]} ? {est_full_comp_time Date: Mon, 24 Jan 2022 01:03:41 +0100 Subject: [PATCH 16/73] Add label sampler --- fltk/strategy/data_samplers.py | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/fltk/strategy/data_samplers.py b/fltk/strategy/data_samplers.py index 98eba58e..4256ae06 100644 --- a/fltk/strategy/data_samplers.py +++ b/fltk/strategy/data_samplers.py @@ -59,6 +59,61 @@ def __iter__(self) -> Iterator[int]: def __len__(self) -> int: return len(self.indices) + +class LimitLabelsSamplerFlex(DistributedSamplerWrapper): + """ + A sampler that limits the number of labels per client + The number of clients must <= than number of labels + """ + + def __init__(self, dataset, num_replicas, rank, args=(5, 42)): + limit, seed = args + super().__init__(dataset, num_replicas, rank, seed) + + labels_per_client = int(np.floor(self.n_labels / self.n_clients)) + remaining_labels = self.n_labels - labels_per_client + labels = list(range(self.n_labels)) # list of labels to distribute + clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client + random.seed(seed) # seed, such that the same result can be obtained multiple times + print(client_labels) + + label_order = random.sample(labels, len(labels)) + client_label_dict = {} + for client_id in clients: + client_label_dict[client_id] = [] + for _ in range(labels_per_client): + chosen_label = label_order.pop() + client_label_dict[client_id].append(chosen_label) + client_labels[client_id].add(chosen_label) + client_label_dict['rest'] = label_order + + indices = [] + ordered_by_label = self.order_by_label(dataset) + labels = client_label_dict[self.client_id] + for label in labels: + n_samples = int(len(ordered_by_label[label])) + clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label + index = clients.index(self.client_id) # find the position of this client + start_index = index * n_samples # inclusive + if rank == self.n_clients: + end_index = len(ordered_by_label[label]) # exclusive + else: + end_index = start_index + n_samples # exclusive + + indices += ordered_by_label[label][start_index:end_index] + + # Last part is uniform sampler + rest_indices = [] + for l in client_label_dict['rest']: + rest_indices += ordered_by_label[l] + filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] + indices += filtered_rest_indices + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices + class LimitLabelsSampler(DistributedSamplerWrapper): """ A sampler that limits the number of labels per client @@ -251,6 +306,9 @@ def get_sampler(dataset, args): elif method == "limit labels": sampler = LimitLabelsSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) + elif method == "limit labels flex": + sampler = LimitLabelsSamplerFlex(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) elif method == "dirichlet": sampler = DirichletSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) From 489f5b023d92246d59e1ead99ec6b232506cce23 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 24 Jan 2022 11:07:30 +0100 Subject: [PATCH 17/73] Add exp configs --- configs/tifl-15/exp_p15_tifl.yaml | 34 +++++++++++++++++++++++++++++++ configs/tifl-15/exp_p3_tifl.yaml | 34 +++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 configs/tifl-15/exp_p15_tifl.yaml create mode 100644 configs/tifl-15/exp_p3_tifl.yaml diff --git a/configs/tifl-15/exp_p15_tifl.yaml b/configs/tifl-15/exp_p15_tifl.yaml new file mode 100644 index 00000000..97f1756d --- /dev/null +++ b/configs/tifl-15/exp_p15_tifl.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 250 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p15_tifl-basic' +offload_stategy: tifl-basic +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/tifl-15/exp_p3_tifl.yaml b/configs/tifl-15/exp_p3_tifl.yaml new file mode 100644 index 00000000..bc3d2265 --- /dev/null +++ b/configs/tifl-15/exp_p3_tifl.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 250 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p3_tifl-basic' +offload_stategy: tifl-adaptive +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 1 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 3 From 7cd44ebfb1aa102b16c99d7c3522a623ab010e41 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 24 Jan 2022 11:08:05 +0100 Subject: [PATCH 18/73] Add tifl impl --- deploy/tifl-15/client_stub_default.yml | 26 ++++++++ deploy/tifl-15/client_stub_fast.yml | 25 ++++++++ deploy/tifl-15/client_stub_medium.yml | 25 ++++++++ deploy/tifl-15/client_stub_slow.yml | 25 ++++++++ deploy/tifl-15/system_stub.yml | 27 ++++++++ fltk/client.py | 7 +- fltk/federator.py | 67 ++++++++++++++++++-- fltk/strategy/client_selection.py | 29 ++++++++- fltk/strategy/data_samplers.py | 73 +++++++++++++++++++++ fltk/strategy/offloading.py | 10 ++- fltk/util/base_config.py | 6 ++ fltk/util/generate_docker_compose.py | 88 ++++++++++++++++++++++++-- fltk/util/show_client_distributions.py | 24 +++---- 13 files changed, 405 insertions(+), 27 deletions(-) create mode 100644 deploy/tifl-15/client_stub_default.yml create mode 100644 deploy/tifl-15/client_stub_fast.yml create mode 100644 deploy/tifl-15/client_stub_medium.yml create mode 100644 deploy/tifl-15/client_stub_slow.yml create mode 100644 deploy/tifl-15/system_stub.yml diff --git a/deploy/tifl-15/client_stub_default.yml b/deploy/tifl-15/client_stub_default.yml new file mode 100644 index 00000000..43d6c919 --- /dev/null +++ b/deploy/tifl-15/client_stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=default + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' +# memory: 1024M diff --git a/deploy/tifl-15/client_stub_fast.yml b/deploy/tifl-15/client_stub_fast.yml new file mode 100644 index 00000000..2c40393f --- /dev/null +++ b/deploy/tifl-15/client_stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=fast + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' diff --git a/deploy/tifl-15/client_stub_medium.yml b/deploy/tifl-15/client_stub_medium.yml new file mode 100644 index 00000000..666e7891 --- /dev/null +++ b/deploy/tifl-15/client_stub_medium.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=medium + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.5' diff --git a/deploy/tifl-15/client_stub_slow.yml b/deploy/tifl-15/client_stub_slow.yml new file mode 100644 index 00000000..ae578071 --- /dev/null +++ b/deploy/tifl-15/client_stub_slow.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=slow + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.25' \ No newline at end of file diff --git a/deploy/tifl-15/system_stub.yml b/deploy/tifl-15/system_stub.yml new file mode 100644 index 00000000..77a19443 --- /dev/null +++ b/deploy/tifl-15/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-1' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/fltk/client.py b/fltk/client.py index 51bbd874..e8f0c7f5 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -663,6 +663,7 @@ def test(self, use_offloaded_model = False): accuracy = 100 * correct / total confusion_mat = confusion_matrix(targets_, pred_) + accuracy_per_class = confusion_mat.diagonal() / confusion_mat.sum(1) class_precision = self.calculate_class_precision(confusion_mat) class_recall = self.calculate_class_recall(confusion_mat) @@ -674,7 +675,7 @@ def test(self, use_offloaded_model = False): self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) - return accuracy, loss, class_precision, class_recall + return accuracy, loss, class_precision, class_recall, accuracy_per_class def run_epochs(self, num_epoch, deadline: int = None, warmup=False): @@ -696,7 +697,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): post_training_time = time.time() start_time_test = datetime.datetime.now() - accuracy, test_loss, class_precision, class_recall = self.test() + accuracy, test_loss, class_precision, class_recall, _accuracy_per_class = self.test() elapsed_time_test = datetime.datetime.now() - start_time_test test_time_ms = int(elapsed_time_test.total_seconds()*1000) post_test_time = time.time() @@ -720,7 +721,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): self.load_offloaded_model() self.copy_offloaded_model_weights() loss_offload, weights_offload, training_process_offload, scheduler_data_offload, perf_data_offload = self.train(self.epoch_counter, deadline, warmup, use_offloaded_model=True) - accuracy, test_loss, class_precision, class_recall = self.test(use_offloaded_model=True) + accuracy, test_loss, class_precision, class_recall, _accuracy_per_class = self.test(use_offloaded_model=True) global global_sender_id data_offload = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss_offload, accuracy, test_loss, class_precision, class_recall, training_process, f'{global_sender_id}-offload') diff --git a/fltk/federator.py b/fltk/federator.py index d3610089..a023cd37 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -13,7 +13,7 @@ from fltk.client import Client from fltk.datasets.data_distribution import distribute_batches_equally from fltk.strategy.aggregation import FedAvg -from fltk.strategy.client_selection import random_selection +from fltk.strategy.client_selection import random_selection, tifl_update_probs, tifl_select_tier_and_decrement from fltk.strategy.offloading import OffloadingStrategy from fltk.util.arguments import Arguments from fltk.util.base_config import BareConfig @@ -25,6 +25,7 @@ from torch.utils.tensorboard import SummaryWriter from pathlib import Path import logging +import numpy as np # from fltk.util.profile_plots import stability_plot, parse_stability_data from fltk.util.results import EpochData @@ -68,12 +69,14 @@ class ClientRef: tb_writer = None tb_writer_offload = None available = False + rank=None - def __init__(self, name, ref, tensorboard_writer, tensorboard_writer_offload): + def __init__(self, name, ref, tensorboard_writer, tensorboard_writer_offload, rank): self.name = name self.ref = ref self.tb_writer = tensorboard_writer self.tb_writer_offload = tensorboard_writer_offload + self.rank = rank def __repr__(self): return self.name @@ -126,6 +129,10 @@ class Federator: freeze_layers_enabled = False offload_enabled = False warmup_active = False + node_groups = {} + tifl_tier_data = [] + tifl_tier_names = [] + tifl_selected_tier = '' exp_start_time = 0 @@ -155,8 +162,21 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): self.reference_lookup[get_worker_info().name] = RRef(self) self.strategy = OffloadingStrategy.Parse(config.offload_strategy) self.configure_strategy(self.strategy) - - + if self.strategy == OffloadingStrategy.TIFL_BASIC or OffloadingStrategy.TIFL_ADAPTIVE: + for k, v in self.config.node_groups.items(): + self.node_groups[k] = list(range(v[0], v[1]+1)) + self.tifl_tier_names.append(k) + + if self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: + num_tiers = len(self.tifl_tier_names) * 1.0 + start_credits = np.ceil(self.config.epochs / num_tiers) + logging.info(f'Tifl starting credits is {start_credits}') + for tier_name in self.tifl_tier_names: + self.tifl_tier_data.append([tier_name, 0, start_credits, 1 / num_tiers]) + residue = 1 + for t in self.tifl_tier_data: + residue -= t[3] + self.tifl_tier_data[0][3] += residue def configure_strategy(self, strategy : OffloadingStrategy): if strategy == OffloadingStrategy.VANILLA: @@ -189,6 +209,12 @@ def configure_strategy(self, strategy : OffloadingStrategy): self.swyh_enabled = False self.freeze_layers_enabled = True self.offload_enabled = True + if strategy == OffloadingStrategy.TIFL_BASIC: + logging.info('Running with offloading strategy: TIFL_BASIC') + self.deadline_enabled = False + self.swyh_enabled = False + self.freeze_layers_enabled = False + self.offload_enabled = False logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') def create_clients(self, client_id_triple): @@ -196,11 +222,22 @@ def create_clients(self, client_id_triple): client = rpc.remote(id, Client, kwargs=dict(id=id, log_rref=self.log_rref, rank=rank, world_size=world_size, config=self.config)) writer = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}') writer_offload = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}_offload') - self.clients.append(ClientRef(id, client, tensorboard_writer=writer, tensorboard_writer_offload=writer_offload)) + self.clients.append(ClientRef(id, client, tensorboard_writer=writer, tensorboard_writer_offload=writer_offload, rank=rank)) self.client_data[id] = [] def select_clients(self, n = 2): available_clients = list(filter(lambda x : x.available, self.clients)) + if self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: + tifl_update_probs(self.tifl_tier_data) + self.tifl_selected_tier = tifl_select_tier_and_decrement(self.tifl_tier_data) + client_subset = self.node_groups[self.tifl_selected_tier] + available_clients = list(filter(lambda x: x.rank in client_subset, self.clients)) + if self.strategy == OffloadingStrategy.TIFL_BASIC: + self.tifl_selected_tier = np.random.choice(list(self.node_groups.keys()), 1, replace=False)[0] + logging.info(f'TIFL: Sampling from group {self.tifl_selected_tier} out of{list(self.node_groups.keys())}') + client_subset = self.node_groups[self.tifl_selected_tier] + available_clients = list(filter(lambda x : x.rank in client_subset, self.clients)) + logging.info(f'TIFL: Sampling subgroup {available_clients}') return random_selection(available_clients, n) def ping_all(self): @@ -451,6 +488,7 @@ def reached_deadline(): all_finished = False time.sleep(0.1) logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') + client_accuracies = [] for client_response in responses: if warmup: break @@ -471,6 +509,7 @@ def reached_deadline(): logging.info(f'{client} had a loss of {epoch_data.loss}') logging.info(f'{client} had a epoch data of {epoch_data}') logging.info(f'{client} has trained on {epoch_data.training_process} samples') + client_accuracies.append(epoch_data.accuracy) # logging.info(f'{client} has perf data: {perf_data}') elapsed_time = client_response.end_time - self.exp_start_time @@ -540,6 +579,13 @@ def reached_deadline(): client_weights_dict[client.name] = weights client_training_process_dict[client.name] = epoch_data.training_process + if self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: + mean_tier_accuracy = np.mean(client_accuracies) + logging.info(f'TIFL:: the mean accuracy is {mean_tier_accuracy}') + for t in self.tifl_tier_data: + if t[0] == self.tifl_selected_tier: + t[1] = mean_tier_accuracy + if 'offload' in response_obj: epoch_data_offload, weights_offload, scheduler_data_offload, perf_data_offload, sender_id = response_obj['offload'] if epoch_data_offload.client_id not in self.client_data: @@ -595,7 +641,11 @@ def reached_deadline(): # test global model logging.info("Testing on global test set") self.test_data.update_nn_parameters(updated_model) - accuracy, loss, class_precision, class_recall = self.test_data.test() + accuracy, loss, class_precision, class_recall, accuracy_per_class = self.test_data.test() + logging.info('Class precision') + logging.warning(accuracy_per_class) + logging.info('Class names') + logging.info(self.test_data.dataset.test_dataset.class_to_idx) # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) @@ -603,6 +653,11 @@ def reached_deadline(): self.tb_writer.add_scalar('accuracy wall time', accuracy, # for every 1000 minibatches elapsed_time) + + class_acc_dict = {} + for idx, acc in enumerate(accuracy_per_class): + class_acc_dict[f'{idx}'] = acc + self.tb_writer.add_scalars('accuracy per class', class_acc_dict, self.epoch_counter) end_epoch_time = time.time() duration = end_epoch_time - start_epoch_time diff --git a/fltk/strategy/client_selection.py b/fltk/strategy/client_selection.py index 34900ce8..dd71a81c 100644 --- a/fltk/strategy/client_selection.py +++ b/fltk/strategy/client_selection.py @@ -1,4 +1,31 @@ import numpy as np def random_selection(clients, n): - return np.random.choice(clients, n, replace=False) \ No newline at end of file + return np.random.choice(clients, n, replace=False) + + +def tifl_select_tier(tiers): + print([x[3] for x in tiers]) + return np.random.choice([x[0] for x in tiers], 1, p=[x[3] for x in tiers])[0] + +def tifl_update_probs(tiers): + n = len([x for x in tiers if x[2] > 0]) + D = n * (n +1) / 2 + tiers.sort(key=lambda x:x[1]) + idx_decr = 0 + for idx, tier in enumerate(tiers): + if tier[2] > 0: + tier[3] = (n - (idx - idx_decr)) / D + else: + tier[3] = 0 + idx_decr += 1 + +def tifl_select_tier_and_decrement(tiers): + selected_tier = tifl_select_tier(tiers) + for tier in tiers: + if tier[0] == selected_tier: + tier[2] -= 1 + return selected_tier + +def tifl_can_select_tier(tiers): + return len([x for x in tiers if x[2] > 0]) \ No newline at end of file diff --git a/fltk/strategy/data_samplers.py b/fltk/strategy/data_samplers.py index 4256ae06..182bf8cc 100644 --- a/fltk/strategy/data_samplers.py +++ b/fltk/strategy/data_samplers.py @@ -59,6 +59,76 @@ def __iter__(self) -> Iterator[int]: def __len__(self) -> int: return len(self.indices) +class N_Labels(DistributedSamplerWrapper): + """ + A sampler that limits the number of labels per client + The number of clients must <= than number of labels + """ + + def __init__(self, dataset, num_replicas, rank, args=(5, 42)): + limit, seed = args + super().__init__(dataset, num_replicas, rank, seed) + + num_copies = np.ceil((args[0] * self.n_clients) / self.n_labels) + label_dict = {} + for l in range(self.n_labels): + label_dict[l] = num_copies + + def choice_n(l_dict: dict, n): + labels = [k for k, v in label_dict.items() if v] + selected = np.random.choice(labels, n, replace=False) + for k, v in l_dict.items(): + if k in selected: + v -= 0 + return selected + + print(f'N Clients={self.n_clients}') + print(f'Num_buckets={num_copies}') + + labels_per_client = int(np.floor(self.n_labels / self.n_clients)) + remaining_labels = self.n_labels - labels_per_client + labels = list(range(self.n_labels)) # list of labels to distribute + clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client + random.seed(seed) # seed, such that the same result can be obtained multiple times + print(client_labels) + + label_order = random.sample(labels, len(labels)) + client_label_dict = {} + for client_id in clients: + client_label_dict[client_id] = [] + for _ in range(labels_per_client): + chosen_label = label_order.pop() + client_label_dict[client_id].append(chosen_label) + client_labels[client_id].add(chosen_label) + client_label_dict['rest'] = label_order + + indices = [] + ordered_by_label = self.order_by_label(dataset) + labels = client_label_dict[self.client_id] + for label in labels: + n_samples = int(len(ordered_by_label[label])) + clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label + index = clients.index(self.client_id) # find the position of this client + start_index = index * n_samples # inclusive + if rank == self.n_clients: + end_index = len(ordered_by_label[label]) # exclusive + else: + end_index = start_index + n_samples # exclusive + + indices += ordered_by_label[label][start_index:end_index] + + # Last part is uniform sampler + rest_indices = [] + for l in client_label_dict['rest']: + rest_indices += ordered_by_label[l] + filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] + indices += filtered_rest_indices + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices + class LimitLabelsSamplerFlex(DistributedSamplerWrapper): """ @@ -309,6 +379,9 @@ def get_sampler(dataset, args): elif method == "limit labels flex": sampler = LimitLabelsSamplerFlex(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) + elif method == "n labels": + sampler = N_Labels(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) elif method == "dirichlet": sampler = DirichletSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) diff --git a/fltk/strategy/offloading.py b/fltk/strategy/offloading.py index 4473ad90..1742da5b 100644 --- a/fltk/strategy/offloading.py +++ b/fltk/strategy/offloading.py @@ -6,7 +6,9 @@ class OffloadingStrategy(Enum): DEADLINE = 2 SWYH = 3 FREEZE = 4 - MODEL_OFFLOAD = 5 + MODEL_OFFLOAD = 5, + TIFL_BASIC = 6, + TIFL_ADAPTIVE = 7 @classmethod def Parse(cls, string_value): @@ -19,4 +21,8 @@ def Parse(cls, string_value): if string_value == 'freeze': return OffloadingStrategy.FREEZE if string_value == 'offload': - return OffloadingStrategy.MODEL_OFFLOAD \ No newline at end of file + return OffloadingStrategy.MODEL_OFFLOAD + if string_value == 'tifl-basic': + return OffloadingStrategy.TIFL_BASIC + if string_value == 'tifl-adaptive': + return OffloadingStrategy.TIFL_ADAPTIVE \ No newline at end of file diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 284a3f51..137042de 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -49,6 +49,9 @@ def __init__(self): self.first_deadline = 400 self.warmup_round = False + # FLTK options + self.node_groups = None + self.federator_host = '0.0.0.0' self.rank = 0 self.world_size = 0 @@ -153,6 +156,9 @@ def merge_yaml(self, cfg = {}): self.data_sampler = cfg['sampler'] if 'sampler_args' in cfg: self.data_sampler_args = cfg['sampler_args'] + + if 'node_groups' in cfg: + self.node_groups = cfg['node_groups'] diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 5adb915a..bd0b7fbb 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -2,18 +2,23 @@ import yaml import copy -template_path = './deploy/templates' +global_template_path = './deploy/templates' + +def load_system_template(template_path = global_template_path): -def load_system_template(): with open(f'{template_path}/system_stub.yml') as file: documents = yaml.full_load(file) return documents -def load_client_template(type='default'): +def load_client_template(type='default', template_path = global_template_path): with open(f'{template_path}/client_stub_{type}.yml') as file: documents = yaml.full_load(file) return documents +def get_deploy_path(name: str): + return f'./deploy/{name}' + + def generate_client(id, template: dict, world_size: int, type='default', cpu_set=''): local_template = copy.deepcopy(template) key_name = list(local_template.keys())[0] @@ -35,6 +40,79 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set def generate_compose_file(): print() +def generate_tifl_15(): + template_path = get_deploy_path('tifl-15') + num_clients= 18 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 3 + for client_id in range(1, num_clients + 1): + client_type = 'default' + if 0 < client_id <= 6: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 6 < client_id <= 12: + client_type = 'medium' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 12 < client_id <= 18: + client_type = 'fast' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + + +def generate_tifl_3(): + template_path = get_deploy_path('tifl-15') + num_clients= 3 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 3 + for client_id in range(1, num_clients + 1): + client_type = 'default' + if 0 < client_id <= 1: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 1 < client_id <= 2: + client_type = 'medium' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 2 < client_id <= 3: + client_type = 'fast' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + def generate_offload_exp(): num_clients = 4 cpu_per_client = 1 @@ -99,6 +177,8 @@ def generate(num_clients: int): # num_clients = int(sys.argv[1]) # generate(num_clients) - generate_offload_exp() + # generate_offload_exp() + # generate_tifl_15() + generate_tifl_3() print('Done') diff --git a/fltk/util/show_client_distributions.py b/fltk/util/show_client_distributions.py index a0805a81..c2af710a 100644 --- a/fltk/util/show_client_distributions.py +++ b/fltk/util/show_client_distributions.py @@ -10,18 +10,19 @@ level=logging.DEBUG, format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', ) -dist_settings = { - 'uniform':{}, - 'limit labels': {'seed': 1, 'range':[0.1, 1, 0.1]}, - 'q sampler': {'seed': 1, 'range':[0.1, 1, 0.1]}, - 'dirichlet': {'seed': 1, 'range':[0.1, 1, 0.1]}, -} +# dist_settings = { +# 'uniform':{}, +# 'limit labels': {'seed': 1, 'range':[0.1, 1, 0.1]}, +# 'q sampler': {'seed': 1, 'range':[0.1, 1, 0.1]}, +# 'dirichlet': {'seed': 1, 'range':[0.1, 1, 0.1]}, +# } dist_settings = { - 'uniform':{}, - 'limit labels flex': {'seed': 1, 'range':[0.1, 1, 0.1]}, + # 'uniform':{}, + # 'limit labels flex': {'seed': 1, 'range':[0.1, 1, 0.1]}, + 'n labels': {'seed': 1, 'range':[0.1, 1, 0.1]}, # 'q sampler': {'seed': 1, 'range':[0.1, 1, 0.1]}, - 'dirichlet': {'seed': 1, 'range':[0.1, 1, 0.1]}, + # 'dirichlet': {'seed': 1, 'range':[0.1, 1, 0.1]}, } num_clients = 4 @@ -29,10 +30,11 @@ class dummy_args: net = 'Cifar10CNN' dataset_name = 'cifar10' # data_sampler = "uniform" #s = "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) - data_sampler = "limit labels flex" + # data_sampler = "limit labels flex" + data_sampler = "n labels" # sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) # data_sampler_args = [0.07, 42] # random seed || random seed || random seed || unused - data_sampler_args = [0.07 , 42] # random seed || random seed || random seed || unused + data_sampler_args = [7 , 42] # random seed || random seed || random seed || unused DistDatasets = { 'cifar10': DistCIFAR10Dataset, 'cifar100': DistCIFAR100Dataset, From a2cb62e8def8f07da2b6c416270c228efd88e355 Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 24 Jan 2022 11:33:29 +0100 Subject: [PATCH 19/73] Update exps --- configs/tifl-15/exp_p15_baseline.yaml | 34 +++++++++++++++++++ configs/tifl-15/exp_p15_tifl-adaptive.yaml | 34 +++++++++++++++++++ ..._p15_tifl.yaml => exp_p15_tifl-basic.yaml} | 2 +- fltk/util/generate_docker_compose.py | 6 ++-- 4 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 configs/tifl-15/exp_p15_baseline.yaml create mode 100644 configs/tifl-15/exp_p15_tifl-adaptive.yaml rename configs/tifl-15/{exp_p15_tifl.yaml => exp_p15_tifl-basic.yaml} (98%) diff --git a/configs/tifl-15/exp_p15_baseline.yaml b/configs/tifl-15/exp_p15_baseline.yaml new file mode 100644 index 00000000..a3410567 --- /dev/null +++ b/configs/tifl-15/exp_p15_baseline.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p15_baseline' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/tifl-15/exp_p15_tifl-adaptive.yaml b/configs/tifl-15/exp_p15_tifl-adaptive.yaml new file mode 100644 index 00000000..72d17fb2 --- /dev/null +++ b/configs/tifl-15/exp_p15_tifl-adaptive.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p15_tifl-adaptive' +offload_stategy: tifl-adaptive +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/tifl-15/exp_p15_tifl.yaml b/configs/tifl-15/exp_p15_tifl-basic.yaml similarity index 98% rename from configs/tifl-15/exp_p15_tifl.yaml rename to configs/tifl-15/exp_p15_tifl-basic.yaml index 97f1756d..872751c7 100644 --- a/configs/tifl-15/exp_p15_tifl.yaml +++ b/configs/tifl-15/exp_p15_tifl-basic.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 250 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: FashionMNISTCNN diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index bd0b7fbb..7acfbf7b 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -49,7 +49,7 @@ def generate_tifl_15(): for key, item in enumerate(system_template['services']['fl_server']['environment']): if item == 'WORLD_SIZE={world_size}': system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) - cpu_idx = 3 + cpu_idx = 2 for client_id in range(1, num_clients + 1): client_type = 'default' if 0 < client_id <= 6: @@ -178,7 +178,7 @@ def generate(num_clients: int): # num_clients = int(sys.argv[1]) # generate(num_clients) # generate_offload_exp() - # generate_tifl_15() - generate_tifl_3() + generate_tifl_15() + # generate_tifl_3() print('Done') From 8c2975c1e4f74b91ced8912e2acae9d185f85208 Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 24 Jan 2022 21:54:52 +0100 Subject: [PATCH 20/73] Small fix in strat checking --- configs/dev/dev_p2.yaml | 34 ++++++++++++ configs/dev/run.py | 18 +++++++ .../p_freezing-iid_freeze.yaml | 34 ++++++++++++ .../p_freezing-iid_vanilla.yaml | 34 ++++++++++++ configs/effect-freezing/run.py | 18 +++++++ configs/experiment_p18_full.yaml | 28 ++++++++++ configs/tifl-15/exp_p3_tifl.yaml | 2 +- configs/tifl-15/run.py | 18 +++++++ deploy/dev/client_stub_default.yml | 25 +++++++++ deploy/dev/client_stub_fast.yml | 24 +++++++++ deploy/dev/client_stub_medium.yml | 24 +++++++++ deploy/dev/client_stub_slow.yml | 24 +++++++++ deploy/dev/system_stub.yml | 27 ++++++++++ fltk/client.py | 14 ++--- fltk/federator.py | 53 ++++++++++++++----- fltk/util/generate_docker_compose.py | 50 ++++++++++++++--- 16 files changed, 401 insertions(+), 26 deletions(-) create mode 100644 configs/dev/dev_p2.yaml create mode 100644 configs/dev/run.py create mode 100644 configs/effect-freezing/p_freezing-iid_freeze.yaml create mode 100644 configs/effect-freezing/p_freezing-iid_vanilla.yaml create mode 100644 configs/effect-freezing/run.py create mode 100644 configs/experiment_p18_full.yaml create mode 100644 configs/tifl-15/run.py create mode 100644 deploy/dev/client_stub_default.yml create mode 100644 deploy/dev/client_stub_fast.yml create mode 100644 deploy/dev/client_stub_medium.yml create mode 100644 deploy/dev/client_stub_slow.yml create mode 100644 deploy/dev/system_stub.yml diff --git a/configs/dev/dev_p2.yaml b/configs/dev/dev_p2.yaml new file mode 100644 index 00000000..b431c0e3 --- /dev/null +++ b/configs/dev/dev_p2.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_dev_p2' +offload_stategy: vanilla +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 diff --git a/configs/dev/run.py b/configs/dev/run.py new file mode 100644 index 00000000..dc8f65f2 --- /dev/null +++ b/configs/dev/run.py @@ -0,0 +1,18 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'dev' + generate_docker(name) + base_path = 'configs/dev' + exp_list = ['dev_p2.yaml'] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/effect-freezing/p_freezing-iid_freeze.yaml b/configs/effect-freezing/p_freezing-iid_freeze.yaml new file mode 100644 index 00000000..d078e4ea --- /dev/null +++ b/configs/effect-freezing/p_freezing-iid_freeze.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p_freezing_iid_freeze' +offload_stategy: freeze +profiling_time: 100 +deadline: 1 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 diff --git a/configs/effect-freezing/p_freezing-iid_vanilla.yaml b/configs/effect-freezing/p_freezing-iid_vanilla.yaml new file mode 100644 index 00000000..d1b3592c --- /dev/null +++ b/configs/effect-freezing/p_freezing-iid_vanilla.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p_freezing_iid_vanilla' +offload_stategy: vanilla +profiling_time: 100 +deadline: 1 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 diff --git a/configs/effect-freezing/run.py b/configs/effect-freezing/run.py new file mode 100644 index 00000000..7dded93e --- /dev/null +++ b/configs/effect-freezing/run.py @@ -0,0 +1,18 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'dev' + generate_docker(name, 10, True) + base_path = 'configs/effect-freezing' + exp_list = ['p_freezing-iid_freeze.yaml','p_freezing-iid_vanilla.yaml'] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/experiment_p18_full.yaml b/configs/experiment_p18_full.yaml new file mode 100644 index 00000000..a01c944a --- /dev/null +++ b/configs/experiment_p18_full.yaml @@ -0,0 +1,28 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'experiment_p18_full' +offload_stategy: deadline +profiling_time: -1 +deadline: 1000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 18 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + hostname: '10.5.0.11' + nic: 'eth0' + clients: + amount: 18 diff --git a/configs/tifl-15/exp_p3_tifl.yaml b/configs/tifl-15/exp_p3_tifl.yaml index bc3d2265..f17b4b8d 100644 --- a/configs/tifl-15/exp_p3_tifl.yaml +++ b/configs/tifl-15/exp_p3_tifl.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 250 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: FashionMNISTCNN diff --git a/configs/tifl-15/run.py b/configs/tifl-15/run.py new file mode 100644 index 00000000..db1a5128 --- /dev/null +++ b/configs/tifl-15/run.py @@ -0,0 +1,18 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'tifl-15' + generate_docker(name) + base_path = 'configs/tifl-15' + exp_list = ['exp_p15_baseline.yaml', 'exp_p15_tifl-adaptive.yaml', 'exp_p15_tifl-basic.yaml'] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/deploy/dev/client_stub_default.yml b/deploy/dev/client_stub_default.yml new file mode 100644 index 00000000..3a1774cf --- /dev/null +++ b/deploy/dev/client_stub_default.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '2' +# memory: 1024M diff --git a/deploy/dev/client_stub_fast.yml b/deploy/dev/client_stub_fast.yml new file mode 100644 index 00000000..f03012ff --- /dev/null +++ b/deploy/dev/client_stub_fast.yml @@ -0,0 +1,24 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '3' diff --git a/deploy/dev/client_stub_medium.yml b/deploy/dev/client_stub_medium.yml new file mode 100644 index 00000000..49abdeb2 --- /dev/null +++ b/deploy/dev/client_stub_medium.yml @@ -0,0 +1,24 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '2' diff --git a/deploy/dev/client_stub_slow.yml b/deploy/dev/client_stub_slow.yml new file mode 100644 index 00000000..9cbdabb5 --- /dev/null +++ b/deploy/dev/client_stub_slow.yml @@ -0,0 +1,24 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.5' \ No newline at end of file diff --git a/deploy/dev/system_stub.yml b/deploy/dev/system_stub.yml new file mode 100644 index 00000000..c84b2ecb --- /dev/null +++ b/deploy/dev/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-2' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/fltk/client.py b/fltk/client.py index e8f0c7f5..937de6f9 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -667,13 +667,13 @@ def test(self, use_offloaded_model = False): class_precision = self.calculate_class_precision(confusion_mat) class_recall = self.calculate_class_recall(confusion_mat) - - self.args.get_logger().debug('Test set: Accuracy: {}/{} ({:.0f}%)'.format(correct, total, accuracy)) - self.args.get_logger().debug('Test set: Loss: {}'.format(loss)) - self.args.get_logger().debug("Classification Report:\n" + classification_report(targets_, pred_)) - self.args.get_logger().debug("Confusion Matrix:\n" + str(confusion_mat)) - self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) - self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) + if False: + self.args.get_logger().debug('Test set: Accuracy: {}/{} ({:.0f}%)'.format(correct, total, accuracy)) + self.args.get_logger().debug('Test set: Loss: {}'.format(loss)) + self.args.get_logger().debug("Classification Report:\n" + classification_report(targets_, pred_)) + self.args.get_logger().debug("Confusion Matrix:\n" + str(confusion_mat)) + self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) + self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) return accuracy, loss, class_precision, class_recall, accuracy_per_class diff --git a/fltk/federator.py b/fltk/federator.py index a023cd37..c617bf1c 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -142,6 +142,8 @@ class Federator: # Keep track of the experiment data exp_data_general = [] + epoch_events = [] + def __init__(self, client_id_triple, num_epochs = 3, config=None): log_rref = rpc.RRef(FLLogger()) self.log_rref = log_rref @@ -162,7 +164,7 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): self.reference_lookup[get_worker_info().name] = RRef(self) self.strategy = OffloadingStrategy.Parse(config.offload_strategy) self.configure_strategy(self.strategy) - if self.strategy == OffloadingStrategy.TIFL_BASIC or OffloadingStrategy.TIFL_ADAPTIVE: + if self.strategy == OffloadingStrategy.TIFL_BASIC or self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: for k, v in self.config.node_groups.items(): self.node_groups[k] = list(range(v[0], v[1]+1)) self.tifl_tier_names.append(k) @@ -225,6 +227,9 @@ def create_clients(self, client_id_triple): self.clients.append(ClientRef(id, client, tensorboard_writer=writer, tensorboard_writer_offload=writer_offload, rank=rank)) self.client_data[id] = [] + def record_epoch_event(self, event: str): + self.epoch_events.append(f'{time.time()} - [{self.epoch_counter}] - {event}') + def select_clients(self, n = 2): available_clients = list(filter(lambda x : x.available, self.clients)) if self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: @@ -345,6 +350,9 @@ def remote_run_epoch(self, epochs, warmup=False, first_epoch=False): client_weights_dict = {} client_training_process_dict = {} + + + self.record_epoch_event('Starting new round') while self.num_available_clients() < self.config.clients_per_round: logging.warning(f'Waiting for enough clients to become available. # Available Clients = {self.num_available_clients()}, but need {self.config.clients_per_round}') self.process_response_list() @@ -458,6 +466,7 @@ def reached_deadline(): if est_total_time < strong_performance: strong_performance = est_total_time strongest = k + self.record_epoch_event(f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') logging.info( f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') logging.info('Sending call to offload') @@ -506,9 +515,12 @@ def reached_deadline(): epoch_data, weights, scheduler_data, perf_data = response_obj['own'] self.client_data[epoch_data.client_id].append(epoch_data) - logging.info(f'{client} had a loss of {epoch_data.loss}') - logging.info(f'{client} had a epoch data of {epoch_data}') - logging.info(f'{client} has trained on {epoch_data.training_process} samples') + + # logging.info(f'{client} had a loss of {epoch_data.loss}') + # logging.info(f'{client} had a epoch data of {epoch_data}') + # logging.info(f'{client} has trained on {epoch_data.training_process} samples') + self.record_epoch_event(f'{client} had an accuracy of {epoch_data.accuracy}') + self.record_epoch_event(f'{client} had an duration of {client_response.duration()}') client_accuracies.append(epoch_data.accuracy) # logging.info(f'{client} has perf data: {perf_data}') elapsed_time = client_response.end_time - self.exp_start_time @@ -564,10 +576,11 @@ def reached_deadline(): p_v2_time = sum([x.mean() for x in perf_data['p_v2_data']]) * perf_data['n_batches'] p_v1_forwards = perf_data['p_v1_forwards'].mean() * perf_data['n_batches'] p_v1_backwards = perf_data['p_v1_backwards'].mean() * perf_data['n_batches'] - logging.info(f'{client} has time estimates: {[total_time_t1, loop_duration, p_v1_time_sum, p_v1_time, p_v2_time, [p_v1_forwards, p_v1_backwards], [p_v2_forwards, p_v2_backwards]]}') - logging.info(f'{client} combined times pre post loop stuff: {[p_v1_pre_loop, loop_duration, p_v1_post_loop]} = {sum([p_v1_pre_loop, loop_duration, p_v1_post_loop])} ? {total_time_t1}') - logging.info(f'{client} p3 time = {p_v3_forwards} + {p_v3_backwards} = {p_v3_forwards+ p_v3_backwards}') - logging.info(f'{client} Pre train loop time = {pre_train_loop_data.mean()}, post train loop time = {post_train_loop_data.mean()}') + + # logging.info(f'{client} has time estimates: {[total_time_t1, loop_duration, p_v1_time_sum, p_v1_time, p_v2_time, [p_v1_forwards, p_v1_backwards], [p_v2_forwards, p_v2_backwards]]}') + # logging.info(f'{client} combined times pre post loop stuff: {[p_v1_pre_loop, loop_duration, p_v1_post_loop]} = {sum([p_v1_pre_loop, loop_duration, p_v1_post_loop])} ? {total_time_t1}') + # logging.info(f'{client} p3 time = {p_v3_forwards} + {p_v3_backwards} = {p_v3_forwards+ p_v3_backwards}') + # logging.info(f'{client} Pre train loop time = {pre_train_loop_data.mean()}, post train loop time = {post_train_loop_data.mean()}') # logging.info(f'{client} p_v1 data: {perf_data["p_v1_data"]}') @@ -642,12 +655,13 @@ def reached_deadline(): logging.info("Testing on global test set") self.test_data.update_nn_parameters(updated_model) accuracy, loss, class_precision, class_recall, accuracy_per_class = self.test_data.test() - logging.info('Class precision') - logging.warning(accuracy_per_class) - logging.info('Class names') - logging.info(self.test_data.dataset.test_dataset.class_to_idx) + # logging.info('Class precision') + # logging.warning(accuracy_per_class) + # logging.info('Class names') + # logging.info(self.test_data.dataset.test_dataset.class_to_idx) # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) + self.record_epoch_event(f'Global accuracy is {accuracy}') self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) elapsed_time = time.time() - self.exp_start_time self.tb_writer.add_scalar('accuracy wall time', @@ -658,6 +672,7 @@ def reached_deadline(): for idx, acc in enumerate(accuracy_per_class): class_acc_dict[f'{idx}'] = acc self.tb_writer.add_scalars('accuracy per class', class_acc_dict, self.epoch_counter) + self.record_epoch_event(f'Accuracy per class is {class_acc_dict}') end_epoch_time = time.time() duration = end_epoch_time - start_epoch_time @@ -692,6 +707,19 @@ def remote_test_sync(self): accuracy, loss, class_precision, class_recall = res[1].wait() logging.info(f'{res[0]} had a result of accuracy={accuracy}') + def flush_epoch_events(self): + file_output = f'./{self.config.output_location}' + exp_prefix = self.config.experiment_prefix + file_epoch_events = f'{file_output}/{exp_prefix}_federator_events.txt' + self.ensure_path_exists(file_output) + + with open(file_epoch_events, 'a') as f: + for ev in self.epoch_events: + f.write(f'{ev}\n') + f.flush() + + self.epoch_events = [] + def save_epoch_data(self): file_output = f'./{self.config.output_location}' exp_prefix = self.config.experiment_prefix @@ -743,6 +771,7 @@ def run(self): self.process_response_list() logging.info(f'Running epoch {epoch}') self.remote_run_epoch(epoch_size) + self.flush_epoch_events() addition += 1 diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 7acfbf7b..fa46f20f 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -1,6 +1,7 @@ import sys import yaml import copy +import argparse global_template_path = './deploy/templates' @@ -40,6 +41,33 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set def generate_compose_file(): print() + +def generate_dev(num_clients = 2, medium=False): + template_path = get_deploy_path('dev') + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + if not medium: + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + else: + client_type = 'medium' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + def generate_tifl_15(): template_path = get_deploy_path('tifl-15') num_clients= 18 @@ -172,13 +200,23 @@ def generate(num_clients: int): with open(r'./docker-compose.yml', 'w') as file: yaml.dump(system_template, file, sort_keys=False) +def run(name, num_clients = None, medium=False): + exp_dict = { + 'tifl-15': generate_tifl_15, + 'dev': generate_dev -if __name__ == '__main__': + } + if num_clients: + exp_dict[name](num_clients, medium) + else: + exp_dict[name]() - # num_clients = int(sys.argv[1]) - # generate(num_clients) - # generate_offload_exp() - generate_tifl_15() - # generate_tifl_3() +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate docker-compose file') + parser.add_argument('name', type=str, + help='Name of an experiment') + parser.add_argument('--clients', type=int, help='Set the number of clients in the system', default=None) + args = parser.parse_args() + run(args.name, args.clients) print('Done') From da5f9463383e9ec28300ad02abdee6220fa33b99 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 24 Jan 2022 22:57:43 +0100 Subject: [PATCH 21/73] Proto for termination --- configs/terminate/p_terminate_terminate.yaml | 35 +++++++ .../terminate/p_terminate_terminate_swyh.yaml | 35 +++++++ configs/terminate/p_terminate_vanilla.yaml | 35 +++++++ configs/terminate/run.py | 18 ++++ deploy/terminate/client_stub_default.yml | 25 +++++ deploy/terminate/client_stub_fast.yml | 24 +++++ deploy/terminate/client_stub_medium.yml | 24 +++++ deploy/terminate/client_stub_slow.yml | 24 +++++ deploy/terminate/system_stub.yml | 27 ++++++ fltk/client.py | 75 +++++++-------- fltk/federator.py | 96 +++++++++++-------- fltk/strategy/offloading.py | 57 ++++++++++- fltk/util/base_config.py | 5 + fltk/util/generate_docker_compose.py | 29 +++++- 14 files changed, 426 insertions(+), 83 deletions(-) create mode 100644 configs/terminate/p_terminate_terminate.yaml create mode 100644 configs/terminate/p_terminate_terminate_swyh.yaml create mode 100644 configs/terminate/p_terminate_vanilla.yaml create mode 100644 configs/terminate/run.py create mode 100644 deploy/terminate/client_stub_default.yml create mode 100644 deploy/terminate/client_stub_fast.yml create mode 100644 deploy/terminate/client_stub_medium.yml create mode 100644 deploy/terminate/client_stub_slow.yml create mode 100644 deploy/terminate/system_stub.yml diff --git a/configs/terminate/p_terminate_terminate.yaml b/configs/terminate/p_terminate_terminate.yaml new file mode 100644 index 00000000..a5386fb1 --- /dev/null +++ b/configs/terminate/p_terminate_terminate.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p_terminate_terminate' +offload_stategy: dynamic-terminate +profiling_time: 100 +deadline: 1 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +termination_percentage: 0.7 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 16 diff --git a/configs/terminate/p_terminate_terminate_swyh.yaml b/configs/terminate/p_terminate_terminate_swyh.yaml new file mode 100644 index 00000000..068c7c22 --- /dev/null +++ b/configs/terminate/p_terminate_terminate_swyh.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p_terminate_terminate_swyh' +offload_stategy: dynamic-terminate-swyh +profiling_time: 100 +deadline: 1 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +termination_percentage: 0.7 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 16 diff --git a/configs/terminate/p_terminate_vanilla.yaml b/configs/terminate/p_terminate_vanilla.yaml new file mode 100644 index 00000000..8591f556 --- /dev/null +++ b/configs/terminate/p_terminate_vanilla.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p_terminate_vanilla' +offload_stategy: vanilla +profiling_time: 100 +deadline: 1 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 4 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +termination_percentage: 0.7 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 16 diff --git a/configs/terminate/run.py b/configs/terminate/run.py new file mode 100644 index 00000000..7dded93e --- /dev/null +++ b/configs/terminate/run.py @@ -0,0 +1,18 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'dev' + generate_docker(name, 10, True) + base_path = 'configs/effect-freezing' + exp_list = ['p_freezing-iid_freeze.yaml','p_freezing-iid_vanilla.yaml'] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/deploy/terminate/client_stub_default.yml b/deploy/terminate/client_stub_default.yml new file mode 100644 index 00000000..3a1774cf --- /dev/null +++ b/deploy/terminate/client_stub_default.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '2' +# memory: 1024M diff --git a/deploy/terminate/client_stub_fast.yml b/deploy/terminate/client_stub_fast.yml new file mode 100644 index 00000000..f03012ff --- /dev/null +++ b/deploy/terminate/client_stub_fast.yml @@ -0,0 +1,24 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '3' diff --git a/deploy/terminate/client_stub_medium.yml b/deploy/terminate/client_stub_medium.yml new file mode 100644 index 00000000..8ed98ed0 --- /dev/null +++ b/deploy/terminate/client_stub_medium.yml @@ -0,0 +1,24 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' diff --git a/deploy/terminate/client_stub_slow.yml b/deploy/terminate/client_stub_slow.yml new file mode 100644 index 00000000..caa4daae --- /dev/null +++ b/deploy/terminate/client_stub_slow.yml @@ -0,0 +1,24 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.25' \ No newline at end of file diff --git a/deploy/terminate/system_stub.yml b/deploy/terminate/system_stub.yml new file mode 100644 index 00000000..c84b2ecb --- /dev/null +++ b/deploy/terminate/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-2' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/fltk/client.py b/fltk/client.py index 937de6f9..9124ebea 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -16,7 +16,7 @@ from fltk.schedulers import MinCapableStepLR from fltk.strategy.aggregation import FedAvg -from fltk.strategy.offloading import OffloadingStrategy +from fltk.strategy.offloading import OffloadingStrategy, parse_strategy from fltk.util.arguments import Arguments from fltk.util.fed_avg import average_nn_parameters from fltk.util.log import FLLogger @@ -83,6 +83,14 @@ class Client: strategy = OffloadingStrategy.VANILLA + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False + dyn_terminate = False + dyn_terminate_swyh = False + + terminate_training = False def __init__(self, id, log_rref, rank, world_size, config = None): # logging.info(f'Welcome to client {id}') @@ -121,51 +129,19 @@ def copy_offloaded_model_weights(self): logging.info('Parameters of offloaded model updated') self.offloaded_model_ready = True - - def parse_strategy(self, strategy: OffloadingStrategy): - deadline_enabled = False - swyh_enabled = False - freeze_layers_enabled = False - offload_enabled = False - if strategy == OffloadingStrategy.VANILLA: - logging.info('Running with offloading strategy: VANILLA') - deadline_enabled = False - swyh_enabled = False - freeze_layers_enabled = False - offload_enabled = False - if strategy == OffloadingStrategy.DEADLINE: - logging.info('Running with offloading strategy: DEADLINE') - deadline_enabled = True - swyh_enabled = False - freeze_layers_enabled = False - offload_enabled = False - if strategy == OffloadingStrategy.SWYH: - logging.info('Running with offloading strategy: SWYH') - deadline_enabled = True - swyh_enabled = True - freeze_layers_enabled = False - offload_enabled = False - if strategy == OffloadingStrategy.FREEZE: - logging.info('Running with offloading strategy: FREEZE') - deadline_enabled = True - swyh_enabled = False - freeze_layers_enabled = True - offload_enabled = False - if strategy == OffloadingStrategy.MODEL_OFFLOAD: - logging.info('Running with offloading strategy: MODEL_OFFLOAD') - deadline_enabled = True - swyh_enabled = False - freeze_layers_enabled = True - offload_enabled = True - return deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled - def configure_strategy(self, strategy : OffloadingStrategy): - deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled = self.parse_strategy(strategy) + deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled, dyn_terminate, dyn_terminate_swyh = parse_strategy(strategy) self.deadline_enabled = deadline_enabled self.swyh_enabled = swyh_enabled self.freeze_layers_enabled = freeze_layers_enabled self.offload_enabled = offload_enabled - logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') + self.dyn_terminate = dyn_terminate + self.dyn_terminate_swyh = dyn_terminate_swyh + logging.info(f'Offloading strategy={strategy}') + logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, ' + f'swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, ' + f'offload={self.offload_enabled}, dyn_terminate={self.dyn_terminate}, ' + f'dyn_terminate_swyh={self.dyn_terminate_swyh}') def init_device(self): @@ -178,6 +154,11 @@ def send_reference(self, server_ref): self.local_log(f'Got worker_info from server {server_ref}') self.server_ref = server_ref + + def terminate_training_endpoint(self): + self.terminate_training = True + + @staticmethod def static_ping(): print(f'Got static ping with global_dict={global_dict}') @@ -466,6 +447,11 @@ def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_ loop_pre_train_start = time.time() start_train_time = time.time() + if self.dyn_terminate_swyh or self.dyn_terminate: + if self.terminate_training: + logging.info('Got a call to terminate training') + break + if self.offload_enabled and not warmup: # Check if there is a call to offload if self.call_to_offload: @@ -690,6 +676,13 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): self.dataset.get_train_sampler().set_epoch_size(num_epoch) # Train locally loss, weights, training_process, scheduler_data, perf_data = self.train(self.epoch_counter, deadline, warmup) + if self.dyn_terminate: + logging.info('Not testing data due to termination call') + self.dyn_terminate = False + return {'own': []} + elif self.dyn_terminate_swyh: + self.dyn_terminate_swyh = False + logging.info('Sending back weights due to terminate with swyh') if not warmup: self.epoch_counter += num_epoch elapsed_time_train = datetime.datetime.now() - start_time_train diff --git a/fltk/federator.py b/fltk/federator.py index c617bf1c..77b24b4e 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -14,7 +14,7 @@ from fltk.datasets.data_distribution import distribute_batches_equally from fltk.strategy.aggregation import FedAvg from fltk.strategy.client_selection import random_selection, tifl_update_probs, tifl_select_tier_and_decrement -from fltk.strategy.offloading import OffloadingStrategy +from fltk.strategy.offloading import OffloadingStrategy, parse_strategy from fltk.util.arguments import Arguments from fltk.util.base_config import BareConfig from fltk.util.data_loader_utils import load_train_data_loader, load_test_data_loader, \ @@ -128,6 +128,8 @@ class Federator: swyh_enabled = False freeze_layers_enabled = False offload_enabled = False + dyn_terminate = False + dyn_terminate_swyh = False warmup_active = False node_groups = {} tifl_tier_data = [] @@ -180,44 +182,58 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): residue -= t[3] self.tifl_tier_data[0][3] += residue + # def configure_strategy(self, strategy : OffloadingStrategy): + # if strategy == OffloadingStrategy.VANILLA: + # logging.info('Running with offloading strategy: VANILLA') + # self.deadline_enabled = False + # self.swyh_enabled = False + # self.freeze_layers_enabled = False + # self.offload_enabled = False + # if strategy == OffloadingStrategy.DEADLINE: + # logging.info('Running with offloading strategy: DEADLINE') + # self.deadline_enabled = True + # self.swyh_enabled = False + # self.freeze_layers_enabled = False + # self.offload_enabled = False + # if strategy == OffloadingStrategy.SWYH: + # logging.info('Running with offloading strategy: SWYH') + # self.deadline_enabled = True + # self.swyh_enabled = True + # self.freeze_layers_enabled = False + # self.offload_enabled = False + # if strategy == OffloadingStrategy.FREEZE: + # logging.info('Running with offloading strategy: FREEZE') + # self.deadline_enabled = True + # self.swyh_enabled = False + # self.freeze_layers_enabled = True + # self.offload_enabled = False + # if strategy == OffloadingStrategy.MODEL_OFFLOAD: + # logging.info('Running with offloading strategy: MODEL_OFFLOAD') + # self.deadline_enabled = True + # self.swyh_enabled = False + # self.freeze_layers_enabled = True + # self.offload_enabled = True + # if strategy == OffloadingStrategy.TIFL_BASIC: + # logging.info('Running with offloading strategy: TIFL_BASIC') + # self.deadline_enabled = False + # self.swyh_enabled = False + # self.freeze_layers_enabled = False + # self.offload_enabled = False + # logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') + # def configure_strategy(self, strategy : OffloadingStrategy): - if strategy == OffloadingStrategy.VANILLA: - logging.info('Running with offloading strategy: VANILLA') - self.deadline_enabled = False - self.swyh_enabled = False - self.freeze_layers_enabled = False - self.offload_enabled = False - if strategy == OffloadingStrategy.DEADLINE: - logging.info('Running with offloading strategy: DEADLINE') - self.deadline_enabled = True - self.swyh_enabled = False - self.freeze_layers_enabled = False - self.offload_enabled = False - if strategy == OffloadingStrategy.SWYH: - logging.info('Running with offloading strategy: SWYH') - self.deadline_enabled = True - self.swyh_enabled = True - self.freeze_layers_enabled = False - self.offload_enabled = False - if strategy == OffloadingStrategy.FREEZE: - logging.info('Running with offloading strategy: FREEZE') - self.deadline_enabled = True - self.swyh_enabled = False - self.freeze_layers_enabled = True - self.offload_enabled = False - if strategy == OffloadingStrategy.MODEL_OFFLOAD: - logging.info('Running with offloading strategy: MODEL_OFFLOAD') - self.deadline_enabled = True - self.swyh_enabled = False - self.freeze_layers_enabled = True - self.offload_enabled = True - if strategy == OffloadingStrategy.TIFL_BASIC: - logging.info('Running with offloading strategy: TIFL_BASIC') - self.deadline_enabled = False - self.swyh_enabled = False - self.freeze_layers_enabled = False - self.offload_enabled = False - logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') + deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled, dyn_terminate, dyn_terminate_swyh = parse_strategy(strategy) + self.deadline_enabled = deadline_enabled + self.swyh_enabled = swyh_enabled + self.freeze_layers_enabled = freeze_layers_enabled + self.offload_enabled = offload_enabled + self.dyn_terminate = dyn_terminate + self.dyn_terminate_swyh = dyn_terminate_swyh + logging.info(f'Offloading strategy={strategy}') + logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, ' + f'swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, ' + f'offload={self.offload_enabled}, dyn_terminate={self.dyn_terminate}, ' + f'dyn_terminate_swyh={self.dyn_terminate_swyh}') def create_clients(self, client_id_triple): for id, rank, world_size in client_id_triple: @@ -489,12 +505,16 @@ def reached_deadline(): # selected_clients[0] # logging.info(f'Status of all_finished={all_finished} and deadline={reached_deadline()}') all_finished = True + for client_response in responses: if client_response.future.done(): if not client_response.done: client_response.finish() else: all_finished = False + num_finished_responses = sum([1 for x in responses if x.done]) + percentage = num_finished_responses / len(responses) + logging.info(f'Percentage of finished responses: {percentage}, do terminate ? {percentage} > {self.config.termination_percentage} = {percentage > self.config.termination_percentage}') time.sleep(0.1) logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') client_accuracies = [] diff --git a/fltk/strategy/offloading.py b/fltk/strategy/offloading.py index 1742da5b..586464e2 100644 --- a/fltk/strategy/offloading.py +++ b/fltk/strategy/offloading.py @@ -8,7 +8,9 @@ class OffloadingStrategy(Enum): FREEZE = 4 MODEL_OFFLOAD = 5, TIFL_BASIC = 6, - TIFL_ADAPTIVE = 7 + TIFL_ADAPTIVE = 7, + DYN_TERMINATE = 8, + DYN_TERMINATE_SWYH = 9 @classmethod def Parse(cls, string_value): @@ -25,4 +27,55 @@ def Parse(cls, string_value): if string_value == 'tifl-basic': return OffloadingStrategy.TIFL_BASIC if string_value == 'tifl-adaptive': - return OffloadingStrategy.TIFL_ADAPTIVE \ No newline at end of file + return OffloadingStrategy.TIFL_ADAPTIVE + if string_value == 'dynamic-terminate': + return OffloadingStrategy.DYN_TERMINATE + if string_value == 'dynamic-terminate-swyh': + return OffloadingStrategy.DYN_TERMINATE_SWYH + + +def parse_strategy(strategy: OffloadingStrategy): + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False + dyn_terminate = False + dyn_terminate_swyh = False + if strategy == OffloadingStrategy.VANILLA: + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False + if strategy == OffloadingStrategy.DEADLINE: + deadline_enabled = True + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False + if strategy == OffloadingStrategy.SWYH: + deadline_enabled = True + swyh_enabled = True + freeze_layers_enabled = False + offload_enabled = False + if strategy == OffloadingStrategy.FREEZE: + deadline_enabled = True + swyh_enabled = False + freeze_layers_enabled = True + offload_enabled = False + if strategy == OffloadingStrategy.MODEL_OFFLOAD: + deadline_enabled = True + swyh_enabled = False + freeze_layers_enabled = True + offload_enabled = True + if strategy == OffloadingStrategy.DYN_TERMINATE: + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False + dyn_terminate = True + if strategy == OffloadingStrategy.DYN_TERMINATE: + deadline_enabled = False + swyh_enabled = False + freeze_layers_enabled = False + offload_enabled = False + dyn_terminate_swyh = True + return deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled, dyn_terminate, dyn_terminate_swyh diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 137042de..cf6e9437 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -52,6 +52,9 @@ def __init__(self): # FLTK options self.node_groups = None + # Termination policy data + self.termination_percentage = 1 + self.federator_host = '0.0.0.0' self.rank = 0 self.world_size = 0 @@ -159,6 +162,8 @@ def merge_yaml(self, cfg = {}): if 'node_groups' in cfg: self.node_groups = cfg['node_groups'] + if 'termination_percentage' in cfg: + self.termination_percentage = cfg['termination_percentage'] diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index fa46f20f..f04b35cb 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -41,6 +41,31 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set def generate_compose_file(): print() +def generate_terminate(num_clients = 16, medium=False): + template_path = get_deploy_path('terminate') + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + if client_id < 5: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + else: + client_type = 'medium' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) def generate_dev(num_clients = 2, medium=False): template_path = get_deploy_path('dev') @@ -203,8 +228,8 @@ def generate(num_clients: int): def run(name, num_clients = None, medium=False): exp_dict = { 'tifl-15': generate_tifl_15, - 'dev': generate_dev - + 'dev': generate_dev, + 'terminate': generate_terminate } if num_clients: exp_dict[name](num_clients, medium) From d784cd42a108201f5cc1b8839aa535c0fe2df3c3 Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 24 Jan 2022 23:04:26 +0100 Subject: [PATCH 22/73] Fix typo in exp script --- configs/terminate/run.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/terminate/run.py b/configs/terminate/run.py index 7dded93e..dd2b1b05 100644 --- a/configs/terminate/run.py +++ b/configs/terminate/run.py @@ -1,10 +1,10 @@ from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': - name = 'dev' - generate_docker(name, 10, True) - base_path = 'configs/effect-freezing' - exp_list = ['p_freezing-iid_freeze.yaml','p_freezing-iid_vanilla.yaml'] + name = 'terminate' + generate_docker(name, 16, True) + base_path = 'configs/terminate' + exp_list = ['p_terminate_terminate_swyh.yaml', 'p_terminate_terminate.yaml', 'p_terminate_vanilla.yaml'] exp_list = [f'{base_path}/{x}' for x in exp_list] first_prefix = '--build' for exp_cfg_file in exp_list: From 8432bac998023407d42ea553d0efd86b9b7e0f29 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 24 Jan 2022 23:09:36 +0100 Subject: [PATCH 23/73] Finish termination impl --- fltk/client.py | 4 ++-- fltk/federator.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fltk/client.py b/fltk/client.py index 9124ebea..5e76c9b7 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -154,11 +154,10 @@ def send_reference(self, server_ref): self.local_log(f'Got worker_info from server {server_ref}') self.server_ref = server_ref - def terminate_training_endpoint(self): + logging.info('I got a call for training termination!') self.terminate_training = True - @staticmethod def static_ping(): print(f'Got static ping with global_dict={global_dict}') @@ -669,6 +668,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): Timing data to measure: Total execution tim: """ + self.terminate_training = False start = time.time() start_time_train = datetime.datetime.now() diff --git a/fltk/federator.py b/fltk/federator.py index 77b24b4e..7cfde199 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -512,9 +512,15 @@ def reached_deadline(): client_response.finish() else: all_finished = False - num_finished_responses = sum([1 for x in responses if x.done]) - percentage = num_finished_responses / len(responses) - logging.info(f'Percentage of finished responses: {percentage}, do terminate ? {percentage} > {self.config.termination_percentage} = {percentage > self.config.termination_percentage}') + if self.dyn_terminate or self.dyn_terminate_swyh: + num_finished_responses = sum([1 for x in responses if x.done]) + percentage = num_finished_responses / len(responses) + if percentage > self.config.termination_percentage: + logging.info('Sending termination signal') + for cr in responses: + if not cr.done: + _remote_method_async(Client.terminate_training_endpoint, cr.client.ref) + logging.info(f'Percentage of finished responses: {percentage}, do terminate ? {percentage} > {self.config.termination_percentage} = {percentage > self.config.termination_percentage}') time.sleep(0.1) logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') client_accuracies = [] From 60f7b5d849035dd9cf478b7092261eb83dd65c15 Mon Sep 17 00:00:00 2001 From: Lydia Date: Wed, 26 Jan 2022 15:44:27 +0100 Subject: [PATCH 24/73] Update offloading --- configs/terminate/p_terminate_terminate.yaml | 2 +- .../terminate/p_terminate_terminate_swyh.yaml | 2 +- configs/terminate/p_terminate_vanilla.yaml | 2 +- configs/terminate/p_terminate_vanilla_s3.yaml | 35 +++++++++++++++++++ configs/terminate/run.py | 7 +++- fltk/client.py | 8 ++--- fltk/federator.py | 11 ++++-- fltk/strategy/offloading.py | 2 +- fltk/util/base_config.py | 6 ++-- 9 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 configs/terminate/p_terminate_vanilla_s3.yaml diff --git a/configs/terminate/p_terminate_terminate.yaml b/configs/terminate/p_terminate_terminate.yaml index a5386fb1..4d2e182a 100644 --- a/configs/terminate/p_terminate_terminate.yaml +++ b/configs/terminate/p_terminate_terminate.yaml @@ -14,7 +14,7 @@ deadline: 1 warmup_round: false output_location: 'output' tensor_board_active: true -clients_per_round: 4 +clients_per_round: 12 node_groups: slow: [1, 1] medium: [2, 2] diff --git a/configs/terminate/p_terminate_terminate_swyh.yaml b/configs/terminate/p_terminate_terminate_swyh.yaml index 068c7c22..056630b3 100644 --- a/configs/terminate/p_terminate_terminate_swyh.yaml +++ b/configs/terminate/p_terminate_terminate_swyh.yaml @@ -14,7 +14,7 @@ deadline: 1 warmup_round: false output_location: 'output' tensor_board_active: true -clients_per_round: 4 +clients_per_round: 12 node_groups: slow: [1, 1] medium: [2, 2] diff --git a/configs/terminate/p_terminate_vanilla.yaml b/configs/terminate/p_terminate_vanilla.yaml index 8591f556..e1057a80 100644 --- a/configs/terminate/p_terminate_vanilla.yaml +++ b/configs/terminate/p_terminate_vanilla.yaml @@ -14,7 +14,7 @@ deadline: 1 warmup_round: false output_location: 'output' tensor_board_active: true -clients_per_round: 4 +clients_per_round: 12 node_groups: slow: [1, 1] medium: [2, 2] diff --git a/configs/terminate/p_terminate_vanilla_s3.yaml b/configs/terminate/p_terminate_vanilla_s3.yaml new file mode 100644 index 00000000..ad196427 --- /dev/null +++ b/configs/terminate/p_terminate_vanilla_s3.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_p_terminate_vanilla_s3' +offload_stategy: vanilla +profiling_time: 100 +deadline: 1 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 12 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +termination_percentage: 0.7 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 16 diff --git a/configs/terminate/run.py b/configs/terminate/run.py index dd2b1b05..c6a9bdad 100644 --- a/configs/terminate/run.py +++ b/configs/terminate/run.py @@ -4,7 +4,12 @@ name = 'terminate' generate_docker(name, 16, True) base_path = 'configs/terminate' - exp_list = ['p_terminate_terminate_swyh.yaml', 'p_terminate_terminate.yaml', 'p_terminate_vanilla.yaml'] + exp_list = [ + 'p_terminate_terminate.yaml', + 'p_terminate_terminate_swyh.yaml', + 'p_terminate_vanilla.yaml', + 'p_terminate_vanilla_s3.yaml' + ] exp_list = [f'{base_path}/{x}' for x in exp_list] first_prefix = '--build' for exp_cfg_file in exp_list: diff --git a/fltk/client.py b/fltk/client.py index 5e76c9b7..189d5e4f 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -676,12 +676,12 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): self.dataset.get_train_sampler().set_epoch_size(num_epoch) # Train locally loss, weights, training_process, scheduler_data, perf_data = self.train(self.epoch_counter, deadline, warmup) - if self.dyn_terminate: + if self.terminate_training and self.dyn_terminate: logging.info('Not testing data due to termination call') - self.dyn_terminate = False + self.terminate_training = False return {'own': []} - elif self.dyn_terminate_swyh: - self.dyn_terminate_swyh = False + elif self.terminate_training and self.dyn_terminate_swyh: + self.terminate_training = False logging.info('Sending back weights due to terminate with swyh') if not warmup: self.epoch_counter += num_epoch diff --git a/fltk/federator.py b/fltk/federator.py index 7cfde199..c1328f6b 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -90,6 +90,7 @@ class ClientResponse: end_time: float = 0 done: bool = False dropped = True + terminated = False def finish(self): self.end_time = time.time() @@ -424,6 +425,7 @@ def reached_deadline(): has_not_called = True show_perf_data = True + has_send_terminate = False while not all_finished and not ((self.deadline_enabled and reached_deadline()) or warmup): # if self.deadline_enabled and reached_deadline() # if has_not_called and (time.time() -start) > 10: @@ -512,14 +514,17 @@ def reached_deadline(): client_response.finish() else: all_finished = False - if self.dyn_terminate or self.dyn_terminate_swyh: + if not has_send_terminate and (self.dyn_terminate or self.dyn_terminate_swyh): num_finished_responses = sum([1 for x in responses if x.done]) percentage = num_finished_responses / len(responses) if percentage > self.config.termination_percentage: logging.info('Sending termination signal') for cr in responses: if not cr.done: + if self.dyn_terminate: + cr.terminated = True _remote_method_async(Client.terminate_training_endpoint, cr.client.ref) + has_send_terminate = True logging.info(f'Percentage of finished responses: {percentage}, do terminate ? {percentage} > {self.config.termination_percentage} = {percentage > self.config.termination_percentage}') time.sleep(0.1) logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') @@ -534,7 +539,7 @@ def reached_deadline(): logging.info( f'{client} had a exec time of {client_response.duration()} dropped?={client_response.dropped}') - if not client_response.dropped: + if not client_response.dropped and not client_response.terminated: client.available = True logging.info(f'Fetching response for client: {client}') response_obj = client_response.future.wait() @@ -686,6 +691,8 @@ def reached_deadline(): # logging.info('Class names') # logging.info(self.test_data.dataset.test_dataset.class_to_idx) # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) + self.tb_writer.add_scalar('Number of clients dropped', sum([1 for x in responses if x.dropped or x.terminated]), self.epoch_counter) + self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) self.record_epoch_event(f'Global accuracy is {accuracy}') self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) diff --git a/fltk/strategy/offloading.py b/fltk/strategy/offloading.py index 586464e2..3c8bf7e4 100644 --- a/fltk/strategy/offloading.py +++ b/fltk/strategy/offloading.py @@ -72,7 +72,7 @@ def parse_strategy(strategy: OffloadingStrategy): freeze_layers_enabled = False offload_enabled = False dyn_terminate = True - if strategy == OffloadingStrategy.DYN_TERMINATE: + if strategy == OffloadingStrategy.DYN_TERMINATE_SWYH: deadline_enabled = False swyh_enabled = False freeze_layers_enabled = False diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index cf6e9437..c6cd6d40 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -17,8 +17,10 @@ def __init__(self): self.batch_size = 1 self.test_batch_size = 1000 self.epochs = 1 - self.lr = 0.001 - self.momentum = 0.9 + # self.lr = 0.001 + self.lr = 0.0001 + # self.momentum = 0.9 + self.momentum = 0.1 self.cuda = False self.shuffle = False self.log_interval = 10 From ca898755d8ffa0ab1f0fb890844e2bd174b0d952 Mon Sep 17 00:00:00 2001 From: bacox Date: Wed, 26 Jan 2022 16:50:14 +0100 Subject: [PATCH 25/73] Add FedProx and FedNova --- fltk/client.py | 16 +++- fltk/federator.py | 10 +++ fltk/strategy/FedNova.py | 184 +++++++++++++++++++++++++++++++++++++++ fltk/strategy/fedprox.py | 147 +++++++++++++++++++++++++++++++ fltk/util/base_config.py | 25 ++++++ fltk/util/definitions.py | 39 +++++++++ 6 files changed, 418 insertions(+), 3 deletions(-) create mode 100644 fltk/strategy/FedNova.py create mode 100644 fltk/strategy/fedprox.py create mode 100644 fltk/util/definitions.py diff --git a/fltk/client.py b/fltk/client.py index 189d5e4f..5ffaa7d2 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -109,9 +109,8 @@ def __init__(self, id, log_rref, rank, world_size, config = None): self.device = self.init_device() self.set_net(self.load_default_model()) self.loss_function = self.args.get_loss_function()() - self.optimizer = torch.optim.SGD(self.net.parameters(), - lr=self.args.get_learning_rate(), - momentum=self.args.get_momentum()) + self.optimizer = self.args.get_optimizer()(self.net.parameters(), + **self.args.optimizer_args) self.scheduler = MinCapableStepLR(self.args.get_logger(), self.optimizer, self.args.get_scheduler_step_size(), self.args.get_scheduler_gamma(), @@ -143,6 +142,14 @@ def configure_strategy(self, strategy : OffloadingStrategy): f'offload={self.offload_enabled}, dyn_terminate={self.dyn_terminate}, ' f'dyn_terminate_swyh={self.dyn_terminate_swyh}') + def set_tau_eff(self, total): + client_weight = self.get_client_datasize() / total + n = self.get_client_datasize() + E = self.args.epochs_per_round + B = 16 # nicely hardcoded :) + tau_eff = int(E * n / B) * client_weight + if hasattr(self.optimizer, 'set_tau_eff'): + self.optimizer.set_tau_eff(tau_eff) def init_device(self): if self.args.cuda and torch.cuda.is_available(): @@ -718,6 +725,9 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): global global_sender_id data_offload = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss_offload, accuracy, test_loss, class_precision, class_recall, training_process, f'{global_sender_id}-offload') + + if hasattr(self.optimizer, 'pre_communicate'): # aka fednova or fedprox + self.optimizer.pre_communicate() # Copy GPU tensors to CPU for k, v in weights_offload.items(): weights_offload[k] = v.cpu() diff --git a/fltk/federator.py b/fltk/federator.py index c1328f6b..b29116ed 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -712,6 +712,15 @@ def reached_deadline(): self.exp_data_general.append([self.epoch_counter, duration, accuracy, loss, class_precision, class_recall]) + def set_tau_eff(self): + total = sum(client.data_size for client in self.clients) + responses = [] + for client in self.clients: + responses.append((client, _remote_method_async(Client.set_tau_eff, client.ref, total))) + torch.futures.wait_all([x[1] for x in responses]) + # for client in self.clients: + # client.set_tau_eff(total) + def save_experiment_data(self): p = Path(f'./{self.config.output_location}') # file_output = f'./{self.config.output_location}' @@ -789,6 +798,7 @@ def run(self): self.ping_all() self.clients_ready() self.update_client_data_sizes() + self.set_tau_eff() epoch_to_run = self.num_epoch addition = 0 diff --git a/fltk/strategy/FedNova.py b/fltk/strategy/FedNova.py new file mode 100644 index 00000000..d51895e1 --- /dev/null +++ b/fltk/strategy/FedNova.py @@ -0,0 +1,184 @@ +import torch +import torch.distributed as dist +from torch.optim.optimizer import Optimizer, required + + +class FedNova(Optimizer): + r"""Implements federated normalized averaging (FedNova). + + Nesterov momentum is based on the formula from + `On the importance of initialization and momentum in deep learning`__. + + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + ratio (float): relative sample size of client + gmf (float): global/server/slow momentum factor + mu (float): parameter for proximal local SGD + lr (float): learning rate + momentum (float, optional): momentum factor (default: 0) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + dampening (float, optional): dampening for momentum (default: 0) + nesterov (bool, optional): enables Nesterov momentum (default: False) + + Example: + >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) + >>> optimizer.zero_grad() + >>> loss_fn(model(input), target).backward() + >>> optimizer.step() + + __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf + + .. note:: + The implementation of SGD with Momentum/Nesterov subtly differs from + Sutskever et. al. and implementations in some other frameworks. + + Considering the specific case of Momentum, the update can be written as + + .. math:: + v = \rho * v + g \\ + p = p - lr * v + + where p, g, v and :math:`\rho` denote the parameters, gradient, + velocity, and momentum respectively. + + This is in contrast to Sutskever et. al. and + other frameworks which employ an update of the form + + .. math:: + v = \rho * v + lr * g \\ + p = p - v + + The Nesterov version is analogously modified. + """ + + def __init__(self, params, lr=0.05, momentum=0.9, dampening=0, + weight_decay=0, nesterov=False, variance=0, mu=0): + self.momentum = momentum + self.mu = mu + self.ai_l1_norm = 0 + self.local_counter = 0 + self.local_steps = 0 + + + if lr is not required and lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if momentum < 0.0: + raise ValueError("Invalid momentum value: {}".format(momentum)) + if weight_decay < 0.0: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + + defaults = dict(lr=lr, momentum=momentum, dampening=dampening, + weight_decay=weight_decay, nesterov=nesterov, variance=variance) + + if nesterov and (momentum <= 0 or dampening != 0): + raise ValueError("Nesterov momentum requires a momentum and zero dampening") + super(FedNova, self).__init__(params, defaults) + + def __setstate__(self, state): + super(FedNova, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('nesterov', False) + + def step(self, closure=None): + """Performs a single optimization step. + + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + device = "cuda" if torch.cuda.is_available() else "cpu" + + loss = None + if closure is not None: + loss = closure() + + # scale = 1**self.itr + + for group in self.param_groups: + weight_decay = group['weight_decay'] + momentum = group['momentum'] + dampening = group['dampening'] + nesterov = group['nesterov'] + + for p in group['params']: + if p.grad is None: + continue + d_p = p.grad.data + + if weight_decay != 0: + d_p.add_(p.data, alpha=weight_decay) + + param_state = self.state[p] + if 'old_init' not in param_state: + param_state['old_init'] = torch.clone(p.data).detach() + + local_lr = group['lr'] + + # apply momentum updates + if momentum != 0: + if 'momentum_buffer' not in param_state: + buf = param_state['momentum_buffer'] = torch.clone(d_p).detach() + else: + buf = param_state['momentum_buffer'] + buf.mul_(momentum).add_(d_p, alpha=1 - dampening) + if nesterov: + d_p = d_p.add(momentum, buf) + else: + d_p = buf + + # apply proximal updates + if self.mu != 0: + d_p.add_(p.data - param_state['old_init'], alpha=self.mu) + + # update accumulated local updates + if 'cum_grad' not in param_state: + param_state['cum_grad'] = torch.clone(d_p).detach() + param_state['cum_grad'].mul_(local_lr) + else: + param_state['cum_grad'].add_(d_p, alpha=local_lr) + + p.data.add_(d_p, alpha=-local_lr) + + # compute local normalizing vector a_i ... but it's a scalar? + # should't a_i be applied to cum_grad? + # so this must be the l1 norm? -> this seems correct. a_i is not computed directly, only it's l1 norm + if self.momentum != 0: + self.local_counter = self.local_counter * self.momentum + 1 + self.ai_l1_norm += self.local_counter + + self.etamu = local_lr * self.mu + if self.etamu != 0: + self.ai_l1_norm *= (1 - self.etamu) + self.ai_l1_norm += 1 + + if self.momentum == 0 and self.etamu == 0: + self.ai_l1_norm += 1 + + self.local_steps += 1 + + return loss + + def set_tau_eff(self, tau_eff): + self.tau_eff = tau_eff + + def pre_communicate(self): + for group in self.param_groups: + for p in group['params']: + param_state = self.state[p] + + # apply fednova update rule + # learning rate has already been applied + cum_grad = param_state['cum_grad'] + p.data.sub_(cum_grad) # get back to old_init + p.data.add_(cum_grad, alpha=self.tau_eff/self.ai_l1_norm) # rescale changes + + # delete stuff for next round + del param_state['old_init'] + param_state['cum_grad'].zero_() + if 'momentum_buffer' in param_state: + param_state['momentum_buffer'].zero_() + + self.local_counter = 0 + self.ai_l1_norm = 0 + self.local_steps = 0 diff --git a/fltk/strategy/fedprox.py b/fltk/strategy/fedprox.py new file mode 100644 index 00000000..7d0d5fe4 --- /dev/null +++ b/fltk/strategy/fedprox.py @@ -0,0 +1,147 @@ +import torch +from torch.optim.optimizer import Optimizer, required + + +class FedProx(Optimizer): + r"""Implements FedAvg and FedProx. Local Solver can have momentum. + + Nesterov momentum is based on the formula from + `On the importance of initialization and momentum in deep learning`__. + + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + ratio (float): relative sample size of client + gmf (float): global/server/slow momentum factor + mu (float): parameter for proximal local SGD + lr (float): learning rate + momentum (float, optional): momentum factor (default: 0) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + dampening (float, optional): dampening for momentum (default: 0) + nesterov (bool, optional): enables Nesterov momentum (default: False) + + Example: + >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) + >>> optimizer.zero_grad() + >>> loss_fn(model(input), target).backward() + >>> optimizer.step() + + __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf + + .. note:: + The implementation of SGD with Momentum/Nesterov subtly differs from + Sutskever et. al. and implementations in some other frameworks. + + Considering the specific case of Momentum, the update can be written as + + .. math:: + v = \rho * v + g \\ + p = p - lr * v + + where p, g, v and :math:`\rho` denote the parameters, gradient, + velocity, and momentum respectively. + + This is in contrast to Sutskever et. al. and + other frameworks which employ an update of the form + + .. math:: + v = \rho * v + lr * g \\ + p = p - v + + The Nesterov version is analogously modified. + """ + + def __init__(self, params, lr=0.05, momentum=0.9, dampening=0, + weight_decay=0, nesterov=False, variance=0, mu=0.01): + + self.itr = 0 + self.a_sum = 0 + self.mu = mu + self.loss = None + + if lr is not required and lr < 0.0: + raise ValueError("Invalid learning rate: {}".format(lr)) + if momentum < 0.0: + raise ValueError("Invalid momentum value: {}".format(momentum)) + if weight_decay < 0.0: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + + defaults = dict(lr=lr, momentum=momentum, dampening=dampening, + weight_decay=weight_decay, nesterov=nesterov, variance=variance) + if nesterov and (momentum <= 0 or dampening != 0): + raise ValueError("Nesterov momentum requires a momentum and zero dampening") + super(FedProx, self).__init__(params, defaults) + + + def __setstate__(self, state): + super(FedProx, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('nesterov', False) + + def step(self, closure=None): + """Performs a single optimization step. + + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + weight_decay = group['weight_decay'] + momentum = group['momentum'] + dampening = group['dampening'] + nesterov = group['nesterov'] + + + for p in group['params']: + if p.grad is None: + continue + d_p = p.grad.data + + if weight_decay != 0: + d_p.add_(p.data, alpha=weight_decay) + + param_state = self.state[p] + if 'old_init' not in param_state: + param_state['old_init'] = torch.clone(p.data).detach() + + if momentum != 0: + if 'momentum_buffer' not in param_state: + buf = param_state['momentum_buffer'] = torch.clone(d_p).detach() + else: + buf = param_state['momentum_buffer'] + buf.mul_(momentum).add_(d_p, alpha=1 - dampening) + if nesterov: + d_p = d_p.add(buf, alpha=1 - dampening) + else: + d_p = buf + + # apply proximal update + d_p.add_(p.data - param_state['old_init'], alpha=self.mu) + + p.data.add_(d_p, alpha=-group['lr']) + + # one simple heuristic is to increase μ when seeing + # the loss increasing and decreasing μ when seeing the loss decreasing + if self.loss: + ratio = loss/self.loss # if the new loss is greater, ratio > 1 + self.mu = self.mu*ratio + self.mu = min(1.0, self.mu) + self.mu = max(0.001, self.mu) + self.loss = loss + + + return loss + + def pre_communicate(self): + for group in self.param_groups: + for p in group['params']: + param_state = self.state[p] + if 'old_init' in param_state: + del param_state['old_init'] + if 'momentum_buffer' in param_state: + param_state['momentum_buffer'].zero_() diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index c6cd6d40..781acb36 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -5,6 +5,9 @@ from fltk.datasets.distributed import DistCIFAR10Dataset, DistCIFAR100Dataset, DistFashionMNISTDataset from fltk.nets import Cifar10CNN, FashionMNISTCNN, Cifar100ResNet, FashionMNISTResNet, Cifar10ResNet, Cifar100VGG +from fltk.strategy.FedNova import FedNova +from fltk.strategy.fedprox import FedProx +from fltk.util.definitions import Optimizations SEED = 1 torch.manual_seed(SEED) @@ -32,6 +35,20 @@ def __init__(self): self.scheduler_gamma = 0.5 self.min_lr = 1e-10 + self.loss_function = torch.nn.CrossEntropyLoss + self.optimizer = torch.optim.SGD + + self.optimizers = { + Optimizations.sgd: torch.optim.SGD, + Optimizations.fedprox: FedProx, + Optimizations.fednova: FedNova + } + + self.optimizer_args = { + 'lr': self.lr, + 'momentum': self.momentum + } + self.round_worker_selection_strategy = None self.round_worker_selection_strategy_kwargs = None @@ -157,6 +174,11 @@ def merge_yaml(self, cfg = {}): self.cuda = True else: self.cuda = False + if 'optimizer' in cfg: + self.optimizer = self.optimizers[cfg['optimizer']] + if 'optimizer_args' in cfg: + for k, v in cfg['optimizer_args'].items(): + self.optimizer_args[k] = v if 'sampler' in cfg: self.data_sampler = cfg['sampler'] if 'sampler_args' in cfg: @@ -186,6 +208,9 @@ def set_sampler(self, sampler): def get_sampler(self): return self.data_sampler + + def get_optimizer(self): + return self.optimizer def get_sampler_args(self): return tuple(self.data_sampler_args) diff --git a/fltk/util/definitions.py b/fltk/util/definitions.py new file mode 100644 index 00000000..c11f704e --- /dev/null +++ b/fltk/util/definitions.py @@ -0,0 +1,39 @@ +from dataclasses import dataclass + +# @dataclass +# class Aggregations: +# avg = 'Avg' +# fed_avg = 'FedAvg' +# sum = 'Sum' + +# @dataclass +# class Samplers: +# uniform = "uniform" +# q_sampler = "q sampler" +# limit_labels = "limit labels" +# dirichlet = "dirichlet" +# limit_labels_q = "limit labels q" +# emd_sampler = 'emd sampler' + +@dataclass +class Optimizations: + sgd = 'SGD' + fedprox = 'FedProx' + fednova = 'FedNova' + +# @dataclass +# class Datasets: +# cifar10 = 'cifar10' +# cifar100 = 'cifar100' +# fashion_mnist = 'fashion-mnist' +# mnist = 'mnist' + +# @dataclass +# class Nets: +# cifar100_resnet = "Cifar100ResNet" +# cifar100_vgg = "Cifar100VGG" +# cifar10_cnn = "Cifar10CNN" +# cifar10_resnet = "Cifar10ResNet" +# fashion_mnist_cnn = "FashionMNISTCNN" +# fashion_mnist_resnet = "FashionMNISTResNet" +# mnist_cnn = 'MNISTCNN' \ No newline at end of file From 899ccded54bfe0b92605517b1bd636ef3b58eb96 Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 05:29:11 +0100 Subject: [PATCH 26/73] Add non-iid n-label sampler --- fltk/strategy/data_samplers.py | 132 ++++++++++++++++++++++++--------- 1 file changed, 96 insertions(+), 36 deletions(-) diff --git a/fltk/strategy/data_samplers.py b/fltk/strategy/data_samplers.py index 182bf8cc..ca00dbe4 100644 --- a/fltk/strategy/data_samplers.py +++ b/fltk/strategy/data_samplers.py @@ -74,61 +74,121 @@ def __init__(self, dataset, num_replicas, rank, args=(5, 42)): for l in range(self.n_labels): label_dict[l] = num_copies - def choice_n(l_dict: dict, n): + def choice_n(l_dict: dict, n, seed_offset = 0): labels = [k for k, v in label_dict.items() if v] + # print(f'Available labels: {labels} choose {n}') + np.random.seed(seed + seed_offset) selected = np.random.choice(labels, n, replace=False) for k, v in l_dict.items(): if k in selected: - v -= 0 + # v -= 1 + l_dict[k] -= 1 return selected - print(f'N Clients={self.n_clients}') - print(f'Num_buckets={num_copies}') - labels_per_client = int(np.floor(self.n_labels / self.n_clients)) - remaining_labels = self.n_labels - labels_per_client - labels = list(range(self.n_labels)) # list of labels to distribute - clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label - client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client - random.seed(seed) # seed, such that the same result can be obtained multiple times - print(client_labels) + # print(f'N Clients={self.n_clients}') + # print(f'Num_buckets={num_copies}') - label_order = random.sample(labels, len(labels)) + clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label client_label_dict = {} - for client_id in clients: - client_label_dict[client_id] = [] - for _ in range(labels_per_client): - chosen_label = label_order.pop() - client_label_dict[client_id].append(chosen_label) - client_labels[client_id].add(chosen_label) - client_label_dict['rest'] = label_order + for idx, client_id in enumerate(clients): + # client_label_dict[client_id] = [] + label_set = choice_n(label_dict, args[0], idx) + client_label_dict[client_id] = label_set + + client_label_dict['rest'] = [] + # client_label_dict['rest'] = labels = [k for k, v in label_dict.items() if v] + for k, v in label_dict.items(): + for x in range(int(v)): + client_label_dict['rest'].append(int(k)) + + # Order data by label; split into N buckets and select indices based on the order found in the client-label-dict + + reverse_label_dict = {} + for l in range(self.n_labels): + reverse_label_dict[l] = [] + + for k, v in client_label_dict.items(): + # print(f'client {k} has labels {v}') + for l_c in v: + reverse_label_dict[l_c].append(k) indices = [] ordered_by_label = self.order_by_label(dataset) - labels = client_label_dict[self.client_id] - for label in labels: - n_samples = int(len(ordered_by_label[label])) - clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label - index = clients.index(self.client_id) # find the position of this client - start_index = index * n_samples # inclusive - if rank == self.n_clients: - end_index = len(ordered_by_label[label]) # exclusive - else: - end_index = start_index + n_samples # exclusive - - indices += ordered_by_label[label][start_index:end_index] + indices_per_client = {} + for c in clients: + indices_per_client[c] = [] - # Last part is uniform sampler rest_indices = [] - for l in client_label_dict['rest']: - rest_indices += ordered_by_label[l] - filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] - indices += filtered_rest_indices + for group, label_list in enumerate(ordered_by_label): + splitted = np.array_split(label_list, num_copies) + client_id_to_distribute = reverse_label_dict[group] + for split_part in splitted: + client_key = client_id_to_distribute.pop() + if client_key == 'rest': + rest_indices.append(split_part) + else: + indices_per_client[client_key].append(split_part) + # for split_part in splitted: + rest_indices = np.concatenate(rest_indices) + rest_splitted = np.array_split(rest_indices, len(indices_per_client)) + + for k, v in indices_per_client.items(): + v.append(rest_splitted.pop()) + indices_per_client[k] = np.concatenate(v) + + indices = indices_per_client[self.client_id] random.seed(seed + self.client_id) # give each client a unique shuffle random.shuffle(indices) # shuffle indices to spread the labels self.indices = indices + # labels_per_client = int(np.floor(self.n_labels / self.n_clients)) + # remaining_labels = self.n_labels - labels_per_client + # labels = list(range(self.n_labels)) # list of labels to distribute + # clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + # client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client + # random.seed(seed) # seed, such that the same result can be obtained multiple times + # print(client_labels) + # + # label_order = random.sample(labels, len(labels)) + # client_label_dict = {} + # for client_id in clients: + # client_label_dict[client_id] = [] + # for _ in range(labels_per_client): + # chosen_label = label_order.pop() + # client_label_dict[client_id].append(chosen_label) + # client_labels[client_id].add(chosen_label) + # client_label_dict['rest'] = label_order + # + # + # + # indices = [] + # ordered_by_label = self.order_by_label(dataset) + # labels = client_label_dict[self.client_id] + # for label in labels: + # n_samples = int(len(ordered_by_label[label])) + # clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label + # index = clients.index(self.client_id) # find the position of this client + # start_index = index * n_samples # inclusive + # if rank == self.n_clients: + # end_index = len(ordered_by_label[label]) # exclusive + # else: + # end_index = start_index + n_samples # exclusive + # + # indices += ordered_by_label[label][start_index:end_index] + # + # # Last part is uniform sampler + # rest_indices = [] + # for l in client_label_dict['rest']: + # rest_indices += ordered_by_label[l] + # filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] + # indices += filtered_rest_indices + # random.seed(seed + self.client_id) # give each client a unique shuffle + # random.shuffle(indices) # shuffle indices to spread the labels + # + # self.indices = indices + class LimitLabelsSamplerFlex(DistributedSamplerWrapper): """ From e0307a61f218a33af047d533afa70a9a74d5ab36 Mon Sep 17 00:00:00 2001 From: Lydia Date: Thu, 27 Jan 2022 11:38:44 +0100 Subject: [PATCH 27/73] Expand on offloading exps --- configs/dev/dev_p2.yaml | 4 +- .../p11_freezoff_iid_dyn_terminate.yaml | 34 +++++++++ .../p11_freezoff_iid_dyn_terminate_swyh.yaml | 34 +++++++++ .../p11_freezoff_iid_fedavg.yaml | 34 +++++++++ .../p11_freezoff_iid_fednova.yaml | 35 +++++++++ .../p11_freezoff_iid_fedprox.yaml | 35 +++++++++ .../p11_freezoff_iid_offload.yaml | 34 +++++++++ .../p11_freezoff_iid_tifl_adaptive.yaml | 34 +++++++++ .../p11_freezoff_iid_tifl_basic.yaml | 34 +++++++++ configs/p11_freezoff_iid/run.py | 29 +++++++ .../p12_freezoff_iid_dyn_terminate_large.yaml | 34 +++++++++ ...freezoff_iid_dyn_terminate_swyh_large.yaml | 34 +++++++++ .../p12_freezoff_iid_fedavg_large.yaml | 34 +++++++++ .../p12_freezoff_iid_fednova_large.yaml | 35 +++++++++ .../p12_freezoff_iid_fedprox_large.yaml | 35 +++++++++ .../p12_freezoff_iid_offload_large.yaml | 34 +++++++++ .../p12_freezoff_iid_tifl_adaptive_large.yaml | 34 +++++++++ .../p12_freezoff_iid_tifl_basic_large.yaml | 34 +++++++++ configs/p12_freezoff_iid_large/run.py | 29 +++++++ deploy/p11_freezoff/client_stub_default.yml | 26 +++++++ deploy/p11_freezoff/client_stub_fast.yml | 25 +++++++ deploy/p11_freezoff/client_stub_medium.yml | 25 +++++++ deploy/p11_freezoff/client_stub_slow.yml | 25 +++++++ deploy/p11_freezoff/system_stub.yml | 27 +++++++ fltk/client.py | 39 +++++++--- fltk/federator.py | 75 ++++++++++++------- fltk/util/base_config.py | 6 +- fltk/util/generate_docker_compose.py | 41 +++++++++- 28 files changed, 855 insertions(+), 44 deletions(-) create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml create mode 100644 configs/p11_freezoff_iid/run.py create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml create mode 100644 configs/p12_freezoff_iid_large/run.py create mode 100644 deploy/p11_freezoff/client_stub_default.yml create mode 100644 deploy/p11_freezoff/client_stub_fast.yml create mode 100644 deploy/p11_freezoff/client_stub_medium.yml create mode 100644 deploy/p11_freezoff/client_stub_slow.yml create mode 100644 deploy/p11_freezoff/system_stub.yml diff --git a/configs/dev/dev_p2.yaml b/configs/dev/dev_p2.yaml index b431c0e3..d3047762 100644 --- a/configs/dev/dev_p2.yaml +++ b/configs/dev/dev_p2.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 50 +total_epochs: 5 epochs_per_cycle: 1 wait_for_clients: true net: FashionMNISTCNN @@ -8,7 +8,7 @@ dataset: fashion-mnist # Use cuda is available; setting to false will force CPU cuda: false experiment_prefix: 'exp_dev_p2' -offload_stategy: vanilla +offload_stategy: offload profiling_time: 100 deadline: 500 warmup_round: false diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml new file mode 100644 index 00000000..a58b5e5c --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_dyn_terminate' +offload_stategy: dynamic-terminate +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml new file mode 100644 index 00000000..d1e34592 --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_dyn_terminate_swyh' +offload_stategy: dynamic-terminate-swyh +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml new file mode 100644 index 00000000..251453dd --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_fedavg' +offload_stategy: vanilla +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml new file mode 100644 index 00000000..cc608d43 --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_fednova' +offload_stategy: vanilla +optimizer: FedNova +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml new file mode 100644 index 00000000..d1c84b5c --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_fedprox' +offload_stategy: vanilla +optimizer: FedProx +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml new file mode 100644 index 00000000..cb856cf6 --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_offload' +offload_stategy: offload +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml new file mode 100644 index 00000000..6f04b09f --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_tifl_adaptive' +offload_stategy: tifl-adaptive +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml new file mode 100644 index 00000000..89e480d9 --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_tifl_basic' +offload_stategy: tifl-basic +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11_freezoff_iid/run.py b/configs/p11_freezoff_iid/run.py new file mode 100644 index 00000000..31e0bb61 --- /dev/null +++ b/configs/p11_freezoff_iid/run.py @@ -0,0 +1,29 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = 'configs/p11_freezoff_iid' + exp_list = [ + 'p11_freezoff_iid_fedprox.yaml', + 'p11_freezoff_iid_fednova.yaml', + 'p11_freezoff_iid_offload.yaml', + 'p11_freezoff_iid_dyn_terminate_swyh.yaml', + 'p11_freezoff_iid_fedavg.yaml', + 'p11_freezoff_iid_tifl_adaptive.yaml', + 'p11_freezoff_iid_dyn_terminate.yaml', + 'p11_freezoff_iid_tifl_basic.yaml' + ] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + + + diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml new file mode 100644 index 00000000..fa064936 --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_dyn_terminate' +offload_stategy: dynamic-terminate +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml new file mode 100644 index 00000000..b7292675 --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_dyn_terminate_swyh' +offload_stategy: dynamic-terminate-swyh +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml new file mode 100644 index 00000000..24327e6f --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_fedavg' +offload_stategy: vanilla +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml new file mode 100644 index 00000000..cde9e011 --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_fednova' +offload_stategy: vanilla +optimizer: FedNova +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml new file mode 100644 index 00000000..e39700ba --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_fedprox' +offload_stategy: vanilla +optimizer: FedProx +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml new file mode 100644 index 00000000..f8045a71 --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_offload' +offload_stategy: offload +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml new file mode 100644 index 00000000..be7ca59a --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_tifl_adaptive' +offload_stategy: tifl-adaptive +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml new file mode 100644 index 00000000..40b89646 --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_tifl_basic' +offload_stategy: tifl-basic +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/run.py b/configs/p12_freezoff_iid_large/run.py new file mode 100644 index 00000000..2619d167 --- /dev/null +++ b/configs/p12_freezoff_iid_large/run.py @@ -0,0 +1,29 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = 'configs/p12_freezoff_iid_large' + exp_list = [ + 'p12_freezoff_iid_offload_large.yaml', + 'p12_freezoff_iid_fedprox_large.yaml', + 'p12_freezoff_iid_fednova_large.yaml', + 'p12_freezoff_iid_dyn_terminate_swyh_large.yaml', + 'p12_freezoff_iid_fedavg_large.yaml', + 'p12_freezoff_iid_tifl_adaptive_large.yaml', + 'p12_freezoff_iid_dyn_terminate_large.yaml', + 'p12_freezoff_iid_tifl_basic_large.yaml' + ] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + + + diff --git a/deploy/p11_freezoff/client_stub_default.yml b/deploy/p11_freezoff/client_stub_default.yml new file mode 100644 index 00000000..43d6c919 --- /dev/null +++ b/deploy/p11_freezoff/client_stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=default + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' +# memory: 1024M diff --git a/deploy/p11_freezoff/client_stub_fast.yml b/deploy/p11_freezoff/client_stub_fast.yml new file mode 100644 index 00000000..2c40393f --- /dev/null +++ b/deploy/p11_freezoff/client_stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=fast + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' diff --git a/deploy/p11_freezoff/client_stub_medium.yml b/deploy/p11_freezoff/client_stub_medium.yml new file mode 100644 index 00000000..677accdf --- /dev/null +++ b/deploy/p11_freezoff/client_stub_medium.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=medium + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.75' diff --git a/deploy/p11_freezoff/client_stub_slow.yml b/deploy/p11_freezoff/client_stub_slow.yml new file mode 100644 index 00000000..ae578071 --- /dev/null +++ b/deploy/p11_freezoff/client_stub_slow.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=slow + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.25' \ No newline at end of file diff --git a/deploy/p11_freezoff/system_stub.yml b/deploy/p11_freezoff/system_stub.yml new file mode 100644 index 00000000..77a19443 --- /dev/null +++ b/deploy/p11_freezoff/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-1' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/fltk/client.py b/fltk/client.py index 5ffaa7d2..10bd3b87 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -40,6 +40,7 @@ global_model_data_size = 0 global_sender_id = "" global_offload_received = False +global_local_updates_left = 0 def _call_method(method, rref, *args, **kwargs): @@ -91,14 +92,16 @@ class Client: dyn_terminate_swyh = False terminate_training = False + offload_release = False def __init__(self, id, log_rref, rank, world_size, config = None): # logging.info(f'Welcome to client {id}') self.id = id global_dict['id'] = id - global global_model_weights, global_offload_received, global_model_data_size + global global_model_weights, global_offload_received, global_model_data_size, global_local_updates_left global_model_weights = None global_offload_received = False + global_local_updates_left = 0 global_model_data_size = 0 self.log_rref = log_rref self.rank = rank @@ -288,12 +291,13 @@ def report_performance_estimate(self, performance_data): return _remote_method_async(Federator.perf_est_endpoint, self.server_ref, self.id, performance_data) @staticmethod - def offload_receive_endpoint(model_weights, num_train_samples, sender_id): + def offload_receive_endpoint(model_weights, num_train_samples, sender_id, local_updates_left): print(f'Got the offload_receive_endpoint endpoint') - global global_model_weights, global_offload_received, global_model_data_size, global_sender_id + global global_model_weights, global_offload_received, global_model_data_size, global_sender_id, global_local_updates_left global_model_weights = copy.deepcopy(model_weights.copy()) global_model_data_size = num_train_samples global_sender_id = sender_id + global_local_updates_left = local_updates_left global_offload_received = True @staticmethod @@ -305,10 +309,14 @@ def offload_receive_endpoint_2(string): # global_offload_received = True - def call_to_offload_endpoint(self, client_to_offload: RRef): + def call_to_offload_endpoint(self, client_to_offload: RRef, soft_deadline): self.local_log(f'Got the call to offload endpoint to {client_to_offload}') self.client_to_offload_to = client_to_offload self.call_to_offload = True + + def release_from_offloading_endpoint(self): + logging.info('Got a release signal') + self.offload_release = True def freeze_layers2(self, until, net): @@ -457,14 +465,17 @@ def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_ if self.terminate_training: logging.info('Got a call to terminate training') break - + + if use_offloaded_model and i > global_local_updates_left: + logging.info(f'Stoppinng training of offloaded model; no local updates left; Was {global_local_updates_left}') + break if self.offload_enabled and not warmup: # Check if there is a call to offload if self.call_to_offload: self.args.get_logger().info('Got call to offload model') model_weights = self.get_nn_parameters() - - ret = rpc.rpc_async(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights, i, self.id])) + local_updates_left = number_of_training_samples - i + ret = rpc.rpc_async(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights, i, self.id, local_updates_left])) print(f'Result of rref: {ret}') # self.call_to_offload = False @@ -679,7 +690,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): start = time.time() start_time_train = datetime.datetime.now() - + self.call_to_offload = False self.dataset.get_train_sampler().set_epoch_size(num_epoch) # Train locally loss, weights, training_process, scheduler_data, perf_data = self.train(self.epoch_counter, deadline, warmup) @@ -710,13 +721,21 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): data = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss, accuracy, test_loss, class_precision, class_recall, training_process, self.id) self.epoch_results.append(data) + if hasattr(self.optimizer, 'pre_communicate'): # aka fednova or fedprox + self.optimizer.pre_communicate() for k, v in weights.items(): weights[k] = v.cpu() response_obj = {'own': [data, weights, scheduler_data, perf_data]} global global_offload_received + if self.offload_enabled: + logging.info('Waiting to receive offload or being released') + while not (global_offload_received or self.offload_release): + time.sleep(0.1) + logging.info(f'Continuing after global_offload_received={global_offload_received} and offload_release={self.offload_release}') if self.offload_enabled and global_offload_received: - self.configure_strategy(OffloadingStrategy.SWYH) + # self.configure_strategy(OffloadingStrategy.SWYH) + self.configure_strategy(OffloadingStrategy.VANILLA) logging.info('Processing offloaded model') self.load_offloaded_model() self.copy_offloaded_model_weights() @@ -726,8 +745,6 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): data_offload = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss_offload, accuracy, test_loss, class_precision, class_recall, training_process, f'{global_sender_id}-offload') - if hasattr(self.optimizer, 'pre_communicate'): # aka fednova or fedprox - self.optimizer.pre_communicate() # Copy GPU tensors to CPU for k, v in weights_offload.items(): weights_offload[k] = v.cpu() diff --git a/fltk/federator.py b/fltk/federator.py index b29116ed..096365e4 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -26,6 +26,7 @@ from pathlib import Path import logging import numpy as np +import copy # from fltk.util.profile_plots import stability_plot, parse_stability_data from fltk.util.results import EpochData @@ -152,9 +153,11 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): self.log_rref = log_rref self.num_epoch = num_epochs self.config = config - self.tb_path = config.output_location + self.tb_path = f'{config.output_location}/{config.experiment_prefix}' self.ensure_path_exists(self.tb_path) self.tb_writer = SummaryWriter(f'{self.tb_path}/{config.experiment_prefix}_federator') + self.strategy = OffloadingStrategy.Parse(config.offload_strategy) + self.configure_strategy(self.strategy) self.create_clients(client_id_triple) self.config.init_logger(logging) self.performance_data = {} @@ -165,8 +168,7 @@ def __init__(self, client_id_triple, num_epochs = 3, config=None): self.test_data = Client("test", None, 1, 2, config) config.data_sampler = copy_sampler self.reference_lookup[get_worker_info().name] = RRef(self) - self.strategy = OffloadingStrategy.Parse(config.offload_strategy) - self.configure_strategy(self.strategy) + if self.strategy == OffloadingStrategy.TIFL_BASIC or self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: for k, v in self.config.node_groups.items(): self.node_groups[k] = list(range(v[0], v[1]+1)) @@ -240,7 +242,9 @@ def create_clients(self, client_id_triple): for id, rank, world_size in client_id_triple: client = rpc.remote(id, Client, kwargs=dict(id=id, log_rref=self.log_rref, rank=rank, world_size=world_size, config=self.config)) writer = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}') - writer_offload = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}_offload') + writer_offload = None + if self.offload_enabled: + writer_offload = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}_offload') self.clients.append(ClientRef(id, client, tensorboard_writer=writer, tensorboard_writer_offload=writer_offload, rank=rank)) self.client_data[id] = [] @@ -339,12 +343,12 @@ def process_response_list(self): resp.client.available = True self.response_list = list(filter(lambda x: not x.done, self.response_list)) - def ask_client_to_offload(self, client1_ref, client2_ref): + def ask_client_to_offload(self, client1_ref, client2_ref, soft_deadline): logging.info(f'Offloading call from {client1_ref} to {client2_ref}') # args = [method, rref] + list(args) # rpc.rpc_sync(client1_ref, Client.call_to_offload_endpoint, args=(client2_ref)) # print(_remote_method_async_by_name(Client.client_to_offload_to, client1_ref, client2_ref)) - _remote_method(Client.call_to_offload_endpoint, client1_ref, client2_ref) + _remote_method(Client.call_to_offload_endpoint, client1_ref, client2_ref, soft_deadline) logging.info(f'Done with call to offload') def remote_run_epoch(self, epochs, warmup=False, first_epoch=False): @@ -467,29 +471,42 @@ def reached_deadline(): strongest = 0 weak_performance = 0 strong_performance = 0 - for k, v in self.performance_estimate.items(): - # print(v) - if first: - first = False - est_total_time = v[1] - weakest = k - strongest = k - weak_performance = est_total_time - strong_performance = est_total_time - else: - est_total_time = v[1] - if est_total_time > weak_performance: - weak_performance = est_total_time + summed_time = 0 + perf_estimate_copy = copy.deepcopy(self.performance_estimate) + offload_calls = [] + for i in range(int(np.floor(len(self.performance_estimate)/2))): + for k, v in perf_estimate_copy.items(): + summed_time += v[1] + # print(v) + if first: + first = False + est_total_time = v[1] weakest = k - if est_total_time < strong_performance: - strong_performance = est_total_time strongest = k - self.record_epoch_event(f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') - logging.info( - f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') + weak_performance = est_total_time + strong_performance = est_total_time + else: + est_total_time = v[1] + if est_total_time > weak_performance: + weak_performance = est_total_time + weakest = k + if est_total_time < strong_performance: + strong_performance = est_total_time + strongest = k + self.record_epoch_event(f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') + logging.info( + f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') + offload_calls.append([weakest, strongest]) + perf_estimate_copy.pop(weakest, None) + perf_estimate_copy.pop(strongest, None) + mean_time_est_time = (summed_time * 1.0) / len(self.performance_estimate.items()) + logging.info(f'Mean time for offloading={mean_time_est_time}') logging.info('Sending call to offload') - self.ask_client_to_offload(self.reference_lookup[weakest], - strongest) + for weak_node, strong_node in offload_calls: + self.ask_client_to_offload(self.reference_lookup[weak_node], strong_node, mean_time_est_time) + logging.info('Releasing clients') + for client in selected_clients: + _remote_method_async(Client.release_from_offloading_endpoint, client.ref) # if self.offload_enabled and not warmup: # logging.info(f'self.performance_estimate={self.performance_estimate}') @@ -722,7 +739,7 @@ def set_tau_eff(self): # client.set_tau_eff(total) def save_experiment_data(self): - p = Path(f'./{self.config.output_location}') + p = Path(f'./{self.tb_path}') # file_output = f'./{self.config.output_location}' exp_prefix = self.config.experiment_prefix self.ensure_path_exists(p) @@ -750,7 +767,7 @@ def remote_test_sync(self): logging.info(f'{res[0]} had a result of accuracy={accuracy}') def flush_epoch_events(self): - file_output = f'./{self.config.output_location}' + file_output = f'./{self.tb_path}' exp_prefix = self.config.experiment_prefix file_epoch_events = f'{file_output}/{exp_prefix}_federator_events.txt' self.ensure_path_exists(file_output) @@ -763,7 +780,7 @@ def flush_epoch_events(self): self.epoch_events = [] def save_epoch_data(self): - file_output = f'./{self.config.output_location}' + file_output = f'./{self.tb_path}' exp_prefix = self.config.experiment_prefix self.ensure_path_exists(file_output) for key in self.client_data: diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 781acb36..177cdb07 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -30,6 +30,7 @@ def __init__(self): self.kwargs = {} self.contribution_measurement_round = 1 self.contribution_measurement_metric = 'Influence' + self.epochs_per_round = 1 self.scheduler_step_size = 50 self.scheduler_gamma = 0.5 @@ -63,7 +64,7 @@ def __init__(self): # self.num_poisoned_workers = 10 self.offload_strategy = 'vanilla' - self.profiling_size = 100 + self.profiling_size = 30 self.deadline = 400 self.first_deadline = 400 self.warmup_round = False @@ -188,6 +189,9 @@ def merge_yaml(self, cfg = {}): self.node_groups = cfg['node_groups'] if 'termination_percentage' in cfg: self.termination_percentage = cfg['termination_percentage'] + + if 'epochs_per_round' in cfg: + self.epochs_per_round = cfg['epochs_per_round'] diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index f04b35cb..a720b9ca 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -93,6 +93,44 @@ def generate_dev(num_clients = 2, medium=False): with open(r'./docker-compose.yml', 'w') as file: yaml.dump(system_template, file, sort_keys=False) + +def generate_p11_freezoff(): + template_path = get_deploy_path('p11_freezoff') + num_clients= 18 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'default' + if 0 < client_id <= 6: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 6 < client_id <= 12: + client_type = 'medium' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 12 < client_id <= 18: + client_type = 'fast' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + + def generate_tifl_15(): template_path = get_deploy_path('tifl-15') num_clients= 18 @@ -229,7 +267,8 @@ def run(name, num_clients = None, medium=False): exp_dict = { 'tifl-15': generate_tifl_15, 'dev': generate_dev, - 'terminate': generate_terminate + 'terminate': generate_terminate, + 'p11_freezoff': generate_p11_freezoff } if num_clients: exp_dict[name](num_clients, medium) From 86fea5530a1fb3b7366115e2bdd6ba944f8808b2 Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 12:08:39 +0100 Subject: [PATCH 28/73] Add variance dev --- fltk/util/generate_docker_compose.py | 39 +++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index a720b9ca..7d355d8c 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -93,6 +93,42 @@ def generate_dev(num_clients = 2, medium=False): with open(r'./docker-compose.yml', 'w') as file: yaml.dump(system_template, file, sort_keys=False) +def generate_p13_w6(): + template_path = get_deploy_path('p11_freezoff') + num_clients= 6 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'default' + if 0 < client_id <= 2: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 2 < client_id <= 4: + client_type = 'medium' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 4 < client_id <= 6: + client_type = 'fast' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + def generate_p11_freezoff(): template_path = get_deploy_path('p11_freezoff') @@ -268,7 +304,8 @@ def run(name, num_clients = None, medium=False): 'tifl-15': generate_tifl_15, 'dev': generate_dev, 'terminate': generate_terminate, - 'p11_freezoff': generate_p11_freezoff + 'p11_freezoff': generate_p11_freezoff, + 'p13_w6' : generate_p13_w6 } if num_clients: exp_dict[name](num_clients, medium) From f9baa31c69bf37df266d86a6f580e3c345187ac9 Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 12:09:23 +0100 Subject: [PATCH 29/73] Update gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 106b5275..8ad635e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +*.csv +*.json + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] From f57c383cb1733248d171977bea712265b8b97be8 Mon Sep 17 00:00:00 2001 From: Lydia Date: Thu, 27 Jan 2022 14:52:53 +0100 Subject: [PATCH 30/73] Small fixes --- fltk/client.py | 13 ++++++++----- fltk/util/base_config.py | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/fltk/client.py b/fltk/client.py index 10bd3b87..69be173c 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -425,8 +425,9 @@ def train(self, epoch, deadline: int = None, warmup=False, use_offloaded_model=F pre_train_loop_data = np.zeros(profiling_size) post_train_loop_data = np.zeros(profiling_size) active_profiling = True - p = P2(profiling_size, 7) - p3 = P3(profiling_size, 7) + split_point = self.args.nets_split_point[self.args.net_name] + p = P2(profiling_size, split_point - 1) + p3 = P3(profiling_size, split_point - 1) if use_offloaded_model: p.attach(self.offloaded_net) p3.attach(self.offloaded_net) @@ -483,8 +484,9 @@ def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_ # This number only works for cifar10cnn # @TODO: Make this dynamic for other networks # self.freeze_layers(5) - self.freeze_layers2(8, self.net) - + split_point = self.args.nets_split_point[self.args.net_name] + self.freeze_layers2(split_point, self.net) + # Check if there is a model to incorporate # Disable for now to offloading testing # if global_offload_received: @@ -589,7 +591,8 @@ def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_ # This number only works for cifar10cnn # @TODO: Make this dynamic for other networks # self.freeze_layers(5) - self.freeze_layers2(8, self.net) + split_point = self.args.nets_split_point[self.args.net_name] + self.freeze_layers2(split_point, self.net) # logging.info(f'Batch time is {batch_duration}') # Break away from loop for debug purposes diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 177cdb07..5db5a9bb 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -89,9 +89,20 @@ def __init__(self): "FashionMNISTCNN": FashionMNISTCNN, "FashionMNISTResNet": FashionMNISTResNet + } + + self.nets_split_point = { + "Cifar100ResNet": 48, + "Cifar100VGG": 28, + "Cifar10CNN": 15, + "Cifar10ResNet": 39, + "FashionMNISTCNN": 7, + "FashionMNISTResNet": 7 + } self.net = None - self.set_net_by_name('Cifar10CNN') + self.net_name = 'Cifar10CNN' + self.set_net_by_name(self.net_name) self.dataset_name = 'cifar10' self.DistDatasets = { @@ -138,6 +149,7 @@ def merge_yaml(self, cfg = {}): if 'wait_for_clients' in cfg: self.wait_for_clients = cfg['wait_for_clients'] if 'net' in cfg: + self.net_name = cfg['net'] self.set_net_by_name(cfg['net']) if 'dataset' in cfg: self.dataset_name = cfg['dataset'] @@ -259,7 +271,8 @@ def get_test_data_loader_pickle_path(self): return self.test_data_loader_pickle_path[self.dataset_name] def set_net_by_name(self, name: str): - self.net = self.available_nets[name] + self.net_name = name + self.net = self.available_nets[self.net_name] def get_cuda(self): return self.cuda From 14c91a8d29aa86450a9195464ded3d401ace9a0e Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 14:53:38 +0100 Subject: [PATCH 31/73] Add variance exps --- .gitignore | 3 +- .../p13_variance_dev_dyn_terminate_large.yaml | 34 ++++++++++++++++++ ...variance_dev_dyn_terminate_swyh_large.yaml | 34 ++++++++++++++++++ .../p13_variance_dev_fedavg_large.yaml | 34 ++++++++++++++++++ .../p13_variance_dev_fednova_large.yaml | 35 +++++++++++++++++++ .../p13_variance_dev_fedprox_large.yaml | 35 +++++++++++++++++++ .../p13_variance_dev_offload_large.yaml | 34 ++++++++++++++++++ .../p13_variance_dev_tifl_adaptive_large.yaml | 34 ++++++++++++++++++ .../p13_variance_dev_tifl_basic_large.yaml | 34 ++++++++++++++++++ configs/p13_variance_dev/run.py | 29 +++++++++++++++ fltk/util/show_client_distributions.py | 4 +-- 11 files changed, 307 insertions(+), 3 deletions(-) create mode 100644 configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml create mode 100644 configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml create mode 100644 configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml create mode 100644 configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml create mode 100644 configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml create mode 100644 configs/p13_variance_dev/p13_variance_dev_offload_large.yaml create mode 100644 configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml create mode 100644 configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml create mode 100644 configs/p13_variance_dev/run.py diff --git a/.gitignore b/.gitignore index 8ad635e6..6bfa0ca6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.csv *.json - +*.png +*.pdf # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml b/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml new file mode 100644 index 00000000..7288b6be --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_dyn_terminate' +offload_stategy: dynamic-terminate +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml b/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml new file mode 100644 index 00000000..dd78fd8c --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_terminate_swyh' +offload_stategy: dynamic-terminate-swyh +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml b/configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml new file mode 100644 index 00000000..37495688 --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_fedavg' +offload_stategy: vanilla +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml b/configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml new file mode 100644 index 00000000..4c705ab2 --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_fednova' +offload_stategy: vanilla +optimizer: FedNova +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml b/configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml new file mode 100644 index 00000000..63213871 --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_fedprox' +offload_stategy: vanilla +optimizer: FedProx +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_offload_large.yaml b/configs/p13_variance_dev/p13_variance_dev_offload_large.yaml new file mode 100644 index 00000000..4ea37194 --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_offload_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_offload' +offload_stategy: offload +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml b/configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml new file mode 100644 index 00000000..b24dc6ab --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_adaptive' +offload_stategy: tifl-adaptive +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml b/configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml new file mode 100644 index 00000000..60f06c0c --- /dev/null +++ b/configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p13_variance_dev_tifl_basic' +offload_stategy: tifl-basic +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p13_variance_dev/run.py b/configs/p13_variance_dev/run.py new file mode 100644 index 00000000..085c4d6c --- /dev/null +++ b/configs/p13_variance_dev/run.py @@ -0,0 +1,29 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p13_w6' + generate_docker(name) + base_path = 'configs/p13_variance_dev' + exp_list = [ + 'p13_variance_dev_offload_large.yaml', + # 'p13_variance_dev_fedprox_large.yaml', + # 'p13_variance_dev_fednova_large.yaml', + # 'p13_variance_dev_dyn_terminate_swyh_large.yaml', + # 'p13_variance_dev_fedavg_large.yaml', + # 'p13_variance_dev_tifl_adaptive_large.yaml', + # 'p13_variance_dev_dyn_terminate_large.yaml', + # 'p13_variance_dev_tifl_basic_large.yaml' + ] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + + + diff --git a/fltk/util/show_client_distributions.py b/fltk/util/show_client_distributions.py index c2af710a..414e799d 100644 --- a/fltk/util/show_client_distributions.py +++ b/fltk/util/show_client_distributions.py @@ -25,7 +25,7 @@ # 'dirichlet': {'seed': 1, 'range':[0.1, 1, 0.1]}, } -num_clients = 4 +num_clients = 6 class dummy_args: net = 'Cifar10CNN' dataset_name = 'cifar10' @@ -34,7 +34,7 @@ class dummy_args: data_sampler = "n labels" # sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) # data_sampler_args = [0.07, 42] # random seed || random seed || random seed || unused - data_sampler_args = [7 , 42] # random seed || random seed || random seed || unused + data_sampler_args = [2 , 42] # random seed || random seed || random seed || unused DistDatasets = { 'cifar10': DistCIFAR10Dataset, 'cifar100': DistCIFAR100Dataset, From 69f6ccefc2e824fa67eead8d14340bb2757e150e Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 15:08:46 +0100 Subject: [PATCH 32/73] Add offload strict --- .../p11_freezoff_iid_offload_strict.yaml | 35 +++++++++++++++++++ fltk/client.py | 2 +- fltk/strategy/offloading.py | 10 +++++- fltk/util/base_config.py | 3 ++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml new file mode 100644 index 00000000..fed425ef --- /dev/null +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11_freezoff_iid_offload_strict' +offload_stategy: offload +profiling_time: 30 +deadline: 5 +deadline_threshold: 1 +warmup_round: false +output_location: 'output' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/fltk/client.py b/fltk/client.py index 69be173c..86754659 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -388,7 +388,7 @@ def train(self, epoch, deadline: int = None, warmup=False, use_offloaded_model=F if use_offloaded_model: for param in self.offloaded_net.parameters(): param.requires_grad = True - deadline_threshold = 10 + deadline_threshold = self.args.deadline_threshold train_stop_time = None if self.deadline_enabled and deadline is not None: train_stop_time = start_time + deadline - deadline_threshold diff --git a/fltk/strategy/offloading.py b/fltk/strategy/offloading.py index 3c8bf7e4..fb405807 100644 --- a/fltk/strategy/offloading.py +++ b/fltk/strategy/offloading.py @@ -10,7 +10,8 @@ class OffloadingStrategy(Enum): TIFL_BASIC = 6, TIFL_ADAPTIVE = 7, DYN_TERMINATE = 8, - DYN_TERMINATE_SWYH = 9 + DYN_TERMINATE_SWYH = 9, + MODEL_OFFLOAD_STRICT = 10 @classmethod def Parse(cls, string_value): @@ -32,6 +33,8 @@ def Parse(cls, string_value): return OffloadingStrategy.DYN_TERMINATE if string_value == 'dynamic-terminate-swyh': return OffloadingStrategy.DYN_TERMINATE_SWYH + if string_value == 'offload-strict': + return OffloadingStrategy.MODEL_OFFLOAD_STRICT def parse_strategy(strategy: OffloadingStrategy): @@ -78,4 +81,9 @@ def parse_strategy(strategy: OffloadingStrategy): freeze_layers_enabled = False offload_enabled = False dyn_terminate_swyh = True + if strategy == OffloadingStrategy.MODEL_OFFLOAD_STRICT: + deadline_enabled = True + swyh_enabled = True + freeze_layers_enabled = True + offload_enabled = True return deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled, dyn_terminate, dyn_terminate_swyh diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 5db5a9bb..86a6600f 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -67,6 +67,7 @@ def __init__(self): self.profiling_size = 30 self.deadline = 400 self.first_deadline = 400 + self.deadline_threshold = 10 self.warmup_round = False # FLTK options @@ -159,6 +160,8 @@ def merge_yaml(self, cfg = {}): self.profiling_size = cfg['profiling_size'] if 'deadline' in cfg: self.deadline = cfg['deadline'] + if 'deadline_threshold' in cfg: + self.deadline_threshold = cfg['deadline_threshold'] if 'first_deadline' in cfg: self.first_deadline = cfg['first_deadline'] if 'warmup_round' in cfg: From ec86d927d5a7f20dc309401fd428593f1b32d83f Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 23:15:23 +0100 Subject: [PATCH 33/73] Save wall_time in csv --- fltk/federator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fltk/federator.py b/fltk/federator.py index 096365e4..183c151b 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -726,7 +726,8 @@ def reached_deadline(): end_epoch_time = time.time() duration = end_epoch_time - start_epoch_time - self.exp_data_general.append([self.epoch_counter, duration, accuracy, loss, class_precision, class_recall]) + + self.exp_data_general.append([self.epoch_counter, end_epoch_time, duration, accuracy, loss, class_precision, class_recall]) def set_tau_eff(self): @@ -745,7 +746,7 @@ def save_experiment_data(self): self.ensure_path_exists(p) p /= f'{exp_prefix}-general_data.csv' # general_filename = f'{file_output}/general_data.csv' - df = pd.DataFrame(self.exp_data_general, columns=['epoch', 'duration', 'accuracy', 'loss', 'class_precision', 'class_recall']) + df = pd.DataFrame(self.exp_data_general, columns=['epoch', 'wall_time', 'duration', 'accuracy', 'loss', 'class_precision', 'class_recall']) df.to_csv(p) def update_client_data_sizes(self): From 948f41e42e0e0ceeba963183ed5af267d6a819a1 Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 23:27:41 +0100 Subject: [PATCH 34/73] Add wall time to client data --- fltk/client.py | 4 ++-- fltk/federator.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fltk/client.py b/fltk/client.py index 86754659..a0f06abc 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -722,7 +722,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): logging.info( f'Time for training={duration_train}, time for testing={duration_test}, total time={duration_train + duration_test}') data = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss, accuracy, test_loss, - class_precision, class_recall, training_process, self.id) + class_precision, class_recall, training_process, self.id, client_wall_time=time.time()) self.epoch_results.append(data) if hasattr(self.optimizer, 'pre_communicate'): # aka fednova or fedprox self.optimizer.pre_communicate() @@ -746,7 +746,7 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): accuracy, test_loss, class_precision, class_recall, _accuracy_per_class = self.test(use_offloaded_model=True) global global_sender_id data_offload = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss_offload, accuracy, test_loss, - class_precision, class_recall, training_process, f'{global_sender_id}-offload') + class_precision, class_recall, training_process, f'{global_sender_id}-offload', client_wall_time=time.time()) # Copy GPU tensors to CPU for k, v in weights_offload.items(): diff --git a/fltk/federator.py b/fltk/federator.py index 183c151b..5acf545d 100644 --- a/fltk/federator.py +++ b/fltk/federator.py @@ -560,8 +560,10 @@ def reached_deadline(): client.available = True logging.info(f'Fetching response for client: {client}') response_obj = client_response.future.wait() - + epoch_data : EpochData epoch_data, weights, scheduler_data, perf_data = response_obj['own'] + epoch_data.global_epoch_id = self.epoch_counter + epoch_data.global_wall_time = client_response.end_time self.client_data[epoch_data.client_id].append(epoch_data) # logging.info(f'{client} had a loss of {epoch_data.loss}') @@ -651,6 +653,8 @@ def reached_deadline(): epoch_data_offload, weights_offload, scheduler_data_offload, perf_data_offload, sender_id = response_obj['offload'] if epoch_data_offload.client_id not in self.client_data: self.client_data[epoch_data_offload.client_id] = [] + epoch_data_offload.global_epoch_id = self.epoch_counter + epoch_data_offload.global_wall_time = client_response.end_time self.client_data[epoch_data_offload.client_id].append(epoch_data_offload) writer = client.tb_writer_offload From de3c46b85f2295a936654fba222ba8ef094f7f5c Mon Sep 17 00:00:00 2001 From: Lydia Date: Thu, 27 Jan 2022 23:32:27 +0100 Subject: [PATCH 35/73] Add p11A experiment --- .../p11A_freezoff_iid_dyn_terminate.yaml | 35 +++++++++++++++++++ .../p11A_freezoff_iid_dyn_terminate_swyh.yaml | 35 +++++++++++++++++++ .../p11A_freezoff_iid_fedavg.yaml | 34 ++++++++++++++++++ .../p11A_freezoff_iid_fednova.yaml | 35 +++++++++++++++++++ .../p11A_freezoff_iid_fedprox.yaml | 35 +++++++++++++++++++ .../p11A_freezoff_iid_offload.yaml | 34 ++++++++++++++++++ .../p11A_freezoff_iid_offload_strict.yaml | 35 +++++++++++++++++++ .../p11A_freezoff_iid_tifl_adaptive.yaml | 34 ++++++++++++++++++ .../p11A_freezoff_iid_tifl_basic.yaml | 34 ++++++++++++++++++ configs/p11A_freezoff_iid/run.py | 30 ++++++++++++++++ .../p11_freezoff_iid_dyn_terminate.yaml | 3 +- .../p11_freezoff_iid_dyn_terminate_swyh.yaml | 3 +- .../p11_freezoff_iid_fedavg.yaml | 2 +- .../p11_freezoff_iid_fednova.yaml | 2 +- .../p11_freezoff_iid_fedprox.yaml | 2 +- .../p11_freezoff_iid_offload.yaml | 2 +- .../p11_freezoff_iid_offload_strict.yaml | 4 +-- .../p11_freezoff_iid_tifl_adaptive.yaml | 2 +- .../p11_freezoff_iid_tifl_basic.yaml | 2 +- configs/p11_freezoff_iid/run.py | 11 +++--- .../p12_freezoff_iid_dyn_terminate_large.yaml | 1 + ...freezoff_iid_dyn_terminate_swyh_large.yaml | 1 + ...p12_freezoff_iid_offload_strict_large.yaml | 35 +++++++++++++++++++ configs/p12_freezoff_iid_large/run.py | 1 + ..._freezoff_non_iid_dyn_terminate_large.yaml | 35 +++++++++++++++++++ ...zoff_non_iid_dyn_terminate_swyh_large.yaml | 35 +++++++++++++++++++ .../p15_freezoff_non_iid_fedavg_large.yaml | 34 ++++++++++++++++++ .../p15_freezoff_non_iid_fednova_large.yaml | 35 +++++++++++++++++++ .../p15_freezoff_non_iid_fedprox_large.yaml | 35 +++++++++++++++++++ .../p15_freezoff_non_iid_offload_large.yaml | 34 ++++++++++++++++++ ...freezoff_non_iid_offload_strict_large.yaml | 35 +++++++++++++++++++ ..._freezoff_non_iid_tifl_adaptive_large.yaml | 34 ++++++++++++++++++ ...p15_freezoff_non_iid_tifl_basic_large.yaml | 34 ++++++++++++++++++ configs/p15_freezoff_non_iid_large/run.py | 30 ++++++++++++++++ 34 files changed, 738 insertions(+), 15 deletions(-) create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml create mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml create mode 100644 configs/p11A_freezoff_iid/run.py create mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml create mode 100644 configs/p15_freezoff_non_iid_large/run.py diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml new file mode 100644 index 00000000..8262f35c --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_dyn_terminate' +offload_stategy: dynamic-terminate +profiling_time: 20 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +termination_percentage: 0.7 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml new file mode 100644 index 00000000..ff5ccea7 --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_dyn_terminate_swyh' +offload_stategy: dynamic-terminate-swyh +profiling_time: 20 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +termination_percentage: 0.7 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml new file mode 100644 index 00000000..190acd98 --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_fedavg' +offload_stategy: vanilla +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml new file mode 100644 index 00000000..13ca9fd6 --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_fednova' +offload_stategy: vanilla +optimizer: FedNova +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml new file mode 100644 index 00000000..2515b230 --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_fedprox' +offload_stategy: vanilla +optimizer: FedProx +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml new file mode 100644 index 00000000..942b7daa --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_offload' +offload_stategy: offload +profiling_time: 20 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml new file mode 100644 index 00000000..9cc228e6 --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_offload_strict' +offload_stategy: offload +profiling_time: 20 +deadline: 7 +deadline_threshold: 2 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml new file mode 100644 index 00000000..41d525d1 --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_tifl_adaptive' +offload_stategy: tifl-adaptive +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml new file mode 100644 index 00000000..d432df2c --- /dev/null +++ b/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p11A_freezoff_iid_tifl_basic' +offload_stategy: tifl-basic +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p11A' +tensor_board_active: true +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p11A_freezoff_iid/run.py b/configs/p11A_freezoff_iid/run.py new file mode 100644 index 00000000..d15e51c2 --- /dev/null +++ b/configs/p11A_freezoff_iid/run.py @@ -0,0 +1,30 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = 'configs/p11A_freezoff_iid' + exp_list = [ + # 'p11A_freezoff_iid_fedprox.yaml', + # 'p11A_freezoff_iid_fednova.yaml', + # 'p11A_freezoff_iid_offload.yaml', + 'p11A_freezoff_iid_offload_strict.yaml', + # 'p11A_freezoff_iid_dyn_terminate_swyh.yaml', + 'p11A_freezoff_iid_fedavg.yaml', + 'p11A_freezoff_iid_tifl_adaptive.yaml', + # 'p11A_freezoff_iid_dyn_terminate.yaml', + 'p11A_freezoff_iid_tifl_basic.yaml' + ] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + + + diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml index a58b5e5c..6c791172 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml @@ -12,9 +12,10 @@ offload_stategy: dynamic-terminate profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 +termination_percentage: 0.7 node_groups: slow: [1, 6] medium: [7, 12] diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml index d1e34592..d439fe4a 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml @@ -12,9 +12,10 @@ offload_stategy: dynamic-terminate-swyh profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 +termination_percentage: 0.7 node_groups: slow: [1, 6] medium: [7, 12] diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml index 251453dd..cc912b84 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml @@ -12,7 +12,7 @@ offload_stategy: vanilla profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 node_groups: diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml index cc608d43..d4ed5682 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml @@ -13,7 +13,7 @@ optimizer: FedNova profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 node_groups: diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml index d1c84b5c..0d45f094 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml @@ -13,7 +13,7 @@ optimizer: FedProx profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 node_groups: diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml index cb856cf6..b81032a8 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml @@ -12,7 +12,7 @@ offload_stategy: offload profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 node_groups: diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml index fed425ef..39135d17 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml @@ -10,10 +10,10 @@ cuda: false experiment_prefix: 'p11_freezoff_iid_offload_strict' offload_stategy: offload profiling_time: 30 -deadline: 5 +deadline: 26 deadline_threshold: 1 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 node_groups: diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml index 6f04b09f..d7a6d012 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml @@ -12,7 +12,7 @@ offload_stategy: tifl-adaptive profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 node_groups: diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml index 89e480d9..2c15a6a3 100644 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml +++ b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml @@ -12,7 +12,7 @@ offload_stategy: tifl-basic profiling_time: 30 deadline: 50000 warmup_round: false -output_location: 'output' +output_location: 'output/p11' tensor_board_active: true clients_per_round: 3 node_groups: diff --git a/configs/p11_freezoff_iid/run.py b/configs/p11_freezoff_iid/run.py index 31e0bb61..b4e7f1db 100644 --- a/configs/p11_freezoff_iid/run.py +++ b/configs/p11_freezoff_iid/run.py @@ -5,13 +5,14 @@ generate_docker(name) base_path = 'configs/p11_freezoff_iid' exp_list = [ - 'p11_freezoff_iid_fedprox.yaml', - 'p11_freezoff_iid_fednova.yaml', - 'p11_freezoff_iid_offload.yaml', - 'p11_freezoff_iid_dyn_terminate_swyh.yaml', + # 'p11_freezoff_iid_fedprox.yaml', + # 'p11_freezoff_iid_fednova.yaml', + # 'p11_freezoff_iid_offload.yaml', + 'p11_freezoff_iid_offload_strict.yaml', + # 'p11_freezoff_iid_dyn_terminate_swyh.yaml', 'p11_freezoff_iid_fedavg.yaml', 'p11_freezoff_iid_tifl_adaptive.yaml', - 'p11_freezoff_iid_dyn_terminate.yaml', + # 'p11_freezoff_iid_dyn_terminate.yaml', 'p11_freezoff_iid_tifl_basic.yaml' ] exp_list = [f'{base_path}/{x}' for x in exp_list] diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml index fa064936..a9567bf1 100644 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml @@ -15,6 +15,7 @@ warmup_round: false output_location: 'output/p12' tensor_board_active: true clients_per_round: 6 +termination_percentage: 0.7 node_groups: slow: [1, 6] medium: [7, 12] diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml index b7292675..0c089d8e 100644 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml @@ -15,6 +15,7 @@ warmup_round: false output_location: 'output/p12' tensor_board_active: true clients_per_round: 6 +termination_percentage: 0.7 node_groups: slow: [1, 6] medium: [7, 12] diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml new file mode 100644 index 00000000..3ff0a335 --- /dev/null +++ b/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p12_freezoff_iid_offload_strict' +offload_stategy: offload +profiling_time: 20 +deadline: 7 +deadline_threshold: 2 +warmup_round: false +output_location: 'output/p12' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p12_freezoff_iid_large/run.py b/configs/p12_freezoff_iid_large/run.py index 2619d167..af24f5d5 100644 --- a/configs/p12_freezoff_iid_large/run.py +++ b/configs/p12_freezoff_iid_large/run.py @@ -5,6 +5,7 @@ generate_docker(name) base_path = 'configs/p12_freezoff_iid_large' exp_list = [ + 'p12_freezoff_iid_offload_strict_large.yaml', 'p12_freezoff_iid_offload_large.yaml', 'p12_freezoff_iid_fedprox_large.yaml', 'p12_freezoff_iid_fednova_large.yaml', diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml new file mode 100644 index 00000000..3ca4f5f5 --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_dyn_terminate' +offload_stategy: dynamic-terminate +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +termination_percentage: 0.7 +node_groups: + slow: [1, 2] + medium: [3, 4] + fast: [5, 6] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml new file mode 100644 index 00000000..ec74786b --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_dyn_terminate_swyh' +offload_stategy: dynamic-terminate-swyh +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +termination_percentage: 0.7 +node_groups: + slow: [1, 2] + medium: [3, 4] + fast: [5, 6] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml new file mode 100644 index 00000000..de16a2ec --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_fedavg' +offload_stategy: vanilla +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 2] + medium: [3, 4] + fast: [5, 6] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml new file mode 100644 index 00000000..28458841 --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_fednova' +offload_stategy: vanilla +optimizer: FedNova +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 2] + medium: [3, 4] + fast: [5, 6] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml new file mode 100644 index 00000000..0f70e659 --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_fedprox' +offload_stategy: vanilla +optimizer: FedProx +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 2] + medium: [3, 4] + fast: [5, 6] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml new file mode 100644 index 00000000..ea0b4f7f --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_offload' +offload_stategy: offload +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 2] + medium: [3, 4] + fast: [5, 6] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml new file mode 100644 index 00000000..74448f53 --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_offload_strict_large' +offload_stategy: offload +profiling_time: 30 +deadline: 7 +deadline_threshold: 2 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 2] + medium: [3, 4] + fast: [5, 6] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml new file mode 100644 index 00000000..9bb2c524 --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_tifl_adaptive' +offload_stategy: tifl-adaptive +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml new file mode 100644 index 00000000..4a32ca6f --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml @@ -0,0 +1,34 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'p15_freezoff_non_iid_tifl_basic' +offload_stategy: tifl-basic +profiling_time: 30 +deadline: 50000 +warmup_round: false +output_location: 'output/p15' +tensor_board_active: true +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/run.py b/configs/p15_freezoff_non_iid_large/run.py new file mode 100644 index 00000000..7b6797e4 --- /dev/null +++ b/configs/p15_freezoff_non_iid_large/run.py @@ -0,0 +1,30 @@ +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = 'configs/p15_freezoff_non_iid_large' + exp_list = [ + # 'p15_freezoff_non_iid_offload_strict_large.yaml', + # 'p15_freezoff_non_iid_offload_large.yaml', + # 'p15_freezoff_non_iid_fedprox_large.yaml', + # 'p15_freezoff_non_iid_fednova_large.yaml', + 'p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml', + # 'p15_freezoff_non_iid_fedavg_large.yaml', + 'p15_freezoff_non_iid_dyn_terminate_large.yaml', + 'p15_freezoff_non_iid_tifl_adaptive_large.yaml', + 'p15_freezoff_non_iid_tifl_basic_large.yaml' + ] + exp_list = [f'{base_path}/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + + + From f7ae7a6a6dbc073eb1ef755e182f7ab85fe9087b Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 27 Jan 2022 23:57:42 +0100 Subject: [PATCH 36/73] Add time fields to epoch data --- fltk/util/results.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fltk/util/results.py b/fltk/util/results.py index a37fc8ad..900f340c 100644 --- a/fltk/util/results.py +++ b/fltk/util/results.py @@ -14,6 +14,9 @@ class EpochData: class_recall: Any training_process: int client_id: str = None + client_wall_time: float = 0 + global_wall_time: float = 0 + global_epoch_id: int = 0 def to_csv_line(self): delimeter = ',' From 62ed884c9af007cd79969ea478e3faaf25179800 Mon Sep 17 00:00:00 2001 From: Lydia Date: Fri, 28 Jan 2022 00:18:41 +0100 Subject: [PATCH 37/73] Rename dev config files --- .../dev/{dev_p2.yaml => dev_p2_fedavg.yaml} | 6 ++-- configs/dev/dev_p2_fedprox.yaml | 35 +++++++++++++++++++ configs/dev/run.py | 5 ++- 3 files changed, 42 insertions(+), 4 deletions(-) rename configs/dev/{dev_p2.yaml => dev_p2_fedavg.yaml} (89%) create mode 100644 configs/dev/dev_p2_fedprox.yaml diff --git a/configs/dev/dev_p2.yaml b/configs/dev/dev_p2_fedavg.yaml similarity index 89% rename from configs/dev/dev_p2.yaml rename to configs/dev/dev_p2_fedavg.yaml index d3047762..5984acd0 100644 --- a/configs/dev/dev_p2.yaml +++ b/configs/dev/dev_p2_fedavg.yaml @@ -7,12 +7,12 @@ net: FashionMNISTCNN dataset: fashion-mnist # Use cuda is available; setting to false will force CPU cuda: false -experiment_prefix: 'exp_dev_p2' -offload_stategy: offload +experiment_prefix: 'exp_dev_p2_fedavg' +offload_stategy: vanilla profiling_time: 100 deadline: 500 warmup_round: false -output_location: 'output' +output_location: 'output/dev_p2' tensor_board_active: true clients_per_round: 2 node_groups: diff --git a/configs/dev/dev_p2_fedprox.yaml b/configs/dev/dev_p2_fedprox.yaml new file mode 100644 index 00000000..1140d239 --- /dev/null +++ b/configs/dev/dev_p2_fedprox.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 5 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +experiment_prefix: 'exp_dev_p2_fedprox' +offload_stategy: vanilla +optimizer: FedProx +profiling_time: 100 +deadline: 500 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 diff --git a/configs/dev/run.py b/configs/dev/run.py index dc8f65f2..006c2f9d 100644 --- a/configs/dev/run.py +++ b/configs/dev/run.py @@ -4,7 +4,10 @@ name = 'dev' generate_docker(name) base_path = 'configs/dev' - exp_list = ['dev_p2.yaml'] + exp_list = [ + 'dev_p2_fedavg.yaml', + 'dev_p2_fedprox.yaml', + ] exp_list = [f'{base_path}/{x}' for x in exp_list] first_prefix = '--build' for exp_cfg_file in exp_list: From 9969dde85568f9a691e31f3ec7b9cdd611de9e3a Mon Sep 17 00:00:00 2001 From: Lydia Date: Fri, 28 Jan 2022 01:04:12 +0100 Subject: [PATCH 38/73] Update stub files --- deploy/p11_freezoff/client_stub_slow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/p11_freezoff/client_stub_slow.yml b/deploy/p11_freezoff/client_stub_slow.yml index ae578071..75771735 100644 --- a/deploy/p11_freezoff/client_stub_slow.yml +++ b/deploy/p11_freezoff/client_stub_slow.yml @@ -22,4 +22,4 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '0.25' \ No newline at end of file + cpus: '0.1' \ No newline at end of file From 185609b415cf2fe493e57bb14f8be3e9dd072c6f Mon Sep 17 00:00:00 2001 From: bacox Date: Fri, 28 Jan 2022 01:33:40 +0100 Subject: [PATCH 39/73] set p20-22 for ready --- configs/dev/descr.yaml | 31 ++++++++++++++++ configs/dev/dev_p2_fedavg.cfg.yaml | 3 ++ configs/dev/dev_p2_fedprox.cfg.yaml | 4 ++ configs/dev/{ => exps}/dev_p2_fedavg.yaml | 9 +++-- configs/dev/{ => exps}/dev_p2_fedprox.yaml | 12 +++--- configs/dev/gen.py | 27 ++++++++++++++ configs/dev/run.py | 2 +- .../p20_freezoff_iid_fmnist_cnn/descr.yaml | 32 ++++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 ++++++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++++++++++ .../exps/fedavg.yaml | 36 ++++++++++++++++++ .../exps/fednova.yaml | 37 +++++++++++++++++++ .../exps/fedprox.yaml | 37 +++++++++++++++++++ .../exps/offload.yaml | 36 ++++++++++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ configs/p20_freezoff_iid_fmnist_cnn/gen.py | 26 +++++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ configs/p20_freezoff_iid_fmnist_cnn/run.py | 30 +++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ .../descr.yaml | 32 ++++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../p21_freezoff_non_iid_fmnist_cnn/gen.py | 26 +++++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ .../p21_freezoff_non_iid_fmnist_cnn/run.py | 30 +++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ .../p22_freezoff_iid_cifar10_cnn/descr.yaml | 32 ++++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 ++++++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++++++++++ .../exps/fedavg.yaml | 36 ++++++++++++++++++ .../exps/fednova.yaml | 37 +++++++++++++++++++ .../exps/fedprox.yaml | 37 +++++++++++++++++++ .../exps/offload.yaml | 36 ++++++++++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ configs/p22_freezoff_iid_cifar10_cnn/gen.py | 26 +++++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ configs/p22_freezoff_iid_cifar10_cnn/run.py | 30 +++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ .../descr.yaml | 32 ++++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 ++++++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++++++++++ .../exps/fedavg.yaml | 36 ++++++++++++++++++ .../exps/fednova.yaml | 37 +++++++++++++++++++ .../exps/fedprox.yaml | 37 +++++++++++++++++++ .../exps/offload.yaml | 36 ++++++++++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../p23_freezoff_non_iid_cifar10_cnn/gen.py | 26 +++++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ .../p23_freezoff_non_iid_cifar10_cnn/run.py | 30 +++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ 82 files changed, 1534 insertions(+), 10 deletions(-) create mode 100644 configs/dev/descr.yaml create mode 100644 configs/dev/dev_p2_fedavg.cfg.yaml create mode 100644 configs/dev/dev_p2_fedprox.cfg.yaml rename configs/dev/{ => exps}/dev_p2_fedavg.yaml (91%) rename configs/dev/{ => exps}/dev_p2_fedprox.yaml (91%) create mode 100644 configs/dev/gen.py create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/descr.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/gen.py create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/run.py create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/gen.py create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/run.py create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/descr.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/gen.py create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/run.py create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/gen.py create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/run.py create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml diff --git a/configs/dev/descr.yaml b/configs/dev/descr.yaml new file mode 100644 index 00000000..fab64941 --- /dev/null +++ b/configs/dev/descr.yaml @@ -0,0 +1,31 @@ +--- +# Experiment configuration +total_epochs: 11 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 diff --git a/configs/dev/dev_p2_fedavg.cfg.yaml b/configs/dev/dev_p2_fedavg.cfg.yaml new file mode 100644 index 00000000..ecb5bc3e --- /dev/null +++ b/configs/dev/dev_p2_fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500 \ No newline at end of file diff --git a/configs/dev/dev_p2_fedprox.cfg.yaml b/configs/dev/dev_p2_fedprox.cfg.yaml new file mode 100644 index 00000000..7b4cc2bb --- /dev/null +++ b/configs/dev/dev_p2_fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500 +optimizer: FedProx diff --git a/configs/dev/dev_p2_fedavg.yaml b/configs/dev/exps/dev_p2_fedavg.yaml similarity index 91% rename from configs/dev/dev_p2_fedavg.yaml rename to configs/dev/exps/dev_p2_fedavg.yaml index 5984acd0..a253f11d 100644 --- a/configs/dev/dev_p2_fedavg.yaml +++ b/configs/dev/exps/dev_p2_fedavg.yaml @@ -1,16 +1,13 @@ --- # Experiment configuration -total_epochs: 5 +total_epochs: 11 epochs_per_cycle: 1 wait_for_clients: true net: FashionMNISTCNN dataset: fashion-mnist # Use cuda is available; setting to false will force CPU cuda: false -experiment_prefix: 'exp_dev_p2_fedavg' -offload_stategy: vanilla profiling_time: 100 -deadline: 500 warmup_round: false output_location: 'output/dev_p2' tensor_board_active: true @@ -32,3 +29,7 @@ system: # nic: 'enp3s0' clients: amount: 2 +# Individual configuration +offload_stategy: vanilla +deadline: 500 +experiment_prefix: 'dev_p2_fedavg' diff --git a/configs/dev/dev_p2_fedprox.yaml b/configs/dev/exps/dev_p2_fedprox.yaml similarity index 91% rename from configs/dev/dev_p2_fedprox.yaml rename to configs/dev/exps/dev_p2_fedprox.yaml index 1140d239..d1884c0a 100644 --- a/configs/dev/dev_p2_fedprox.yaml +++ b/configs/dev/exps/dev_p2_fedprox.yaml @@ -1,17 +1,13 @@ --- # Experiment configuration -total_epochs: 5 +total_epochs: 11 epochs_per_cycle: 1 wait_for_clients: true net: FashionMNISTCNN dataset: fashion-mnist # Use cuda is available; setting to false will force CPU cuda: false -experiment_prefix: 'exp_dev_p2_fedprox' -offload_stategy: vanilla -optimizer: FedProx profiling_time: 100 -deadline: 500 warmup_round: false output_location: 'output/dev_p2' tensor_board_active: true @@ -33,3 +29,9 @@ system: # nic: 'enp3s0' clients: amount: 2 +# Individual configuration +offload_stategy: vanilla +deadline: 500 +optimizer: FedProx + +experiment_prefix: 'dev_p2_fedprox' diff --git a/configs/dev/gen.py b/configs/dev/gen.py new file mode 100644 index 00000000..267dadf6 --- /dev/null +++ b/configs/dev/gen.py @@ -0,0 +1,27 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = 'configs/dev' + + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/dev/run.py b/configs/dev/run.py index 006c2f9d..4193edce 100644 --- a/configs/dev/run.py +++ b/configs/dev/run.py @@ -8,7 +8,7 @@ 'dev_p2_fedavg.yaml', 'dev_p2_fedprox.yaml', ] - exp_list = [f'{base_path}/{x}' for x in exp_list] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] first_prefix = '--build' for exp_cfg_file in exp_list: cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/descr.yaml b/configs/p20_freezoff_iid_fmnist_cnn/descr.yaml new file mode 100644 index 00000000..a1241ede --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..3aad2be6 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_dyn_terminate' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..94567f35 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_dyn_terminate_swyh' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..4189dcb5 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_fedavg' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml new file mode 100644 index 00000000..d8e9c159 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_fednova' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..0599b51f --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_fedprox' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml new file mode 100644 index 00000000..624f8fbd --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_offload' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..9b4a6d51 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_offload_strict' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..17f8aac7 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_tifl_adaptive' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..3da03869 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p20' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_tifl_basic' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/gen.py b/configs/p20_freezoff_iid_fmnist_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/run.py b/configs/p20_freezoff_iid_fmnist_cnn/run.py new file mode 100644 index 00000000..3f289cc1 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml new file mode 100644 index 00000000..0c77063a --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/gen.py b/configs/p21_freezoff_non_iid_fmnist_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/run.py b/configs/p21_freezoff_non_iid_fmnist_cnn/run.py new file mode 100644 index 00000000..3f289cc1 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/descr.yaml b/configs/p22_freezoff_iid_cifar10_cnn/descr.yaml new file mode 100644 index 00000000..b98f9f11 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..94c0963c --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..8ecdd49a --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..57b3ad7e --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedavg' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml new file mode 100644 index 00000000..75dc378f --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fednova' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..ddba20a8 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedprox' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml new file mode 100644 index 00000000..66785fd7 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..d11399f4 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload_strict' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..00519096 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..cd2109d2 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/gen.py b/configs/p22_freezoff_iid_cifar10_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/run.py b/configs/p22_freezoff_iid_cifar10_cnn/run.py new file mode 100644 index 00000000..3f289cc1 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml new file mode 100644 index 00000000..1fd1bf0c --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..fc4aceea --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..38ec7094 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..fc9fb303 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedavg' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml new file mode 100644 index 00000000..9f61a507 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fednova' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..c53e7753 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedprox' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml new file mode 100644 index 00000000..c7dd2978 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..e52c19d4 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload_strict' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..cefa2ae0 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..284f47b5 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/gen.py b/configs/p23_freezoff_non_iid_cifar10_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/run.py b/configs/p23_freezoff_non_iid_cifar10_cnn/run.py new file mode 100644 index 00000000..3f289cc1 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file From f3e03285cb04b4883e5d3cbca6bc6b3c86641128 Mon Sep 17 00:00:00 2001 From: bacox Date: Fri, 28 Jan 2022 18:14:09 +0100 Subject: [PATCH 40/73] Add mnist dataset --- configs/dev_mnist/descr.yaml | 31 ++++++++ configs/dev_mnist/exps/fedavg.yaml | 35 +++++++++ configs/dev_mnist/fedavg.cfg.yaml | 3 + configs/dev_mnist/gen.py | 26 +++++++ configs/dev_mnist/run.py | 20 +++++ fltk/datasets/distributed/mnist.py | 121 +++++++++++++++++++++++++++++ fltk/nets/mnist_cnn.py | 20 +++++ fltk/util/base_config.py | 13 +++- 8 files changed, 266 insertions(+), 3 deletions(-) create mode 100644 configs/dev_mnist/descr.yaml create mode 100644 configs/dev_mnist/exps/fedavg.yaml create mode 100644 configs/dev_mnist/fedavg.cfg.yaml create mode 100644 configs/dev_mnist/gen.py create mode 100644 configs/dev_mnist/run.py create mode 100644 fltk/datasets/distributed/mnist.py create mode 100644 fltk/nets/mnist_cnn.py diff --git a/configs/dev_mnist/descr.yaml b/configs/dev_mnist/descr.yaml new file mode 100644 index 00000000..d4357836 --- /dev/null +++ b/configs/dev_mnist/descr.yaml @@ -0,0 +1,31 @@ +--- +# Experiment configuration +total_epochs: 3 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 diff --git a/configs/dev_mnist/exps/fedavg.yaml b/configs/dev_mnist/exps/fedavg.yaml new file mode 100644 index 00000000..4c7ef222 --- /dev/null +++ b/configs/dev_mnist/exps/fedavg.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 3 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 +# Individual configuration +offload_stategy: vanilla +deadline: 500 +experiment_prefix: 'dev_mnist_fedavg' diff --git a/configs/dev_mnist/fedavg.cfg.yaml b/configs/dev_mnist/fedavg.cfg.yaml new file mode 100644 index 00000000..ecb5bc3e --- /dev/null +++ b/configs/dev_mnist/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500 \ No newline at end of file diff --git a/configs/dev_mnist/gen.py b/configs/dev_mnist/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/dev_mnist/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/dev_mnist/run.py b/configs/dev_mnist/run.py new file mode 100644 index 00000000..8e1fad76 --- /dev/null +++ b/configs/dev_mnist/run.py @@ -0,0 +1,20 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'dev' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') diff --git a/fltk/datasets/distributed/mnist.py b/fltk/datasets/distributed/mnist.py new file mode 100644 index 00000000..06e6d9e9 --- /dev/null +++ b/fltk/datasets/distributed/mnist.py @@ -0,0 +1,121 @@ +from __future__ import annotations +from fltk.datasets import DistDataset +from torchvision import datasets, transforms +from torch.utils.data import DataLoader +# from fltk.strategy import get_sampler, get_augmentations, get_augmentations_tensor, UnifyingSampler +from fltk.strategy.data_samplers import get_sampler + +from random import choice +from PIL import Image + +# typing: +from typing import TYPE_CHECKING, Tuple, Any, List +if TYPE_CHECKING: + from fltk.util import BareConfig + +# class MNIST(datasets.MNIST): +# def __init__(self, root:str, transform, augment:bool=False): +# super().__init__(root=root, train=True, download=True, transform=transform) +# if augment: +# self.augmentation_transforms = get_augmentations() +# self.tensor_augmentations = get_augmentations_tensor() +# +# def __getitem__(self, index: int) -> Tuple[Any, Any]: +# augment = False +# if isinstance(index, str): +# target = int(index) +# index = choice(self.ordedered_by_label[target]) +# augment = True +# +# img, target = self.data[index], int(self.targets[index]) +# +# img = img.numpy() +# if augment: +# img = self.augmentation_transforms(image=img)['image'] +# img = Image.fromarray(img, mode='L') +# img = self.tensor_augmentations(img) +# +# if self.transform is not None: +# img = self.transform(img) +# +# return img, target +# +# def set_available_indices(self, ordedered_by_label:List[int]): +# self.ordedered_by_label = ordedered_by_label +# +# class DistMNISTDataset_2(DistDataset): +# +# def __init__(self, args:BareConfig): +# super(DistMNISTDataset_2, self).__init__(args) +# self.augment = args.augment +# self.augmented_emd = args.augmented_emd +# self.init_train_dataset(args) +# self.init_test_dataset() +# +# def init_train_dataset(self, args:BareConfig): +# dist_loader_text = "distributed" if self.args.get_distributed() else "" +# self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' MNIST train data") +# +# self.train_dataset = MNIST(root=self.get_args().get_data_path(), transform=transforms.ToTensor(), augment=self.augment) +# self.train_sampler = get_sampler(self.train_dataset, self.args) +# self.train_dataset.set_available_indices(self.train_sampler.order_by_label(self.train_dataset)) +# if self.augment: +# self.train_sampler = UnifyingSampler(self.train_dataset, args, self.train_sampler, self.augmented_emd) +# self.train_loader = DataLoader(self.train_dataset, batch_size=16, sampler=self.train_sampler) +# +# def init_test_dataset(self): +# dist_loader_text = "distributed" if self.args.get_distributed() else "" +# self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' MNIST test data") +# self.test_dataset = datasets.MNIST(root=self.get_args().get_data_path(), train=False, download=True, +# transform=transforms.Compose([transforms.ToTensor()])) +# self.test_sampler = get_sampler(self.test_dataset, self.args) +# self.test_loader = DataLoader(self.test_dataset, batch_size=16, sampler=self.test_sampler) + + +class DistMNISTDataset(DistDataset): + + def __init__(self, args): + super(DistMNISTDataset, self).__init__(args) + self.init_train_dataset() + self.init_test_dataset() + + def init_train_dataset(self): + dist_loader_text = "distributed" if self.args.get_distributed() else "" + self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' MNIST train data") + + self.train_dataset = datasets.MNIST(root=self.get_args().get_data_path(), train=True, download=True, + transform=transforms.Compose([transforms.ToTensor()])) + self.train_sampler = get_sampler(self.train_dataset, self.args) + self.train_loader = DataLoader(self.train_dataset, batch_size=16, sampler=self.train_sampler) + + def init_test_dataset(self): + dist_loader_text = "distributed" if self.args.get_distributed() else "" + self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' MNIST test data") + self.test_dataset = datasets.MNIST(root=self.get_args().get_data_path(), train=False, download=True, + transform=transforms.Compose([transforms.ToTensor()])) + self.test_sampler = get_sampler(self.test_dataset, self.args) + self.test_loader = DataLoader(self.test_dataset, batch_size=16, sampler=self.test_sampler) + + def load_train_dataset(self): + self.get_args().get_logger().debug("Loading MNIST train data") + + train_dataset = datasets.MNIST(self.get_args().get_data_path(), train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) + train_loader = DataLoader(train_dataset, batch_size=len(train_dataset)) + + train_data = self.get_tuple_from_data_loader(train_loader) + + self.get_args().get_logger().debug("Finished loading MNIST train data") + + return train_data + + def load_test_dataset(self): + self.get_args().get_logger().debug("Loading MNIST test data") + + test_dataset = datasets.MNIST(self.get_args().get_data_path(), train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) + test_loader = DataLoader(test_dataset, batch_size=len(test_dataset)) + + test_data = self.get_tuple_from_data_loader(test_loader) + + self.get_args().get_logger().debug("Finished loading MNIST test data") + + return test_data \ No newline at end of file diff --git a/fltk/nets/mnist_cnn.py b/fltk/nets/mnist_cnn.py new file mode 100644 index 00000000..5f4b69cd --- /dev/null +++ b/fltk/nets/mnist_cnn.py @@ -0,0 +1,20 @@ +import torch.nn as nn +import torch.nn.functional as F + +class MNIST_CNN(nn.Module): + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x) \ No newline at end of file diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 86a6600f..063f3f41 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -4,7 +4,9 @@ import json from fltk.datasets.distributed import DistCIFAR10Dataset, DistCIFAR100Dataset, DistFashionMNISTDataset +from fltk.datasets.distributed.mnist import DistMNISTDataset from fltk.nets import Cifar10CNN, FashionMNISTCNN, Cifar100ResNet, FashionMNISTResNet, Cifar10ResNet, Cifar100VGG +from fltk.nets.mnist_cnn import MNIST_CNN from fltk.strategy.FedNova import FedNova from fltk.strategy.fedprox import FedProx from fltk.util.definitions import Optimizations @@ -88,7 +90,8 @@ def __init__(self): "Cifar10CNN": Cifar10CNN, "Cifar10ResNet": Cifar10ResNet, "FashionMNISTCNN": FashionMNISTCNN, - "FashionMNISTResNet": FashionMNISTResNet + "FashionMNISTResNet": FashionMNISTResNet, + "MNISTCNN": MNIST_CNN, } @@ -98,8 +101,8 @@ def __init__(self): "Cifar10CNN": 15, "Cifar10ResNet": 39, "FashionMNISTCNN": 7, - "FashionMNISTResNet": 7 - + "FashionMNISTResNet": 7, + "MNISTCNN": 2, } self.net = None self.net_name = 'Cifar10CNN' @@ -110,17 +113,21 @@ def __init__(self): 'cifar10': DistCIFAR10Dataset, 'cifar100': DistCIFAR100Dataset, 'fashion-mnist': DistFashionMNISTDataset, + 'mnist': DistMNISTDataset } self.train_data_loader_pickle_path = { 'cifar10': 'data_loaders/cifar10/train_data_loader.pickle', 'fashion-mnist': 'data_loaders/fashion-mnist/train_data_loader.pickle', 'cifar100': 'data_loaders/cifar100/train_data_loader.pickle', + 'mnist' : 'data_loaders/mnist/train_data_loader.pickle', } self.test_data_loader_pickle_path = { 'cifar10': 'data_loaders/cifar10/test_data_loader.pickle', 'fashion-mnist': 'data_loaders/fashion-mnist/test_data_loader.pickle', 'cifar100': 'data_loaders/cifar100/test_data_loader.pickle', + 'mnist' : 'data_loaders/mnist/test_data_loader.pickle', + } self.loss_function = torch.nn.CrossEntropyLoss self.default_model_folder_path = "default_models" From 6bdfaeafc391c8d3bb774c346c92072d3c7af1a2 Mon Sep 17 00:00:00 2001 From: Lydia Date: Fri, 28 Jan 2022 19:18:33 +0100 Subject: [PATCH 41/73] Add cifar10 experiments --- configs/p20_freezoff_iid_fmnist_cnn/run.py | 11 +++-- .../exps/dyn_terminate.yaml | 36 +++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 ++++++++++++++++ .../exps/fedavg.yaml | 36 +++++++++++++++ .../exps/fednova.yaml | 37 ++++++++++++++++ .../exps/fedprox.yaml | 37 ++++++++++++++++ .../exps/offload.yaml | 36 +++++++++++++++ .../exps/offload_strict.yaml | 37 ++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 +++++++++++++++ .../exps/tifl_basic.yaml | 36 +++++++++++++++ .../p21_freezoff_non_iid_fmnist_cnn/run.py | 11 +++-- configs/p22_freezoff_iid_cifar10_cnn/run.py | 16 +++---- .../descr.yaml | 32 ++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 +++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 ++++++++++++++++ .../exps/fedavg.yaml | 36 +++++++++++++++ .../exps/fednova.yaml | 37 ++++++++++++++++ .../exps/fedprox.yaml | 37 ++++++++++++++++ .../exps/offload.yaml | 36 +++++++++++++++ .../exps/offload_strict.yaml | 37 ++++++++++++++++ .../exps/offload_strict2.yaml | 37 ++++++++++++++++ .../exps/offload_strict3.yaml | 37 ++++++++++++++++ .../exps/offload_strict4.yaml | 37 ++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 +++++++++++++++ .../exps/tifl_basic.yaml | 36 +++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../p24_freezoff_iid_cifar10_cnn_w9s3/gen.py | 26 +++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ .../offload_strict2.cfg.yaml | 4 ++ .../offload_strict3.cfg.yaml | 4 ++ .../offload_strict4.cfg.yaml | 4 ++ .../p24_freezoff_iid_cifar10_cnn_w9s3/run.py | 40 +++++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ .../descr.yaml | 32 ++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 +++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 ++++++++++++++++ .../exps/fedavg.yaml | 36 +++++++++++++++ .../exps/fednova.yaml | 37 ++++++++++++++++ .../exps/fedprox.yaml | 37 ++++++++++++++++ .../exps/offload.yaml | 36 +++++++++++++++ .../exps/offload_strict.yaml | 37 ++++++++++++++++ .../exps/offload_strict2.yaml | 37 ++++++++++++++++ .../exps/offload_strict3.yaml | 37 ++++++++++++++++ .../exps/offload_strict4.yaml | 37 ++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 +++++++++++++++ .../exps/tifl_basic.yaml | 36 +++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../gen.py | 26 +++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ .../offload_strict2.cfg.yaml | 4 ++ .../offload_strict3.cfg.yaml | 4 ++ .../offload_strict4.cfg.yaml | 4 ++ .../run.py | 40 +++++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ .../descr.yaml | 32 ++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 +++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 ++++++++++++++++ .../exps/fedavg.yaml | 36 +++++++++++++++ .../exps/fednova.yaml | 37 ++++++++++++++++ .../exps/fedprox.yaml | 37 ++++++++++++++++ .../exps/offload.yaml | 36 +++++++++++++++ .../exps/offload_strict.yaml | 37 ++++++++++++++++ .../exps/offload_strict4.yaml | 37 ++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 +++++++++++++++ .../exps/tifl_basic.yaml | 36 +++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../p26_freezoff_iid_mnist_cnn_w9s3/gen.py | 26 +++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ .../offload_strict4.cfg.yaml | 4 ++ .../p26_freezoff_iid_mnist_cnn_w9s3/run.py | 38 ++++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ .../descr.yaml | 32 ++++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 +++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 ++++++++++++++++ .../exps/fedavg.yaml | 36 +++++++++++++++ .../exps/fednova.yaml | 37 ++++++++++++++++ .../exps/fedprox.yaml | 37 ++++++++++++++++ .../exps/offload.yaml | 36 +++++++++++++++ .../exps/offload_strict.yaml | 37 ++++++++++++++++ .../exps/offload_strict4.yaml | 37 ++++++++++++++++ .../exps/tifl_adaptive.yaml | 36 +++++++++++++++ .../exps/tifl_basic.yaml | 36 +++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../gen.py | 26 +++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ .../offload_strict4.cfg.yaml | 4 ++ .../run.py | 38 ++++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ .../p23_freezoff_w9s3/client_stub_default.yml | 26 +++++++++++ deploy/p23_freezoff_w9s3/client_stub_fast.yml | 25 +++++++++++ .../p23_freezoff_w9s3/client_stub_medium.yml | 25 +++++++++++ deploy/p23_freezoff_w9s3/client_stub_slow.yml | 25 +++++++++++ deploy/p23_freezoff_w9s3/system_stub.yml | 27 ++++++++++++ fltk/client.py | 9 ++-- fltk/strategy/offloading.py | 10 ++++- fltk/util/generate_docker_compose.py | 44 ++++++++++++++++++- 120 files changed, 2685 insertions(+), 20 deletions(-) create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml create mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml create mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml create mode 100644 deploy/p23_freezoff_w9s3/client_stub_default.yml create mode 100644 deploy/p23_freezoff_w9s3/client_stub_fast.yml create mode 100644 deploy/p23_freezoff_w9s3/client_stub_medium.yml create mode 100644 deploy/p23_freezoff_w9s3/client_stub_slow.yml create mode 100644 deploy/p23_freezoff_w9s3/system_stub.yml diff --git a/configs/p20_freezoff_iid_fmnist_cnn/run.py b/configs/p20_freezoff_iid_fmnist_cnn/run.py index 3f289cc1..d2b2e028 100644 --- a/configs/p20_freezoff_iid_fmnist_cnn/run.py +++ b/configs/p20_freezoff_iid_fmnist_cnn/run.py @@ -1,8 +1,9 @@ from pathlib import Path - +import time from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': + EVENT_FILE="exp_events.txt" name = 'p11_freezoff' generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' @@ -21,10 +22,14 @@ first_prefix = '--build' for exp_cfg_file in exp_list: cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + print(f'Running cmd: "{cmd}"') os.system(cmd) first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') print('Done') - - diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..b98791f7 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_dyn_terminate' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..a402cc14 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_dyn_terminate_swyh' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..11334b11 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_fedavg' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml new file mode 100644 index 00000000..9842d72e --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_fednova' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..af82207f --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_fedprox' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml new file mode 100644 index 00000000..4e09f4cb --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_offload' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..fd15fd06 --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_offload_strict' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..5d5b03fe --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_tifl_adaptive' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..ab79764c --- /dev/null +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: FashionMNISTCNN +dataset: fashion-mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p21' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_tifl_basic' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/run.py b/configs/p21_freezoff_non_iid_fmnist_cnn/run.py index 3f289cc1..d2b2e028 100644 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/run.py +++ b/configs/p21_freezoff_non_iid_fmnist_cnn/run.py @@ -1,8 +1,9 @@ from pathlib import Path - +import time from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': + EVENT_FILE="exp_events.txt" name = 'p11_freezoff' generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' @@ -21,10 +22,14 @@ first_prefix = '--build' for exp_cfg_file in exp_list: cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + print(f'Running cmd: "{cmd}"') os.system(cmd) first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') print('Done') - - diff --git a/configs/p22_freezoff_iid_cifar10_cnn/run.py b/configs/p22_freezoff_iid_cifar10_cnn/run.py index 3f289cc1..d3cc22bf 100644 --- a/configs/p22_freezoff_iid_cifar10_cnn/run.py +++ b/configs/p22_freezoff_iid_cifar10_cnn/run.py @@ -8,14 +8,14 @@ base_path = f'configs/{Path(__file__).parent.name}' exp_list = [ 'fedavg.yaml', - 'offload_strict.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', + # 'offload_strict.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', ] exp_list = [f'{base_path}/exps/{x}' for x in exp_list] first_prefix = '--build' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml new file mode 100644 index 00000000..12d65dca --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml new file mode 100644 index 00000000..8198e4f1 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..64a7079e --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml new file mode 100644 index 00000000..28d40fca --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml new file mode 100644 index 00000000..309ec520 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml new file mode 100644 index 00000000..7ff26439 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml new file mode 100644 index 00000000..5405552e --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml new file mode 100644 index 00000000..6dc599f8 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 18 +deadline_threshold: 2 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml new file mode 100644 index 00000000..ec52f440 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 20 +deadline_threshold: 3 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml new file mode 100644 index 00000000..4bf53a83 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 18 +deadline_threshold: 5 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict3' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml new file mode 100644 index 00000000..c7a821a1 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 3 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict4' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..400673ef --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml new file mode 100644 index 00000000..44c19d0e --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml new file mode 100644 index 00000000..1287aa02 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 18 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml new file mode 100644 index 00000000..ea5b7853 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 20 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml new file mode 100644 index 00000000..7b37118c --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 18 +deadline_threshold: 5 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml new file mode 100644 index 00000000..871612b2 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py new file mode 100644 index 00000000..7bf9555d --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py @@ -0,0 +1,40 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p23_w9s3' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + # 'fedavg.yaml', + # 'offload_strict.yaml', + # 'offload_strict2.yaml', + # 'offload_strict3.yaml', + 'offload_strict4.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml new file mode 100644 index 00000000..7883c126 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml new file mode 100644 index 00000000..f2eef747 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..ba7edf86 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml new file mode 100644 index 00000000..612cc586 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml new file mode 100644 index 00000000..f001c2bf --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml new file mode 100644 index 00000000..70f3db66 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml new file mode 100644 index 00000000..612d7668 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml new file mode 100644 index 00000000..55f85fb4 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 18 +deadline_threshold: 2 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml new file mode 100644 index 00000000..adb6b6fc --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 20 +deadline_threshold: 3 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml new file mode 100644 index 00000000..afa90fc8 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 5 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict3' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml new file mode 100644 index 00000000..9aaf58bb --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 3 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict4' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..d2e1ea7a --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml new file mode 100644 index 00000000..25e05da2 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml new file mode 100644 index 00000000..1287aa02 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 18 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml new file mode 100644 index 00000000..ea5b7853 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 20 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml new file mode 100644 index 00000000..8cfc95a8 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 5 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml new file mode 100644 index 00000000..871612b2 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py new file mode 100644 index 00000000..7bf9555d --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py @@ -0,0 +1,40 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p23_w9s3' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + # 'fedavg.yaml', + # 'offload_strict.yaml', + # 'offload_strict2.yaml', + # 'offload_strict3.yaml', + 'offload_strict4.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml new file mode 100644 index 00000000..847dd378 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml new file mode 100644 index 00000000..0d8ddd4b --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_dyn_terminate' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..f37eae09 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml new file mode 100644 index 00000000..4867e937 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_fedavg' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml new file mode 100644 index 00000000..2276565a --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_fednova' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml new file mode 100644 index 00000000..de6b1824 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_fedprox' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml new file mode 100644 index 00000000..b557e0d8 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_offload' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml new file mode 100644 index 00000000..9e239bb2 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 9 +deadline_threshold: 1 +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_offload_strict' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml new file mode 100644 index 00000000..ea43cc12 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 9 +deadline_threshold: 1 +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_offload_strict4' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..445b8965 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_tifl_adaptive' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml new file mode 100644 index 00000000..a21d3198 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p26' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_tifl_basic' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py b/configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml new file mode 100644 index 00000000..41073058 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 9 +deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml new file mode 100644 index 00000000..aa3df65e --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 9 +deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py b/configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py new file mode 100644 index 00000000..6daede8b --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py @@ -0,0 +1,38 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p23_w9s3' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'offload_strict4.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml new file mode 100644 index 00000000..8e10f30e --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml new file mode 100644 index 00000000..7292e375 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_dyn_terminate' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..4e729a66 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml new file mode 100644 index 00000000..1cdd98d4 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_fedavg' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml new file mode 100644 index 00000000..89e075b3 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_fednova' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml new file mode 100644 index 00000000..f12611a6 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_fedprox' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml new file mode 100644 index 00000000..ae96e514 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_offload' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml new file mode 100644 index 00000000..089ebcaf --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 9 +deadline_threshold: 1 +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_offload_strict' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml new file mode 100644 index 00000000..ab8cf176 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 9 +deadline_threshold: 1 +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_offload_strict4' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..0602959d --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_tifl_adaptive' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml new file mode 100644 index 00000000..b75f25b0 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 5 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p27' +tensor_board_active: true +termination_percentage: 0.6 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_tifl_basic' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml new file mode 100644 index 00000000..41073058 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 9 +deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml new file mode 100644 index 00000000..aa3df65e --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 9 +deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py new file mode 100644 index 00000000..6daede8b --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py @@ -0,0 +1,38 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p23_w9s3' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'offload_strict4.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3/client_stub_default.yml b/deploy/p23_freezoff_w9s3/client_stub_default.yml new file mode 100644 index 00000000..43d6c919 --- /dev/null +++ b/deploy/p23_freezoff_w9s3/client_stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=default + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' +# memory: 1024M diff --git a/deploy/p23_freezoff_w9s3/client_stub_fast.yml b/deploy/p23_freezoff_w9s3/client_stub_fast.yml new file mode 100644 index 00000000..d7c98ce0 --- /dev/null +++ b/deploy/p23_freezoff_w9s3/client_stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=fast + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '3' diff --git a/deploy/p23_freezoff_w9s3/client_stub_medium.yml b/deploy/p23_freezoff_w9s3/client_stub_medium.yml new file mode 100644 index 00000000..f6bded5d --- /dev/null +++ b/deploy/p23_freezoff_w9s3/client_stub_medium.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=medium + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '2' diff --git a/deploy/p23_freezoff_w9s3/client_stub_slow.yml b/deploy/p23_freezoff_w9s3/client_stub_slow.yml new file mode 100644 index 00000000..bdd138f4 --- /dev/null +++ b/deploy/p23_freezoff_w9s3/client_stub_slow.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=slow + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.75' \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3/system_stub.yml b/deploy/p23_freezoff_w9s3/system_stub.yml new file mode 100644 index 00000000..77a19443 --- /dev/null +++ b/deploy/p23_freezoff_w9s3/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-1' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/fltk/client.py b/fltk/client.py index a0f06abc..103262fe 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -602,7 +602,8 @@ def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_ if active_profiling: pre_train_loop_data[i] = loop_pre_train_end - loop_pre_train_start post_train_loop_data[i] = loop_post_train_end - loop_post_train_start - + p.remove_all_handles() + p3.remove_all_handles() control_end_time = time.time() end_loop_time = time.time() logging.info(f'Measure end time is {(control_end_time - control_start_time)}') @@ -737,11 +738,13 @@ def run_epochs(self, num_epoch, deadline: int = None, warmup=False): time.sleep(0.1) logging.info(f'Continuing after global_offload_received={global_offload_received} and offload_release={self.offload_release}') if self.offload_enabled and global_offload_received: - # self.configure_strategy(OffloadingStrategy.SWYH) - self.configure_strategy(OffloadingStrategy.VANILLA) + self.configure_strategy(OffloadingStrategy.SWYH) + # self.configure_strategy(OffloadingStrategy.VANILLA) logging.info('Processing offloaded model') self.load_offloaded_model() self.copy_offloaded_model_weights() + elapsed_time = time.time() - start + deadline -= elapsed_time loss_offload, weights_offload, training_process_offload, scheduler_data_offload, perf_data_offload = self.train(self.epoch_counter, deadline, warmup, use_offloaded_model=True) accuracy, test_loss, class_precision, class_recall, _accuracy_per_class = self.test(use_offloaded_model=True) global global_sender_id diff --git a/fltk/strategy/offloading.py b/fltk/strategy/offloading.py index fb405807..3f39f3e2 100644 --- a/fltk/strategy/offloading.py +++ b/fltk/strategy/offloading.py @@ -11,7 +11,8 @@ class OffloadingStrategy(Enum): TIFL_ADAPTIVE = 7, DYN_TERMINATE = 8, DYN_TERMINATE_SWYH = 9, - MODEL_OFFLOAD_STRICT = 10 + MODEL_OFFLOAD_STRICT = 10, + MODEL_OFFLOAD_STRICT_SWYH = 11 @classmethod def Parse(cls, string_value): @@ -35,6 +36,8 @@ def Parse(cls, string_value): return OffloadingStrategy.DYN_TERMINATE_SWYH if string_value == 'offload-strict': return OffloadingStrategy.MODEL_OFFLOAD_STRICT + if string_value == 'offload-strict-swyh': + return OffloadingStrategy.MODEL_OFFLOAD_STRICT_SWYH def parse_strategy(strategy: OffloadingStrategy): @@ -86,4 +89,9 @@ def parse_strategy(strategy: OffloadingStrategy): swyh_enabled = True freeze_layers_enabled = True offload_enabled = True + if strategy == OffloadingStrategy.MODEL_OFFLOAD_STRICT_SWYH: + deadline_enabled = True + swyh_enabled = True + freeze_layers_enabled = True + offload_enabled = True return deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled, dyn_terminate, dyn_terminate_swyh diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 7d355d8c..d7c440e9 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -6,7 +6,7 @@ global_template_path = './deploy/templates' def load_system_template(template_path = global_template_path): - + print(f'Loading system template from {template_path}/system_stub.yml') with open(f'{template_path}/system_stub.yml') as file: documents = yaml.full_load(file) return documents @@ -41,6 +41,45 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set def generate_compose_file(): print() +def generate_p23_freezoff_w9s3(): + template_path = get_deploy_path('p23_freezoff_w9s3') + num_clients = 9 + cpu_per_client = 1 + num_cpus = 20 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_set = 0 + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'default' + if 0 < client_id <= 3: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 3 < client_id <= 6: + client_type = 'medium' + cpu_set = f'{cpu_idx}-{cpu_idx+1}' + cpu_idx += 2 + elif 6 < client_id <= 9: + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + + def generate_terminate(num_clients = 16, medium=False): template_path = get_deploy_path('terminate') world_size = num_clients + 1 @@ -305,7 +344,8 @@ def run(name, num_clients = None, medium=False): 'dev': generate_dev, 'terminate': generate_terminate, 'p11_freezoff': generate_p11_freezoff, - 'p13_w6' : generate_p13_w6 + 'p13_w6' : generate_p13_w6, + 'p23_w9s3': generate_p23_freezoff_w9s3 } if num_clients: exp_dict[name](num_clients, medium) From 648da27f127b02fd130959d77d2394a4acd47b6d Mon Sep 17 00:00:00 2001 From: bacox Date: Fri, 28 Jan 2022 19:23:34 +0100 Subject: [PATCH 42/73] Add exp effect non-iid-ness --- .../descr.yaml | 27 ++++++++++++++ .../exps/fedavg-iid-uniform.yaml | 35 +++++++++++++++++++ .../exps/fedavg-non-iid-1.yaml | 35 +++++++++++++++++++ .../exps/fedavg-non-iid-10.yaml | 35 +++++++++++++++++++ .../exps/fedavg-non-iid-2.yaml | 35 +++++++++++++++++++ .../exps/fedavg-non-iid-5.yaml | 35 +++++++++++++++++++ .../exps/fedavg-non-iid-9.yaml | 35 +++++++++++++++++++ .../fedavg-iid-uniform.cfg.yaml | 7 ++++ .../fedavg-non-iid-1.cfg.yaml | 7 ++++ .../fedavg-non-iid-10.cfg.yaml | 7 ++++ .../fedavg-non-iid-2.cfg.yaml | 7 ++++ .../fedavg-non-iid-5.cfg.yaml | 7 ++++ .../p28_effect_of_non_iid_ness_mnist/gen.py | 26 ++++++++++++++ .../p28_effect_of_non_iid_ness_mnist/run.py | 30 ++++++++++++++++ .../client_stub_default.yml | 26 ++++++++++++++ .../p28_non_iid_effect/client_stub_fast.yml | 25 +++++++++++++ .../p28_non_iid_effect/client_stub_medium.yml | 25 +++++++++++++ .../p28_non_iid_effect/client_stub_slow.yml | 25 +++++++++++++ deploy/p28_non_iid_effect/system_stub.yml | 27 ++++++++++++++ fltk/util/generate_docker_compose.py | 28 ++++++++++++++- 20 files changed, 483 insertions(+), 1 deletion(-) create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/descr.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/gen.py create mode 100644 configs/p28_effect_of_non_iid_ness_mnist/run.py create mode 100644 deploy/p28_non_iid_effect/client_stub_default.yml create mode 100644 deploy/p28_non_iid_effect/client_stub_fast.yml create mode 100644 deploy/p28_non_iid_effect/client_stub_medium.yml create mode 100644 deploy/p28_non_iid_effect/client_stub_slow.yml create mode 100644 deploy/p28_non_iid_effect/system_stub.yml diff --git a/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml b/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml new file mode 100644 index 00000000..b947fd02 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml @@ -0,0 +1,27 @@ +--- +# Experiment configuration +total_epochs: 150 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p28' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml new file mode 100644 index 00000000..bf04e605 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 150 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p28' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-iid-uniform' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml new file mode 100644 index 00000000..fe9e8900 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 150 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p28' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 1 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-1' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml new file mode 100644 index 00000000..fcd78384 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 150 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p28' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 10 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-10' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml new file mode 100644 index 00000000..9e6908e0 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 150 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p28' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-2' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml new file mode 100644 index 00000000..dee9afe9 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 150 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p28' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-5' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml new file mode 100644 index 00000000..2faf5386 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 9 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-9' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml new file mode 100644 index 00000000..65bda1b5 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml new file mode 100644 index 00000000..645c08d9 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 1 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml new file mode 100644 index 00000000..641a4579 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 10 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml new file mode 100644 index 00000000..876dae51 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml new file mode 100644 index 00000000..79bbc150 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/gen.py b/configs/p28_effect_of_non_iid_ness_mnist/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p28_effect_of_non_iid_ness_mnist/run.py b/configs/p28_effect_of_non_iid_ness_mnist/run.py new file mode 100644 index 00000000..3201df1b --- /dev/null +++ b/configs/p28_effect_of_non_iid_ness_mnist/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p28_non_iid_effect' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/deploy/p28_non_iid_effect/client_stub_default.yml b/deploy/p28_non_iid_effect/client_stub_default.yml new file mode 100644 index 00000000..43d6c919 --- /dev/null +++ b/deploy/p28_non_iid_effect/client_stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=default + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' +# memory: 1024M diff --git a/deploy/p28_non_iid_effect/client_stub_fast.yml b/deploy/p28_non_iid_effect/client_stub_fast.yml new file mode 100644 index 00000000..2c40393f --- /dev/null +++ b/deploy/p28_non_iid_effect/client_stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=fast + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' diff --git a/deploy/p28_non_iid_effect/client_stub_medium.yml b/deploy/p28_non_iid_effect/client_stub_medium.yml new file mode 100644 index 00000000..677accdf --- /dev/null +++ b/deploy/p28_non_iid_effect/client_stub_medium.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=medium + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.75' diff --git a/deploy/p28_non_iid_effect/client_stub_slow.yml b/deploy/p28_non_iid_effect/client_stub_slow.yml new file mode 100644 index 00000000..75771735 --- /dev/null +++ b/deploy/p28_non_iid_effect/client_stub_slow.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=slow + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.1' \ No newline at end of file diff --git a/deploy/p28_non_iid_effect/system_stub.yml b/deploy/p28_non_iid_effect/system_stub.yml new file mode 100644 index 00000000..77a19443 --- /dev/null +++ b/deploy/p28_non_iid_effect/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-1' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index d7c440e9..3d1dbeed 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -41,6 +41,31 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set def generate_compose_file(): print() +def generate_p28_non_iid_effect(): + template_path = get_deploy_path('p28_non_iid_effect') + num_clients = 6 + cpu_per_client = 3 + num_cpus = 20 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_set = 0 + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + def generate_p23_freezoff_w9s3(): template_path = get_deploy_path('p23_freezoff_w9s3') num_clients = 9 @@ -345,7 +370,8 @@ def run(name, num_clients = None, medium=False): 'terminate': generate_terminate, 'p11_freezoff': generate_p11_freezoff, 'p13_w6' : generate_p13_w6, - 'p23_w9s3': generate_p23_freezoff_w9s3 + 'p23_w9s3': generate_p23_freezoff_w9s3, + 'p28_non_iid_effect': generate_p28_non_iid_effect, } if num_clients: exp_dict[name](num_clients, medium) From 38b3ac148b5c3a5cab4bac2cff86ab558105d7c6 Mon Sep 17 00:00:00 2001 From: bacox Date: Fri, 28 Jan 2022 20:34:46 +0100 Subject: [PATCH 43/73] Add freezing effect experiment --- .../descr.yaml | 6 +-- .../exps/fedavg-iid-uniform.yaml | 6 +-- .../exps/fedavg-non-iid-1.yaml | 6 +-- .../exps/fedavg-non-iid-10.yaml | 6 +-- .../exps/fedavg-non-iid-2.yaml | 6 +-- .../exps/fedavg-non-iid-5.yaml | 6 +-- .../p29_effect_of_freezing_mnist/descr.yaml | 27 ++++++++++++++ .../exps/fedavg-iid-freeze-0.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-10.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-100.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-20.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-30.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-40.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-50.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-60.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-70.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-80.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-90.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-0.yaml} | 11 +++--- .../exps/fedavg-non_iid-freeze-10.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-100.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-20.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-30.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-40.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-50.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-60.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-70.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-80.yaml | 36 ++++++++++++++++++ .../exps/fedavg-non_iid-freeze-90.yaml | 36 ++++++++++++++++++ .../fedavg-iid-freeze-0.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-10.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-100.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-20.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-30.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-40.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-50.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-60.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-70.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-80.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-90.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-0.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-10.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-100.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-20.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-30.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-40.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-50.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-60.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-70.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-80.cfg.yaml | 8 ++++ .../fedavg-non_iid-freeze-90.cfg.yaml | 8 ++++ configs/p29_effect_of_freezing_mnist/gen.py | 26 +++++++++++++ configs/p29_effect_of_freezing_mnist/run.py | 30 +++++++++++++++ configs/p30_freezing_effect_dev/descr.yaml | 27 ++++++++++++++ .../exps/fedavg-iid-freeze-100.yaml | 36 ++++++++++++++++++ .../exps/fedavg-iid-freeze-50.yaml | 36 ++++++++++++++++++ .../fedavg-iid-freeze-100.cfg.yaml | 8 ++++ .../fedavg-iid-freeze-50.cfg.yaml | 8 ++++ configs/p30_freezing_effect_dev/gen.py | 26 +++++++++++++ configs/p30_freezing_effect_dev/run.py | 23 ++++++++++++ fltk/client.py | 21 +++++++---- fltk/util/base_config.py | 5 +++ fltk/util/generate_docker_compose.py | 37 ++++++++++++++++--- 63 files changed, 1254 insertions(+), 35 deletions(-) create mode 100644 configs/p29_effect_of_freezing_mnist/descr.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml rename configs/{p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml => p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml} (76%) create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml create mode 100644 configs/p29_effect_of_freezing_mnist/gen.py create mode 100644 configs/p29_effect_of_freezing_mnist/run.py create mode 100644 configs/p30_freezing_effect_dev/descr.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/gen.py create mode 100644 configs/p30_freezing_effect_dev/run.py diff --git a/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml b/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml index b947fd02..343cd958 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 150 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 6 +clients_per_round: 2 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,4 +24,4 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 6 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml index bf04e605..e9c89eeb 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 150 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 6 +clients_per_round: 2 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml index fe9e8900..c684c850 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 150 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 6 +clients_per_round: 2 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml index fcd78384..ae1cd0f0 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 150 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 6 +clients_per_round: 2 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml index 9e6908e0..7a5a1fc1 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 150 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 6 +clients_per_round: 2 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml index dee9afe9..4628bb9e 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml @@ -1,6 +1,6 @@ --- # Experiment configuration -total_epochs: 150 +total_epochs: 50 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 6 +clients_per_round: 2 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p29_effect_of_freezing_mnist/descr.yaml b/configs/p29_effect_of_freezing_mnist/descr.yaml new file mode 100644 index 00000000..40f964ef --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/descr.yaml @@ -0,0 +1,27 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml new file mode 100644 index 00000000..38642015 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-0' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml new file mode 100644 index 00000000..280fde8a --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-10' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml new file mode 100644 index 00000000..f4371b42 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-100' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml new file mode 100644 index 00000000..a55e48a4 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-20' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml new file mode 100644 index 00000000..3ab8f8a4 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-30' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml new file mode 100644 index 00000000..03500d8e --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-40' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml new file mode 100644 index 00000000..83c97db4 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-50' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml new file mode 100644 index 00000000..b47c0bbb --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-60' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml new file mode 100644 index 00000000..3520be09 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-70' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml new file mode 100644 index 00000000..021485b6 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-80' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml new file mode 100644 index 00000000..3c9b2bda --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-90' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml similarity index 76% rename from configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml rename to configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml index 2faf5386..5e41e478 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-9.yaml +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml @@ -9,10 +9,10 @@ dataset: mnist cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p23' +output_location: 'output/p29' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 6 +clients_per_round: 3 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,12 +24,13 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 10 # Individual configuration offload_stategy: vanilla deadline: 500000 sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" sampler_args: - - 9 # label limit || q probability || alpha || unused + - 2 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-9' +freeze_clients: [] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-0' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml new file mode 100644 index 00000000..4f3cd557 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-10' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml new file mode 100644 index 00000000..aa4f26af --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-100' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml new file mode 100644 index 00000000..53932748 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-20' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml new file mode 100644 index 00000000..58b9ac71 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-30' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml new file mode 100644 index 00000000..9437dd2f --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-40' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml new file mode 100644 index 00000000..6d680b4c --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-50' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml new file mode 100644 index 00000000..a54ef747 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-60' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml new file mode 100644 index 00000000..1a2acd61 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-70' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml new file mode 100644 index 00000000..666a2d67 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-80' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml new file mode 100644 index 00000000..ec92b09d --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p29' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] +experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-90' diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml new file mode 100644 index 00000000..7a53cb0f --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml new file mode 100644 index 00000000..b17e30a6 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml new file mode 100644 index 00000000..bd803e4f --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml new file mode 100644 index 00000000..0a5689e0 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml new file mode 100644 index 00000000..6e1c7303 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml new file mode 100644 index 00000000..7c856f57 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml new file mode 100644 index 00000000..b69a2dd7 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml new file mode 100644 index 00000000..c53f6f96 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml new file mode 100644 index 00000000..6539a04c --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml new file mode 100644 index 00000000..e8699931 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml new file mode 100644 index 00000000..f99391ec --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml new file mode 100644 index 00000000..ab6135eb --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml new file mode 100644 index 00000000..295d3428 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml new file mode 100644 index 00000000..077ef99a --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml new file mode 100644 index 00000000..3de07281 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml new file mode 100644 index 00000000..91a88894 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml new file mode 100644 index 00000000..4ceae941 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml new file mode 100644 index 00000000..9f8c7a99 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml new file mode 100644 index 00000000..671fb412 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml new file mode 100644 index 00000000..75cdd8a2 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml new file mode 100644 index 00000000..9e82753a --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml new file mode 100644 index 00000000..d9055b37 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/gen.py b/configs/p29_effect_of_freezing_mnist/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p29_effect_of_freezing_mnist/run.py b/configs/p29_effect_of_freezing_mnist/run.py new file mode 100644 index 00000000..3201df1b --- /dev/null +++ b/configs/p29_effect_of_freezing_mnist/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p28_non_iid_effect' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/p30_freezing_effect_dev/descr.yaml b/configs/p30_freezing_effect_dev/descr.yaml new file mode 100644 index 00000000..cd26fcfa --- /dev/null +++ b/configs/p30_freezing_effect_dev/descr.yaml @@ -0,0 +1,27 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml new file mode 100644 index 00000000..55b4adb3 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-100' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml new file mode 100644 index 00000000..38601371 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-50' diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml new file mode 100644 index 00000000..c53f6f96 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml new file mode 100644 index 00000000..6e1c7303 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/gen.py b/configs/p30_freezing_effect_dev/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p30_freezing_effect_dev/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p30_freezing_effect_dev/run.py b/configs/p30_freezing_effect_dev/run.py new file mode 100644 index 00000000..7d2c19d2 --- /dev/null +++ b/configs/p30_freezing_effect_dev/run.py @@ -0,0 +1,23 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p30_dev' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg-iid-freeze-50.yaml', + 'fedavg-iid-freeze-100.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/fltk/client.py b/fltk/client.py index 103262fe..c4cf6ccd 100644 --- a/fltk/client.py +++ b/fltk/client.py @@ -375,7 +375,6 @@ def train(self, epoch, deadline: int = None, warmup=False, use_offloaded_model=F :type epoch: int """ - perf_data = { 'total_duration': 0, 'p_v2_data': None, @@ -428,13 +427,21 @@ def train(self, epoch, deadline: int = None, warmup=False, use_offloaded_model=F split_point = self.args.nets_split_point[self.args.net_name] p = P2(profiling_size, split_point - 1) p3 = P3(profiling_size, split_point - 1) - if use_offloaded_model: - p.attach(self.offloaded_net) - p3.attach(self.offloaded_net) + + profiler_active = False + # Freezing effect experiment + if self.rank in self.args.freeze_clients: + logging.info('I need to freeze!') + split_point = self.args.nets_split_point[self.args.net_name] + self.freeze_layers2(split_point, self.net) else: - p.attach(self.net) - p3.attach(self.net) - profiler_active = True + if use_offloaded_model: + p.attach(self.offloaded_net) + p3.attach(self.offloaded_net) + else: + p.attach(self.net) + p3.attach(self.net) + profiler_active = True control_start_time = time.time() training_process = 0 diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 063f3f41..8e3f45e3 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -133,6 +133,9 @@ def __init__(self): self.default_model_folder_path = "default_models" self.data_path = "data" + # For freezing effect experiment + self.freeze_clients = [] + ########### # Methods # ########### @@ -214,6 +217,8 @@ def merge_yaml(self, cfg = {}): if 'epochs_per_round' in cfg: self.epochs_per_round = cfg['epochs_per_round'] + if 'freeze_clients' in cfg: + self.freeze_clients = cfg['freeze_clients'] diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 3d1dbeed..a5d4627b 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -41,10 +41,36 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set def generate_compose_file(): print() -def generate_p28_non_iid_effect(): + +def generate_p30_freezing_effect_dev(): template_path = get_deploy_path('p28_non_iid_effect') num_clients = 6 - cpu_per_client = 3 + cpu_per_client = 1 + num_cpus = 20 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_set = 0 + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'default' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + +def generate_p28_non_iid_effect(): + template_path = get_deploy_path('p28_non_iid_effect') + num_clients = 10 + cpu_per_client = 1 num_cpus = 20 world_size = num_clients + 1 system_template: dict = load_system_template(template_path=template_path) @@ -55,9 +81,9 @@ def generate_p28_non_iid_effect(): cpu_set = 0 cpu_idx = 2 for client_id in range(1, num_clients + 1): - client_type = 'fast' - cpu_set = f'{cpu_idx}-{cpu_idx + 2}' - cpu_idx += 3 + client_type = 'default' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 client_template: dict = load_client_template(type=client_type, template_path=template_path) client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) @@ -372,6 +398,7 @@ def run(name, num_clients = None, medium=False): 'p13_w6' : generate_p13_w6, 'p23_w9s3': generate_p23_freezoff_w9s3, 'p28_non_iid_effect': generate_p28_non_iid_effect, + 'p30_dev': generate_p30_freezing_effect_dev, } if num_clients: exp_dict[name](num_clients, medium) From 2dcd3fb068171391550f1aadb73cc4075f274bb1 Mon Sep 17 00:00:00 2001 From: bacox Date: Sat, 29 Jan 2022 00:18:10 +0100 Subject: [PATCH 44/73] Add freezing effect epxeriment --- .../exps/fedavg-iid-freeze-0.yaml | 36 +++++++++++++++++++ .../exps/fedavg-iid-freeze-16.yaml | 36 +++++++++++++++++++ .../exps/fedavg-iid-freeze-33.yaml | 36 +++++++++++++++++++ .../exps/fedavg-iid-freeze-66.yaml | 36 +++++++++++++++++++ .../exps/fedavg-iid-freeze-83.yaml | 36 +++++++++++++++++++ .../exps/fedavg-non-iid-freeze-0.yaml | 36 +++++++++++++++++++ .../exps/fedavg-non-iid-freeze-100.yaml | 36 +++++++++++++++++++ .../exps/fedavg-non-iid-freeze-16.yaml | 36 +++++++++++++++++++ .../exps/fedavg-non-iid-freeze-33.yaml | 36 +++++++++++++++++++ .../exps/fedavg-non-iid-freeze-50.yaml | 36 +++++++++++++++++++ .../exps/fedavg-non-iid-freeze-66.yaml | 36 +++++++++++++++++++ .../exps/fedavg-non-iid-freeze-83.yaml | 36 +++++++++++++++++++ .../fedavg-iid-freeze-0.cfg.yaml | 8 +++++ .../fedavg-iid-freeze-16.cfg.yaml | 8 +++++ .../fedavg-iid-freeze-33.cfg.yaml | 8 +++++ .../fedavg-iid-freeze-66.cfg.yaml | 8 +++++ .../fedavg-iid-freeze-83.cfg.yaml | 8 +++++ .../fedavg-non-iid-freeze-0.cfg.yaml | 8 +++++ .../fedavg-non-iid-freeze-100.cfg.yaml | 8 +++++ .../fedavg-non-iid-freeze-16.cfg.yaml | 8 +++++ .../fedavg-non-iid-freeze-33.cfg.yaml | 8 +++++ .../fedavg-non-iid-freeze-50.cfg.yaml | 8 +++++ .../fedavg-non-iid-freeze-66.cfg.yaml | 8 +++++ .../fedavg-non-iid-freeze-83.cfg.yaml | 8 +++++ configs/p30_freezing_effect_dev/run.py | 12 +++++++ 25 files changed, 540 insertions(+) create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml create mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml create mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml new file mode 100644 index 00000000..bb7e53a6 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-0' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml new file mode 100644 index 00000000..3320a44b --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-16' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml new file mode 100644 index 00000000..5105d2eb --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-33' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml new file mode 100644 index 00000000..3d1c1628 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-66' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml new file mode 100644 index 00000000..1732f166 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-83' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml new file mode 100644 index 00000000..46355b63 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-0' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml new file mode 100644 index 00000000..ea0edb03 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-100' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml new file mode 100644 index 00000000..86a47161 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-16' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml new file mode 100644 index 00000000..a382b7ab --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-33' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml new file mode 100644 index 00000000..5fcdc08f --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-50' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml new file mode 100644 index 00000000..69e32d91 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-66' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml new file mode 100644 index 00000000..0ff8e473 --- /dev/null +++ b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p30' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 6 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] +experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-83' diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml new file mode 100644 index 00000000..7a53cb0f --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml new file mode 100644 index 00000000..b17e30a6 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml new file mode 100644 index 00000000..0a5689e0 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml new file mode 100644 index 00000000..7c856f57 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml new file mode 100644 index 00000000..b69a2dd7 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml new file mode 100644 index 00000000..ab6135eb --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml new file mode 100644 index 00000000..671fb412 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml new file mode 100644 index 00000000..295d3428 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml new file mode 100644 index 00000000..3de07281 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml new file mode 100644 index 00000000..91a88894 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml new file mode 100644 index 00000000..4ceae941 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml new file mode 100644 index 00000000..9f8c7a99 --- /dev/null +++ b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml @@ -0,0 +1,8 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/run.py b/configs/p30_freezing_effect_dev/run.py index 7d2c19d2..bfeed62c 100644 --- a/configs/p30_freezing_effect_dev/run.py +++ b/configs/p30_freezing_effect_dev/run.py @@ -7,8 +7,20 @@ generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' exp_list = [ + 'fedavg-iid-freeze-0.yaml', + 'fedavg-iid-freeze-16.yaml', 'fedavg-iid-freeze-50.yaml', + 'fedavg-iid-freeze-83.yaml', + 'fedavg-non-iid-freeze-100.yaml', + 'fedavg-non-iid-freeze-33.yaml', + 'fedavg-non-iid-freeze-66.yaml', 'fedavg-iid-freeze-100.yaml', + 'fedavg-iid-freeze-33.yaml', + 'fedavg-iid-freeze-66.yaml', + 'fedavg-non-iid-freeze-0.yaml', + 'fedavg-non-iid-freeze-16.yaml', + 'fedavg-non-iid-freeze-50.yaml', + 'fedavg-non-iid-freeze-83.yaml', ] exp_list = [f'{base_path}/exps/{x}' for x in exp_list] first_prefix = '--build' From 0a784f9d0dc6c810d7b78c574bf8ea6480b362c2 Mon Sep 17 00:00:00 2001 From: bacox Date: Sat, 29 Jan 2022 01:10:29 +0100 Subject: [PATCH 45/73] Update non-iids-ness exps --- .../descr.yaml | 4 +-- .../exps/fedavg-iid-uniform.yaml | 4 +-- .../exps/fedavg-non-iid-1.yaml | 4 +-- .../exps/fedavg-non-iid-10.yaml | 4 +-- .../exps/fedavg-non-iid-2.yaml | 4 +-- .../exps/fedavg-non-iid-5.yaml | 4 +-- .../p28_effect_of_non_iid_ness_mnist/run.py | 14 +++----- configs/p29_effect_of_freezing_mnist/run.py | 32 +++++++++++++------ 8 files changed, 39 insertions(+), 31 deletions(-) diff --git a/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml b/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml index 343cd958..ba67d24a 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 2 +clients_per_round: 3 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,4 +24,4 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 6 + amount: 10 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml index e9c89eeb..aea42ff7 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 2 +clients_per_round: 3 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 6 + amount: 10 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml index c684c850..3ba1e18e 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 2 +clients_per_round: 3 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 6 + amount: 10 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml index ae1cd0f0..f31e76c9 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 2 +clients_per_round: 3 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 6 + amount: 10 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml index 7a5a1fc1..bdc94c89 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 2 +clients_per_round: 3 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 6 + amount: 10 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml index 4628bb9e..bcacd129 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p28' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 2 +clients_per_round: 3 node_groups: slow: [1, 6] medium: [7, 12] @@ -24,7 +24,7 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 6 + amount: 10 # Individual configuration offload_stategy: vanilla deadline: 500000 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/run.py b/configs/p28_effect_of_non_iid_ness_mnist/run.py index 3201df1b..43b29a17 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/run.py +++ b/configs/p28_effect_of_non_iid_ness_mnist/run.py @@ -7,15 +7,11 @@ generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', + 'fedavg-iid-uniform.yaml', + 'fedavg-non-iid-10.yaml', + 'fedavg-non-iid-1.yaml', + 'fedavg-non-iid-2.yaml', + 'fedavg-non-iid-5.yaml', ] exp_list = [f'{base_path}/exps/{x}' for x in exp_list] first_prefix = '--build' diff --git a/configs/p29_effect_of_freezing_mnist/run.py b/configs/p29_effect_of_freezing_mnist/run.py index 3201df1b..79bde98e 100644 --- a/configs/p29_effect_of_freezing_mnist/run.py +++ b/configs/p29_effect_of_freezing_mnist/run.py @@ -7,15 +7,28 @@ generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', + 'fedavg-iid-freeze-0.yaml', + 'fedavg-iid-freeze-30.yaml', + 'fedavg-iid-freeze-70.yaml', + 'fedavg-non_iid-freeze-100.yaml', + 'fedavg-non_iid-freeze-40.yaml', + 'fedavg-non_iid-freeze-80.yaml', + 'fedavg-iid-freeze-100.yaml', + 'fedavg-iid-freeze-40.yaml', + 'fedavg-iid-freeze-80.yaml', + 'fedavg-non_iid-freeze-10.yaml', + 'fedavg-non_iid-freeze-50.yaml', + 'fedavg-non_iid-freeze-90.yaml', + 'fedavg-iid-freeze-10.yaml', + 'fedavg-iid-freeze-50.yaml', + 'fedavg-iid-freeze-90.yaml', + 'fedavg-non_iid-freeze-20.yaml', + 'fedavg-non_iid-freeze-60.yaml', + 'fedavg-iid-freeze-20.yaml', + 'fedavg-iid-freeze-60.yaml', + 'fedavg-non_iid-freeze-0.yaml', + 'fedavg-non_iid-freeze-30.yaml', + 'fedavg-non_iid-freeze-70.yaml', ] exp_list = [f'{base_path}/exps/{x}' for x in exp_list] first_prefix = '--build' @@ -27,4 +40,3 @@ print('Done') - From 2b0c45cc0e80ee55b9513d21ad0471bad4c03c00 Mon Sep 17 00:00:00 2001 From: Lydia Date: Sat, 29 Jan 2022 01:22:05 +0100 Subject: [PATCH 46/73] Fix data sampler error in config --- .../p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml index 641a4579..7a92bbb2 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml @@ -1,7 +1,7 @@ # Individual configuration offload_stategy: vanilla deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" sampler_args: - 10 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused \ No newline at end of file From a7398c952f576268acf9a855a7cd33b09f1d4124 Mon Sep 17 00:00:00 2001 From: bacox Date: Sat, 29 Jan 2022 01:28:21 +0100 Subject: [PATCH 47/73] Configure p24 p25 --- .../exps/offload_strict5.yaml | 37 +++++++++++++++++++ .../exps/offload_strict6.yaml | 37 +++++++++++++++++++ .../exps/offload_strict7.yaml | 37 +++++++++++++++++++ .../offload_strict5.cfg.yaml | 4 ++ .../offload_strict6.cfg.yaml | 4 ++ .../offload_strict7.cfg.yaml | 4 ++ .../p24_freezoff_iid_cifar10_cnn_w9s3/run.py | 5 ++- .../exps/offload_strict5.yaml | 37 +++++++++++++++++++ .../exps/offload_strict6.yaml | 37 +++++++++++++++++++ .../exps/offload_strict7.yaml | 37 +++++++++++++++++++ .../offload_strict5.cfg.yaml | 4 ++ .../offload_strict6.cfg.yaml | 4 ++ .../offload_strict7.cfg.yaml | 4 ++ .../run.py | 5 ++- .../p28_effect_of_non_iid_ness_mnist/run.py | 7 ++++ configs/p29_effect_of_freezing_mnist/run.py | 7 ++++ configs/p30_freezing_effect_dev/run.py | 6 +++ 17 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml create mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml new file mode 100644 index 00000000..96a22f5a --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 19 +deadline_threshold: 3 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict5' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml new file mode 100644 index 00000000..65fe0dda --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 3 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict6' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml new file mode 100644 index 00000000..def40d09 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p24' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 2 +experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict7' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml new file mode 100644 index 00000000..40874f3d --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 19 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml new file mode 100644 index 00000000..29d51951 --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml new file mode 100644 index 00000000..b6c7084a --- /dev/null +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py index 7bf9555d..761eefd8 100644 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py +++ b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py @@ -12,7 +12,10 @@ # 'offload_strict.yaml', # 'offload_strict2.yaml', # 'offload_strict3.yaml', - 'offload_strict4.yaml', + # 'offload_strict4.yaml', + 'offload_strict5.yaml', + 'offload_strict6.yaml', + 'offload_strict7.yaml', # 'fednova.yaml', # 'fedprox.yaml', # 'offload.yaml', diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml new file mode 100644 index 00000000..32a41da9 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 19 +deadline_threshold: 3 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict5' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml new file mode 100644 index 00000000..d1c78b4a --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 3 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict6' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml new file mode 100644 index 00000000..f49e5dee --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p25' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 2 +experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict7' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml new file mode 100644 index 00000000..40874f3d --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 19 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml new file mode 100644 index 00000000..29d51951 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml new file mode 100644 index 00000000..b6c7084a --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 20 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py index 7bf9555d..761eefd8 100644 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py @@ -12,7 +12,10 @@ # 'offload_strict.yaml', # 'offload_strict2.yaml', # 'offload_strict3.yaml', - 'offload_strict4.yaml', + # 'offload_strict4.yaml', + 'offload_strict5.yaml', + 'offload_strict6.yaml', + 'offload_strict7.yaml', # 'fednova.yaml', # 'fedprox.yaml', # 'offload.yaml', diff --git a/configs/p28_effect_of_non_iid_ness_mnist/run.py b/configs/p28_effect_of_non_iid_ness_mnist/run.py index 43b29a17..28db12b0 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/run.py +++ b/configs/p28_effect_of_non_iid_ness_mnist/run.py @@ -17,10 +17,17 @@ first_prefix = '--build' for exp_cfg_file in exp_list: cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + print(f'Running cmd: "{cmd}"') os.system(cmd) first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') print('Done') + diff --git a/configs/p29_effect_of_freezing_mnist/run.py b/configs/p29_effect_of_freezing_mnist/run.py index 79bde98e..42858e42 100644 --- a/configs/p29_effect_of_freezing_mnist/run.py +++ b/configs/p29_effect_of_freezing_mnist/run.py @@ -34,9 +34,16 @@ first_prefix = '--build' for exp_cfg_file in exp_list: cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + print(f'Running cmd: "{cmd}"') os.system(cmd) first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') print('Done') + diff --git a/configs/p30_freezing_effect_dev/run.py b/configs/p30_freezing_effect_dev/run.py index bfeed62c..510f2fdc 100644 --- a/configs/p30_freezing_effect_dev/run.py +++ b/configs/p30_freezing_effect_dev/run.py @@ -26,9 +26,15 @@ first_prefix = '--build' for exp_cfg_file in exp_list: cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + print(f'Running cmd: "{cmd}"') os.system(cmd) first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') print('Done') From 09e281664ca18f004fa5dce46b1279eb6c229bcf Mon Sep 17 00:00:00 2001 From: Lydia Date: Sat, 29 Jan 2022 01:28:56 +0100 Subject: [PATCH 48/73] Fix typo --- .../exps/fedavg-non-iid-10.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml index f31e76c9..e91a1444 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml +++ b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml @@ -28,7 +28,7 @@ system: # Individual configuration offload_stategy: vanilla deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" sampler_args: - 10 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused From 4a46a50dda508d39a91522b35cb15927cb582309 Mon Sep 17 00:00:00 2001 From: Lydia Date: Sat, 29 Jan 2022 01:34:28 +0100 Subject: [PATCH 49/73] Fi typo in run scripts --- configs/p28_effect_of_non_iid_ness_mnist/run.py | 1 + configs/p29_effect_of_freezing_mnist/run.py | 1 + configs/p30_freezing_effect_dev/run.py | 1 + 3 files changed, 3 insertions(+) diff --git a/configs/p28_effect_of_non_iid_ness_mnist/run.py b/configs/p28_effect_of_non_iid_ness_mnist/run.py index 28db12b0..14a343ec 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/run.py +++ b/configs/p28_effect_of_non_iid_ness_mnist/run.py @@ -3,6 +3,7 @@ from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': + EVENT_FILE="exp_events.txt" name = 'p28_non_iid_effect' generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' diff --git a/configs/p29_effect_of_freezing_mnist/run.py b/configs/p29_effect_of_freezing_mnist/run.py index 42858e42..abc46f9a 100644 --- a/configs/p29_effect_of_freezing_mnist/run.py +++ b/configs/p29_effect_of_freezing_mnist/run.py @@ -3,6 +3,7 @@ from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': + EVENT_FILE="exp_events.txt" name = 'p28_non_iid_effect' generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' diff --git a/configs/p30_freezing_effect_dev/run.py b/configs/p30_freezing_effect_dev/run.py index 510f2fdc..28dea172 100644 --- a/configs/p30_freezing_effect_dev/run.py +++ b/configs/p30_freezing_effect_dev/run.py @@ -3,6 +3,7 @@ from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': + EVENT_FILE="exp_events.txt" name = 'p30_dev' generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' From c3bd6c364360e009570df679c9bb82faecddd799 Mon Sep 17 00:00:00 2001 From: Lydia Date: Sat, 29 Jan 2022 01:35:45 +0100 Subject: [PATCH 50/73] Fi typo in run scripts --- configs/p28_effect_of_non_iid_ness_mnist/run.py | 2 +- configs/p29_effect_of_freezing_mnist/run.py | 2 +- configs/p30_freezing_effect_dev/run.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/p28_effect_of_non_iid_ness_mnist/run.py b/configs/p28_effect_of_non_iid_ness_mnist/run.py index 14a343ec..d741747a 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/run.py +++ b/configs/p28_effect_of_non_iid_ness_mnist/run.py @@ -1,5 +1,5 @@ from pathlib import Path - +import time from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': diff --git a/configs/p29_effect_of_freezing_mnist/run.py b/configs/p29_effect_of_freezing_mnist/run.py index abc46f9a..ae621347 100644 --- a/configs/p29_effect_of_freezing_mnist/run.py +++ b/configs/p29_effect_of_freezing_mnist/run.py @@ -1,5 +1,5 @@ from pathlib import Path - +import time from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': diff --git a/configs/p30_freezing_effect_dev/run.py b/configs/p30_freezing_effect_dev/run.py index 28dea172..690abaea 100644 --- a/configs/p30_freezing_effect_dev/run.py +++ b/configs/p30_freezing_effect_dev/run.py @@ -1,5 +1,5 @@ from pathlib import Path - +import time from fltk.util.generate_docker_compose import run as generate_docker import os if __name__ == '__main__': From 3c2ef88c015e014cee7e47fb354ebb8bf9dbd531 Mon Sep 17 00:00:00 2001 From: bacox Date: Sat, 29 Jan 2022 17:26:28 +0100 Subject: [PATCH 51/73] Fix issue in n label sampler --- fltk/strategy/data_samplers.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fltk/strategy/data_samplers.py b/fltk/strategy/data_samplers.py index ca00dbe4..35ab398a 100644 --- a/fltk/strategy/data_samplers.py +++ b/fltk/strategy/data_samplers.py @@ -130,12 +130,18 @@ def choice_n(l_dict: dict, n, seed_offset = 0): else: indices_per_client[client_key].append(split_part) # for split_part in splitted: - rest_indices = np.concatenate(rest_indices) - rest_splitted = np.array_split(rest_indices, len(indices_per_client)) - - for k, v in indices_per_client.items(): - v.append(rest_splitted.pop()) - indices_per_client[k] = np.concatenate(v) + # @TODO: Fix this part in terms of code cleanness. Could be written more cleanly + if len(rest_indices): + rest_indices = np.concatenate(rest_indices) + rest_splitted = np.array_split(rest_indices, len(indices_per_client)) + + for k, v in indices_per_client.items(): + v.append(rest_splitted.pop()) + indices_per_client[k] = np.concatenate(v) + else: + rest_indices = np.ndarray([]) + for k, v in indices_per_client.items(): + indices_per_client[k] = np.concatenate(v) indices = indices_per_client[self.client_id] random.seed(seed + self.client_id) # give each client a unique shuffle From 21a44f3a77f1bbff2bfde9b70dec3dcfe0d2207a Mon Sep 17 00:00:00 2001 From: bacox Date: Sun, 30 Jan 2022 00:56:38 +0100 Subject: [PATCH 52/73] Fix n label sampler --- fltk/strategy/data_samplers.py | 45 ++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/fltk/strategy/data_samplers.py b/fltk/strategy/data_samplers.py index 35ab398a..9710aa1a 100644 --- a/fltk/strategy/data_samplers.py +++ b/fltk/strategy/data_samplers.py @@ -74,11 +74,27 @@ def __init__(self, dataset, num_replicas, rank, args=(5, 42)): for l in range(self.n_labels): label_dict[l] = num_copies + def get_least_used_labels(l_dict: dict): + label_list = [[k, v] for k, v in label_dict.items()] + label_list[-1][1] = 0 + sorted_list = sorted(label_list, key=lambda x: x[1], reverse=True) + # print('d') + # label_list.sort(lambda x:x) + def choice_n(l_dict: dict, n, seed_offset = 0): + # get_least_used_labels(l_dict) labels = [k for k, v in label_dict.items() if v] - # print(f'Available labels: {labels} choose {n}') - np.random.seed(seed + seed_offset) + # summed = sum([int(v) for k, v in label_dict.items() if v]) + # amounts = [float(v) / float(summed) for k, v in label_dict.items() if v] + # # p = amounts / summed + print(f'Available labels: {labels} choose {n}') + # # np.random.seed(seed + seed_offset) + # # @TODO: Error is in this section! + # print(f'n={n}, labels={labels}, p={amounts}') + # print(amounts) + selected = np.random.choice(labels, n, replace=False) + # print(selected) for k, v in l_dict.items(): if k in selected: # v -= 1 @@ -91,16 +107,31 @@ def choice_n(l_dict: dict, n, seed_offset = 0): clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label client_label_dict = {} + ordered_list = list(range(self.n_labels)) * int(num_copies) + + # Old code + # for idx, client_id in enumerate(clients): + # # client_label_dict[client_id] = [] + # label_set = choice_n(label_dict, args[0], idx) + # client_label_dict[client_id] = label_set + + # Now code for idx, client_id in enumerate(clients): - # client_label_dict[client_id] = [] - label_set = choice_n(label_dict, args[0], idx) + label_set = [] + for _ in range(args[0]): + label_set.append(ordered_list.pop()) client_label_dict[client_id] = label_set client_label_dict['rest'] = [] + # New code + if len(ordered_list): + client_label_dict['rest'] = ordered_list + + # Old code # client_label_dict['rest'] = labels = [k for k, v in label_dict.items() if v] - for k, v in label_dict.items(): - for x in range(int(v)): - client_label_dict['rest'].append(int(k)) + # for k, v in label_dict.items(): + # for x in range(int(v)): + # client_label_dict['rest'].append(int(k)) # Order data by label; split into N buckets and select indices based on the order found in the client-label-dict From 897041ed1df28962774e35452d7037a09a58ca2c Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 31 Jan 2022 14:00:04 +0100 Subject: [PATCH 53/73] Add check experiments --- configs/p14A_check_iid_cifar10_cnn/descr.yaml | 32 ++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 ++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 ++++++++++++++ .../exps/fedavg.yaml | 36 ++++++++++++++ .../exps/fednova.yaml | 37 ++++++++++++++ .../exps/fedprox.yaml | 37 ++++++++++++++ .../exps/offload.yaml | 36 ++++++++++++++ .../exps/offload_strict.yaml | 37 ++++++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++++++ .../fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ configs/p14A_check_iid_cifar10_cnn/gen.py | 26 ++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ configs/p14A_check_iid_cifar10_cnn/run.py | 43 ++++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ configs/p14_check_iid_cifar10_cnn/descr.yaml | 32 ++++++++++++ .../dyn_terminate.cfg.yaml | 3 ++ .../dyn_terminate_swyh.cfg.yaml | 3 ++ .../exps/dyn_terminate.yaml | 36 ++++++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 ++++++++++++++ .../exps/fedavg.yaml | 36 ++++++++++++++ .../exps/fednova.yaml | 37 ++++++++++++++ .../exps/fedprox.yaml | 37 ++++++++++++++ .../exps/offload.yaml | 36 ++++++++++++++ .../exps/offload_strict.yaml | 37 ++++++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++++++ .../p14_check_iid_cifar10_cnn/fedavg.cfg.yaml | 3 ++ .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ configs/p14_check_iid_cifar10_cnn/gen.py | 26 ++++++++++ .../offload.cfg.yaml | 3 ++ .../offload_strict.cfg.yaml | 4 ++ configs/p14_check_iid_cifar10_cnn/run.py | 43 ++++++++++++++++ .../tifl_adaptive.cfg.yaml | 3 ++ .../tifl_basic.cfg.yaml | 3 ++ fltk/util/generate_docker_compose.py | 49 +++++++++++++++++++ 43 files changed, 967 insertions(+) create mode 100644 configs/p14A_check_iid_cifar10_cnn/descr.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/gen.py create mode 100644 configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/run.py create mode 100644 configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/descr.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/offload.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/gen.py create mode 100644 configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/run.py create mode 100644 configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml diff --git a/configs/p14A_check_iid_cifar10_cnn/descr.yaml b/configs/p14A_check_iid_cifar10_cnn/descr.yaml new file mode 100644 index 00000000..60a8fb57 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14A' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 18 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..5d6cffe7 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..69343556 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..f1c1bad5 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_fedavg' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml new file mode 100644 index 00000000..7a76e627 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p14_check_iid_cifar10_cnn_fednova' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..f968c79a --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p14_check_iid_cifar10_cnn_fedprox' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml new file mode 100644 index 00000000..4fcab48b --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_offload' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..a782648c --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p14_check_iid_cifar10_cnn_offload_strict' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..92547493 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..1c292e0a --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/gen.py b/configs/p14A_check_iid_cifar10_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/run.py b/configs/p14A_check_iid_cifar10_cnn/run.py new file mode 100644 index 00000000..daacdfd5 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/run.py @@ -0,0 +1,43 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'generate_check_w18' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + # 'offload_strict.yaml', + # 'offload_strict2.yaml', + # 'offload_strict3.yaml', + # 'offload_strict4.yaml', + # 'offload_strict5.yaml', + # 'offload_strict6.yaml', + # 'offload_strict7.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/descr.yaml b/configs/p14_check_iid_cifar10_cnn/descr.yaml new file mode 100644 index 00000000..a840e572 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 diff --git a/configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..5d6cffe7 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..69343556 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..f1c1bad5 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_fedavg' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml b/configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml new file mode 100644 index 00000000..7a76e627 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p14_check_iid_cifar10_cnn_fednova' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..f968c79a --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p14_check_iid_cifar10_cnn_fedprox' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/offload.yaml b/configs/p14_check_iid_cifar10_cnn/exps/offload.yaml new file mode 100644 index 00000000..4fcab48b --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_offload' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..a782648c --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p14_check_iid_cifar10_cnn_offload_strict' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..92547493 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..1c292e0a --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 200 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 4 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 4 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/gen.py b/configs/p14_check_iid_cifar10_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/run.py b/configs/p14_check_iid_cifar10_cnn/run.py new file mode 100644 index 00000000..5732f7e7 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/run.py @@ -0,0 +1,43 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'generate_check_w4' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + # 'offload_strict.yaml', + # 'offload_strict2.yaml', + # 'offload_strict3.yaml', + # 'offload_strict4.yaml', + # 'offload_strict5.yaml', + # 'offload_strict6.yaml', + # 'offload_strict7.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index a5d4627b..e0ff1db3 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -220,6 +220,53 @@ def generate_p13_w6(): yaml.dump(system_template, file, sort_keys=False) +def generate_check_w4(): + template_path = get_deploy_path('p11_freezoff') + num_clients= 4 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'fast' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + +def generate_check_w18(): + template_path = get_deploy_path('p11_freezoff') + num_clients= 18 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'fast' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + + def generate_p11_freezoff(): template_path = get_deploy_path('p11_freezoff') num_clients= 18 @@ -399,6 +446,8 @@ def run(name, num_clients = None, medium=False): 'p23_w9s3': generate_p23_freezoff_w9s3, 'p28_non_iid_effect': generate_p28_non_iid_effect, 'p30_dev': generate_p30_freezing_effect_dev, + 'generate_check_w4': generate_check_w4, + 'generate_check_w18': generate_check_w18 } if num_clients: exp_dict[name](num_clients, medium) From 72e14955121f3d36c27801cdee289f329a9d49ba Mon Sep 17 00:00:00 2001 From: Lydia Date: Mon, 31 Jan 2022 14:13:41 +0100 Subject: [PATCH 54/73] Update docker generator --- fltk/util/generate_docker_compose.py | 40 ++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index a5d4627b..f305eaba 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -130,6 +130,45 @@ def generate_p23_freezoff_w9s3(): with open(r'./docker-compose.yml', 'w') as file: yaml.dump(system_template, file, sort_keys=False) +def generate_p23_freezoff_w9s3_half(): + template_path = get_deploy_path('p23_freezoff_w9s3-half') + num_clients = 9 + cpu_per_client = 1 + num_cpus = 20 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_set = 0 + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'default' + if 0 < client_id <= 3: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 3 < client_id <= 6: + client_type = 'medium' + cpu_set = f'{cpu_idx}-{cpu_idx+1}' + cpu_idx += 2 + elif 6 < client_id <= 9: + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + + def generate_terminate(num_clients = 16, medium=False): template_path = get_deploy_path('terminate') @@ -397,6 +436,7 @@ def run(name, num_clients = None, medium=False): 'p11_freezoff': generate_p11_freezoff, 'p13_w6' : generate_p13_w6, 'p23_w9s3': generate_p23_freezoff_w9s3, + 'p23_w9s3-half': generate_p23_freezoff_w9s3_half, 'p28_non_iid_effect': generate_p28_non_iid_effect, 'p30_dev': generate_p30_freezing_effect_dev, } From eaad317488ba8192205cb25d08abf4db1194994f Mon Sep 17 00:00:00 2001 From: Lydia Date: Tue, 1 Feb 2022 10:39:33 +0100 Subject: [PATCH 55/73] Add more experimental configs --- configs/p14A_check_iid_cifar10_cnn/descr.yaml | 4 +- .../exps/dyn_terminate.yaml | 8 +-- .../exps/dyn_terminate_swyh.yaml | 8 +-- .../exps/fedavg.yaml | 8 +-- .../exps/fednova.yaml | 8 +-- .../exps/fedprox.yaml | 8 +-- .../exps/offload.yaml | 8 +-- .../exps/offload_strict.yaml | 8 +-- .../exps/tifl_adaptive.yaml | 8 +-- .../exps/tifl_basic.yaml | 8 +-- configs/p14A_check_iid_cifar10_cnn/run.py | 2 +- .../descr.yaml | 27 ++++++++ .../exps/fedavg-iid-uniform.yaml | 35 ++++++++++ .../exps/fedavg-non-iid-1.yaml | 35 ++++++++++ .../exps/fedavg-non-iid-10.yaml | 35 ++++++++++ .../exps/fedavg-non-iid-2.yaml | 35 ++++++++++ .../exps/fedavg-non-iid-5.yaml | 35 ++++++++++ .../fedavg-iid-uniform.cfg.yaml | 7 ++ .../fedavg-non-iid-1.cfg.yaml | 7 ++ .../fedavg-non-iid-10.cfg.yaml | 7 ++ .../fedavg-non-iid-2.cfg.yaml | 7 ++ .../fedavg-non-iid-5.cfg.yaml | 7 ++ .../gen.py | 26 ++++++++ .../run.py | 34 ++++++++++ .../descr.yaml | 2 +- .../offload_strict10.yaml | 4 ++ .../offload_strict11.yaml | 4 ++ .../offload_strict8.yaml | 4 ++ .../offload_strict9.yaml | 4 ++ .../run.py | 11 +++- .../p28_effect_of_non_iid_ness_mnist/run.py | 2 +- .../descr.yaml | 32 +++++++++ .../dyn_terminate.cfg.yaml | 3 + .../dyn_terminate_swyh.cfg.yaml | 3 + .../exps/dyn_terminate.yaml | 36 ++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++ .../exps/fedavg.yaml | 36 ++++++++++ .../exps/fednova.yaml | 37 +++++++++++ .../exps/fedprox.yaml | 37 +++++++++++ .../exps/offload.yaml | 36 ++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++ .../exps/offload_strict2.yaml | 37 +++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++ .../fedavg.cfg.yaml | 3 + .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../gen.py | 26 ++++++++ .../offload.cfg.yaml | 3 + .../offload_strict.cfg.yaml | 4 ++ .../offload_strict2.cfg.yaml | 4 ++ .../run.py | 39 +++++++++++ .../tifl_adaptive.cfg.yaml | 3 + .../tifl_basic.cfg.yaml | 3 + .../p32_freezoff_iid_cifar10_cnn/descr.yaml | 32 +++++++++ .../dyn_terminate.cfg.yaml | 3 + .../dyn_terminate_swyh.cfg.yaml | 3 + .../exps/dyn_terminate.yaml | 36 ++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++ .../exps/fedavg.yaml | 36 ++++++++++ .../exps/fednova.yaml | 37 +++++++++++ .../exps/fedprox.yaml | 37 +++++++++++ .../exps/offload.yaml | 36 ++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++ .../fedavg.cfg.yaml | 3 + .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ configs/p32_freezoff_iid_cifar10_cnn/gen.py | 26 ++++++++ .../offload.cfg.yaml | 3 + .../offload_strict.cfg.yaml | 4 ++ configs/p32_freezoff_iid_cifar10_cnn/run.py | 30 +++++++++ .../tifl_adaptive.cfg.yaml | 3 + .../tifl_basic.cfg.yaml | 3 + .../descr.yaml | 32 +++++++++ .../dyn_terminate.cfg.yaml | 3 + .../dyn_terminate_swyh.cfg.yaml | 3 + .../exps/dyn_terminate.yaml | 36 ++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++ .../exps/fedavg.yaml | 36 ++++++++++ .../exps/fednova.yaml | 37 +++++++++++ .../exps/fedprox.yaml | 37 +++++++++++ .../exps/offload.yaml | 36 ++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++ .../fedavg.cfg.yaml | 3 + .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../p33_freezoff_non_iid_cifar10_cnn/gen.py | 26 ++++++++ .../offload.cfg.yaml | 3 + .../offload_strict.cfg.yaml | 4 ++ .../p33_freezoff_non_iid_cifar10_cnn/run.py | 30 +++++++++ .../tifl_adaptive.cfg.yaml | 3 + .../tifl_basic.cfg.yaml | 3 + .../descr.yaml | 32 +++++++++ .../dyn_terminate.cfg.yaml | 3 + .../dyn_terminate_swyh.cfg.yaml | 3 + .../exps/dyn_terminate.yaml | 36 ++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++ .../exps/fedavg.yaml | 36 ++++++++++ .../exps/fednova.yaml | 37 +++++++++++ .../exps/fedprox.yaml | 37 +++++++++++ .../exps/offload.yaml | 36 ++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++ .../exps/offload_strict2.yaml | 37 +++++++++++ .../exps/offload_strict3.yaml | 37 +++++++++++ .../exps/offload_strict4.yaml | 37 +++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++ .../fedavg.cfg.yaml | 3 + .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../gen.py | 26 ++++++++ .../offload.cfg.yaml | 3 + .../offload_strict.cfg.yaml | 4 ++ .../offload_strict2.cfg.yaml | 4 ++ .../offload_strict3.cfg.yaml | 4 ++ .../offload_strict4.cfg.yaml | 4 ++ .../run.py | 41 ++++++++++++ .../tifl_adaptive.cfg.yaml | 3 + .../tifl_basic.cfg.yaml | 3 + .../descr.yaml | 32 +++++++++ .../dyn_terminate.cfg.yaml | 3 + .../dyn_terminate_swyh.cfg.yaml | 3 + .../exps/dyn_terminate.yaml | 36 ++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++ .../exps/fedavg.yaml | 36 ++++++++++ .../exps/fednova.yaml | 37 +++++++++++ .../exps/fedprox.yaml | 37 +++++++++++ .../exps/offload.yaml | 36 ++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++ .../fedavg.cfg.yaml | 3 + .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../gen.py | 26 ++++++++ .../offload.cfg.yaml | 3 + .../offload_strict.cfg.yaml | 4 ++ .../run.py | 37 +++++++++++ .../tifl_adaptive.cfg.yaml | 3 + .../tifl_basic.cfg.yaml | 3 + .../descr.yaml | 32 +++++++++ .../dyn_terminate.cfg.yaml | 3 + .../dyn_terminate_swyh.cfg.yaml | 3 + .../exps/dyn_terminate.yaml | 36 ++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++ .../exps/fedavg.yaml | 36 ++++++++++ .../exps/fednova.yaml | 37 +++++++++++ .../exps/fedprox.yaml | 37 +++++++++++ .../exps/offload.yaml | 36 ++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++ .../fedavg.cfg.yaml | 3 + .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../p35_freezoff_non_iid_5_cifar10_cnn/gen.py | 26 ++++++++ .../offload.cfg.yaml | 3 + .../offload_strict.cfg.yaml | 4 ++ .../p35_freezoff_non_iid_5_cifar10_cnn/run.py | 37 +++++++++++ .../tifl_adaptive.cfg.yaml | 3 + .../tifl_basic.cfg.yaml | 3 + .../descr.yaml | 32 +++++++++ .../dyn_terminate.cfg.yaml | 3 + .../dyn_terminate_swyh.cfg.yaml | 3 + .../exps/dyn_terminate.yaml | 36 ++++++++++ .../exps/dyn_terminate_swyh.yaml | 37 +++++++++++ .../exps/fedavg.yaml | 36 ++++++++++ .../exps/fednova.yaml | 37 +++++++++++ .../exps/fedprox.yaml | 37 +++++++++++ .../exps/offload.yaml | 36 ++++++++++ .../exps/offload_strict.yaml | 37 +++++++++++ .../exps/offload_strict2.yaml | 37 +++++++++++ .../exps/offload_strict3.yaml | 37 +++++++++++ .../exps/offload_strict4.yaml | 37 +++++++++++ .../exps/tifl_adaptive.yaml | 36 ++++++++++ .../exps/tifl_basic.yaml | 36 ++++++++++ .../fedavg.cfg.yaml | 3 + .../fednova.cfg.yaml | 4 ++ .../fedprox.cfg.yaml | 4 ++ .../gen.py | 26 ++++++++ .../offload.cfg.yaml | 3 + .../offload_strict.cfg.yaml | 4 ++ .../offload_strict2.cfg.yaml | 4 ++ .../offload_strict3.cfg.yaml | 4 ++ .../offload_strict4.cfg.yaml | 4 ++ .../run.py | 41 ++++++++++++ .../tifl_adaptive.cfg.yaml | 3 + .../tifl_basic.cfg.yaml | 3 + fltk/util/generate_docker_compose.py | 66 ++++++++++++++++++- 193 files changed, 3881 insertions(+), 45 deletions(-) create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/gen.py create mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/run.py create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml create mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml create mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/descr.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/gen.py create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/run.py create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/gen.py create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/run.py create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml create mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml create mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml create mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml diff --git a/configs/p14A_check_iid_cifar10_cnn/descr.yaml b/configs/p14A_check_iid_cifar10_cnn/descr.yaml index 60a8fb57..594a03b8 100644 --- a/configs/p14A_check_iid_cifar10_cnn/descr.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/descr.yaml @@ -12,7 +12,7 @@ warmup_round: false output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 18 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,4 +29,4 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 18 + amount: 6 diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml index 5d6cffe7..23f5d126 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,8 +29,8 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: dynamic-terminate deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml index 69343556..2ce3217b 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,9 +29,9 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: dynamic-terminate-swyh deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate_swyh' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml index f1c1bad5..0c3d3b94 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,8 +29,8 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_fedavg' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_fedavg' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml index 7a76e627..7b93eabd 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,9 +29,9 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 optimizer: FedNova -experiment_prefix: 'p14_check_iid_cifar10_cnn_fednova' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_fednova' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml index f968c79a..3ebf2e49 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,9 +29,9 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: vanilla deadline: 500000 optimizer: FedProx -experiment_prefix: 'p14_check_iid_cifar10_cnn_fedprox' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_fedprox' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml index 4fcab48b..f3503858 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,8 +29,8 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: offload deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_offload' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_offload' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml index a782648c..bb23ed2d 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,9 +29,9 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: offload deadline: 7 deadline_threshold: 2 -experiment_prefix: 'p14_check_iid_cifar10_cnn_offload_strict' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_offload_strict' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml index 92547493..fab8cbf1 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,8 +29,8 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: tifl-adaptive deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_adaptive' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml index 1c292e0a..6ae1e298 100644 --- a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml +++ b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml @@ -9,10 +9,10 @@ dataset: cifar10 cuda: false profiling_time: 30 warmup_round: false -output_location: 'output/p14' +output_location: 'output/p14A' tensor_board_active: true termination_percentage: 0.7 -clients_per_round: 4 +clients_per_round: 6 node_groups: slow: [1, 6] medium: [7, 12] @@ -29,8 +29,8 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 4 + amount: 6 # Individual configuration offload_stategy: tifl-basic deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_basic' +experiment_prefix: 'p14A_check_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p14A_check_iid_cifar10_cnn/run.py b/configs/p14A_check_iid_cifar10_cnn/run.py index daacdfd5..460f8b1e 100644 --- a/configs/p14A_check_iid_cifar10_cnn/run.py +++ b/configs/p14A_check_iid_cifar10_cnn/run.py @@ -4,7 +4,7 @@ import os if __name__ == '__main__': EVENT_FILE="exp_events.txt" - name = 'generate_check_w18' + name = 'generate_check_w18_fast' generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' exp_list = [ diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml new file mode 100644 index 00000000..7c09bd98 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml @@ -0,0 +1,27 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14B' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml new file mode 100644 index 00000000..ad80682e --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14B' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-iid-uniform' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml new file mode 100644 index 00000000..99c58d6c --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14B' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 1 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-1' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml new file mode 100644 index 00000000..63dde8d5 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14B' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 10 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-10' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml new file mode 100644 index 00000000..a81d65c3 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14B' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-2' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml new file mode 100644 index 00000000..d911b976 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p14B' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 10 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-5' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml new file mode 100644 index 00000000..65bda1b5 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml new file mode 100644 index 00000000..645c08d9 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 1 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml new file mode 100644 index 00000000..7a92bbb2 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 10 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml new file mode 100644 index 00000000..876dae51 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml new file mode 100644 index 00000000..79bbc150 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml @@ -0,0 +1,7 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/gen.py b/configs/p14B_effect_of_non_iid_ness_cifar10/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/run.py b/configs/p14B_effect_of_non_iid_ness_cifar10/run.py new file mode 100644 index 00000000..1037b72f --- /dev/null +++ b/configs/p14B_effect_of_non_iid_ness_cifar10/run.py @@ -0,0 +1,34 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p28_non_iid_effect' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg-non-iid-5.yaml', + 'fedavg-iid-uniform.yaml', + 'fedavg-non-iid-10.yaml', + 'fedavg-non-iid-1.yaml', + 'fedavg-non-iid-2.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + + diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml index 1fd1bf0c..06dcc5b1 100644 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml +++ b/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml @@ -1,7 +1,7 @@ --- # Experiment configuration total_epochs: 50 -epochs_per_cycle: 1 +epochs_per_cycle: 2 wait_for_clients: true net: Cifar10CNN dataset: cifar10 diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml new file mode 100644 index 00000000..a1ee25bc --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml new file mode 100644 index 00000000..6048f43b --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 18 +deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml new file mode 100644 index 00000000..d70e0e3d --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 19 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml new file mode 100644 index 00000000..87225b97 --- /dev/null +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload-strict +deadline: 19 +deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py index 761eefd8..ab8ced7f 100644 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py +++ b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py @@ -13,9 +13,14 @@ # 'offload_strict2.yaml', # 'offload_strict3.yaml', # 'offload_strict4.yaml', - 'offload_strict5.yaml', - 'offload_strict6.yaml', - 'offload_strict7.yaml', + # 'offload_strict5.yaml', + # 'offload_strict6.yaml', + # 'offload_strict7.yaml', + 'offload_strict8.yaml', + 'offload_strict9.yaml', + 'offload_strict10.yaml', + 'offload_strict11.yaml', + # 'fednova.yaml', # 'fedprox.yaml', # 'offload.yaml', diff --git a/configs/p28_effect_of_non_iid_ness_mnist/run.py b/configs/p28_effect_of_non_iid_ness_mnist/run.py index d741747a..1037b72f 100644 --- a/configs/p28_effect_of_non_iid_ness_mnist/run.py +++ b/configs/p28_effect_of_non_iid_ness_mnist/run.py @@ -8,11 +8,11 @@ generate_docker(name) base_path = f'configs/{Path(__file__).parent.name}' exp_list = [ + 'fedavg-non-iid-5.yaml', 'fedavg-iid-uniform.yaml', 'fedavg-non-iid-10.yaml', 'fedavg-non-iid-1.yaml', 'fedavg-non-iid-2.yaml', - 'fedavg-non-iid-5.yaml', ] exp_list = [f'{base_path}/exps/{x}' for x in exp_list] first_prefix = '--build' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml new file mode 100644 index 00000000..8dd342b3 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml new file mode 100644 index 00000000..ddd63597 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..f92d5f3a --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml new file mode 100644 index 00000000..de88d8ed --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml new file mode 100644 index 00000000..13210854 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml new file mode 100644 index 00000000..9881414a --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml new file mode 100644 index 00000000..98e4a085 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml new file mode 100644 index 00000000..7100314c --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 34 +deadline_threshold: 2 +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml new file mode 100644 index 00000000..38db1e51 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 35 +deadline_threshold: 2 +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..ae5d1bcd --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml new file mode 100644 index 00000000..45fbcdc3 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p31' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml new file mode 100644 index 00000000..d2a1ae5b --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 34 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml new file mode 100644 index 00000000..8f107382 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 35 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py new file mode 100644 index 00000000..10c67efe --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py @@ -0,0 +1,39 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + # name = 'p23_w9s3-half' + name = 'p23_w9s3' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'offload_strict2.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'offload.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/descr.yaml b/configs/p32_freezoff_iid_cifar10_cnn/descr.yaml new file mode 100644 index 00000000..b3e88009 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p32' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..94c0963c --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..8ecdd49a --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..57b3ad7e --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedavg' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml new file mode 100644 index 00000000..75dc378f --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fednova' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..ddba20a8 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedprox' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml new file mode 100644 index 00000000..66785fd7 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..d11399f4 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload_strict' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..00519096 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..cd2109d2 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p22' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/gen.py b/configs/p32_freezoff_iid_cifar10_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/run.py b/configs/p32_freezoff_iid_cifar10_cnn/run.py new file mode 100644 index 00000000..d3cc22bf --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + # 'offload_strict.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml new file mode 100644 index 00000000..ae61894f --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p33' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..fc4aceea --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..38ec7094 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..fc9fb303 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedavg' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml new file mode 100644 index 00000000..9f61a507 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fednova' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..c53e7753 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedprox' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml new file mode 100644 index 00000000..c7dd2978 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..e52c19d4 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload_strict' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..cefa2ae0 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..284f47b5 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p23' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 6 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/gen.py b/configs/p33_freezoff_non_iid_cifar10_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/run.py b/configs/p33_freezoff_non_iid_cifar10_cnn/run.py new file mode 100644 index 00000000..3f289cc1 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/run.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'offload_strict.yaml', + 'fednova.yaml', + 'fedprox.yaml', + 'offload.yaml', + 'tifl_adaptive.yaml', + 'tifl_basic.yaml', + 'dyn_terminate_swyh.yaml', + 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') + + diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml new file mode 100644 index 00000000..03073a5c --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml new file mode 100644 index 00000000..37e8a04a --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..634d40b3 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml new file mode 100644 index 00000000..ccaa69df --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml new file mode 100644 index 00000000..d3aea80a --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml new file mode 100644 index 00000000..121218a1 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml new file mode 100644 index 00000000..0ea6f35c --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml new file mode 100644 index 00000000..58d5c49f --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 33 +deadline_threshold: 2 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml new file mode 100644 index 00000000..3e000c48 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 34 +deadline_threshold: 2 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml new file mode 100644 index 00000000..0cc9dba8 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 36 +deadline_threshold: 3 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict3' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml new file mode 100644 index 00000000..0eeb0782 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 38 +deadline_threshold: 3 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict4' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..4f97443e --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml new file mode 100644 index 00000000..a01d9d72 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p34' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml new file mode 100644 index 00000000..ba287d60 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 33 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml new file mode 100644 index 00000000..d2a1ae5b --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 34 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml new file mode 100644 index 00000000..bcd99ddf --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 36 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml new file mode 100644 index 00000000..9f884b16 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 38 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py new file mode 100644 index 00000000..9e73a7ba --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py @@ -0,0 +1,41 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p23_w9s3-half' + # name = 'p23_w9s3' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + # 'fedavg.yaml', + # 'offload_strict.yaml', + # 'offload_strict2.yaml', + 'offload_strict3.yaml', + 'offload_strict4.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'offload.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml new file mode 100644 index 00000000..26406f56 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml new file mode 100644 index 00000000..9bf1330f --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..69eb49ea --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml new file mode 100644 index 00000000..e8fea4bd --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedavg' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml new file mode 100644 index 00000000..ca6f7508 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fednova' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml new file mode 100644 index 00000000..f9882b88 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedprox' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml new file mode 100644 index 00000000..b857c0e0 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml new file mode 100644 index 00000000..03732d06 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload_strict' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..7d0552be --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_adaptive' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml new file mode 100644 index 00000000..77600133 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_basic' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py new file mode 100644 index 00000000..8484ca9a --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py @@ -0,0 +1,37 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + # 'offload_strict.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml new file mode 100644 index 00000000..26406f56 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml new file mode 100644 index 00000000..9bf1330f --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..69eb49ea --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml new file mode 100644 index 00000000..e8fea4bd --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedavg' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml new file mode 100644 index 00000000..ca6f7508 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fednova' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml new file mode 100644 index 00000000..f9882b88 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedprox' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml new file mode 100644 index 00000000..b857c0e0 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml new file mode 100644 index 00000000..03732d06 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload_strict' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..7d0552be --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_adaptive' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml new file mode 100644 index 00000000..77600133 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 2 +wait_for_clients: true +net: Cifar10CNN +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p35' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 2 +node_groups: + slow: [1, 6] + medium: [7, 12] + fast: [13, 18] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 5 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 18 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_basic' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml new file mode 100644 index 00000000..f07a9c58 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 7 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py new file mode 100644 index 00000000..8484ca9a --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py @@ -0,0 +1,37 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p11_freezoff' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + # 'offload_strict.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'offload.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml new file mode 100644 index 00000000..73e25c49 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml @@ -0,0 +1,32 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml new file mode 100644 index 00000000..279369ef --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml new file mode 100644 index 00000000..578998b4 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml new file mode 100644 index 00000000..bc057921 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate +deadline: 500000 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_dyn_terminate' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml new file mode 100644 index 00000000..cce27aef --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: dynamic-terminate-swyh +deadline: 500000 + +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_dyn_terminate_swyh' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml new file mode 100644 index 00000000..76521c3a --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_fedavg' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml new file mode 100644 index 00000000..651a5c6d --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_fednova' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml new file mode 100644 index 00000000..0830ef17 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_fedprox' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml new file mode 100644 index 00000000..7719dec5 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 500000 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml new file mode 100644 index 00000000..0ec1313c --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 33 +deadline_threshold: 2 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml new file mode 100644 index 00000000..4cdec706 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 34 +deadline_threshold: 2 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict2' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml new file mode 100644 index 00000000..78f24757 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 36 +deadline_threshold: 3 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict3' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml new file mode 100644 index 00000000..8b28e12c --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml @@ -0,0 +1,37 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: offload +deadline: 38 +deadline_threshold: 3 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict4' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml new file mode 100644 index 00000000..056d4c26 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_tifl_adaptive' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml new file mode 100644 index 00000000..4e7d6a9f --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 50 +epochs_per_cycle: 1 +wait_for_clients: true +net: Cifar10ResNet +dataset: cifar10 +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 30 +warmup_round: false +output_location: 'output/p36' +tensor_board_active: true +termination_percentage: 0.7 +clients_per_round: 3 +node_groups: + slow: [1, 3] + medium: [4, 6] + fast: [7, 9] +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" +sampler_args: + - 2 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 9 +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 +experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_tifl_basic' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml new file mode 100644 index 00000000..3febf08b --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: offload +deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml new file mode 100644 index 00000000..ba287d60 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 33 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml new file mode 100644 index 00000000..d2a1ae5b --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 34 +deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml new file mode 100644 index 00000000..bcd99ddf --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 36 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml new file mode 100644 index 00000000..9f884b16 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: offload +deadline: 38 +deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py new file mode 100644 index 00000000..2a60ce38 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py @@ -0,0 +1,41 @@ +from pathlib import Path +import time +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + EVENT_FILE="exp_events.txt" + name = 'p23_w9s3_fast' + # name = 'p23_w9s3' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + # 'offload_strict.yaml', + # 'offload_strict2.yaml', + # 'offload_strict3.yaml', + # 'offload_strict4.yaml', + # 'fednova.yaml', + # 'fedprox.yaml', + # 'tifl_adaptive.yaml', + # 'tifl_basic.yaml', + # 'offload.yaml', + # 'dyn_terminate_swyh.yaml', + # 'dyn_terminate.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') + start = time.time() + + + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + elapsed = time.time() - start + os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') + + print('Done') + + diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml new file mode 100644 index 00000000..e0ca9fbd --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-adaptive +deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml new file mode 100644 index 00000000..b12b53b3 --- /dev/null +++ b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: tifl-basic +deadline: 500000 \ No newline at end of file diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 701d6cfb..8ac761a4 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -168,6 +168,44 @@ def generate_p23_freezoff_w9s3_half(): with open(r'./docker-compose.yml', 'w') as file: yaml.dump(system_template, file, sort_keys=False) +def generate_p23_freezoff_w9s3_fast(): + template_path = get_deploy_path('p23_freezoff_w9s3_fast') + num_clients = 9 + cpu_per_client = 1 + num_cpus = 20 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_set = 0 + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'default' + if 0 < client_id <= 3: + client_type = 'slow' + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + elif 3 < client_id <= 6: + client_type = 'medium' + cpu_set = f'{cpu_idx}-{cpu_idx+1}' + cpu_idx += 2 + elif 6 < client_id <= 9: + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + else: + cpu_set = f'{cpu_idx}' + cpu_idx += 1 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + def generate_terminate(num_clients = 16, medium=False): @@ -306,6 +344,30 @@ def generate_check_w18(): yaml.dump(system_template, file, sort_keys=False) +def generate_check_w18_fast(): + template_path = get_deploy_path('p11_freezoff_fast') + num_clients= 6 + world_size = num_clients + 1 + system_template: dict = load_system_template(template_path=template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + cpu_idx = 2 + for client_id in range(1, num_clients + 1): + client_type = 'fast' + cpu_set = f'{cpu_idx}-{cpu_idx + 2}' + cpu_idx += 3 + + client_template: dict = load_client_template(type=client_type, template_path=template_path) + client_definition, container_name = generate_client(client_id, client_template, world_size, type=client_type, + cpu_set=cpu_set) + system_template['services'].update(client_definition) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + + def generate_p11_freezoff(): template_path = get_deploy_path('p11_freezoff') num_clients= 18 @@ -484,10 +546,12 @@ def run(name, num_clients = None, medium=False): 'p13_w6' : generate_p13_w6, 'p23_w9s3': generate_p23_freezoff_w9s3, 'p23_w9s3-half': generate_p23_freezoff_w9s3_half, + 'p23_w9s3_fast': generate_p23_freezoff_w9s3_fast, 'p28_non_iid_effect': generate_p28_non_iid_effect, 'p30_dev': generate_p30_freezing_effect_dev, 'generate_check_w4': generate_check_w4, - 'generate_check_w18': generate_check_w18 + 'generate_check_w18': generate_check_w18, + 'generate_check_w18_fast': generate_check_w18_fast } if num_clients: exp_dict[name](num_clients, medium) From 983d1dc99d44f92f763333e470ef1a974cf2498e Mon Sep 17 00:00:00 2001 From: Lydia Date: Tue, 1 Feb 2022 10:40:13 +0100 Subject: [PATCH 56/73] Add deploy files --- deploy/p11_freezoff/client_stub_slow.yml | 3 ++- .../p11_freezoff_fast/client_stub_default.yml | 26 ++++++++++++++++++ deploy/p11_freezoff_fast/client_stub_fast.yml | 25 +++++++++++++++++ .../p11_freezoff_fast/client_stub_medium.yml | 25 +++++++++++++++++ deploy/p11_freezoff_fast/client_stub_slow.yml | 26 ++++++++++++++++++ deploy/p11_freezoff_fast/system_stub.yml | 27 +++++++++++++++++++ .../client_stub_default.yml | 26 ++++++++++++++++++ .../client_stub_fast.yml | 25 +++++++++++++++++ .../client_stub_medium.yml | 25 +++++++++++++++++ .../client_stub_slow.yml | 25 +++++++++++++++++ deploy/p23_freezoff_w9s3-half/system_stub.yml | 27 +++++++++++++++++++ .../client_stub_default.yml | 26 ++++++++++++++++++ .../client_stub_fast.yml | 25 +++++++++++++++++ .../client_stub_medium.yml | 25 +++++++++++++++++ .../client_stub_slow.yml | 25 +++++++++++++++++ deploy/p23_freezoff_w9s3_fast/system_stub.yml | 27 +++++++++++++++++++ 16 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 deploy/p11_freezoff_fast/client_stub_default.yml create mode 100644 deploy/p11_freezoff_fast/client_stub_fast.yml create mode 100644 deploy/p11_freezoff_fast/client_stub_medium.yml create mode 100644 deploy/p11_freezoff_fast/client_stub_slow.yml create mode 100644 deploy/p11_freezoff_fast/system_stub.yml create mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_default.yml create mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_fast.yml create mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_medium.yml create mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_slow.yml create mode 100644 deploy/p23_freezoff_w9s3-half/system_stub.yml create mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_default.yml create mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml create mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml create mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml create mode 100644 deploy/p23_freezoff_w9s3_fast/system_stub.yml diff --git a/deploy/p11_freezoff/client_stub_slow.yml b/deploy/p11_freezoff/client_stub_slow.yml index 75771735..bafa7e5a 100644 --- a/deploy/p11_freezoff/client_stub_slow.yml +++ b/deploy/p11_freezoff/client_stub_slow.yml @@ -22,4 +22,5 @@ client_name: # name can be anything deploy: resources: limits: - cpus: '0.1' \ No newline at end of file + cpus: '0.5' + # cpus: '0.1' \ No newline at end of file diff --git a/deploy/p11_freezoff_fast/client_stub_default.yml b/deploy/p11_freezoff_fast/client_stub_default.yml new file mode 100644 index 00000000..43d6c919 --- /dev/null +++ b/deploy/p11_freezoff_fast/client_stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=default + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' +# memory: 1024M diff --git a/deploy/p11_freezoff_fast/client_stub_fast.yml b/deploy/p11_freezoff_fast/client_stub_fast.yml new file mode 100644 index 00000000..d7c98ce0 --- /dev/null +++ b/deploy/p11_freezoff_fast/client_stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=fast + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '3' diff --git a/deploy/p11_freezoff_fast/client_stub_medium.yml b/deploy/p11_freezoff_fast/client_stub_medium.yml new file mode 100644 index 00000000..677accdf --- /dev/null +++ b/deploy/p11_freezoff_fast/client_stub_medium.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=medium + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.75' diff --git a/deploy/p11_freezoff_fast/client_stub_slow.yml b/deploy/p11_freezoff_fast/client_stub_slow.yml new file mode 100644 index 00000000..f1ef01a8 --- /dev/null +++ b/deploy/p11_freezoff_fast/client_stub_slow.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=slow + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.1' + # cpus: '0.1' \ No newline at end of file diff --git a/deploy/p11_freezoff_fast/system_stub.yml b/deploy/p11_freezoff_fast/system_stub.yml new file mode 100644 index 00000000..77a19443 --- /dev/null +++ b/deploy/p11_freezoff_fast/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-1' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_default.yml b/deploy/p23_freezoff_w9s3-half/client_stub_default.yml new file mode 100644 index 00000000..43d6c919 --- /dev/null +++ b/deploy/p23_freezoff_w9s3-half/client_stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=default + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' +# memory: 1024M diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_fast.yml b/deploy/p23_freezoff_w9s3-half/client_stub_fast.yml new file mode 100644 index 00000000..d7c98ce0 --- /dev/null +++ b/deploy/p23_freezoff_w9s3-half/client_stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=fast + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '3' diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_medium.yml b/deploy/p23_freezoff_w9s3-half/client_stub_medium.yml new file mode 100644 index 00000000..f6bded5d --- /dev/null +++ b/deploy/p23_freezoff_w9s3-half/client_stub_medium.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=medium + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '2' diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_slow.yml b/deploy/p23_freezoff_w9s3-half/client_stub_slow.yml new file mode 100644 index 00000000..a5ead21c --- /dev/null +++ b/deploy/p23_freezoff_w9s3-half/client_stub_slow.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=slow + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '0.5' \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3-half/system_stub.yml b/deploy/p23_freezoff_w9s3-half/system_stub.yml new file mode 100644 index 00000000..77a19443 --- /dev/null +++ b/deploy/p23_freezoff_w9s3-half/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-1' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_default.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_default.yml new file mode 100644 index 00000000..43d6c919 --- /dev/null +++ b/deploy/p23_freezoff_w9s3_fast/client_stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=default + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' +# memory: 1024M diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml new file mode 100644 index 00000000..d7c98ce0 --- /dev/null +++ b/deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=fast + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '3' diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml new file mode 100644 index 00000000..f6bded5d --- /dev/null +++ b/deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=medium + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '2' diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml new file mode 100644 index 00000000..19a0ab36 --- /dev/null +++ b/deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - NODE_GROUP=slow + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '1' \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3_fast/system_stub.yml b/deploy/p23_freezoff_w9s3_fast/system_stub.yml new file mode 100644 index 00000000..77a19443 --- /dev/null +++ b/deploy/p23_freezoff_w9s3_fast/system_stub.yml @@ -0,0 +1,27 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-1' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file From 1bc1c42e01664b5847b8f089b5e0ff2491382c83 Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 1 Feb 2022 10:46:19 +0100 Subject: [PATCH 57/73] Update show client dist --- fltk/util/show_client_distributions.py | 40 +++++++++++++++++++++----- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/fltk/util/show_client_distributions.py b/fltk/util/show_client_distributions.py index 414e799d..ed50927b 100644 --- a/fltk/util/show_client_distributions.py +++ b/fltk/util/show_client_distributions.py @@ -4,8 +4,12 @@ import pandas as pd from tqdm import tqdm +from fltk.client import Client from fltk.datasets import DistCIFAR10Dataset, DistCIFAR100Dataset, DistFashionMNISTDataset, DistDataset import logging + +from fltk.util.base_config import BareConfig + logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', @@ -25,7 +29,7 @@ # 'dirichlet': {'seed': 1, 'range':[0.1, 1, 0.1]}, } -num_clients = 6 +num_clients = 10 class dummy_args: net = 'Cifar10CNN' dataset_name = 'cifar10' @@ -45,6 +49,13 @@ class dummy_args: world_size = 2 logger = logging.Logger(__name__) data_path = 'data' + cuda = False + + def get_net(self): + return self.net + + def init_logger(self, logger): + self.logger = logger def get_distributed(self): return self.distributed @@ -75,22 +86,37 @@ def gen_distribution(name, params): if rank == 0: continue print(f'node {rank}') - args = dummy_args() + args = BareConfig() + args.init_logger(logging) args.data_sampler = name + + + # args.set_net_by_name('MNISTCNN') + # args.dataset_name = 'mnist' + args.set_net_by_name('FashionMNISTCNN') + args.dataset_name = 'fashion-mnist' + # data_sampler = "uniform" #s = "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) + # data_sampler = "limit labels flex" + args.data_sampler = "n labels" + args.data_sampler_args = [2 , 42] args.world_size = world_size args.rank = rank dataset: DistDataset = args.DistDatasets[args.dataset_name](args) datasets.append((args, dataset)) - test_loader = dataset.get_test_loader() - train_loader = dataset.get_train_loader() - class_dict = dataset.train_dataset.class_to_idx + # test_loader = dataset.get_test_loader() + # train_loader = dataset.get_train_loader() + # class_dict = dataset.train_dataset.class_to_idx print('Iterating over all items') batch_size = 16 # for i, (inputs, labels) in enumerate(dataset.get_train_loader(), 0): # print(labels) # print('d') - train_loader = dataset.get_train_loader() - test_loader = dataset.get_test_loader() + client = Client("test", None, rank, args.world_size, args) + client.init_dataloader() + train_loader = client.dataset.get_train_loader() + train_loader2 = dataset.get_train_loader() + test_loader = client.dataset.get_test_loader() + test_loader2 = dataset.get_test_loader() idx2class = {v: k for k, v in train_loader.dataset.class_to_idx.items()} count_dict = {k: 0 for k, v in train_loader.dataset.class_to_idx.items()} From 0038b03f21f66f382b6776cff36bda0c43146ef5 Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 1 Feb 2022 21:36:06 +0100 Subject: [PATCH 58/73] Add dev config files --- .gitignore | 4 +- configs/dev_mnist_all/descr.yaml | 31 ++ configs/dev_mnist_all/exps/fedavg.yaml | 35 ++ configs/dev_mnist_all/exps/fednova.yaml | 36 ++ configs/dev_mnist_all/exps/fedprox.yaml | 36 ++ configs/dev_mnist_all/fedavg.cfg.yaml | 3 + configs/dev_mnist_all/fednova.cfg.yaml | 4 + configs/dev_mnist_all/fedprox.cfg.yaml | 4 + configs/dev_mnist_all/gen.py | 26 + configs/dev_mnist_all/run.py | 22 + .../strategy/{ => aggregation}/aggregation.py | 0 fltk/strategy/client_selection.py | 31 -- fltk/strategy/data_samplers.py | 489 ------------------ fltk/strategy/{ => optimization}/FedNova.py | 0 fltk/strategy/{ => optimization}/fedprox.py | 0 15 files changed, 200 insertions(+), 521 deletions(-) create mode 100644 configs/dev_mnist_all/descr.yaml create mode 100644 configs/dev_mnist_all/exps/fedavg.yaml create mode 100644 configs/dev_mnist_all/exps/fednova.yaml create mode 100644 configs/dev_mnist_all/exps/fedprox.yaml create mode 100644 configs/dev_mnist_all/fedavg.cfg.yaml create mode 100644 configs/dev_mnist_all/fednova.cfg.yaml create mode 100644 configs/dev_mnist_all/fedprox.cfg.yaml create mode 100644 configs/dev_mnist_all/gen.py create mode 100644 configs/dev_mnist_all/run.py rename fltk/strategy/{ => aggregation}/aggregation.py (100%) delete mode 100644 fltk/strategy/client_selection.py delete mode 100644 fltk/strategy/data_samplers.py rename fltk/strategy/{ => optimization}/FedNova.py (100%) rename fltk/strategy/{ => optimization}/fedprox.py (100%) diff --git a/.gitignore b/.gitignore index 6bfa0ca6..dfd70190 100644 --- a/.gitignore +++ b/.gitignore @@ -144,4 +144,6 @@ output docker_data .idea *.tmp.txt -docker-compose.yml \ No newline at end of file +docker-compose.yml + +refactor-notes.md \ No newline at end of file diff --git a/configs/dev_mnist_all/descr.yaml b/configs/dev_mnist_all/descr.yaml new file mode 100644 index 00000000..998743a6 --- /dev/null +++ b/configs/dev_mnist_all/descr.yaml @@ -0,0 +1,31 @@ +--- +# Experiment configuration +total_epochs: 2 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 diff --git a/configs/dev_mnist_all/exps/fedavg.yaml b/configs/dev_mnist_all/exps/fedavg.yaml new file mode 100644 index 00000000..0f7550fe --- /dev/null +++ b/configs/dev_mnist_all/exps/fedavg.yaml @@ -0,0 +1,35 @@ +--- +# Experiment configuration +total_epochs: 2 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +experiment_prefix: 'dev_mnist_all_fedavg' diff --git a/configs/dev_mnist_all/exps/fednova.yaml b/configs/dev_mnist_all/exps/fednova.yaml new file mode 100644 index 00000000..4528c8dd --- /dev/null +++ b/configs/dev_mnist_all/exps/fednova.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 2 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova +experiment_prefix: 'dev_mnist_all_fednova' diff --git a/configs/dev_mnist_all/exps/fedprox.yaml b/configs/dev_mnist_all/exps/fedprox.yaml new file mode 100644 index 00000000..a325437c --- /dev/null +++ b/configs/dev_mnist_all/exps/fedprox.yaml @@ -0,0 +1,36 @@ +--- +# Experiment configuration +total_epochs: 2 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx +experiment_prefix: 'dev_mnist_all_fedprox' diff --git a/configs/dev_mnist_all/fedavg.cfg.yaml b/configs/dev_mnist_all/fedavg.cfg.yaml new file mode 100644 index 00000000..3b4615d1 --- /dev/null +++ b/configs/dev_mnist_all/fedavg.cfg.yaml @@ -0,0 +1,3 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 \ No newline at end of file diff --git a/configs/dev_mnist_all/fednova.cfg.yaml b/configs/dev_mnist_all/fednova.cfg.yaml new file mode 100644 index 00000000..ca0e2a55 --- /dev/null +++ b/configs/dev_mnist_all/fednova.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedNova \ No newline at end of file diff --git a/configs/dev_mnist_all/fedprox.cfg.yaml b/configs/dev_mnist_all/fedprox.cfg.yaml new file mode 100644 index 00000000..f66490e9 --- /dev/null +++ b/configs/dev_mnist_all/fedprox.cfg.yaml @@ -0,0 +1,4 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500000 +optimizer: FedProx \ No newline at end of file diff --git a/configs/dev_mnist_all/gen.py b/configs/dev_mnist_all/gen.py new file mode 100644 index 00000000..168833f0 --- /dev/null +++ b/configs/dev_mnist_all/gen.py @@ -0,0 +1,26 @@ +from pathlib import Path + +if __name__ == '__main__': + base_path = f'configs/{Path(__file__).parent.name}' + path = Path(base_path) + descr_path = path / 'descr.yaml' + + exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + + diff --git a/configs/dev_mnist_all/run.py b/configs/dev_mnist_all/run.py new file mode 100644 index 00000000..c2483d49 --- /dev/null +++ b/configs/dev_mnist_all/run.py @@ -0,0 +1,22 @@ +from pathlib import Path + +from fltk.util.generate_docker_compose import run as generate_docker +import os +if __name__ == '__main__': + name = 'dev' + generate_docker(name) + base_path = f'configs/{Path(__file__).parent.name}' + exp_list = [ + 'fedavg.yaml', + 'fednova.yaml', + 'fedprox.yaml', + ] + exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + first_prefix = '--build' + for exp_cfg_file in exp_list: + cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + print(f'Running cmd: "{cmd}"') + os.system(cmd) + first_prefix = '' + + print('Done') diff --git a/fltk/strategy/aggregation.py b/fltk/strategy/aggregation/aggregation.py similarity index 100% rename from fltk/strategy/aggregation.py rename to fltk/strategy/aggregation/aggregation.py diff --git a/fltk/strategy/client_selection.py b/fltk/strategy/client_selection.py deleted file mode 100644 index dd71a81c..00000000 --- a/fltk/strategy/client_selection.py +++ /dev/null @@ -1,31 +0,0 @@ -import numpy as np - -def random_selection(clients, n): - return np.random.choice(clients, n, replace=False) - - -def tifl_select_tier(tiers): - print([x[3] for x in tiers]) - return np.random.choice([x[0] for x in tiers], 1, p=[x[3] for x in tiers])[0] - -def tifl_update_probs(tiers): - n = len([x for x in tiers if x[2] > 0]) - D = n * (n +1) / 2 - tiers.sort(key=lambda x:x[1]) - idx_decr = 0 - for idx, tier in enumerate(tiers): - if tier[2] > 0: - tier[3] = (n - (idx - idx_decr)) / D - else: - tier[3] = 0 - idx_decr += 1 - -def tifl_select_tier_and_decrement(tiers): - selected_tier = tifl_select_tier(tiers) - for tier in tiers: - if tier[0] == selected_tier: - tier[2] -= 1 - return selected_tier - -def tifl_can_select_tier(tiers): - return len([x for x in tiers if x[2] > 0]) \ No newline at end of file diff --git a/fltk/strategy/data_samplers.py b/fltk/strategy/data_samplers.py deleted file mode 100644 index 9710aa1a..00000000 --- a/fltk/strategy/data_samplers.py +++ /dev/null @@ -1,489 +0,0 @@ -from torchvision import datasets, transforms -import random -import logging -from torch.utils.data import DistributedSampler, Dataset -from typing import Iterator -from collections import Counter -import numpy as np - -class DistributedSamplerWrapper(DistributedSampler): - indices = [] - epoch_size = 1.0 - def __init__(self, dataset: Dataset, num_replicas = None, - rank = None, seed = 0) -> None: - super().__init__(dataset, num_replicas=num_replicas, rank=rank) - - self.client_id = rank - 1 - self.n_clients = num_replicas - 1 - self.n_labels = len(dataset.classes) - self.seed = seed - - - def order_by_label(self, dataset): - # order the indices by label - ordered_by_label = [[] for i in range(len(dataset.classes))] - for index, target in enumerate(dataset.targets): - ordered_by_label[target].append(index) - - return ordered_by_label - - def set_epoch_size(self, epoch_size: float) -> None: - """ Sets the epoch size as relative to the local amount of data. - 1.5 will result in the __iter__ function returning the available - indices with half appearing twice. - - Args: - epoch_size (float): relative size of epoch - """ - self.epoch_size = epoch_size - - def __iter__(self) -> Iterator[int]: - random.seed(self.rank+self.epoch) - epochs_todo = self.epoch_size - indices = [] - while(epochs_todo > 0.0): - random.shuffle(self.indices) - if epochs_todo >= 1.0: - indices.extend(self.indices) - else: - end_index = int(round(len(self.indices)*epochs_todo)) - indices.extend(self.indices[:end_index]) - - epochs_todo = epochs_todo - 1 - - ratio = len(indices)/float(len(self.indices)) - np.testing.assert_almost_equal(ratio, self.epoch_size, decimal=2) - - return iter(indices) - - def __len__(self) -> int: - return len(self.indices) - -class N_Labels(DistributedSamplerWrapper): - """ - A sampler that limits the number of labels per client - The number of clients must <= than number of labels - """ - - def __init__(self, dataset, num_replicas, rank, args=(5, 42)): - limit, seed = args - super().__init__(dataset, num_replicas, rank, seed) - - num_copies = np.ceil((args[0] * self.n_clients) / self.n_labels) - label_dict = {} - for l in range(self.n_labels): - label_dict[l] = num_copies - - def get_least_used_labels(l_dict: dict): - label_list = [[k, v] for k, v in label_dict.items()] - label_list[-1][1] = 0 - sorted_list = sorted(label_list, key=lambda x: x[1], reverse=True) - # print('d') - # label_list.sort(lambda x:x) - - def choice_n(l_dict: dict, n, seed_offset = 0): - # get_least_used_labels(l_dict) - labels = [k for k, v in label_dict.items() if v] - # summed = sum([int(v) for k, v in label_dict.items() if v]) - # amounts = [float(v) / float(summed) for k, v in label_dict.items() if v] - # # p = amounts / summed - print(f'Available labels: {labels} choose {n}') - # # np.random.seed(seed + seed_offset) - # # @TODO: Error is in this section! - # print(f'n={n}, labels={labels}, p={amounts}') - # print(amounts) - - selected = np.random.choice(labels, n, replace=False) - # print(selected) - for k, v in l_dict.items(): - if k in selected: - # v -= 1 - l_dict[k] -= 1 - return selected - - - # print(f'N Clients={self.n_clients}') - # print(f'Num_buckets={num_copies}') - - clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label - client_label_dict = {} - ordered_list = list(range(self.n_labels)) * int(num_copies) - - # Old code - # for idx, client_id in enumerate(clients): - # # client_label_dict[client_id] = [] - # label_set = choice_n(label_dict, args[0], idx) - # client_label_dict[client_id] = label_set - - # Now code - for idx, client_id in enumerate(clients): - label_set = [] - for _ in range(args[0]): - label_set.append(ordered_list.pop()) - client_label_dict[client_id] = label_set - - client_label_dict['rest'] = [] - # New code - if len(ordered_list): - client_label_dict['rest'] = ordered_list - - # Old code - # client_label_dict['rest'] = labels = [k for k, v in label_dict.items() if v] - # for k, v in label_dict.items(): - # for x in range(int(v)): - # client_label_dict['rest'].append(int(k)) - - # Order data by label; split into N buckets and select indices based on the order found in the client-label-dict - - reverse_label_dict = {} - for l in range(self.n_labels): - reverse_label_dict[l] = [] - - for k, v in client_label_dict.items(): - # print(f'client {k} has labels {v}') - for l_c in v: - reverse_label_dict[l_c].append(k) - - indices = [] - ordered_by_label = self.order_by_label(dataset) - indices_per_client = {} - for c in clients: - indices_per_client[c] = [] - - rest_indices = [] - for group, label_list in enumerate(ordered_by_label): - splitted = np.array_split(label_list, num_copies) - client_id_to_distribute = reverse_label_dict[group] - for split_part in splitted: - client_key = client_id_to_distribute.pop() - if client_key == 'rest': - rest_indices.append(split_part) - else: - indices_per_client[client_key].append(split_part) - # for split_part in splitted: - # @TODO: Fix this part in terms of code cleanness. Could be written more cleanly - if len(rest_indices): - rest_indices = np.concatenate(rest_indices) - rest_splitted = np.array_split(rest_indices, len(indices_per_client)) - - for k, v in indices_per_client.items(): - v.append(rest_splitted.pop()) - indices_per_client[k] = np.concatenate(v) - else: - rest_indices = np.ndarray([]) - for k, v in indices_per_client.items(): - indices_per_client[k] = np.concatenate(v) - - indices = indices_per_client[self.client_id] - random.seed(seed + self.client_id) # give each client a unique shuffle - random.shuffle(indices) # shuffle indices to spread the labels - - self.indices = indices - - # labels_per_client = int(np.floor(self.n_labels / self.n_clients)) - # remaining_labels = self.n_labels - labels_per_client - # labels = list(range(self.n_labels)) # list of labels to distribute - # clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label - # client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client - # random.seed(seed) # seed, such that the same result can be obtained multiple times - # print(client_labels) - # - # label_order = random.sample(labels, len(labels)) - # client_label_dict = {} - # for client_id in clients: - # client_label_dict[client_id] = [] - # for _ in range(labels_per_client): - # chosen_label = label_order.pop() - # client_label_dict[client_id].append(chosen_label) - # client_labels[client_id].add(chosen_label) - # client_label_dict['rest'] = label_order - # - # - # - # indices = [] - # ordered_by_label = self.order_by_label(dataset) - # labels = client_label_dict[self.client_id] - # for label in labels: - # n_samples = int(len(ordered_by_label[label])) - # clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label - # index = clients.index(self.client_id) # find the position of this client - # start_index = index * n_samples # inclusive - # if rank == self.n_clients: - # end_index = len(ordered_by_label[label]) # exclusive - # else: - # end_index = start_index + n_samples # exclusive - # - # indices += ordered_by_label[label][start_index:end_index] - # - # # Last part is uniform sampler - # rest_indices = [] - # for l in client_label_dict['rest']: - # rest_indices += ordered_by_label[l] - # filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] - # indices += filtered_rest_indices - # random.seed(seed + self.client_id) # give each client a unique shuffle - # random.shuffle(indices) # shuffle indices to spread the labels - # - # self.indices = indices - - -class LimitLabelsSamplerFlex(DistributedSamplerWrapper): - """ - A sampler that limits the number of labels per client - The number of clients must <= than number of labels - """ - - def __init__(self, dataset, num_replicas, rank, args=(5, 42)): - limit, seed = args - super().__init__(dataset, num_replicas, rank, seed) - - labels_per_client = int(np.floor(self.n_labels / self.n_clients)) - remaining_labels = self.n_labels - labels_per_client - labels = list(range(self.n_labels)) # list of labels to distribute - clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label - client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client - random.seed(seed) # seed, such that the same result can be obtained multiple times - print(client_labels) - - label_order = random.sample(labels, len(labels)) - client_label_dict = {} - for client_id in clients: - client_label_dict[client_id] = [] - for _ in range(labels_per_client): - chosen_label = label_order.pop() - client_label_dict[client_id].append(chosen_label) - client_labels[client_id].add(chosen_label) - client_label_dict['rest'] = label_order - - indices = [] - ordered_by_label = self.order_by_label(dataset) - labels = client_label_dict[self.client_id] - for label in labels: - n_samples = int(len(ordered_by_label[label])) - clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label - index = clients.index(self.client_id) # find the position of this client - start_index = index * n_samples # inclusive - if rank == self.n_clients: - end_index = len(ordered_by_label[label]) # exclusive - else: - end_index = start_index + n_samples # exclusive - - indices += ordered_by_label[label][start_index:end_index] - - # Last part is uniform sampler - rest_indices = [] - for l in client_label_dict['rest']: - rest_indices += ordered_by_label[l] - filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] - indices += filtered_rest_indices - random.seed(seed + self.client_id) # give each client a unique shuffle - random.shuffle(indices) # shuffle indices to spread the labels - - self.indices = indices - -class LimitLabelsSampler(DistributedSamplerWrapper): - """ - A sampler that limits the number of labels per client - """ - def __init__(self, dataset, num_replicas, rank, args=(5, 42)): - limit, seed = args - super().__init__(dataset, num_replicas, rank, seed) - - if self.n_clients % self.n_labels != 0: - logging.error( - "multiples of {} clients are needed for the 'limiting-labels' data distribution method, {} does not work".format( - self.n_labels, self.n_clients)) - return - - n_occurrences = limit * int(self.n_clients / self.n_labels) # number of occurrences of each label - counters = [n_occurrences] * self.n_clients # keeps track of which labels still can be given out - labels = list(range(self.n_labels)) # list of labels to distribute - clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label - client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client - random.seed(seed) # seed, such that the same result can be obtained multiple times - - while labels: - # pick a random label - label = random.choice(labels) - counters[label] -= 1 # decrement counter of this label - if counters[label] == 0: # if needed, remove label - labels.remove(label) - - # check which clients the label can be given to - selectable = [i for i in clients if not label in client_labels[i]] - client = None - - if not selectable: - # poor choice, let's fix this -> swap two labels - # conditions for swapping: - # sets of labels A, B, with B incomplete, remaining label l that is not possible to give to B, s.t.: - # (1) l not in A - # (2) exists label l' in A but not in B - # l, l' can be swapped - - client = random.choice(clients) # label can not be given to this client - for c, s in enumerate(client_labels): - if len(s) == limit: # this a completed set - if label not in s: # label can be given to this client (1) - subset = s.difference(client_labels[client]) # remove labels client already has (2...) - if subset: # subset is not empty (2 continued): - l = min(subset) # get a swappable label (in a deterministic way), and swap labels - client_labels[c].remove(l) - client_labels[c].add(label) - client_labels[client].add(l) - break - else: # normal operation, pick a rondom selectable client - client = random.choice(selectable) - client_labels[client].add(label) - - # check if this client has been given the maximum number of labels - if len(client_labels[client]) == limit: - clients.remove(client) - - # now we have a set of labels for each client - # client with rank=rank now needs to be given data - # all clients get the same amount of data, the first portion is given to client with rank 1, the second to rank 2, etc - - labels = client_labels[self.client_id] - logging.info("Client {} gets labels {}".format(self.rank, client_labels[self.client_id])) - indices = [] - ordered_by_label = self.order_by_label(dataset) - for label in labels: - n_samples = int(len(ordered_by_label[label]) / n_occurrences) - clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label - index = clients.index(self.client_id) # find the position of this client - start_index = index * n_samples # inclusive - if rank == self.n_clients: - end_index = len(ordered_by_label[label]) # exclusive - else: - end_index = start_index + n_samples # exclusive - - indices += ordered_by_label[label][start_index:end_index] - - random.seed(seed + self.client_id) # give each client a unique shuffle - random.shuffle(indices) # shuffle indices to spread the labels - - self.indices = indices - -class Probability_q_Sampler(DistributedSamplerWrapper): - """ - Clients are divided among M groups, with M being the number of labels. - A sample with label m is than given to a member of group m with probability q, - and to any other group with probability (1-q)/(m-1) - - side effect of this method is that the reported loss on the test dataset becomes somewhat meaningless...logging.info("distribution in client with rank {}: {}".format(rank, Counter(labels))) - """ - - def __init__(self, dataset, num_replicas, rank, args=(0.5, 42)): - q, seed = args - super().__init__(dataset, num_replicas, rank, seed) - - if self.n_clients % self.n_labels != 0: - logging.error( - "multiples of {} clients are needed for the 'probability-q-sampler' data distribution method, {} does not work".format( - self.n_labels,self.n_clients)) - return - - # divide data among groups - counter = 0 # for dividing data within a group - group_id = self.client_id % self.n_labels - group_clients = [client for client in range(self.n_clients) if client % self.n_labels == group_id] - indices = [] - random.seed(seed) - ordered_by_label = self.order_by_label(dataset) - for group, label_list in enumerate(ordered_by_label): - for sample_idx in label_list: - rnd_val = random.random() - if rnd_val < q: - if group == group_id: - if group_clients[counter] == self.client_id: - indices.append(sample_idx) - counter = (counter + 1) % len(group_clients) - else: - others = [grp for grp in range(self.n_labels) if grp != group] - if random.choice(others) == group_id: - if group_clients[counter] == self.client_id: - indices.append(sample_idx) - counter = (counter + 1) % len(group_clients) - - labels = [dataset.targets[i] for i in indices] - logging.info("nr of samplers in client with rank {}: {}".format(rank, len(indices))) - logging.info("distribution in client with rank {}: {}".format(rank, Counter(labels))) - - random.seed(seed + self.client_id) # give each client a unique shuffle - random.shuffle(indices) # shuffle indices to spread the labels - - self.indices = indices - -class DirichletSampler(DistributedSamplerWrapper): - """ Generates a (non-iid) data distribution by sampling the dirichlet distribution. Dirichlet constructs a - vector of length num_clients, that sums to one. Decreasing alpha results in a more non-iid data set. - This distribution method results in both label and quantity skew. - """ - def __init__(self, dataset: Dataset, num_replicas = None, - rank = None, args = (0.5, 42)) -> None: - alpha, seed = args - super().__init__(dataset, num_replicas=num_replicas, rank=rank, seed=seed) - - np.random.seed(seed) - indices = [] - ordered_by_label = self.order_by_label(dataset) - for labels in ordered_by_label: - n_samples = len(labels) - # generate an allocation by sampling dirichlet, which results in how many samples each client gets - allocation = np.random.dirichlet([alpha] * self.n_clients) * n_samples - allocation = allocation.astype(int) - start_index = allocation[0:self.client_id].sum() - end_index = 0 - if self.client_id + 1 == self.n_clients: # last client - end_index = n_samples - else: - end_index = start_index + allocation[self.client_id] - - selection = labels[start_index:end_index] - indices.extend(selection) - - labels = [dataset.targets[i] for i in indices] - logging.info("nr of samplers in client with rank {}: {}".format(rank, len(indices))) - logging.info("distribution in client with rank {}: {}".format(rank, Counter(labels))) - - random.seed(seed + self.client_id) # give each client a unique shuffle - random.shuffle(indices) # shuffle indices to spread the labels - - self.indices = indices - -class UniformSampler(DistributedSamplerWrapper): - def __init__(self, dataset, num_replicas=None, rank=None, seed=0): - super().__init__(dataset, num_replicas=num_replicas, rank=rank, seed=seed) - indices = list(range(len(self.dataset))) - self.indices = indices[self.rank:self.total_size:self.num_replicas] - -def get_sampler(dataset, args): - sampler = None - if args.get_distributed(): - method = args.get_sampler() - args.get_logger().info( - "Using {} sampler method, with args: {}".format(method, args.get_sampler_args())) - - if method == "uniform": - sampler = UniformSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank()) - elif method == "q sampler": - sampler = Probability_q_Sampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), - args=args.get_sampler_args()) - elif method == "limit labels": - sampler = LimitLabelsSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), - args=args.get_sampler_args()) - elif method == "limit labels flex": - sampler = LimitLabelsSamplerFlex(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), - args=args.get_sampler_args()) - elif method == "n labels": - sampler = N_Labels(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), - args=args.get_sampler_args()) - elif method == "dirichlet": - sampler = DirichletSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), - args=args.get_sampler_args()) - else: # default - args().get_logger().warning("Unknown sampler " + method + ", using uniform instead") - sampler = UniformSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank()) - - return sampler diff --git a/fltk/strategy/FedNova.py b/fltk/strategy/optimization/FedNova.py similarity index 100% rename from fltk/strategy/FedNova.py rename to fltk/strategy/optimization/FedNova.py diff --git a/fltk/strategy/fedprox.py b/fltk/strategy/optimization/fedprox.py similarity index 100% rename from fltk/strategy/fedprox.py rename to fltk/strategy/optimization/fedprox.py From 20400bf50ff863a4985591453d9c5b4d93a142b9 Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 1 Feb 2022 21:37:30 +0100 Subject: [PATCH 59/73] Move strategy to new file structure --- fltk/datasets/distributed/cifar10.py | 3 +- fltk/datasets/distributed/cifar100.py | 3 +- fltk/datasets/distributed/fashion_mnist.py | 2 +- fltk/datasets/distributed/mnist.py | 6 +- fltk/nets/__init__.py | 14 +- fltk/samplers/__init__.py | 38 ++++ fltk/samplers/dirichlet.py | 44 +++++ fltk/samplers/distributed_sampler.py | 59 ++++++ fltk/samplers/limit_labels.py | 91 +++++++++ fltk/samplers/limit_labels_flex.py | 61 ++++++ fltk/samplers/n_label.py | 174 ++++++++++++++++++ fltk/samplers/q_sampler.py | 57 ++++++ fltk/samplers/uniform.py | 13 ++ fltk/strategy/__init__.py | 4 + fltk/strategy/aggregation/FedAvg.py | 18 ++ fltk/strategy/aggregation/__init__.py | 2 + fltk/strategy/aggregation/aggregation.py | 16 -- fltk/strategy/client_selection/__init__.py | 2 + .../client_selection/random_selection.py | 5 + fltk/strategy/client_selection/tifl.py | 31 ++++ fltk/strategy/optimization/__init__.py | 2 + fltk/util/base_config.py | 5 +- fltk/util/definitions.py | 66 ++++--- requirements.txt | 9 +- 24 files changed, 668 insertions(+), 57 deletions(-) create mode 100644 fltk/samplers/__init__.py create mode 100644 fltk/samplers/dirichlet.py create mode 100644 fltk/samplers/distributed_sampler.py create mode 100644 fltk/samplers/limit_labels.py create mode 100644 fltk/samplers/limit_labels_flex.py create mode 100644 fltk/samplers/n_label.py create mode 100644 fltk/samplers/q_sampler.py create mode 100644 fltk/samplers/uniform.py create mode 100644 fltk/strategy/aggregation/FedAvg.py create mode 100644 fltk/strategy/aggregation/__init__.py create mode 100644 fltk/strategy/client_selection/__init__.py create mode 100644 fltk/strategy/client_selection/random_selection.py create mode 100644 fltk/strategy/client_selection/tifl.py create mode 100644 fltk/strategy/optimization/__init__.py diff --git a/fltk/datasets/distributed/cifar10.py b/fltk/datasets/distributed/cifar10.py index f25feeca..dfce7f79 100644 --- a/fltk/datasets/distributed/cifar10.py +++ b/fltk/datasets/distributed/cifar10.py @@ -3,9 +3,10 @@ from torch.utils.data import DataLoader, DistributedSampler from fltk.datasets.distributed.dataset import DistDataset -from fltk.strategy.data_samplers import get_sampler import logging +from fltk.samplers import get_sampler + class DistCIFAR10Dataset(DistDataset): diff --git a/fltk/datasets/distributed/cifar100.py b/fltk/datasets/distributed/cifar100.py index 329b7ae0..a1ccb432 100644 --- a/fltk/datasets/distributed/cifar100.py +++ b/fltk/datasets/distributed/cifar100.py @@ -1,9 +1,8 @@ from torchvision import datasets from torchvision import transforms from torch.utils.data import DataLoader, DistributedSampler - from fltk.datasets.distributed.dataset import DistDataset -from fltk.strategy.data_samplers import get_sampler +from fltk.samplers import get_sampler class DistCIFAR100Dataset(DistDataset): diff --git a/fltk/datasets/distributed/fashion_mnist.py b/fltk/datasets/distributed/fashion_mnist.py index cba0468b..d9713f75 100644 --- a/fltk/datasets/distributed/fashion_mnist.py +++ b/fltk/datasets/distributed/fashion_mnist.py @@ -3,7 +3,7 @@ from torchvision import transforms from torch.utils.data import DataLoader, DistributedSampler -from fltk.strategy.data_samplers import get_sampler +from fltk.samplers import get_sampler class DistFashionMNISTDataset(DistDataset): diff --git a/fltk/datasets/distributed/mnist.py b/fltk/datasets/distributed/mnist.py index 06e6d9e9..bf6d49b7 100644 --- a/fltk/datasets/distributed/mnist.py +++ b/fltk/datasets/distributed/mnist.py @@ -3,13 +3,15 @@ from torchvision import datasets, transforms from torch.utils.data import DataLoader # from fltk.strategy import get_sampler, get_augmentations, get_augmentations_tensor, UnifyingSampler -from fltk.strategy.data_samplers import get_sampler - from random import choice from PIL import Image + # typing: from typing import TYPE_CHECKING, Tuple, Any, List + +from fltk.samplers import get_sampler + if TYPE_CHECKING: from fltk.util import BareConfig diff --git a/fltk/nets/__init__.py b/fltk/nets/__init__.py index 432dbca9..c71bd98c 100644 --- a/fltk/nets/__init__.py +++ b/fltk/nets/__init__.py @@ -1,6 +1,18 @@ +from enum import Enum + from .cifar_10_cnn import Cifar10CNN from .cifar_100_resnet import Cifar100ResNet from .fashion_mnist_cnn import FashionMNISTCNN from .fashion_mnist_resnet import FashionMNISTResNet from .cifar_10_resnet import Cifar10ResNet -from .cifar_100_vgg import Cifar100VGG \ No newline at end of file +from .cifar_100_vgg import Cifar100VGG + + +class Nets(Enum): + cifar100_resnet = "Cifar100ResNet" + cifar100_vgg = "Cifar100VGG" + cifar10_cnn = "Cifar10CNN" + cifar10_resnet = "Cifar10ResNet" + fashion_mnist_cnn = "FashionMNISTCNN" + fashion_mnist_resnet = "FashionMNISTResNet" + mnist_cnn = 'MNISTCNN' \ No newline at end of file diff --git a/fltk/samplers/__init__.py b/fltk/samplers/__init__.py new file mode 100644 index 00000000..5bf964d0 --- /dev/null +++ b/fltk/samplers/__init__.py @@ -0,0 +1,38 @@ +from .distributed_sampler import DistributedSamplerWrapper +from .uniform import UniformSampler +from .n_label import N_Labels +from .q_sampler import Probability_q_Sampler +from .dirichlet import DirichletSampler +from .limit_labels import LimitLabelsSampler +from .limit_labels_flex import LimitLabelsSamplerFlex + + +def get_sampler(dataset, args): + sampler = None + if args.get_distributed(): + method = args.get_sampler() + args.get_logger().info( + "Using {} sampler method, with args: {}".format(method, args.get_sampler_args())) + + if method == "uniform": + sampler = UniformSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank()) + elif method == "q sampler": + sampler = Probability_q_Sampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) + elif method == "limit labels": + sampler = LimitLabelsSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) + elif method == "limit labels flex": + sampler = LimitLabelsSamplerFlex(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) + elif method == "n labels": + sampler = N_Labels(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) + elif method == "dirichlet": + sampler = DirichletSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), + args=args.get_sampler_args()) + else: # default + args().get_logger().warning("Unknown sampler " + method + ", using uniform instead") + sampler = UniformSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank()) + + return sampler diff --git a/fltk/samplers/dirichlet.py b/fltk/samplers/dirichlet.py new file mode 100644 index 00000000..4aac9545 --- /dev/null +++ b/fltk/samplers/dirichlet.py @@ -0,0 +1,44 @@ +from fltk.samplers import DistributedSamplerWrapper +from torch.utils.data import DistributedSampler, Dataset +import numpy as np +import logging +import random +from collections import Counter + + +class DirichletSampler(DistributedSamplerWrapper): + """ Generates a (non-iid) data distribution by sampling the dirichlet distribution. Dirichlet constructs a + vector of length num_clients, that sums to one. Decreasing alpha results in a more non-iid data set. + This distribution method results in both label and quantity skew. + """ + def __init__(self, dataset: Dataset, num_replicas = None, + rank = None, args = (0.5, 42)) -> None: + alpha, seed = args + super().__init__(dataset, num_replicas=num_replicas, rank=rank, seed=seed) + + np.random.seed(seed) + indices = [] + ordered_by_label = self.order_by_label(dataset) + for labels in ordered_by_label: + n_samples = len(labels) + # generate an allocation by sampling dirichlet, which results in how many samples each client gets + allocation = np.random.dirichlet([alpha] * self.n_clients) * n_samples + allocation = allocation.astype(int) + start_index = allocation[0:self.client_id].sum() + end_index = 0 + if self.client_id + 1 == self.n_clients: # last client + end_index = n_samples + else: + end_index = start_index + allocation[self.client_id] + + selection = labels[start_index:end_index] + indices.extend(selection) + + labels = [dataset.targets[i] for i in indices] + logging.info("nr of samplers in client with rank {}: {}".format(rank, len(indices))) + logging.info("distribution in client with rank {}: {}".format(rank, Counter(labels))) + + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices \ No newline at end of file diff --git a/fltk/samplers/distributed_sampler.py b/fltk/samplers/distributed_sampler.py new file mode 100644 index 00000000..21d6a652 --- /dev/null +++ b/fltk/samplers/distributed_sampler.py @@ -0,0 +1,59 @@ +import random +import logging +from torch.utils.data import DistributedSampler, Dataset +from typing import Iterator +import numpy as np + + +class DistributedSamplerWrapper(DistributedSampler): + indices = [] + epoch_size = 1.0 + def __init__(self, dataset: Dataset, num_replicas = None, + rank = None, seed = 0) -> None: + super().__init__(dataset, num_replicas=num_replicas, rank=rank) + + self.client_id = rank - 1 + self.n_clients = num_replicas - 1 + self.n_labels = len(dataset.classes) + self.seed = seed + + + def order_by_label(self, dataset): + # order the indices by label + ordered_by_label = [[] for i in range(len(dataset.classes))] + for index, target in enumerate(dataset.targets): + ordered_by_label[target].append(index) + + return ordered_by_label + + def set_epoch_size(self, epoch_size: float) -> None: + """ Sets the epoch size as relative to the local amount of data. + 1.5 will result in the __iter__ function returning the available + indices with half appearing twice. + + Args: + epoch_size (float): relative size of epoch + """ + self.epoch_size = epoch_size + + def __iter__(self) -> Iterator[int]: + random.seed(self.rank+self.epoch) + epochs_todo = self.epoch_size + indices = [] + while(epochs_todo > 0.0): + random.shuffle(self.indices) + if epochs_todo >= 1.0: + indices.extend(self.indices) + else: + end_index = int(round(len(self.indices)*epochs_todo)) + indices.extend(self.indices[:end_index]) + + epochs_todo = epochs_todo - 1 + + ratio = len(indices)/float(len(self.indices)) + np.testing.assert_almost_equal(ratio, self.epoch_size, decimal=2) + + return iter(indices) + + def __len__(self) -> int: + return len(self.indices) \ No newline at end of file diff --git a/fltk/samplers/limit_labels.py b/fltk/samplers/limit_labels.py new file mode 100644 index 00000000..a05039fd --- /dev/null +++ b/fltk/samplers/limit_labels.py @@ -0,0 +1,91 @@ +from fltk.samplers import DistributedSamplerWrapper +from torch.utils.data import DistributedSampler, Dataset +import numpy as np +import logging +import random +from collections import Counter + +class LimitLabelsSampler(DistributedSamplerWrapper): + """ + A sampler that limits the number of labels per client + """ + + def __init__(self, dataset, num_replicas, rank, args=(5, 42)): + limit, seed = args + super().__init__(dataset, num_replicas, rank, seed) + + if self.n_clients % self.n_labels != 0: + logging.error( + "multiples of {} clients are needed for the 'limiting-labels' data distribution method, {} does not work".format( + self.n_labels, self.n_clients)) + return + + n_occurrences = limit * int(self.n_clients / self.n_labels) # number of occurrences of each label + counters = [n_occurrences] * self.n_clients # keeps track of which labels still can be given out + labels = list(range(self.n_labels)) # list of labels to distribute + clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client + random.seed(seed) # seed, such that the same result can be obtained multiple times + + while labels: + # pick a random label + label = random.choice(labels) + counters[label] -= 1 # decrement counter of this label + if counters[label] == 0: # if needed, remove label + labels.remove(label) + + # check which clients the label can be given to + selectable = [i for i in clients if not label in client_labels[i]] + client = None + + if not selectable: + # poor choice, let's fix this -> swap two labels + # conditions for swapping: + # sets of labels A, B, with B incomplete, remaining label l that is not possible to give to B, s.t.: + # (1) l not in A + # (2) exists label l' in A but not in B + # l, l' can be swapped + + client = random.choice(clients) # label can not be given to this client + for c, s in enumerate(client_labels): + if len(s) == limit: # this a completed set + if label not in s: # label can be given to this client (1) + subset = s.difference(client_labels[client]) # remove labels client already has (2...) + if subset: # subset is not empty (2 continued): + l = min(subset) # get a swappable label (in a deterministic way), and swap labels + client_labels[c].remove(l) + client_labels[c].add(label) + client_labels[client].add(l) + break + else: # normal operation, pick a rondom selectable client + client = random.choice(selectable) + client_labels[client].add(label) + + # check if this client has been given the maximum number of labels + if len(client_labels[client]) == limit: + clients.remove(client) + + # now we have a set of labels for each client + # client with rank=rank now needs to be given data + # all clients get the same amount of data, the first portion is given to client with rank 1, the second to rank 2, etc + + labels = client_labels[self.client_id] + logging.info("Client {} gets labels {}".format(self.rank, client_labels[self.client_id])) + indices = [] + ordered_by_label = self.order_by_label(dataset) + for label in labels: + n_samples = int(len(ordered_by_label[label]) / n_occurrences) + clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label + index = clients.index(self.client_id) # find the position of this client + start_index = index * n_samples # inclusive + if rank == self.n_clients: + end_index = len(ordered_by_label[label]) # exclusive + else: + end_index = start_index + n_samples # exclusive + + indices += ordered_by_label[label][start_index:end_index] + + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices \ No newline at end of file diff --git a/fltk/samplers/limit_labels_flex.py b/fltk/samplers/limit_labels_flex.py new file mode 100644 index 00000000..d6dc659c --- /dev/null +++ b/fltk/samplers/limit_labels_flex.py @@ -0,0 +1,61 @@ +from fltk.samplers import DistributedSamplerWrapper +from torch.utils.data import DistributedSampler, Dataset +import numpy as np +import logging +import random +from collections import Counter + + +class LimitLabelsSamplerFlex(DistributedSamplerWrapper): + """ + A sampler that limits the number of labels per client + The number of clients must <= than number of labels + """ + + def __init__(self, dataset, num_replicas, rank, args=(5, 42)): + limit, seed = args + super().__init__(dataset, num_replicas, rank, seed) + + labels_per_client = int(np.floor(self.n_labels / self.n_clients)) + remaining_labels = self.n_labels - labels_per_client + labels = list(range(self.n_labels)) # list of labels to distribute + clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client + random.seed(seed) # seed, such that the same result can be obtained multiple times + print(client_labels) + + label_order = random.sample(labels, len(labels)) + client_label_dict = {} + for client_id in clients: + client_label_dict[client_id] = [] + for _ in range(labels_per_client): + chosen_label = label_order.pop() + client_label_dict[client_id].append(chosen_label) + client_labels[client_id].add(chosen_label) + client_label_dict['rest'] = label_order + + indices = [] + ordered_by_label = self.order_by_label(dataset) + labels = client_label_dict[self.client_id] + for label in labels: + n_samples = int(len(ordered_by_label[label])) + clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label + index = clients.index(self.client_id) # find the position of this client + start_index = index * n_samples # inclusive + if rank == self.n_clients: + end_index = len(ordered_by_label[label]) # exclusive + else: + end_index = start_index + n_samples # exclusive + + indices += ordered_by_label[label][start_index:end_index] + + # Last part is uniform sampler + rest_indices = [] + for l in client_label_dict['rest']: + rest_indices += ordered_by_label[l] + filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] + indices += filtered_rest_indices + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices \ No newline at end of file diff --git a/fltk/samplers/n_label.py b/fltk/samplers/n_label.py new file mode 100644 index 00000000..4b00c1a9 --- /dev/null +++ b/fltk/samplers/n_label.py @@ -0,0 +1,174 @@ +from fltk.samplers import DistributedSamplerWrapper +from torch.utils.data import DistributedSampler, Dataset +import numpy as np +import logging +import random +from collections import Counter + + +class N_Labels(DistributedSamplerWrapper): + """ + A sampler that limits the number of labels per client + The number of clients must <= than number of labels + """ + + def __init__(self, dataset, num_replicas, rank, args=(5, 42)): + limit, seed = args + super().__init__(dataset, num_replicas, rank, seed) + + num_copies = np.ceil((args[0] * self.n_clients) / self.n_labels) + label_dict = {} + for l in range(self.n_labels): + label_dict[l] = num_copies + + def get_least_used_labels(l_dict: dict): + label_list = [[k, v] for k, v in label_dict.items()] + label_list[-1][1] = 0 + sorted_list = sorted(label_list, key=lambda x: x[1], reverse=True) + # print('d') + # label_list.sort(lambda x:x) + + def choice_n(l_dict: dict, n, seed_offset = 0): + # get_least_used_labels(l_dict) + labels = [k for k, v in label_dict.items() if v] + # summed = sum([int(v) for k, v in label_dict.items() if v]) + # amounts = [float(v) / float(summed) for k, v in label_dict.items() if v] + # # p = amounts / summed + print(f'Available labels: {labels} choose {n}') + # # np.random.seed(seed + seed_offset) + # # @TODO: Error is in this section! + # print(f'n={n}, labels={labels}, p={amounts}') + # print(amounts) + + selected = np.random.choice(labels, n, replace=False) + # print(selected) + for k, v in l_dict.items(): + if k in selected: + # v -= 1 + l_dict[k] -= 1 + return selected + + + # print(f'N Clients={self.n_clients}') + # print(f'Num_buckets={num_copies}') + + clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + client_label_dict = {} + ordered_list = list(range(self.n_labels)) * int(num_copies) + + # Old code + # for idx, client_id in enumerate(clients): + # # client_label_dict[client_id] = [] + # label_set = choice_n(label_dict, args[0], idx) + # client_label_dict[client_id] = label_set + + # Now code + for idx, client_id in enumerate(clients): + label_set = [] + for _ in range(args[0]): + label_set.append(ordered_list.pop()) + client_label_dict[client_id] = label_set + + client_label_dict['rest'] = [] + # New code + if len(ordered_list): + client_label_dict['rest'] = ordered_list + + # Old code + # client_label_dict['rest'] = labels = [k for k, v in label_dict.items() if v] + # for k, v in label_dict.items(): + # for x in range(int(v)): + # client_label_dict['rest'].append(int(k)) + + # Order data by label; split into N buckets and select indices based on the order found in the client-label-dict + + reverse_label_dict = {} + for l in range(self.n_labels): + reverse_label_dict[l] = [] + + for k, v in client_label_dict.items(): + # print(f'client {k} has labels {v}') + for l_c in v: + reverse_label_dict[l_c].append(k) + + indices = [] + ordered_by_label = self.order_by_label(dataset) + indices_per_client = {} + for c in clients: + indices_per_client[c] = [] + + rest_indices = [] + for group, label_list in enumerate(ordered_by_label): + splitted = np.array_split(label_list, num_copies) + client_id_to_distribute = reverse_label_dict[group] + for split_part in splitted: + client_key = client_id_to_distribute.pop() + if client_key == 'rest': + rest_indices.append(split_part) + else: + indices_per_client[client_key].append(split_part) + # for split_part in splitted: + # @TODO: Fix this part in terms of code cleanness. Could be written more cleanly + if len(rest_indices): + rest_indices = np.concatenate(rest_indices) + rest_splitted = np.array_split(rest_indices, len(indices_per_client)) + + for k, v in indices_per_client.items(): + v.append(rest_splitted.pop()) + indices_per_client[k] = np.concatenate(v) + else: + rest_indices = np.ndarray([]) + for k, v in indices_per_client.items(): + indices_per_client[k] = np.concatenate(v) + + indices = indices_per_client[self.client_id] + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices + + # labels_per_client = int(np.floor(self.n_labels / self.n_clients)) + # remaining_labels = self.n_labels - labels_per_client + # labels = list(range(self.n_labels)) # list of labels to distribute + # clients = list(range(self.n_clients)) # keeps track of which clients should still be given a label + # client_labels = [set() for n in range(self.n_clients)] # set of labels given to each client + # random.seed(seed) # seed, such that the same result can be obtained multiple times + # print(client_labels) + # + # label_order = random.sample(labels, len(labels)) + # client_label_dict = {} + # for client_id in clients: + # client_label_dict[client_id] = [] + # for _ in range(labels_per_client): + # chosen_label = label_order.pop() + # client_label_dict[client_id].append(chosen_label) + # client_labels[client_id].add(chosen_label) + # client_label_dict['rest'] = label_order + # + # + # + # indices = [] + # ordered_by_label = self.order_by_label(dataset) + # labels = client_label_dict[self.client_id] + # for label in labels: + # n_samples = int(len(ordered_by_label[label])) + # clients = [c for c, s in enumerate(client_labels) if label in s] # find out which clients have this label + # index = clients.index(self.client_id) # find the position of this client + # start_index = index * n_samples # inclusive + # if rank == self.n_clients: + # end_index = len(ordered_by_label[label]) # exclusive + # else: + # end_index = start_index + n_samples # exclusive + # + # indices += ordered_by_label[label][start_index:end_index] + # + # # Last part is uniform sampler + # rest_indices = [] + # for l in client_label_dict['rest']: + # rest_indices += ordered_by_label[l] + # filtered_rest_indices = rest_indices[self.rank:self.total_size:self.num_replicas] + # indices += filtered_rest_indices + # random.seed(seed + self.client_id) # give each client a unique shuffle + # random.shuffle(indices) # shuffle indices to spread the labels + # + # self.indices = indices \ No newline at end of file diff --git a/fltk/samplers/q_sampler.py b/fltk/samplers/q_sampler.py new file mode 100644 index 00000000..77d38f0a --- /dev/null +++ b/fltk/samplers/q_sampler.py @@ -0,0 +1,57 @@ +from fltk.samplers import DistributedSamplerWrapper +from torch.utils.data import DistributedSampler, Dataset +import numpy as np +import logging +import random +from collections import Counter + + +class Probability_q_Sampler(DistributedSamplerWrapper): + """ + Clients are divided among M groups, with M being the number of labels. + A sample with label m is than given to a member of group m with probability q, + and to any other group with probability (1-q)/(m-1) + + side effect of this method is that the reported loss on the test dataset becomes somewhat meaningless...logging.info("distribution in client with rank {}: {}".format(rank, Counter(labels))) + """ + + def __init__(self, dataset, num_replicas, rank, args=(0.5, 42)): + q, seed = args + super().__init__(dataset, num_replicas, rank, seed) + + if self.n_clients % self.n_labels != 0: + logging.error( + "multiples of {} clients are needed for the 'probability-q-sampler' data distribution method, {} does not work".format( + self.n_labels, self.n_clients)) + return + + # divide data among groups + counter = 0 # for dividing data within a group + group_id = self.client_id % self.n_labels + group_clients = [client for client in range(self.n_clients) if client % self.n_labels == group_id] + indices = [] + random.seed(seed) + ordered_by_label = self.order_by_label(dataset) + for group, label_list in enumerate(ordered_by_label): + for sample_idx in label_list: + rnd_val = random.random() + if rnd_val < q: + if group == group_id: + if group_clients[counter] == self.client_id: + indices.append(sample_idx) + counter = (counter + 1) % len(group_clients) + else: + others = [grp for grp in range(self.n_labels) if grp != group] + if random.choice(others) == group_id: + if group_clients[counter] == self.client_id: + indices.append(sample_idx) + counter = (counter + 1) % len(group_clients) + + labels = [dataset.targets[i] for i in indices] + logging.info("nr of samplers in client with rank {}: {}".format(rank, len(indices))) + logging.info("distribution in client with rank {}: {}".format(rank, Counter(labels))) + + random.seed(seed + self.client_id) # give each client a unique shuffle + random.shuffle(indices) # shuffle indices to spread the labels + + self.indices = indices \ No newline at end of file diff --git a/fltk/samplers/uniform.py b/fltk/samplers/uniform.py new file mode 100644 index 00000000..65a57ba8 --- /dev/null +++ b/fltk/samplers/uniform.py @@ -0,0 +1,13 @@ +from fltk.samplers import DistributedSamplerWrapper +from torch.utils.data import DistributedSampler, Dataset +import numpy as np +import logging +import random +from collections import Counter + + +class UniformSampler(DistributedSamplerWrapper): + def __init__(self, dataset, num_replicas=None, rank=None, seed=0): + super().__init__(dataset, num_replicas=num_replicas, rank=rank, seed=seed) + indices = list(range(len(self.dataset))) + self.indices = indices[self.rank:self.total_size:self.num_replicas] \ No newline at end of file diff --git a/fltk/strategy/__init__.py b/fltk/strategy/__init__.py index e69de29b..15884b28 100644 --- a/fltk/strategy/__init__.py +++ b/fltk/strategy/__init__.py @@ -0,0 +1,4 @@ +from .aggregation import * +from .client_selection import * +from .optimization import * +from .offloading import OffloadingStrategy, parse_strategy diff --git a/fltk/strategy/aggregation/FedAvg.py b/fltk/strategy/aggregation/FedAvg.py new file mode 100644 index 00000000..98a72396 --- /dev/null +++ b/fltk/strategy/aggregation/FedAvg.py @@ -0,0 +1,18 @@ + + +def FedAvg(parameters, sizes): + new_params = {} + sum_size = 0 + for client in parameters: + for name in parameters[client].keys(): + try: + new_params[name].data += (parameters[client][name].data * sizes[client]) + except: + new_params[name] = (parameters[client][name].data * sizes[client]) + sum_size += sizes[client] + + for name in new_params: + # @TODO: Is .long() really required? + new_params[name].data = new_params[name].data.long() / sum_size + + return new_params \ No newline at end of file diff --git a/fltk/strategy/aggregation/__init__.py b/fltk/strategy/aggregation/__init__.py new file mode 100644 index 00000000..696cca51 --- /dev/null +++ b/fltk/strategy/aggregation/__init__.py @@ -0,0 +1,2 @@ +from .FedAvg import FedAvg +from .aggregation import average_nn_parameters, average_nn_parameters_simple diff --git a/fltk/strategy/aggregation/aggregation.py b/fltk/strategy/aggregation/aggregation.py index 10a9975c..827fe8fb 100644 --- a/fltk/strategy/aggregation/aggregation.py +++ b/fltk/strategy/aggregation/aggregation.py @@ -25,22 +25,6 @@ def average_nn_parameters(parameters): return new_params -def FedAvg(parameters, sizes): - new_params = {} - sum_size = 0 - for client in parameters: - for name in parameters[client].keys(): - try: - new_params[name].data += (parameters[client][name].data * sizes[client]) - except: - new_params[name] = (parameters[client][name].data * sizes[client]) - sum_size += sizes[client] - - for name in new_params: - # @TODO: Is .long() really required? - new_params[name].data = new_params[name].data.long() / sum_size - - return new_params def average_nn_parameters(parameters, sizes): new_params = {} diff --git a/fltk/strategy/client_selection/__init__.py b/fltk/strategy/client_selection/__init__.py new file mode 100644 index 00000000..f490a0da --- /dev/null +++ b/fltk/strategy/client_selection/__init__.py @@ -0,0 +1,2 @@ +from .random_selection import random_selection +from .tifl import tifl_select_tier, tifl_select_tier_and_decrement, tifl_can_select_tier, tifl_update_probs \ No newline at end of file diff --git a/fltk/strategy/client_selection/random_selection.py b/fltk/strategy/client_selection/random_selection.py new file mode 100644 index 00000000..716665a5 --- /dev/null +++ b/fltk/strategy/client_selection/random_selection.py @@ -0,0 +1,5 @@ +import numpy as np + + +def random_selection(clients, n): + return np.random.choice(clients, n, replace=False) \ No newline at end of file diff --git a/fltk/strategy/client_selection/tifl.py b/fltk/strategy/client_selection/tifl.py new file mode 100644 index 00000000..20be7311 --- /dev/null +++ b/fltk/strategy/client_selection/tifl.py @@ -0,0 +1,31 @@ +import numpy as np + + +def tifl_select_tier(tiers): + print([x[3] for x in tiers]) + return np.random.choice([x[0] for x in tiers], 1, p=[x[3] for x in tiers])[0] + + +def tifl_update_probs(tiers): + n = len([x for x in tiers if x[2] > 0]) + D = n * (n +1) / 2 + tiers.sort(key=lambda x:x[1]) + idx_decr = 0 + for idx, tier in enumerate(tiers): + if tier[2] > 0: + tier[3] = (n - (idx - idx_decr)) / D + else: + tier[3] = 0 + idx_decr += 1 + + +def tifl_select_tier_and_decrement(tiers): + selected_tier = tifl_select_tier(tiers) + for tier in tiers: + if tier[0] == selected_tier: + tier[2] -= 1 + return selected_tier + + +def tifl_can_select_tier(tiers): + return len([x for x in tiers if x[2] > 0]) \ No newline at end of file diff --git a/fltk/strategy/optimization/__init__.py b/fltk/strategy/optimization/__init__.py new file mode 100644 index 00000000..549e5626 --- /dev/null +++ b/fltk/strategy/optimization/__init__.py @@ -0,0 +1,2 @@ +from .fedprox import FedProx +from .FedNova import FedNova \ No newline at end of file diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 8e3f45e3..dfdc6e02 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -7,8 +7,7 @@ from fltk.datasets.distributed.mnist import DistMNISTDataset from fltk.nets import Cifar10CNN, FashionMNISTCNN, Cifar100ResNet, FashionMNISTResNet, Cifar10ResNet, Cifar100VGG from fltk.nets.mnist_cnn import MNIST_CNN -from fltk.strategy.FedNova import FedNova -from fltk.strategy.fedprox import FedProx +from fltk.strategy.optimization import FedProx, FedNova from fltk.util.definitions import Optimizations SEED = 1 @@ -201,7 +200,7 @@ def merge_yaml(self, cfg = {}): else: self.cuda = False if 'optimizer' in cfg: - self.optimizer = self.optimizers[cfg['optimizer']] + self.optimizer = self.optimizers[Optimizations(cfg['optimizer'])] if 'optimizer_args' in cfg: for k, v in cfg['optimizer_args'].items(): self.optimizer_args[k] = v diff --git a/fltk/util/definitions.py b/fltk/util/definitions.py index c11f704e..c705b97d 100644 --- a/fltk/util/definitions.py +++ b/fltk/util/definitions.py @@ -1,39 +1,49 @@ -from dataclasses import dataclass - -# @dataclass -# class Aggregations: -# avg = 'Avg' -# fed_avg = 'FedAvg' -# sum = 'Sum' - -# @dataclass -# class Samplers: -# uniform = "uniform" -# q_sampler = "q sampler" -# limit_labels = "limit labels" -# dirichlet = "dirichlet" -# limit_labels_q = "limit labels q" -# emd_sampler = 'emd sampler' - -@dataclass -class Optimizations: +######### Definitions ######### +# 1. Datasets # +# 2. Networks (models) # +# 3. Aggregation methods # +# 4. Client selection methods # +# 5. Data samplers # +# 6. Optimizers # +############################### +# Use enums instead of dataclasses? +from enum import Enum + + +class Samplers(Enum): + uniform = "uniform" + q_sampler = "q sampler" + limit_labels = "limit labels" + dirichlet = "dirichlet" + limit_labels_q = "limit labels q" + emd_sampler = 'emd sampler' + limit_labels_flex = "limit labels flex" + + +class Optimizations(Enum): sgd = 'SGD' fedprox = 'FedProx' fednova = 'FedNova' -# @dataclass -# class Datasets: -# cifar10 = 'cifar10' -# cifar100 = 'cifar100' -# fashion_mnist = 'fashion-mnist' -# mnist = 'mnist' -# @dataclass -# class Nets: +class Dataset(Enum): + cifar10 = 'cifar10' + cifar100 = 'cifar100' + fashion_mnist = 'fashion-mnist' + mnist = 'mnist' + + +class Aggregations(Enum): + avg = 'Avg' + fed_avg = 'FedAvg' + sum = 'Sum' + + +# class Nets(Enum): # cifar100_resnet = "Cifar100ResNet" # cifar100_vgg = "Cifar100VGG" # cifar10_cnn = "Cifar10CNN" # cifar10_resnet = "Cifar10ResNet" # fashion_mnist_cnn = "FashionMNISTCNN" # fashion_mnist_resnet = "FashionMNISTResNet" -# mnist_cnn = 'MNISTCNN' \ No newline at end of file +# mnist_cnn = 'MNISTCNN' diff --git a/requirements.txt b/requirements.txt index e87e007e..84ecb26b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,8 @@ tqdm==4.49.0 scikit-learn==0.23.2 pandas==1.1.2 numpy>=1.20.0 -torch==1.7.1 -torchvision==0.8.2 +torch==1.10.1 +torchvision==0.11.2 scipy==1.4.1 h5py==2.10.0 requests @@ -12,4 +12,7 @@ torchsummary dataclass-csv tensorboard seaborn -matplotlib \ No newline at end of file +matplotlib +google-api-python-client +google-auth-httplib2 +google-auth-oauthlib \ No newline at end of file From b1541689b6f614808055cbdd65c7541e230fa2b7 Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 1 Feb 2022 22:36:50 +0100 Subject: [PATCH 60/73] Use enums in config parsing --- configs/dev_mnist_all/exps/fedavg.yaml | 4 +- fltk/nets/__init__.py | 12 ----- fltk/samplers/__init__.py | 13 ++--- fltk/util/base_config.py | 74 +++++++++++++------------- fltk/util/definitions.py | 19 +++---- 5 files changed, 57 insertions(+), 65 deletions(-) diff --git a/configs/dev_mnist_all/exps/fedavg.yaml b/configs/dev_mnist_all/exps/fedavg.yaml index 0f7550fe..5e90d7fe 100644 --- a/configs/dev_mnist_all/exps/fedavg.yaml +++ b/configs/dev_mnist_all/exps/fedavg.yaml @@ -16,10 +16,10 @@ node_groups: slow: [1, 1] medium: [2, 2] fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) #sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: - - 0.07 # label limit || q probability || alpha || unused + - 2 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused system: federator: diff --git a/fltk/nets/__init__.py b/fltk/nets/__init__.py index c71bd98c..4bfa7138 100644 --- a/fltk/nets/__init__.py +++ b/fltk/nets/__init__.py @@ -1,18 +1,6 @@ -from enum import Enum - from .cifar_10_cnn import Cifar10CNN from .cifar_100_resnet import Cifar100ResNet from .fashion_mnist_cnn import FashionMNISTCNN from .fashion_mnist_resnet import FashionMNISTResNet from .cifar_10_resnet import Cifar10ResNet from .cifar_100_vgg import Cifar100VGG - - -class Nets(Enum): - cifar100_resnet = "Cifar100ResNet" - cifar100_vgg = "Cifar100VGG" - cifar10_cnn = "Cifar10CNN" - cifar10_resnet = "Cifar10ResNet" - fashion_mnist_cnn = "FashionMNISTCNN" - fashion_mnist_resnet = "FashionMNISTResNet" - mnist_cnn = 'MNISTCNN' \ No newline at end of file diff --git a/fltk/samplers/__init__.py b/fltk/samplers/__init__.py index 5bf964d0..d6085f8e 100644 --- a/fltk/samplers/__init__.py +++ b/fltk/samplers/__init__.py @@ -5,6 +5,7 @@ from .dirichlet import DirichletSampler from .limit_labels import LimitLabelsSampler from .limit_labels_flex import LimitLabelsSamplerFlex +from ..util.definitions import DataSampler def get_sampler(dataset, args): @@ -14,21 +15,21 @@ def get_sampler(dataset, args): args.get_logger().info( "Using {} sampler method, with args: {}".format(method, args.get_sampler_args())) - if method == "uniform": + if method == DataSampler.uniform: sampler = UniformSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank()) - elif method == "q sampler": + elif method == DataSampler.q_sampler: sampler = Probability_q_Sampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) - elif method == "limit labels": + elif method == DataSampler.limit_labels: sampler = LimitLabelsSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) - elif method == "limit labels flex": + elif method == DataSampler.limit_labels_flex: sampler = LimitLabelsSamplerFlex(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) - elif method == "n labels": + elif method == DataSampler.n_labels: sampler = N_Labels(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) - elif method == "dirichlet": + elif method == DataSampler.dirichlet: sampler = DirichletSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) else: # default diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index dfdc6e02..42883789 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -8,7 +8,7 @@ from fltk.nets import Cifar10CNN, FashionMNISTCNN, Cifar100ResNet, FashionMNISTResNet, Cifar10ResNet, Cifar100VGG from fltk.nets.mnist_cnn import MNIST_CNN from fltk.strategy.optimization import FedProx, FedNova -from fltk.util.definitions import Optimizations +from fltk.util.definitions import Optimizations, DataSampler, Nets, Dataset SEED = 1 torch.manual_seed(SEED) @@ -80,52 +80,54 @@ def __init__(self): self.federator_host = '0.0.0.0' self.rank = 0 self.world_size = 0 - self.data_sampler = "uniform" + self.data_sampler = DataSampler.uniform self.data_sampler_args = None self.distributed = False + self.available_nets = { - "Cifar100ResNet": Cifar100ResNet, - "Cifar100VGG": Cifar100VGG, - "Cifar10CNN": Cifar10CNN, - "Cifar10ResNet": Cifar10ResNet, - "FashionMNISTCNN": FashionMNISTCNN, - "FashionMNISTResNet": FashionMNISTResNet, - "MNISTCNN": MNIST_CNN, + Nets.cifar100_resnet: Cifar100ResNet, + Nets.cifar100_vgg: Cifar100VGG, + Nets.cifar10_cnn: Cifar10CNN, + Nets.cifar10_resnet: Cifar10ResNet, + Nets.fashion_mnist_cnn: FashionMNISTCNN, + Nets.fashion_mnist_resnet: FashionMNISTResNet, + Nets.mnist_cnn: MNIST_CNN, } self.nets_split_point = { - "Cifar100ResNet": 48, - "Cifar100VGG": 28, - "Cifar10CNN": 15, - "Cifar10ResNet": 39, - "FashionMNISTCNN": 7, - "FashionMNISTResNet": 7, - "MNISTCNN": 2, + Nets.cifar100_resnet: 48, + Nets.cifar100_vgg: 28, + Nets.cifar10_cnn: 15, + Nets.cifar10_resnet: 39, + Nets.fashion_mnist_cnn: 7, + Nets.fashion_mnist_resnet: 7, + Nets.mnist_cnn: 2, } self.net = None - self.net_name = 'Cifar10CNN' - self.set_net_by_name(self.net_name) - self.dataset_name = 'cifar10' + self.net_name = Nets.cifar10_cnn + self.set_net_by_name(self.net_name.value) + # self.dataset_name = 'cifar10' + self.dataset_name = Dataset.cifar10 self.DistDatasets = { - 'cifar10': DistCIFAR10Dataset, - 'cifar100': DistCIFAR100Dataset, - 'fashion-mnist': DistFashionMNISTDataset, - 'mnist': DistMNISTDataset + Dataset.cifar10: DistCIFAR10Dataset, + Dataset.cifar100: DistCIFAR100Dataset, + Dataset.fashion_mnist: DistFashionMNISTDataset, + Dataset.mnist: DistMNISTDataset } self.train_data_loader_pickle_path = { - 'cifar10': 'data_loaders/cifar10/train_data_loader.pickle', - 'fashion-mnist': 'data_loaders/fashion-mnist/train_data_loader.pickle', - 'cifar100': 'data_loaders/cifar100/train_data_loader.pickle', - 'mnist' : 'data_loaders/mnist/train_data_loader.pickle', + Dataset.cifar10: 'data_loaders/cifar10/train_data_loader.pickle', + Dataset.fashion_mnist: 'data_loaders/fashion-mnist/train_data_loader.pickle', + Dataset.cifar100: 'data_loaders/cifar100/train_data_loader.pickle', + Dataset.mnist: 'data_loaders/mnist/train_data_loader.pickle', } self.test_data_loader_pickle_path = { - 'cifar10': 'data_loaders/cifar10/test_data_loader.pickle', - 'fashion-mnist': 'data_loaders/fashion-mnist/test_data_loader.pickle', - 'cifar100': 'data_loaders/cifar100/test_data_loader.pickle', - 'mnist' : 'data_loaders/mnist/test_data_loader.pickle', + Dataset.cifar10: 'data_loaders/cifar10/test_data_loader.pickle', + Dataset.fashion_mnist: 'data_loaders/fashion-mnist/test_data_loader.pickle', + Dataset.cifar100: 'data_loaders/cifar100/test_data_loader.pickle', + Dataset.mnist: 'data_loaders/mnist/test_data_loader.pickle', } self.loss_function = torch.nn.CrossEntropyLoss @@ -159,10 +161,10 @@ def merge_yaml(self, cfg = {}): if 'wait_for_clients' in cfg: self.wait_for_clients = cfg['wait_for_clients'] if 'net' in cfg: - self.net_name = cfg['net'] + self.net_name = Nets(cfg['net']) self.set_net_by_name(cfg['net']) if 'dataset' in cfg: - self.dataset_name = cfg['dataset'] + self.dataset_name = Dataset(cfg['dataset']) if 'offload_stategy' in cfg: self.offload_strategy = cfg['offload_stategy'] if 'profiling_size' in cfg: @@ -205,7 +207,7 @@ def merge_yaml(self, cfg = {}): for k, v in cfg['optimizer_args'].items(): self.optimizer_args[k] = v if 'sampler' in cfg: - self.data_sampler = cfg['sampler'] + self.data_sampler = DataSampler(cfg['sampler']) if 'sampler_args' in cfg: self.data_sampler_args = cfg['sampler_args'] @@ -270,7 +272,7 @@ def get_dataloader_list(self): return list(self.train_data_loader_pickle_path.keys()) def get_nets_list(self): - return list(self.available_nets.keys()) + return list(map(lambda c: c.value, Nets)) def set_train_data_loader_pickle_path(self, path, name='cifar10'): self.train_data_loader_pickle_path[name] = path @@ -285,7 +287,7 @@ def get_test_data_loader_pickle_path(self): return self.test_data_loader_pickle_path[self.dataset_name] def set_net_by_name(self, name: str): - self.net_name = name + self.net_name = Nets(name) self.net = self.available_nets[self.net_name] def get_cuda(self): diff --git a/fltk/util/definitions.py b/fltk/util/definitions.py index c705b97d..41dfaec6 100644 --- a/fltk/util/definitions.py +++ b/fltk/util/definitions.py @@ -10,7 +10,7 @@ from enum import Enum -class Samplers(Enum): +class DataSampler(Enum): uniform = "uniform" q_sampler = "q sampler" limit_labels = "limit labels" @@ -18,6 +18,7 @@ class Samplers(Enum): limit_labels_q = "limit labels q" emd_sampler = 'emd sampler' limit_labels_flex = "limit labels flex" + n_labels = "n labels" class Optimizations(Enum): @@ -39,11 +40,11 @@ class Aggregations(Enum): sum = 'Sum' -# class Nets(Enum): -# cifar100_resnet = "Cifar100ResNet" -# cifar100_vgg = "Cifar100VGG" -# cifar10_cnn = "Cifar10CNN" -# cifar10_resnet = "Cifar10ResNet" -# fashion_mnist_cnn = "FashionMNISTCNN" -# fashion_mnist_resnet = "FashionMNISTResNet" -# mnist_cnn = 'MNISTCNN' +class Nets(Enum): + cifar100_resnet = "Cifar100ResNet" + cifar100_vgg = "Cifar100VGG" + cifar10_cnn = "Cifar10CNN" + cifar10_resnet = "Cifar10ResNet" + fashion_mnist_cnn = "FashionMNISTCNN" + fashion_mnist_resnet = "FashionMNISTResNet" + mnist_cnn = 'MNISTCNN' From 1363a80c2bf9c89cf4d949de2082cdebb970e223 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 21 Feb 2022 15:31:59 +0100 Subject: [PATCH 61/73] Move code to new structure --- fltk/core/__init__.py | 0 fltk/core/client.py | 111 ++++++++++++++ fltk/core/federator.py | 148 +++++++++++++++++++ fltk/core/node.py | 164 +++++++++++++++++++++ fltk/core/rpc_util.py | 30 ++++ fltk/datasets/distributed/cifar10.py | 9 +- fltk/datasets/distributed/cifar100.py | 12 +- fltk/datasets/distributed/dataset.py | 2 + fltk/datasets/distributed/fashion_mnist.py | 12 +- fltk/datasets/distributed/mnist.py | 12 +- fltk/datasets/loader_util.py | 36 +++++ fltk/nets/__init__.py | 31 ++++ fltk/samplers/__init__.py | 6 +- fltk/strategy/optimization/__init__.py | 13 +- fltk/util/config.py | 66 +++++++++ fltk/util/log.py | 10 +- test_node_synchronous.py | 44 ++++++ 17 files changed, 681 insertions(+), 25 deletions(-) create mode 100644 fltk/core/__init__.py create mode 100644 fltk/core/client.py create mode 100644 fltk/core/federator.py create mode 100644 fltk/core/node.py create mode 100644 fltk/core/rpc_util.py create mode 100644 fltk/datasets/loader_util.py create mode 100644 fltk/util/config.py create mode 100644 test_node_synchronous.py diff --git a/fltk/core/__init__.py b/fltk/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fltk/core/client.py b/fltk/core/client.py new file mode 100644 index 00000000..bb994ed4 --- /dev/null +++ b/fltk/core/client.py @@ -0,0 +1,111 @@ +import time +from typing import Tuple, Any + +import torch + +from fltk.core.node import Node +from fltk.schedulers import MinCapableStepLR +from fltk.strategy import get_optimizer +from fltk.util.config import Config + + +class Client(Node): + + def __init__(self, id: int, rank: int, world_size: int, config: Config): + super().__init__(id, rank, world_size, config) + + self.loss_function = self.config.get_loss_function()() + self.optimizer = get_optimizer(self.config.optimizer)(self.net.parameters(), + **self.config.optimizer_args) + self.scheduler = MinCapableStepLR(self.logger, self.optimizer, + self.config.scheduler_step_size, + self.config.scheduler_gamma, + self.config.min_lr) + + def train(self, num_epochs: int): + start_time = time.time() + + running_loss = 0.0 + final_running_loss = 0.0 + if self.distributed: + self.dataset.train_sampler.set_epoch(num_epochs) + + number_of_training_samples = len(self.dataset.get_train_loader()) + # self.logger.info(f'{self.id}: Number of training samples: {number_of_training_samples}') + + for i, (inputs, labels) in enumerate(self.dataset.get_train_loader(), 0): + inputs, labels = inputs.to(self.device), labels.to(self.device) + + # zero the parameter gradients + self.optimizer.zero_grad() + + outputs = self.net(inputs) + loss = self.loss_function(outputs, labels) + + loss.backward() + self.optimizer.step() + running_loss += loss.item() + # Mark logging update step + if i % self.config.log_interval == 0: + # self.logger.info( + # '[%d, %5d] loss: %.3f' % (num_epochs, i, running_loss / self.config.log_interval)) + final_running_loss = running_loss / self.config.log_interval + running_loss = 0.0 + + end_time = time.time() + duration = end_time - start_time + # self.logger.info(f'Train duration is {duration} seconds') + + return final_running_loss, self.get_nn_parameters(), + + def test(self): + start_time = time.time() + correct = 0 + total = 0 + targets_ = [] + pred_ = [] + loss = 0.0 + with torch.no_grad(): + for (images, labels) in self.dataset.get_test_loader(): + images, labels = images.to(self.device), labels.to(self.device) + + outputs = self.net(images) + + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + targets_.extend(labels.cpu().view_as(predicted).numpy()) + pred_.extend(predicted.cpu().numpy()) + + loss += self.loss_function(outputs, labels).item() + loss /= len(self.dataset.get_test_loader().dataset) + accuracy = 100 * correct / total + # confusion_mat = confusion_matrix(targets_, pred_) + # accuracy_per_class = confusion_mat.diagonal() / confusion_mat.sum(1) + # + # class_precision = calculate_class_precision(confusion_mat) + # class_recall = calculate_class_recall(confusion_mat) + end_time = time.time() + duration = end_time - start_time + # self.logger.info(f'Test duration is {duration} seconds') + return accuracy, loss + + def get_client_datasize(self): + return len(self.dataset.get_train_sampler()) + + def exec_round(self, num_epochs: int) -> Tuple[float, Any, float, float]: + + start = time.time() + + loss, weights = self.train(num_epochs) + + accuracy, test_loss = self.test() + + end = time.time() + duration = end - start + # self.logger.info(f'Round duration is {duration} seconds') + return loss, weights, accuracy, test_loss + + def __del__(self): + self.logger.info(f'Client {self.id} is stopping') \ No newline at end of file diff --git a/fltk/core/federator.py b/fltk/core/federator.py new file mode 100644 index 00000000..ac2ee2be --- /dev/null +++ b/fltk/core/federator.py @@ -0,0 +1,148 @@ +import copy +import time +from typing import List, Union + +import torch + +from fltk.core.client import Client +from fltk.core.node import Node +from fltk.datasets.loader_util import get_dataset +from fltk.strategy import FedAvg, random_selection, average_nn_parameters, average_nn_parameters_simple +from fltk.util.config import Config + + +NodeReference = Union[Node, str] + + +class Federator(Node): + clients: List[NodeReference] = [] + num_rounds: int + + def __init__(self, id: int, rank: int, world_size: int, config: Config): + super().__init__(id, rank, world_size, config) + self.loss_function = self.config.get_loss_function()() + self.num_rounds = config.rounds + self.config = config + + + + def create_clients(self): + if self.config.single_machine: + # Create direct clients + world_size = self.config.num_clients + 1 + for client_id in range(1, self.config.num_clients+ 1): + client_name = f'client{client_id}' + self.clients.append(Client(client_name, client_id, world_size, copy.deepcopy(self.config))) + + def register_client(self, client_name, rank): + if self.config.single_machine: + self.logger.warning('This function should not be called when in single machine mode!') + self.clients.append(client_name) + + def _num_clients_online(self) -> int: + return len(self.clients) + + def _all_clients_online(self) -> bool: + return len(self.clients) == self.world_size - 1 + + def clients_ready(self): + """ + Synchronous implementation + """ + all_ready = False + ready_clients = [] + while not all_ready: + responses = [] + all_ready = True + for client in self.clients: + resp = self.message(client, Client.is_ready) + if resp: + self.logger.info(f'Client {client} is ready') + else: + self.logger.info(f'Waiting for client {client}') + all_ready = False + time.sleep(2) + + def run(self): + self.init_dataloader() + self.create_clients() + while not self._all_clients_online(): + self.logger.info(f'Waiting for all clients to come online. Waiting for {self.world_size - 1 -self._num_clients_online()} clients') + time.sleep(2) + self.client_load_data() + self.clients_ready() + + for communications_round in range(self.config.rounds): + self.exec_round() + + self.logger.info('Federator is stopping') + + def client_load_data(self): + for client in self.clients: + self.message(client, Client.init_dataloader) + + def test(self, net): + start_time = time.time() + correct = 0 + total = 0 + targets_ = [] + pred_ = [] + loss = 0.0 + with torch.no_grad(): + for (images, labels) in self.dataset.get_test_loader(): + images, labels = images.to(self.device), labels.to(self.device) + + outputs = net(images) + + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + targets_.extend(labels.cpu().view_as(predicted).numpy()) + pred_.extend(predicted.cpu().numpy()) + + loss += self.loss_function(outputs, labels).item() + loss /= len(self.dataset.get_test_loader().dataset) + accuracy = 100 * correct / total + # confusion_mat = confusion_matrix(targets_, pred_) + # accuracy_per_class = confusion_mat.diagonal() / confusion_mat.sum(1) + # + # class_precision = calculate_class_precision(confusion_mat) + # class_recall = calculate_class_recall(confusion_mat) + end_time = time.time() + duration = end_time - start_time + self.logger.info(f'Test duration is {duration} seconds') + return accuracy, loss + + def exec_round(self): + start_time = time.time() + num_epochs = self.config.epochs + + # Client selection + selected_clients = random_selection(self.clients, self.config.clients_per_round) + + last_model = self.get_nn_parameters() + for client in selected_clients: + self.message(client, Client.update_nn_parameters, last_model) + + # Actual training calls + client_weights = {} + client_sizes = {} + for client in selected_clients: + train_loss, weights, accuracy, test_loss = self.message(client, Client.exec_round, num_epochs) + client_weights[client] = weights + client_data_size = self.message(client, Client.get_client_datasize) + client_sizes[client] = client_data_size + self.logger.info(f'Client {client} has a accuracy of {accuracy}, train loss={train_loss}, test loss={test_loss},datasize={client_data_size}') + + # updated_model = FedAvg(client_weights, client_sizes) + updated_model = average_nn_parameters_simple(list(client_weights.values())) + self.update_nn_parameters(updated_model) + + test_accuracy, test_loss = self.test(self.net) + self.logger.info(f'Federator has a accuracy of {test_accuracy} and loss={test_loss}') + + end_time = time.time() + duration = end_time - start_time + self.logger.info(f'Round duration is {duration} seconds') + diff --git a/fltk/core/node.py b/fltk/core/node.py new file mode 100644 index 00000000..0b685f8f --- /dev/null +++ b/fltk/core/node.py @@ -0,0 +1,164 @@ +import copy +import os +from typing import Callable, Any + +import torch + +# from fltk.core.rpc_util import _remote_method_direct +from torch.distributed import rpc + +from fltk.datasets.loader_util import get_dataset +from fltk.nets import get_net +from fltk.util.config import Config +from fltk.util.log import getLogger + +global_vars = {} + + +def _remote_method_direct(method, other_node: str, *args, **kwargs): + # Client example + # ret = rpc.rpc_async(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights, i, self.id, local_updates_left])) + + args = [method, other_node] + list(args) + # return rpc.rpc_sync(other_node, _call_method, args=args, kwargs=kwargs) + return rpc.rpc_sync(other_node, method, args=args, kwargs=kwargs) + +class Node: + id: int + rank: int + world_size: int + counter = 0 + real_time = False + distributed = True + cuda = False + finished_init: bool = False + + device = torch.device("cpu") + net: Any + dataset: Any + logger = getLogger(__name__) + + + # _address_book = {} + + def __init__(self, id: int, rank: int, world_size: int, config: Config): + self.config = config + self.id = id + self.rank = rank + self.world_size = world_size + global global_vars + global_vars['self'] = self + self._config(config) + + def _config(self, config: Config): + self.config.rank = self.rank + self.config.world_size = self.world_size + self.cuda = config.cuda + self.distributed = config.distributed + self.set_net(self.load_default_model()) + + def init_dataloader(self): + self.logger.info(f'world size = {self.config.world_size} with rank={self.config.rank}') + self.dataset = get_dataset(self.config.dataset_name)(self.config) + self.finished_init = True + self.logger.info('Done with init') + + def is_ready(self): + return self.finished_init + + # def _add_address(self, node_name: str, ref: Any): + # self._address_book[node_name] = ref + + @staticmethod + def _receive(method: Callable, sender: str, *args, **kwargs): + global global_vars + global_self = global_vars['self'] + return method(global_self, *args, **kwargs) + + # def _lookup_reference(self, node_name: str): + + def init_device(self): + if self.cuda and torch.cuda.is_available(): + return torch.device("cuda:0") + else: + return torch.device("cpu") + + def set_net(self, net): + self.net = net + self.net.to(self.device) + + # def load_model_from_file(self): + # model_class = self.args.get_net() + # default_model_path = os.path.join(self.args.get_default_model_folder_path(), model_class.__name__ + ".model") + # return self.load_model_from_file(default_model_path) + + def get_nn_parameters(self): + """ + Return the NN's parameters. + """ + return self.net.state_dict() + + def load_default_model(self): + """ + Load a model from default model file. + + This is used to ensure consistent default model behavior. + """ + model_class = get_net(self.config.net_name) + default_model_path = os.path.join(self.config.get_default_model_folder_path(), model_class.__name__ + ".model") + + return self.load_model_from_file(default_model_path) + + def load_model_from_file(self, model_file_path): + """ + Load a model from a file. + + :param model_file_path: string + """ + model_class = get_net(self.config.net_name) + model = model_class() + + if os.path.exists(model_file_path): + try: + model.load_state_dict(torch.load(model_file_path)) + except: + self.logger.warning("Couldn't load model. Attempting to map CUDA tensors to CPU to solve error.") + + model.load_state_dict(torch.load(model_file_path, map_location=torch.device('cpu'))) + else: + self.logger.warning("Could not find model: {}".format(model_file_path)) + return model + + + def update_nn_parameters(self, new_params, is_offloaded_model = False): + """ + Update the NN's parameters. + + :param new_params: New weights for the neural network + :type new_params: dict + """ + if is_offloaded_model: + pass + # self.offloaded_net.load_state_dict(copy.deepcopy(new_params), strict=True) + else: + self.net.load_state_dict(copy.deepcopy(new_params), strict=True) + # self.logger.info(f'Weights of the model are updated') + + def message(self, other_node: str, method: Callable, *args, **kwargs): + if self.real_time: + func = Node._receive + args_list = [method, self.id] + list(args) + return rpc.rpc_sync(other_node, func, args=args_list, kwargs=kwargs) + return method(other_node, *args, **kwargs) + + def ping(self, sender: str, be_weird=False): + self.logger.info(f'Pong from {self.id}, got call from {sender} [{self.counter}]') + # print(f'Pong from {self.id}, got call from {sender} [{self.counter}]') + self.counter += 1 + if be_weird: + return 'AAAAAAAAAAAAAAAAAAAAAAHHHH!!!!' + else: + return f'Pong {self.counter}' + + def __repr__(self): + return str(self.id) diff --git a/fltk/core/rpc_util.py b/fltk/core/rpc_util.py new file mode 100644 index 00000000..b8aab507 --- /dev/null +++ b/fltk/core/rpc_util.py @@ -0,0 +1,30 @@ +import torch +from torch.distributed import rpc + +def _call_method(method, rref, *args, **kwargs): + """helper for _remote_method()""" + return method(rref.local_value(), *args, **kwargs) + +def _remote_method(method, rref, *args, **kwargs): + """ + executes method(*args, **kwargs) on the from the machine that owns rref + + very similar to rref.remote().method(*args, **kwargs), but method() doesn't have to be in the remote scope + """ + args = [method, rref] + list(args) + return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) + + +def _remote_method_async(method, rref, *args, **kwargs) -> torch.Future: + args = [method, rref] + list(args) + return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) + + +def _remote_method_async_by_info(method, worker_info, *args, **kwargs): + args = [method, worker_info] + list(args) + return rpc.rpc_async(worker_info, _call_method, args=args, kwargs=kwargs) + +def _remote_method_direct(method, other_node: str, *args, **kwargs): + args = [method, other_node] + list(args) + # return rpc.rpc_sync(other_node, _call_method, args=args, kwargs=kwargs) + return rpc.rpc_sync(other_node, method, args=args, kwargs=kwargs) \ No newline at end of file diff --git a/fltk/datasets/distributed/cifar10.py b/fltk/datasets/distributed/cifar10.py index dfce7f79..31b0769a 100644 --- a/fltk/datasets/distributed/cifar10.py +++ b/fltk/datasets/distributed/cifar10.py @@ -17,7 +17,8 @@ def __init__(self, args): def init_train_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' CIFAR10 train data") + self.logger.debug(f"Loading '{dist_loader_text}' CIFAR10 train data") + # self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' CIFAR10 train data") normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.RandomHorizontalFlip(), @@ -29,10 +30,12 @@ def init_train_dataset(self): transform=transform) self.train_sampler = get_sampler(self.train_dataset, self.args) self.train_loader = DataLoader(self.train_dataset, batch_size=16, sampler=self.train_sampler) - logging.info("this client gets {} samples".format(len(self.train_sampler))) + self.logger.info("this client gets {} samples".format(len(self.train_sampler))) + # logging.info("this client gets {} samples".format(len(self.train_sampler))) def init_test_dataset(self): - self.get_args().get_logger().debug("Loading CIFAR10 test data") + self.logger.debug("Loading CIFAR10 test data") + # self.get_args().get_logger().debug("Loading CIFAR10 test data") normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ diff --git a/fltk/datasets/distributed/cifar100.py b/fltk/datasets/distributed/cifar100.py index a1ccb432..640677eb 100644 --- a/fltk/datasets/distributed/cifar100.py +++ b/fltk/datasets/distributed/cifar100.py @@ -14,7 +14,7 @@ def __init__(self, args): def init_train_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' CIFAR100 train data") + self.logger.debug(f"Loading '{dist_loader_text}' CIFAR100 train data") normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]) transform = transforms.Compose([ transforms.RandomHorizontalFlip(), @@ -29,7 +29,7 @@ def init_train_dataset(self): def init_test_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' CIFAR100 test data") + self.logger.debug(f"Loading '{dist_loader_text}' CIFAR100 test data") normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]) transform = transforms.Compose([ @@ -44,7 +44,7 @@ def init_test_dataset(self): def load_train_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' CIFAR100 train data") + self.logger.debug(f"Loading '{dist_loader_text}' CIFAR100 train data") normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]) transform = transforms.Compose([ @@ -63,12 +63,12 @@ def load_train_dataset(self): train_data = self.get_tuple_from_data_loader(train_loader) dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Finished loading '{dist_loader_text}' CIFAR100 train data") + self.logger.debug(f"Finished loading '{dist_loader_text}' CIFAR100 train data") return train_data def load_test_dataset(self): - self.get_args().get_logger().debug("Loading CIFAR100 test data") + self.logger.debug("Loading CIFAR100 test data") normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]) transform = transforms.Compose([ @@ -83,7 +83,7 @@ def load_test_dataset(self): test_data = self.get_tuple_from_data_loader(test_loader) - self.get_args().get_logger().debug("Finished loading CIFAR10 test data") + self.logger.debug("Finished loading CIFAR10 test data") return test_data diff --git a/fltk/datasets/distributed/dataset.py b/fltk/datasets/distributed/dataset.py index 46458de1..83f4cf31 100644 --- a/fltk/datasets/distributed/dataset.py +++ b/fltk/datasets/distributed/dataset.py @@ -5,6 +5,7 @@ import numpy from fltk.util.arguments import Arguments +from fltk.util.log import getLogger class DistDataset: @@ -15,6 +16,7 @@ class DistDataset: test_dataset = None train_loader = None test_loader = None + logger = getLogger(__name__) def __init__(self, args: Arguments): self.args = args # self.train_dataset = self.load_train_dataset() diff --git a/fltk/datasets/distributed/fashion_mnist.py b/fltk/datasets/distributed/fashion_mnist.py index d9713f75..770f6b66 100644 --- a/fltk/datasets/distributed/fashion_mnist.py +++ b/fltk/datasets/distributed/fashion_mnist.py @@ -15,7 +15,7 @@ def __init__(self, args): def init_train_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' Fashion MNIST train data") + self.logger.debug(f"Loading '{dist_loader_text}' Fashion MNIST train data") self.train_dataset = datasets.FashionMNIST(root=self.get_args().get_data_path(), train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) @@ -24,32 +24,32 @@ def init_train_dataset(self): def init_test_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' Fashion MNIST test data") + self.logger.debug(f"Loading '{dist_loader_text}' Fashion MNIST test data") self.test_dataset = datasets.FashionMNIST(root=self.get_args().get_data_path(), train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) self.test_sampler = get_sampler(self.test_dataset, self.args) self.test_loader = DataLoader(self.test_dataset, batch_size=16, sampler=self.test_sampler) def load_train_dataset(self): - self.get_args().get_logger().debug("Loading Fashion MNIST train data") + self.logger.debug("Loading Fashion MNIST train data") train_dataset = datasets.FashionMNIST(self.get_args().get_data_path(), train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) train_loader = DataLoader(train_dataset, batch_size=len(train_dataset)) train_data = self.get_tuple_from_data_loader(train_loader) - self.get_args().get_logger().debug("Finished loading Fashion MNIST train data") + self.logger.debug("Finished loading Fashion MNIST train data") return train_data def load_test_dataset(self): - self.get_args().get_logger().debug("Loading Fashion MNIST test data") + self.logger.debug("Loading Fashion MNIST test data") test_dataset = datasets.FashionMNIST(self.get_args().get_data_path(), train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) test_loader = DataLoader(test_dataset, batch_size=len(test_dataset)) test_data = self.get_tuple_from_data_loader(test_loader) - self.get_args().get_logger().debug("Finished loading Fashion MNIST test data") + self.logger.debug("Finished loading Fashion MNIST test data") return test_data diff --git a/fltk/datasets/distributed/mnist.py b/fltk/datasets/distributed/mnist.py index bf6d49b7..a4056a3c 100644 --- a/fltk/datasets/distributed/mnist.py +++ b/fltk/datasets/distributed/mnist.py @@ -83,7 +83,7 @@ def __init__(self, args): def init_train_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' MNIST train data") + self.logger.debug(f"Loading '{dist_loader_text}' MNIST train data") self.train_dataset = datasets.MNIST(root=self.get_args().get_data_path(), train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) @@ -92,32 +92,32 @@ def init_train_dataset(self): def init_test_dataset(self): dist_loader_text = "distributed" if self.args.get_distributed() else "" - self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' MNIST test data") + self.logger.debug(f"Loading '{dist_loader_text}' MNIST test data") self.test_dataset = datasets.MNIST(root=self.get_args().get_data_path(), train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) self.test_sampler = get_sampler(self.test_dataset, self.args) self.test_loader = DataLoader(self.test_dataset, batch_size=16, sampler=self.test_sampler) def load_train_dataset(self): - self.get_args().get_logger().debug("Loading MNIST train data") + self.logger.debug("Loading MNIST train data") train_dataset = datasets.MNIST(self.get_args().get_data_path(), train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) train_loader = DataLoader(train_dataset, batch_size=len(train_dataset)) train_data = self.get_tuple_from_data_loader(train_loader) - self.get_args().get_logger().debug("Finished loading MNIST train data") + self.logger.debug("Finished loading MNIST train data") return train_data def load_test_dataset(self): - self.get_args().get_logger().debug("Loading MNIST test data") + self.logger.debug("Loading MNIST test data") test_dataset = datasets.MNIST(self.get_args().get_data_path(), train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) test_loader = DataLoader(test_dataset, batch_size=len(test_dataset)) test_data = self.get_tuple_from_data_loader(test_loader) - self.get_args().get_logger().debug("Finished loading MNIST test data") + self.logger.debug("Finished loading MNIST test data") return test_data \ No newline at end of file diff --git a/fltk/datasets/loader_util.py b/fltk/datasets/loader_util.py new file mode 100644 index 00000000..1671882f --- /dev/null +++ b/fltk/datasets/loader_util.py @@ -0,0 +1,36 @@ +from fltk.datasets.distributed.mnist import DistMNISTDataset +from fltk.datasets.distributed.cifar10 import DistCIFAR10Dataset +from fltk.datasets.distributed.cifar100 import DistCIFAR100Dataset +from fltk.datasets.distributed.fashion_mnist import DistFashionMNISTDataset +from fltk.util.definitions import Dataset + +def available_datasets(): + return { + Dataset.cifar10: DistCIFAR10Dataset, + Dataset.cifar100: DistCIFAR100Dataset, + Dataset.fashion_mnist: DistFashionMNISTDataset, + Dataset.mnist: DistMNISTDataset + } + +def get_dataset(name: Dataset): + return available_datasets()[name] + + +def get_train_loader_path(name: Dataset) -> str: + paths = { + Dataset.cifar10: 'data_loaders/cifar10/train_data_loader.pickle', + Dataset.fashion_mnist: 'data_loaders/fashion-mnist/train_data_loader.pickle', + Dataset.cifar100: 'data_loaders/cifar100/train_data_loader.pickle', + Dataset.mnist: 'data_loaders/mnist/train_data_loader.pickle', + } + return paths[name] + + +def get_test_loader_path(name: Dataset)-> str: + paths = { + Dataset.cifar10: 'data_loaders/cifar10/test_data_loader.pickle', + Dataset.fashion_mnist: 'data_loaders/fashion-mnist/test_data_loader.pickle', + Dataset.cifar100: 'data_loaders/cifar100/test_data_loader.pickle', + Dataset.mnist: 'data_loaders/mnist/test_data_loader.pickle', + } + return paths[name] \ No newline at end of file diff --git a/fltk/nets/__init__.py b/fltk/nets/__init__.py index 4bfa7138..e6acc547 100644 --- a/fltk/nets/__init__.py +++ b/fltk/nets/__init__.py @@ -4,3 +4,34 @@ from .fashion_mnist_resnet import FashionMNISTResNet from .cifar_10_resnet import Cifar10ResNet from .cifar_100_vgg import Cifar100VGG +from .mnist_cnn import MNIST_CNN +from ..util.definitions import Nets + + +def available_nets(): + return { + Nets.cifar100_resnet: Cifar100ResNet, + Nets.cifar100_vgg: Cifar100VGG, + Nets.cifar10_cnn: Cifar10CNN, + Nets.cifar10_resnet: Cifar10ResNet, + Nets.fashion_mnist_cnn: FashionMNISTCNN, + Nets.fashion_mnist_resnet: FashionMNISTResNet, + Nets.mnist_cnn: MNIST_CNN, + + } + +def get_net(name: Nets): + return available_nets()[name] + + +def get_net_split_point(name: Nets): + nets_split_point = { + Nets.cifar100_resnet: 48, + Nets.cifar100_vgg: 28, + Nets.cifar10_cnn: 15, + Nets.cifar10_resnet: 39, + Nets.fashion_mnist_cnn: 7, + Nets.fashion_mnist_resnet: 7, + Nets.mnist_cnn: 2, + } + return nets_split_point[name] \ No newline at end of file diff --git a/fltk/samplers/__init__.py b/fltk/samplers/__init__.py index d6085f8e..63c4a34b 100644 --- a/fltk/samplers/__init__.py +++ b/fltk/samplers/__init__.py @@ -6,13 +6,15 @@ from .limit_labels import LimitLabelsSampler from .limit_labels_flex import LimitLabelsSamplerFlex from ..util.definitions import DataSampler +from ..util.log import getLogger def get_sampler(dataset, args): + logger = getLogger(__name__) sampler = None if args.get_distributed(): method = args.get_sampler() - args.get_logger().info( + logger.info( "Using {} sampler method, with args: {}".format(method, args.get_sampler_args())) if method == DataSampler.uniform: @@ -33,7 +35,7 @@ def get_sampler(dataset, args): sampler = DirichletSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank(), args=args.get_sampler_args()) else: # default - args().get_logger().warning("Unknown sampler " + method + ", using uniform instead") + logger.warning("Unknown sampler " + method + ", using uniform instead") sampler = UniformSampler(dataset, num_replicas=args.get_world_size(), rank=args.get_rank()) return sampler diff --git a/fltk/strategy/optimization/__init__.py b/fltk/strategy/optimization/__init__.py index 549e5626..e4a501ce 100644 --- a/fltk/strategy/optimization/__init__.py +++ b/fltk/strategy/optimization/__init__.py @@ -1,2 +1,13 @@ +import torch from .fedprox import FedProx -from .FedNova import FedNova \ No newline at end of file +from .FedNova import FedNova +from ...util.definitions import Optimizations + + +def get_optimizer(name: Optimizations): + optimizers = { + Optimizations.sgd: torch.optim.SGD, + Optimizations.fedprox: FedProx, + Optimizations.fednova: FedNova + } + return optimizers[name] diff --git a/fltk/util/config.py b/fltk/util/config.py new file mode 100644 index 00000000..815bb506 --- /dev/null +++ b/fltk/util/config.py @@ -0,0 +1,66 @@ +from dataclasses import dataclass + +import torch + +from fltk.util.definitions import Dataset, Nets, DataSampler, Optimizations + + +@dataclass +class Config: + batch_size: int = 1 + test_batch_size: int = 1000 + rounds: int = 2 + epochs: int = 1 + lr: float = 0.01 + momentum: float = 0.1 + cuda: bool = False + shuffle: bool = False + log_interval: int = 10 + scheduler_step_size: int = 50 + scheduler_gamma: float = 0.5 + min_lr: float = 1e-10 + optimizer = Optimizations.sgd + optimizer_args = { + 'lr': lr, + 'momentum': momentum + } + loss_function = torch.nn.CrossEntropyLoss + + num_clients: int = 10 + clients_per_round: int = 2 + distributed: bool = True + single_machine: bool = False + + dataset_name: Dataset = Dataset.mnist + net_name: Nets = Nets.mnist_cnn + default_model_folder_path: str = "default_models" + data_path: str = "data" + data_sampler: DataSampler = DataSampler.uniform + data_sampler_args = [] + + rank: int = 0 + world_size: int = 0 + + def get_default_model_folder_path(self): + return self.default_model_folder_path + + def get_distributed(self): + return self.distributed + + def get_sampler(self): + return self.data_sampler + + def get_world_size(self): + return self.world_size + + def get_rank(self): + return self.rank + + def get_sampler_args(self): + return tuple(self.data_sampler_args) + + def get_data_path(self): + return self.data_path + + def get_loss_function(self): + return self.loss_function diff --git a/fltk/util/log.py b/fltk/util/log.py index a80661a3..338bb914 100644 --- a/fltk/util/log.py +++ b/fltk/util/log.py @@ -6,4 +6,12 @@ class FLLogger: @staticmethod @rpc.functions.async_execution def log(arg1, node_id, log_line, report_time): - logging.info(f'[{node_id}: {report_time}]: {log_line}') \ No newline at end of file + logging.info(f'[{node_id}: {report_time}]: {log_line}') + + +def getLogger(module_name): + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', + ) + return logging.getLogger(module_name) diff --git a/test_node_synchronous.py b/test_node_synchronous.py new file mode 100644 index 00000000..7631f50e --- /dev/null +++ b/test_node_synchronous.py @@ -0,0 +1,44 @@ +import os +import sys +import torch +import torch.distributed.rpc as rpc + +from fltk.core.client import Client +from fltk.core.federator import Federator +from fltk.core.node import Node +from fltk.util.config import Config + +if __name__ == '__main__': + world_size = 2 + config = Config() + config.num_clients = world_size - 1 + config.world_size = world_size + config.clients_per_round = 1 + config.epochs = 2 + config.rounds = 20 + config.cuda = False + config.single_machine = True + + fed = Federator('fed0', 0, world_size, config) + fed.run() + + # n1 = Client('c1', 0, world_size, config) + # n2 = Client('c2', 1, world_size, config) + # n3 = Client('c3', 2, world_size, config) + # n1.init_dataloader() + # n2.init_dataloader() + # n3.init_dataloader() + # + # response = n1.message(n2, Client.ping, 'new_sender') + # print(response) + # response = n3.message(n1, Client.ping, 'new_sender', be_weird=True) + # print(response) + # + # _, _, accuracy_n1, _ = n3.message(n1, Client.exec_round, 1) + # _, _, accuracy_n2, _ = n1.message(n2, Client.exec_round, 1) + # _, _, accuracy_n3, _ = n1.message(n3, Client.exec_round, 1) + # print(f'Client n1 has an accuracy of {accuracy_n1}') + # print(f'Client n2 has an accuracy of {accuracy_n2}') + # print(f'Client n3 has an accuracy of {accuracy_n3}') + # + # print(config) From 8ec610494c66fdd3a4906f3eb579f809f40df286 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 21 Feb 2022 15:36:31 +0100 Subject: [PATCH 62/73] Enable cuda support --- fltk/core/node.py | 5 +++++ test_node_synchronous.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fltk/core/node.py b/fltk/core/node.py index 0b685f8f..eacab95a 100644 --- a/fltk/core/node.py +++ b/fltk/core/node.py @@ -54,6 +54,7 @@ def _config(self, config: Config): self.config.rank = self.rank self.config.world_size = self.world_size self.cuda = config.cuda + self.init_device() self.distributed = config.distributed self.set_net(self.load_default_model()) @@ -78,9 +79,13 @@ def _receive(method: Callable, sender: str, *args, **kwargs): # def _lookup_reference(self, node_name: str): def init_device(self): + if self.cuda and not torch.cuda.is_available(): + self.logger.warning('Unable to configure device for GPU because cuda.is_available() == False') if self.cuda and torch.cuda.is_available(): + self.logger.info("Configure device for GPU (Cuda)") return torch.device("cuda:0") else: + self.logger.info("Configure device for CPU") return torch.device("cpu") def set_net(self, net): diff --git a/test_node_synchronous.py b/test_node_synchronous.py index 7631f50e..dfe5cfa6 100644 --- a/test_node_synchronous.py +++ b/test_node_synchronous.py @@ -16,7 +16,7 @@ config.clients_per_round = 1 config.epochs = 2 config.rounds = 20 - config.cuda = False + config.cuda = True config.single_machine = True fed = Federator('fed0', 0, world_size, config) From d43423917dbd006d3406f3700fd1bf728e4140d2 Mon Sep 17 00:00:00 2001 From: bacox Date: Mon, 21 Feb 2022 17:34:46 +0100 Subject: [PATCH 63/73] Make log level configurable --- fltk/core/client.py | 16 +++++++++++- fltk/core/federator.py | 51 +++++++++++++++++++++++++++++---------- fltk/core/node.py | 10 +++++--- fltk/samplers/__init__.py | 2 +- fltk/samplers/uniform.py | 2 +- fltk/util/config.py | 4 ++- fltk/util/definitions.py | 11 +++++++++ fltk/util/log.py | 7 ++++-- 8 files changed, 81 insertions(+), 22 deletions(-) diff --git a/fltk/core/client.py b/fltk/core/client.py index bb994ed4..34820964 100644 --- a/fltk/core/client.py +++ b/fltk/core/client.py @@ -58,6 +58,15 @@ def train(self, num_epochs: int): return final_running_loss, self.get_nn_parameters(), + def set_tau_eff(self, total): + client_weight = self.get_client_datasize() / total + n = self.get_client_datasize() + E = self.config.epochs + B = 16 # nicely hardcoded :) + tau_eff = int(E * n / B) * client_weight + if hasattr(self.optimizer, 'set_tau_eff'): + self.optimizer.set_tau_eff(tau_eff) + def test(self): start_time = time.time() correct = 0 @@ -80,7 +89,7 @@ def test(self): loss += self.loss_function(outputs, labels).item() loss /= len(self.dataset.get_test_loader().dataset) - accuracy = 100 * correct / total + accuracy = 100.0 * correct / total # confusion_mat = confusion_matrix(targets_, pred_) # accuracy_per_class = confusion_mat.diagonal() / confusion_mat.sum(1) # @@ -105,6 +114,11 @@ def exec_round(self, num_epochs: int) -> Tuple[float, Any, float, float]: end = time.time() duration = end - start # self.logger.info(f'Round duration is {duration} seconds') + + if hasattr(self.optimizer, 'pre_communicate'): # aka fednova or fedprox + self.optimizer.pre_communicate() + for k, v in weights.items(): + weights[k] = v.cpu() return loss, weights, accuracy, test_loss def __del__(self): diff --git a/fltk/core/federator.py b/fltk/core/federator.py index ac2ee2be..25d90857 100644 --- a/fltk/core/federator.py +++ b/fltk/core/federator.py @@ -9,15 +9,22 @@ from fltk.datasets.loader_util import get_dataset from fltk.strategy import FedAvg, random_selection, average_nn_parameters, average_nn_parameters_simple from fltk.util.config import Config - +from dataclasses import dataclass NodeReference = Union[Node, str] +@dataclass +class LocalClient: + name: str + ref: NodeReference + data_size: int class Federator(Node): - clients: List[NodeReference] = [] + clients: List[LocalClient] = [] + # clients: List[NodeReference] = [] num_rounds: int + def __init__(self, id: int, rank: int, world_size: int, config: Config): super().__init__(id, rank, world_size, config) self.loss_function = self.config.get_loss_function()() @@ -32,12 +39,13 @@ def create_clients(self): world_size = self.config.num_clients + 1 for client_id in range(1, self.config.num_clients+ 1): client_name = f'client{client_id}' - self.clients.append(Client(client_name, client_id, world_size, copy.deepcopy(self.config))) + client = Client(client_name, client_id, world_size, copy.deepcopy(self.config)) + self.clients.append(LocalClient(client_name, client, 0)) def register_client(self, client_name, rank): if self.config.single_machine: self.logger.warning('This function should not be called when in single machine mode!') - self.clients.append(client_name) + self.clients.append(LocalClient(client_name, client_name, 0)) def _num_clients_online(self) -> int: return len(self.clients) @@ -55,7 +63,7 @@ def clients_ready(self): responses = [] all_ready = True for client in self.clients: - resp = self.message(client, Client.is_ready) + resp = self.message(client.ref, Client.is_ready) if resp: self.logger.info(f'Client {client} is ready') else: @@ -63,13 +71,21 @@ def clients_ready(self): all_ready = False time.sleep(2) + def get_client_data_sizes(self): + for client in self.clients: + client.data_size = self.message(client.ref, Client.get_client_datasize) + def run(self): - self.init_dataloader() + # Load dataset with world size 2 to load the whole dataset. + # Caused by the fact that the dataloader subtracts 1 from the world size to exclude the federator by default. + self.init_dataloader(world_size=2) + self.create_clients() while not self._all_clients_online(): self.logger.info(f'Waiting for all clients to come online. Waiting for {self.world_size - 1 -self._num_clients_online()} clients') time.sleep(2) self.client_load_data() + self.get_client_data_sizes() self.clients_ready() for communications_round in range(self.config.rounds): @@ -79,7 +95,15 @@ def run(self): def client_load_data(self): for client in self.clients: - self.message(client, Client.init_dataloader) + self.message(client.ref, Client.init_dataloader) + + def set_tau_eff(self): + total = sum(client.data_size for client in self.clients) + # responses = [] + for client in self.clients: + self.message(client.ref, Client.set_tau_eff, client.ref, total) + # responses.append((client, _remote_method_async(Client.set_tau_eff, client.ref, total))) + # torch.futures.wait_all([x[1] for x in responses]) def test(self, net): start_time = time.time() @@ -103,7 +127,7 @@ def test(self, net): loss += self.loss_function(outputs, labels).item() loss /= len(self.dataset.get_test_loader().dataset) - accuracy = 100 * correct / total + accuracy = 100.0 * correct / total # confusion_mat = confusion_matrix(targets_, pred_) # accuracy_per_class = confusion_mat.diagonal() / confusion_mat.sum(1) # @@ -119,20 +143,21 @@ def exec_round(self): num_epochs = self.config.epochs # Client selection + selected_clients: List[LocalClient] selected_clients = random_selection(self.clients, self.config.clients_per_round) last_model = self.get_nn_parameters() for client in selected_clients: - self.message(client, Client.update_nn_parameters, last_model) + self.message(client.ref, Client.update_nn_parameters, last_model) # Actual training calls client_weights = {} client_sizes = {} for client in selected_clients: - train_loss, weights, accuracy, test_loss = self.message(client, Client.exec_round, num_epochs) - client_weights[client] = weights - client_data_size = self.message(client, Client.get_client_datasize) - client_sizes[client] = client_data_size + train_loss, weights, accuracy, test_loss = self.message(client.ref, Client.exec_round, num_epochs) + client_weights[client.name] = weights + client_data_size = self.message(client.ref, Client.get_client_datasize) + client_sizes[client.name] = client_data_size self.logger.info(f'Client {client} has a accuracy of {accuracy}, train loss={train_loss}, test loss={test_loss},datasize={client_data_size}') # updated_model = FedAvg(client_weights, client_sizes) diff --git a/fltk/core/node.py b/fltk/core/node.py index eacab95a..17d594a0 100644 --- a/fltk/core/node.py +++ b/fltk/core/node.py @@ -51,6 +51,7 @@ def __init__(self, id: int, rank: int, world_size: int, config: Config): self._config(config) def _config(self, config: Config): + self.logger.setLevel(config.log_level.value) self.config.rank = self.rank self.config.world_size = self.world_size self.cuda = config.cuda @@ -58,9 +59,12 @@ def _config(self, config: Config): self.distributed = config.distributed self.set_net(self.load_default_model()) - def init_dataloader(self): - self.logger.info(f'world size = {self.config.world_size} with rank={self.config.rank}') - self.dataset = get_dataset(self.config.dataset_name)(self.config) + def init_dataloader(self, world_size: int = None): + config = copy.deepcopy(self.config) + if world_size: + config.world_size = world_size + self.logger.info(f'world size = {config.world_size} with rank={config.rank}') + self.dataset = get_dataset(config.dataset_name)(config) self.finished_init = True self.logger.info('Done with init') diff --git a/fltk/samplers/__init__.py b/fltk/samplers/__init__.py index 63c4a34b..d808d4ff 100644 --- a/fltk/samplers/__init__.py +++ b/fltk/samplers/__init__.py @@ -14,7 +14,7 @@ def get_sampler(dataset, args): sampler = None if args.get_distributed(): method = args.get_sampler() - logger.info( + logger.debug( "Using {} sampler method, with args: {}".format(method, args.get_sampler_args())) if method == DataSampler.uniform: diff --git a/fltk/samplers/uniform.py b/fltk/samplers/uniform.py index 65a57ba8..69e826f1 100644 --- a/fltk/samplers/uniform.py +++ b/fltk/samplers/uniform.py @@ -10,4 +10,4 @@ class UniformSampler(DistributedSamplerWrapper): def __init__(self, dataset, num_replicas=None, rank=None, seed=0): super().__init__(dataset, num_replicas=num_replicas, rank=rank, seed=seed) indices = list(range(len(self.dataset))) - self.indices = indices[self.rank:self.total_size:self.num_replicas] \ No newline at end of file + self.indices = indices[self.rank:self.total_size:self.n_clients] \ No newline at end of file diff --git a/fltk/util/config.py b/fltk/util/config.py index 815bb506..2b8ba98a 100644 --- a/fltk/util/config.py +++ b/fltk/util/config.py @@ -2,7 +2,7 @@ import torch -from fltk.util.definitions import Dataset, Nets, DataSampler, Optimizations +from fltk.util.definitions import Dataset, Nets, DataSampler, Optimizations, LogLevel @dataclass @@ -26,6 +26,8 @@ class Config: } loss_function = torch.nn.CrossEntropyLoss + log_level: LogLevel = LogLevel.DEBUG + num_clients: int = 10 clients_per_round: int = 2 distributed: bool = True diff --git a/fltk/util/definitions.py b/fltk/util/definitions.py index 41dfaec6..c81f9a4f 100644 --- a/fltk/util/definitions.py +++ b/fltk/util/definitions.py @@ -34,6 +34,17 @@ class Dataset(Enum): mnist = 'mnist' +class LogLevel(Enum): + CRITICAL = 50 + FATAL = CRITICAL + ERROR = 40 + WARNING = 30 + WARN = WARNING + INFO = 20 + DEBUG = 10 + NOTSET = 0 + + class Aggregations(Enum): avg = 'Avg' fed_avg = 'FedAvg' diff --git a/fltk/util/log.py b/fltk/util/log.py index 338bb914..ba7c3c16 100644 --- a/fltk/util/log.py +++ b/fltk/util/log.py @@ -2,6 +2,9 @@ from torch.distributed import rpc +from fltk.util.definitions import LogLevel + + class FLLogger: @staticmethod @rpc.functions.async_execution @@ -9,9 +12,9 @@ def log(arg1, node_id, log_line, report_time): logging.info(f'[{node_id}: {report_time}]: {log_line}') -def getLogger(module_name): +def getLogger(module_name, level: LogLevel = LogLevel.INFO): logging.basicConfig( - level=logging.DEBUG, + level=level.value, format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', ) return logging.getLogger(module_name) From 13602bab9a9002ba915517e8a69f988b170cbc74 Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 24 Feb 2022 19:59:39 +0100 Subject: [PATCH 64/73] Record experiment data --- fltk/core/client.py | 10 +++-- fltk/core/federator.py | 42 +++++++++++++----- fltk/util/analysis.py | 70 ++++++++++++++++++++++++++++++ fltk/util/config.py | 7 +++ fltk/util/data_container.py | 86 +++++++++++++++++++++++++++++++++++++ 5 files changed, 200 insertions(+), 15 deletions(-) create mode 100644 fltk/util/analysis.py create mode 100644 fltk/util/data_container.py diff --git a/fltk/core/client.py b/fltk/core/client.py index 34820964..21e922f3 100644 --- a/fltk/core/client.py +++ b/fltk/core/client.py @@ -103,23 +103,25 @@ def test(self): def get_client_datasize(self): return len(self.dataset.get_train_sampler()) - def exec_round(self, num_epochs: int) -> Tuple[float, Any, float, float]: + def exec_round(self, num_epochs: int) -> Tuple[Any, Any, Any, Any, float, float, float]: start = time.time() loss, weights = self.train(num_epochs) - + time_mark_between = time.time() accuracy, test_loss = self.test() end = time.time() - duration = end - start + round_duration = end - start + train_duration = time_mark_between - start + test_duration = end - time_mark_between # self.logger.info(f'Round duration is {duration} seconds') if hasattr(self.optimizer, 'pre_communicate'): # aka fednova or fedprox self.optimizer.pre_communicate() for k, v in weights.items(): weights[k] = v.cpu() - return loss, weights, accuracy, test_loss + return loss, weights, accuracy, test_loss, round_duration, train_duration, test_duration def __del__(self): self.logger.info(f'Client {self.id} is stopping') \ No newline at end of file diff --git a/fltk/core/federator.py b/fltk/core/federator.py index 25d90857..70b65a38 100644 --- a/fltk/core/federator.py +++ b/fltk/core/federator.py @@ -1,8 +1,10 @@ import copy import time +from pathlib import Path from typing import List, Union import torch +from tqdm import tqdm from fltk.core.client import Client from fltk.core.node import Node @@ -11,25 +13,30 @@ from fltk.util.config import Config from dataclasses import dataclass +from fltk.util.data_container import DataContainer, FederatorRecord, ClientRecord + NodeReference = Union[Node, str] @dataclass class LocalClient: name: str ref: NodeReference data_size: int + exp_data: DataContainer + class Federator(Node): clients: List[LocalClient] = [] # clients: List[NodeReference] = [] num_rounds: int - + exp_data: DataContainer def __init__(self, id: int, rank: int, world_size: int, config: Config): super().__init__(id, rank, world_size, config) self.loss_function = self.config.get_loss_function()() self.num_rounds = config.rounds self.config = config + self.exp_data = DataContainer('federator', config.output_path, FederatorRecord, config.save_data_append) @@ -40,12 +47,14 @@ def create_clients(self): for client_id in range(1, self.config.num_clients+ 1): client_name = f'client{client_id}' client = Client(client_name, client_id, world_size, copy.deepcopy(self.config)) - self.clients.append(LocalClient(client_name, client, 0)) + self.clients.append(LocalClient(client_name, client, 0, DataContainer(client_name, self.config.output_path, + ClientRecord, self.config.save_data_append))) def register_client(self, client_name, rank): if self.config.single_machine: self.logger.warning('This function should not be called when in single machine mode!') - self.clients.append(LocalClient(client_name, client_name, 0)) + self.clients.append(LocalClient(client_name, client_name, 0, DataContainer(client_name, self.config.output_path, + ClientRecord, self.config.save_data_append))) def _num_clients_online(self) -> int: return len(self.clients) @@ -88,11 +97,18 @@ def run(self): self.get_client_data_sizes() self.clients_ready() - for communications_round in range(self.config.rounds): - self.exec_round() + for communication_round in range(self.config.rounds): + self.exec_round(communication_round) + self.save_data() self.logger.info('Federator is stopping') + + def save_data(self): + self.exp_data.save() + for client in self.clients: + client.exp_data.save() + def client_load_data(self): for client in self.clients: self.message(client.ref, Client.init_dataloader) @@ -138,7 +154,7 @@ def test(self, net): self.logger.info(f'Test duration is {duration} seconds') return accuracy, loss - def exec_round(self): + def exec_round(self, id: int): start_time = time.time() num_epochs = self.config.epochs @@ -153,21 +169,25 @@ def exec_round(self): # Actual training calls client_weights = {} client_sizes = {} - for client in selected_clients: - train_loss, weights, accuracy, test_loss = self.message(client.ref, Client.exec_round, num_epochs) + pbar = tqdm(selected_clients) + for client in pbar: + pbar.set_description(f'[Round {id:>3}] Running clients') + train_loss, weights, accuracy, test_loss, round_duration, train_duration, test_duration = self.message(client.ref, Client.exec_round, num_epochs) client_weights[client.name] = weights client_data_size = self.message(client.ref, Client.get_client_datasize) client_sizes[client.name] = client_data_size - self.logger.info(f'Client {client} has a accuracy of {accuracy}, train loss={train_loss}, test loss={test_loss},datasize={client_data_size}') + client.exp_data.append(ClientRecord(id, train_duration, test_duration, round_duration, num_epochs, 0, accuracy, train_loss, test_loss)) + # self.logger.info(f'[Round {id:>3}] Client {client} has a accuracy of {accuracy}, train loss={train_loss}, test loss={test_loss},datasize={client_data_size}') # updated_model = FedAvg(client_weights, client_sizes) updated_model = average_nn_parameters_simple(list(client_weights.values())) self.update_nn_parameters(updated_model) test_accuracy, test_loss = self.test(self.net) - self.logger.info(f'Federator has a accuracy of {test_accuracy} and loss={test_loss}') + self.logger.info(f'[Round {id:>3}] Federator has a accuracy of {test_accuracy} and loss={test_loss}') end_time = time.time() duration = end_time - start_time - self.logger.info(f'Round duration is {duration} seconds') + self.exp_data.append(FederatorRecord(len(selected_clients), 0, duration, test_loss, test_accuracy)) + self.logger.info(f'[Round {id:>3}] Round duration is {duration} seconds') diff --git a/fltk/util/analysis.py b/fltk/util/analysis.py new file mode 100644 index 00000000..b35ce4c8 --- /dev/null +++ b/fltk/util/analysis.py @@ -0,0 +1,70 @@ +from pathlib import Path +import argparse +from typing import List + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import re + +# alt.renderers.enable('mimetype') + +def get_cwd() -> Path: + return Path.cwd() + + +def get_exp_name(path: Path) -> str: + return path.parent.name + + +def ensure_path_exists(path: Path): + path.mkdir(parents=True, exist_ok=True) + +def load_and_merge_dfs(files: List[Path]) -> pd.DataFrame: + dfs = [pd.read_csv(x) for x in files] + return pd.concat(dfs, ignore_index=True) + +def order_client_names(names: List[str]) -> List[str]: + return sorted(names, key=lambda x: float(re.findall(r'\d+', x)[0])) + +def plot_client_duration(df: pd.DataFrame): + small_df = df[['round_id', 'train_duration', 'test_duration', 'round_duration', 'node_name']].melt(id_vars=['round_id', 'node_name'], var_name='type') + ordered_clients = order_client_names(small_df['node_name'].unique()) + plt.figure() + g = sns.FacetGrid(small_df, col="type", sharey=False) + g.map(sns.boxplot, "node_name", "value", order=ordered_clients) + for axes in g.axes.flat: + _ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90) + plt.tight_layout() + plt.show() + + plt.figure() + g = sns.FacetGrid(small_df, col="type", sharey=False, hue='node_name', hue_order=ordered_clients) + g.map(sns.lineplot, "round_id", "value") + for axes in g.axes.flat: + _ = axes.set_xticklabels(axes.get_xticklabels(), rotation=90) + plt.tight_layout() + plt.show() + + +def analyse(path: Path): + cwd = get_cwd() + output_path = cwd / get_exp_name(path) + ensure_path_exists(output_path) + all_files = [x for x in path.iterdir() if x.is_file()] + federator_files = [x for x in all_files if 'federator' in x.name] + client_files = [x for x in all_files if x.name.startswith('client')] + + federator_data = load_and_merge_dfs(federator_files) + client_data = load_and_merge_dfs(client_files) + + # print(len(client_data), len(federator_data)) + plot_client_duration(client_data) + # What do we want to plot in terms of data? + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Basic experiment analysis') + parser.add_argument('path', type=str, help='Path pointing to experiment results files') + args = parser.parse_args() + analyse(Path(args.path)) diff --git a/fltk/util/config.py b/fltk/util/config.py index 2b8ba98a..86266a22 100644 --- a/fltk/util/config.py +++ b/fltk/util/config.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from pathlib import Path import torch @@ -43,6 +44,12 @@ class Config: rank: int = 0 world_size: int = 0 + # Save data in append mode. Thereby flushing on every append to file. + # This could be useful when a system is likely to crash midway an experiment + save_data_append: bool = False + + output_path: Path = Path('output_test_2') + def get_default_model_folder_path(self): return self.default_model_folder_path diff --git a/fltk/util/data_container.py b/fltk/util/data_container.py new file mode 100644 index 00000000..d0500820 --- /dev/null +++ b/fltk/util/data_container.py @@ -0,0 +1,86 @@ +import csv +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Union, List, Type +from typing.io import TextIO + + +@dataclass +class DataRecord: + pass + + +@dataclass +class FederatorRecord(DataRecord): + num_selected_clients: int + round_id: int + round_duration: int + test_loss: float + test_accuracy: float + # Accuracy per class? + timestamp: float = time.time() + node_name: str = '' + + +@dataclass +class ClientRecord(DataRecord): + round_id: int + train_duration: float + test_duration: float + round_duration: float + num_epochs: int + trained_items: int + accuracy: float + train_loss: float + test_loss: float + # Accuracy per class? + timestamp: float = time.time() + node_name: str = '' + + +class DataContainer: + records: List[DataRecord] + file_name: str + file_handle: TextIO + file_path: Path + append_mode: bool + record_type: DataRecord + delimiter = ',' + name: str + + def __init__(self, name: str, output_location: Path, record_type: DataRecord, append_mode: bool = False): + # print(f'Creating new Data container for client {name}') + self.records = [] + self.file_name = f'{name}.csv' + self.name = name + output_location.mkdir(parents=True, exist_ok=True) + self.file_path = output_location / self.file_name + self.append_mode = append_mode + file_flag = 'a' if append_mode else 'w' + self.file_handle = open(self.file_path, file_flag) + self.record_type = record_type + if self.append_mode: + open(self.file_path, 'w').close() + dw = csv.DictWriter(self.file_handle, self.record_type.__annotations__) + dw.writeheader() + self.file_handle.flush() + + def append(self, record: DataRecord): + record.node_name = self.name + self.records.append(record) + if self.append_mode: + dw = csv.DictWriter(self.file_handle, self.record_type.__annotations__) + dw.writerow(record.__dict__) + self.file_handle.flush() + + def save(self): + if self.append_mode: + return + dw = csv.DictWriter(self.file_handle, self.record_type.__annotations__) + dw.writeheader() + # print(f'Saving {len(self.records)} for node {self.name}') + for record in self.records: + record.node_name = self.name + dw.writerow(record.__dict__) + self.file_handle.flush() From dbcf49b0f9b1772f4c73495a1145ce779ab3aeeb Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 10 Mar 2022 16:11:48 +0100 Subject: [PATCH 65/73] Make message calls opaque --- Dockerfile | 9 +- fltk/__main__.py | 282 +++++++++++++++++++------ fltk/core/client.py | 24 ++- fltk/core/federator.py | 70 +++++- fltk/core/node.py | 30 ++- fltk/strategy/aggregation/FedAvg.py | 2 +- fltk/strategy/aggregation/__init__.py | 13 +- fltk/strategy/optimization/__init__.py | 2 +- fltk/util/config.py | 42 +++- fltk/util/data_container.py | 1 + fltk/util/definitions.py | 11 +- fltk/util/generate_docker_compose.py | 9 +- 12 files changed, 396 insertions(+), 99 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6e79f4a4..09cdfe0b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,9 +7,6 @@ MAINTAINER Bart Cox # Run build without interactive dialogue ARG DEBIAN_FRONTEND=noninteractive -ENV GLOO_SOCKET_IFNAME=eth0 -ENV TP_SOCKET_IFNAME=eth0 - # Define the working directory of the current Docker container WORKDIR /opt/federation-lab @@ -26,6 +23,9 @@ COPY requirements.txt ./ # Install all required packages for the generator RUN python3 -m pip install -r requirements.txt +ENV GLOO_SOCKET_IFNAME=$NIC +ENV TP_SOCKET_IFNAME=$NIC + #RUN mkdir -p ./data/MNIST #COPY ./data/MNIST ../data/MNIST #ADD fltk ./fedsim @@ -46,5 +46,6 @@ COPY fltk ./fltk COPY configs ./configs #CMD python3 ./fltk/__main__.py single configs/experiment.yaml --rank=$RANK # CMD python3 -m fltk single configs/experiment_vanilla.yaml --rank=$RANK -CMD python3 -m fltk single $EXP_CONFIG --rank=$RANK +#CMD python3 -m fltk single $EXP_CONFIG --rank=$RANK +CMD python3 -m fltk remote $EXP_CONFIG $RANK --nic=$NIC --host=$MASTER_HOSTNAME #CMD python3 setup.py \ No newline at end of file diff --git a/fltk/__main__.py b/fltk/__main__.py index 263a7fa5..cf614012 100644 --- a/fltk/__main__.py +++ b/fltk/__main__.py @@ -1,87 +1,233 @@ +# import os +# import random +# import sys +# import time +# +# import torch.distributed.rpc as rpc +# import logging +# +# import yaml +# import argparse +# +# import torch.multiprocessing as mp +# from fltk.federator import Federator +# from fltk.launch import run_single, run_spawn +# from fltk.util.base_config import BareConfig +# +# logging.basicConfig(level=logging.DEBUG) +# +# def add_default_arguments(parser): +# parser.add_argument('--world_size', type=str, default=None, +# help='Number of entities in the world. This is the number of clients + 1') +# +# def main(): +# parser = argparse.ArgumentParser(description='Experiment launcher for the Federated Learning Testbed') +# +# subparsers = parser.add_subparsers(dest="mode") +# +# single_parser = subparsers.add_parser('single') +# single_parser.add_argument('config', type=str) +# single_parser.add_argument('--rank', type=int) +# single_parser.add_argument('--nic', type=str, default=None) +# single_parser.add_argument('--host', type=str, default=None) +# add_default_arguments(single_parser) +# +# spawn_parser = subparsers.add_parser('spawn') +# spawn_parser.add_argument('config', type=str) +# add_default_arguments(spawn_parser) +# +# remote_parser = subparsers.add_parser('remote') +# remote_parser.add_argument('--rank', type=int) +# remote_parser.add_argument('--nic', type=str, default=None) +# remote_parser.add_argument('--host', type=str, default=None) +# add_default_arguments(remote_parser) +# args = parser.parse_args() +# if args.mode == 'remote': +# if args.rank is None or args.host is None or args.world_size is None or args.nic is None: +# print('Missing rank, host, world-size, or nic argument when in \'remote\' mode!') +# parser.print_help() +# exit(1) +# world_size = int(args.world_size) +# master_address = args.host +# nic = args.nic +# rank = int(args.rank) +# if rank == 0: +# print('Remote mode only supports ranks > 0!') +# exit(1) +# print(f'rank={args.rank}, world_size={world_size}, host={master_address}, args=None, nic={nic}') +# run_single(rank=args.rank, world_size=world_size, host=master_address, args=None, nic=nic) +# else: +# with open(args.config) as file: +# sleep_time = random.uniform(0, 5.0) +# time.sleep(sleep_time) +# cfg = BareConfig() +# yaml_data = yaml.load(file, Loader=yaml.FullLoader) +# cfg.merge_yaml(yaml_data) +# if args.mode == 'single': +# if args.rank is None: +# print('Missing rank argument when in \'single\' mode!') +# parser.print_help() +# exit(1) +# world_size = args.world_size +# master_address = args.host +# nic = args.nic +# +# if not world_size: +# world_size = yaml_data['system']['clients']['amount'] + 1 +# if not master_address: +# master_address = yaml_data['system']['federator']['hostname'] +# if not nic: +# nic = yaml_data['system']['federator']['nic'] +# print(f'rank={args.rank}, world_size={world_size}, host={master_address}, args=cfg, nic={nic}') +# run_single(rank=args.rank, world_size=world_size, host=master_address, args=cfg, nic=nic) +# else: +# run_spawn(cfg) +# +# if __name__ == "__main__": +# main() import os -import random import sys -import time +from pathlib import Path -import torch.distributed.rpc as rpc -import logging +from torch.distributed import rpc -import yaml +from fltk.core.client import Client + +print(sys.path) +# from fltk.core.federator import Federator as Fed +print(list(Path.cwd().iterdir())) import argparse +from enum import Enum +from pathlib import Path -import torch.multiprocessing as mp -from fltk.federator import Federator -from fltk.launch import run_single, run_spawn -from fltk.util.base_config import BareConfig +from fltk.core.federator import Federator +from fltk.util.config import Config +from fltk.util.definitions import Aggregations, Optimizations -logging.basicConfig(level=logging.DEBUG) +def run_single(config_path: Path): -def add_default_arguments(parser): - parser.add_argument('--world_size', type=str, default=None, - help='Number of entities in the world. This is the number of clients + 1') + # We can iterate over all the experiments in the directory and execute it, as long as the system remains the same! + # System = machines and its configuration + + print(config_path) + config = Config.FromYamlFile(config_path) + config.world_size = config.num_clients + 1 + config.replication_id = 1 + federator_node = Federator('federator', 0, config.world_size, config) + federator_node.run() + + +def retrieve_env_params(nic=None, host=None): + if host: + os.environ['MASTER_ADDR'] = host + os.environ['MASTER_PORT'] = '5000' + if nic: + os.environ['GLOO_SOCKET_IFNAME'] = nic + os.environ['TP_SOCKET_IFNAME'] = nic + +def retrieve_network_params_from_config(config: Config, nic=None, host=None): + if hasattr(config, 'system'): + system_attr = getattr(config, 'system') + if 'federator' in system_attr: + if 'hostname' in system_attr['federator'] and not host: + host = system_attr['federator']['hostname'] + if 'nic' in system_attr['federator'] and not nic: + nic = system_attr['federator']['nic'] + return nic, host + +def run_remote(config_path: Path, rank: int, nic=None, host=None): + print(config_path, rank) + config = Config.FromYamlFile(config_path) + config.world_size = config.num_clients + 1 + nic, host = retrieve_network_params_from_config(config, nic, host) + if not nic or not host: + print('Missing rank, host, world-size, or nic argument when in \'remote\' mode!') + parser.print_help() + exit(1) + retrieve_env_params(nic, host) + print(f'Starting with host={os.environ["MASTER_ADDR"]} and port={os.environ["MASTER_PORT"]} and interface={nic}') + options = rpc.TensorPipeRpcBackendOptions( + num_worker_threads=16, + rpc_timeout=0, # infinite timeout + init_method=f'tcp://{os.environ["MASTER_ADDR"]}:{os.environ["MASTER_PORT"]}' + ) + if rank != 0: + print(f'Starting worker {rank}') + rpc.init_rpc( + f"client{rank}", + rank=rank, + world_size=config.world_size, + rpc_backend_options=options, + ) + client_node = Client(f'client{rank}', rank, config.world_size, config) + client_node.remote_registration() + + # trainer passively waiting for ps to kick off training iterations + else: + print(f'Starting the ps with world size={config.world_size}') + rpc.init_rpc( + "federator", + rank=rank, + world_size=config.world_size, + rpc_backend_options=options + + ) + federator_node = Federator('federator', 0, config.world_size, config) + # federator_node.create_clients() + federator_node.run() + federator_node.stop_all_clients() + print('Ending program') + # if rank == 0: + # print('FEDERATOR!') + # else: + # print(f'CLIENT {rank}') def main(): - parser = argparse.ArgumentParser(description='Experiment launcher for the Federated Learning Testbed') + pass - subparsers = parser.add_subparsers(dest="mode") - single_parser = subparsers.add_parser('single') - single_parser.add_argument('config', type=str) - single_parser.add_argument('--rank', type=int) - single_parser.add_argument('--nic', type=str, default=None) - single_parser.add_argument('--host', type=str, default=None) - add_default_arguments(single_parser) +def add_default_arguments(parser): + parser.add_argument('config', type=str, + help='') - spawn_parser = subparsers.add_parser('spawn') - spawn_parser.add_argument('config', type=str) - add_default_arguments(spawn_parser) +if __name__ == '__main__': + parser = argparse.ArgumentParser(prog='fltk', description='Experiment launcher for the Federated Learning Testbed (fltk)') + subparsers = parser.add_subparsers(dest="action", required=True) + launch_parser = subparsers.add_parser('launch-util') remote_parser = subparsers.add_parser('remote') - remote_parser.add_argument('--rank', type=int) + single_machine_parser = subparsers.add_parser('single') + add_default_arguments(launch_parser) + add_default_arguments(remote_parser) + add_default_arguments(single_machine_parser) + + remote_parser.add_argument('rank', type=int) remote_parser.add_argument('--nic', type=str, default=None) remote_parser.add_argument('--host', type=str, default=None) - add_default_arguments(remote_parser) + + # single_parser = subparsers.add_parser('single', help='single help') + # single_parser.add_argument('config') + # util_parser = subparsers.add_parser('util', help='util help') + # util_parser.add_argument('action') + # print(sys.argv) args = parser.parse_args() - if args.mode == 'remote': - if args.rank is None or args.host is None or args.world_size is None or args.nic is None: - print('Missing rank, host, world-size, or nic argument when in \'remote\' mode!') - parser.print_help() - exit(1) - world_size = int(args.world_size) - master_address = args.host - nic = args.nic - rank = int(args.rank) - if rank == 0: - print('Remote mode only supports ranks > 0!') - exit(1) - print(f'rank={args.rank}, world_size={world_size}, host={master_address}, args=None, nic={nic}') - run_single(rank=args.rank, world_size=world_size, host=master_address, args=None, nic=nic) + if args.action == 'launch-util': + pass + # run_single(Path(args.config)) + if args.action == 'remote': + run_remote(Path(args.config), args.rank, args.nic, args.host) else: - with open(args.config) as file: - sleep_time = random.uniform(0, 5.0) - time.sleep(sleep_time) - cfg = BareConfig() - yaml_data = yaml.load(file, Loader=yaml.FullLoader) - cfg.merge_yaml(yaml_data) - if args.mode == 'single': - if args.rank is None: - print('Missing rank argument when in \'single\' mode!') - parser.print_help() - exit(1) - world_size = args.world_size - master_address = args.host - nic = args.nic - - if not world_size: - world_size = yaml_data['system']['clients']['amount'] + 1 - if not master_address: - master_address = yaml_data['system']['federator']['hostname'] - if not nic: - nic = yaml_data['system']['federator']['nic'] - print(f'rank={args.rank}, world_size={world_size}, host={master_address}, args=cfg, nic={nic}') - run_single(rank=args.rank, world_size=world_size, host=master_address, args=cfg, nic=nic) - else: - run_spawn(cfg) - -if __name__ == "__main__": - main() \ No newline at end of file + # Run single machine mode + run_single(Path(args.config)) + + # if args.mode == 'single': + # print('Single') + # c = Config(optimizer=Optimizations.fedprox) + # print(isinstance(Config.aggregation, Enum)) + # config = Config.FromYamlFile(args.config) + # + # auto = config.optimizer + # print(config) + # print('Parsed') + + # print(args) \ No newline at end of file diff --git a/fltk/core/client.py b/fltk/core/client.py index 21e922f3..3f0ed23a 100644 --- a/fltk/core/client.py +++ b/fltk/core/client.py @@ -10,7 +10,7 @@ class Client(Node): - + running = False def __init__(self, id: int, rank: int, world_size: int, config: Config): super().__init__(id, rank, world_size, config) @@ -22,6 +22,23 @@ def __init__(self, id: int, rank: int, world_size: int, config: Config): self.config.scheduler_gamma, self.config.min_lr) + def remote_registration(self): + self.logger.info('Sending registration') + self.message('federator', 'ping', 'new_sender', be_weird=True) + self.message('federator', 'register_client', self.id, self.rank) + self.running = True + self._event_loop() + + def stop_client(self): + self.logger.info('Got call to stop event loop') + self.running = False + + def _event_loop(self): + self.logger.info('Starting event loop') + while self.running: + time.sleep(0.1) + self.logger.info('Exiting node') + def train(self, num_epochs: int): start_time = time.time() @@ -47,10 +64,11 @@ def train(self, num_epochs: int): running_loss += loss.item() # Mark logging update step if i % self.config.log_interval == 0: - # self.logger.info( - # '[%d, %5d] loss: %.3f' % (num_epochs, i, running_loss / self.config.log_interval)) + self.logger.info( + '[%s] [%d, %5d] loss: %.3f' % (self.id, num_epochs, i, running_loss / self.config.log_interval)) final_running_loss = running_loss / self.config.log_interval running_loss = 0.0 + # break end_time = time.time() duration = end_time - start_time diff --git a/fltk/core/federator.py b/fltk/core/federator.py index 70b65a38..43d9c392 100644 --- a/fltk/core/federator.py +++ b/fltk/core/federator.py @@ -14,6 +14,7 @@ from dataclasses import dataclass from fltk.util.data_container import DataContainer, FederatorRecord, ClientRecord +from fltk.strategy import get_aggregation NodeReference = Union[Node, str] @dataclass @@ -36,11 +37,14 @@ def __init__(self, id: int, rank: int, world_size: int, config: Config): self.loss_function = self.config.get_loss_function()() self.num_rounds = config.rounds self.config = config + config.output_path = Path(config.output_path) / config.exp_name / f'{config.name}_r{config.replication_id}' self.exp_data = DataContainer('federator', config.output_path, FederatorRecord, config.save_data_append) + self.aggregation_method = get_aggregation(config.aggregation) def create_clients(self): + self.logger.info('Creating clients') if self.config.single_machine: # Create direct clients world_size = self.config.num_clients + 1 @@ -49,13 +53,20 @@ def create_clients(self): client = Client(client_name, client_id, world_size, copy.deepcopy(self.config)) self.clients.append(LocalClient(client_name, client, 0, DataContainer(client_name, self.config.output_path, ClientRecord, self.config.save_data_append))) + self.logger.info(f'Client "{client_name}" created') def register_client(self, client_name, rank): + self.logger.info(f'Got new client registration from client {client_name}') if self.config.single_machine: self.logger.warning('This function should not be called when in single machine mode!') - self.clients.append(LocalClient(client_name, client_name, 0, DataContainer(client_name, self.config.output_path, + self.clients.append(LocalClient(client_name, client_name, rank, DataContainer(client_name, self.config.output_path, ClientRecord, self.config.save_data_append))) + def stop_all_clients(self): + for client in self.clients: + self.message(client.ref, Client.stop_client) + + def _num_clients_online(self) -> int: return len(self.clients) @@ -93,6 +104,9 @@ def run(self): while not self._all_clients_online(): self.logger.info(f'Waiting for all clients to come online. Waiting for {self.world_size - 1 -self._num_clients_online()} clients') time.sleep(2) + self.logger.info('All clients are online') + # self.logger.info('Running') + # time.sleep(10) self.client_load_data() self.get_client_data_sizes() self.clients_ready() @@ -169,18 +183,58 @@ def exec_round(self, id: int): # Actual training calls client_weights = {} client_sizes = {} - pbar = tqdm(selected_clients) - for client in pbar: - pbar.set_description(f'[Round {id:>3}] Running clients') - train_loss, weights, accuracy, test_loss, round_duration, train_duration, test_duration = self.message(client.ref, Client.exec_round, num_epochs) + # pbar = tqdm(selected_clients) + # for client in pbar: + + # Client training + training_futures: List[torch.Future] = [] + + + + def training_cb(fut: torch.Future, client: LocalClient): + train_loss, weights, accuracy, test_loss, round_duration, train_duration, test_duration = fut.wait() client_weights[client.name] = weights client_data_size = self.message(client.ref, Client.get_client_datasize) client_sizes[client.name] = client_data_size - client.exp_data.append(ClientRecord(id, train_duration, test_duration, round_duration, num_epochs, 0, accuracy, train_loss, test_loss)) - # self.logger.info(f'[Round {id:>3}] Client {client} has a accuracy of {accuracy}, train loss={train_loss}, test loss={test_loss},datasize={client_data_size}') + self.logger.info(f'Training callback for client {client.name}') + client.exp_data.append( + ClientRecord(id, train_duration, test_duration, round_duration, num_epochs, 0, accuracy, train_loss, + test_loss)) + + for client in selected_clients: + # future: torch.Future + # if not self.real_time: + # future = torch.futures.Future() + # future.set_result(self.message(client.ref, Client.exec_round, num_epochs)) + # future.then(lambda x: training_cb(x, client)) + # training_futures.append(future) + # else: + future = self.message_async(client.ref, Client.exec_round, num_epochs) + future.then(lambda x: training_cb(x, client)) + training_futures.append(future) + + def all_futures_done(futures: List[torch.Future])->bool: + return all(map(lambda x: x.done(), futures)) + + while not all_futures_done(training_futures): + time.sleep(0.1) + # self.logger.info(f'Waiting for other clients') + + self.logger.info(f'Continue with rest [1]') + + + # for client in selected_clients: + # # pbar.set_description(f'[Round {id:>3}] Running clients') + # train_loss, weights, accuracy, test_loss, round_duration, train_duration, test_duration = self.message(client.ref, Client.exec_round, num_epochs) + # client_weights[client.name] = weights + # client_data_size = self.message(client.ref, Client.get_client_datasize) + # client_sizes[client.name] = client_data_size + # client.exp_data.append(ClientRecord(id, train_duration, test_duration, round_duration, num_epochs, 0, accuracy, train_loss, test_loss)) + # # self.logger.info(f'[Round {id:>3}] Client {client} has a accuracy of {accuracy}, train loss={train_loss}, test loss={test_loss},datasize={client_data_size}') # updated_model = FedAvg(client_weights, client_sizes) - updated_model = average_nn_parameters_simple(list(client_weights.values())) + updated_model = self.aggregation_method(client_weights, client_sizes) + # updated_model = average_nn_parameters_simple(list(client_weights.values())) self.update_nn_parameters(updated_model) test_accuracy, test_loss = self.test(self.net) diff --git a/fltk/core/node.py b/fltk/core/node.py index 17d594a0..6d8aac0c 100644 --- a/fltk/core/node.py +++ b/fltk/core/node.py @@ -46,6 +46,7 @@ def __init__(self, id: int, rank: int, world_size: int, config: Config): self.id = id self.rank = rank self.world_size = world_size + self.real_time = config.real_time global global_vars global_vars['self'] = self self._config(config) @@ -77,8 +78,19 @@ def is_ready(self): @staticmethod def _receive(method: Callable, sender: str, *args, **kwargs): global global_vars + # print('_receive') + # print(global_vars) global_self = global_vars['self'] - return method(global_self, *args, **kwargs) + # print(type(method)) + # print(type(global_self)) + if type(method) is str: + # print(f'Retrieving method from string: "{method}"') + method = getattr(global_self, method) + return method(*args, **kwargs) + else: + # print(method) + # print(global_self, *args, kwargs) + return method(global_self, *args, **kwargs) # def _lookup_reference(self, node_name: str): @@ -153,13 +165,27 @@ def update_nn_parameters(self, new_params, is_offloaded_model = False): self.net.load_state_dict(copy.deepcopy(new_params), strict=True) # self.logger.info(f'Weights of the model are updated') - def message(self, other_node: str, method: Callable, *args, **kwargs): + def message(self, other_node: str, method: Callable, *args, **kwargs) -> torch.Future: if self.real_time: func = Node._receive args_list = [method, self.id] + list(args) return rpc.rpc_sync(other_node, func, args=args_list, kwargs=kwargs) return method(other_node, *args, **kwargs) + def message_async(self, other_node: str, method: Callable, *args, **kwargs) -> torch.Future: + if self.real_time: + func = Node._receive + args_list = [method, self.id] + list(args) + return rpc.rpc_async(other_node, func, args=args_list, kwargs=kwargs) + # Wrap inside a future to keep the logic the same + future = torch.futures.Future() + future.set_result(method(other_node, *args, **kwargs)) + return future + + # def register_client(self, client_name, rank): + # print(f'self={self}') + # self.logger.info(f'[Default Implementation!] Got new client registration from client {client_name}') + def ping(self, sender: str, be_weird=False): self.logger.info(f'Pong from {self.id}, got call from {sender} [{self.counter}]') # print(f'Pong from {self.id}, got call from {sender} [{self.counter}]') diff --git a/fltk/strategy/aggregation/FedAvg.py b/fltk/strategy/aggregation/FedAvg.py index 98a72396..041f4628 100644 --- a/fltk/strategy/aggregation/FedAvg.py +++ b/fltk/strategy/aggregation/FedAvg.py @@ -1,6 +1,6 @@ -def FedAvg(parameters, sizes): +def fed_avg(parameters, sizes): new_params = {} sum_size = 0 for client in parameters: diff --git a/fltk/strategy/aggregation/__init__.py b/fltk/strategy/aggregation/__init__.py index 696cca51..fca94c72 100644 --- a/fltk/strategy/aggregation/__init__.py +++ b/fltk/strategy/aggregation/__init__.py @@ -1,2 +1,13 @@ -from .FedAvg import FedAvg +from fltk.util.definitions import Aggregations +from .FedAvg import fed_avg from .aggregation import average_nn_parameters, average_nn_parameters_simple + + +def get_aggregation(name: Aggregations): + enum_type = Aggregations(name.value) + aggregations_dict = { + Aggregations.fedavg: fed_avg, + Aggregations.sum: lambda x: x, + Aggregations.avg: lambda x: x*2 + } + return aggregations_dict[enum_type] \ No newline at end of file diff --git a/fltk/strategy/optimization/__init__.py b/fltk/strategy/optimization/__init__.py index e4a501ce..a38c3de0 100644 --- a/fltk/strategy/optimization/__init__.py +++ b/fltk/strategy/optimization/__init__.py @@ -1,7 +1,7 @@ import torch from .fedprox import FedProx from .FedNova import FedNova -from ...util.definitions import Optimizations +from fltk.util.definitions import Optimizations def get_optimizer(name: Optimizations): diff --git a/fltk/util/config.py b/fltk/util/config.py index 86266a22..a6811735 100644 --- a/fltk/util/config.py +++ b/fltk/util/config.py @@ -1,13 +1,19 @@ +import copy from dataclasses import dataclass +from enum import Enum, EnumMeta from pathlib import Path +from typing import Type import torch +import yaml -from fltk.util.definitions import Dataset, Nets, DataSampler, Optimizations, LogLevel +from fltk.util.definitions import Dataset, Nets, DataSampler, Optimizations, LogLevel, Aggregations @dataclass class Config: + # optimizer: Optimizations + name: str = '' batch_size: int = 1 test_batch_size: int = 1000 rounds: int = 2 @@ -20,36 +26,53 @@ class Config: scheduler_step_size: int = 50 scheduler_gamma: float = 0.5 min_lr: float = 1e-10 - optimizer = Optimizations.sgd + # Enum + optimizer: Optimizations = Optimizations.sgd optimizer_args = { 'lr': lr, 'momentum': momentum } loss_function = torch.nn.CrossEntropyLoss - + # Enum log_level: LogLevel = LogLevel.DEBUG num_clients: int = 10 clients_per_round: int = 2 distributed: bool = True single_machine: bool = False - + # Enum + aggregation: Aggregations = Aggregations.fedavg + # Enum dataset_name: Dataset = Dataset.mnist + # Enum net_name: Nets = Nets.mnist_cnn default_model_folder_path: str = "default_models" data_path: str = "data" + # Enum data_sampler: DataSampler = DataSampler.uniform data_sampler_args = [] rank: int = 0 world_size: int = 0 + replication_id: int = None + exp_name: str = 'experiment' + + real_time : bool = False + # Save data in append mode. Thereby flushing on every append to file. # This could be useful when a system is likely to crash midway an experiment save_data_append: bool = False - output_path: Path = Path('output_test_2') + def __init__(self, **kwargs) -> None: + enum_fields = [x for x in self.__dataclass_fields__.items() if isinstance(x[1].type, Enum) or isinstance(x[1].type, EnumMeta)] + for name, field in enum_fields: + if name in kwargs and isinstance(kwargs[name], str): + kwargs[name] = field.type(kwargs[name]) + for name, value in kwargs.items(): + self.__setattr__(name, value) + def get_default_model_folder_path(self): return self.default_model_folder_path @@ -73,3 +96,12 @@ def get_data_path(self): def get_loss_function(self): return self.loss_function + + @classmethod + def FromYamlFile(cls, path: Path): + print(f'Loading yaml from {path.absolute()}') + with open(path) as file: + content = yaml.safe_load(file) + for k, v in content.items(): + print(f'Inserting key "{k}" into config') + return cls(**content) diff --git a/fltk/util/data_container.py b/fltk/util/data_container.py index d0500820..6a6f1350 100644 --- a/fltk/util/data_container.py +++ b/fltk/util/data_container.py @@ -54,6 +54,7 @@ def __init__(self, name: str, output_location: Path, record_type: DataRecord, ap self.records = [] self.file_name = f'{name}.csv' self.name = name + output_location = Path(output_location) output_location.mkdir(parents=True, exist_ok=True) self.file_path = output_location / self.file_name self.append_mode = append_mode diff --git a/fltk/util/definitions.py b/fltk/util/definitions.py index c81f9a4f..2492b062 100644 --- a/fltk/util/definitions.py +++ b/fltk/util/definitions.py @@ -7,9 +7,10 @@ # 6. Optimizers # ############################### # Use enums instead of dataclasses? -from enum import Enum +from enum import Enum, unique +@unique class DataSampler(Enum): uniform = "uniform" q_sampler = "q sampler" @@ -21,19 +22,20 @@ class DataSampler(Enum): n_labels = "n labels" +@unique class Optimizations(Enum): sgd = 'SGD' fedprox = 'FedProx' fednova = 'FedNova' +@unique class Dataset(Enum): cifar10 = 'cifar10' cifar100 = 'cifar100' fashion_mnist = 'fashion-mnist' mnist = 'mnist' - class LogLevel(Enum): CRITICAL = 50 FATAL = CRITICAL @@ -44,13 +46,14 @@ class LogLevel(Enum): DEBUG = 10 NOTSET = 0 - +@unique class Aggregations(Enum): avg = 'Avg' - fed_avg = 'FedAvg' + fedavg = 'FedAvg' sum = 'Sum' +@unique class Nets(Enum): cifar100_resnet = "Cifar100ResNet" cifar100_vgg = "Cifar100VGG" diff --git a/fltk/util/generate_docker_compose.py b/fltk/util/generate_docker_compose.py index 8ac761a4..b58233d3 100644 --- a/fltk/util/generate_docker_compose.py +++ b/fltk/util/generate_docker_compose.py @@ -1,10 +1,15 @@ import sys +from pathlib import Path + import yaml import copy import argparse -global_template_path = './deploy/templates' +# global_template_path = './deploy/templates' +global_template_path = Path(__file__).absolute().parent.parent.parent / 'deploy' / 'templates' +global_template_path = global_template_path.__str__() +print(global_template_path) def load_system_template(template_path = global_template_path): print(f'Loading system template from {template_path}/system_stub.yml') with open(f'{template_path}/system_stub.yml') as file: @@ -17,7 +22,7 @@ def load_client_template(type='default', template_path = global_template_path): return documents def get_deploy_path(name: str): - return f'./deploy/{name}' + return f'{Path(global_template_path).parent}/{name}' def generate_client(id, template: dict, world_size: int, type='default', cpu_set=''): From 849021f1472b35a22d2c3f39bc3a2cce49cc25b3 Mon Sep 17 00:00:00 2001 From: bacox Date: Fri, 11 Mar 2022 16:27:07 +0100 Subject: [PATCH 66/73] Add systes generators --- configs/dev_mnist/descr.yaml | 1 + configs/dev_mnist/exps/fedavg.yaml | 9 +- configs/dev_mnist/exps/fedavg_direct.yaml | 38 +++++ configs/dev_mnist/fedavg.cfg.yaml | 4 +- configs/dev_mnist/fedavg_direct.cfg.yaml | 5 + configs/dev_mnist/gen.py | 9 +- configs/dev_mnist/run.py | 2 +- configs/dev_mnist_all/exps/fedavg.yaml | 1 + deploy/dev/client_stub_default.yml | 2 + deploy/dev/client_stub_fast.yml | 2 + deploy/dev/client_stub_medium.yml | 2 + deploy/dev/client_stub_slow.yml | 2 + deploy/dev/system_stub.yml | 2 + deploy/dev_generate/client_stub_medium.yml | 26 ++++ deploy/dev_generate/description.yml | 19 +++ deploy/dev_generate/stub_default.yml | 26 ++++ deploy/dev_generate/stub_fast.yml | 25 ++++ deploy/dev_generate/system_stub.yml | 29 ++++ fltk/__main__.py | 35 +++-- fltk/core/federator.py | 28 ++-- fltk/util/generate_docker_compose_2.py | 164 +++++++++++++++++++++ fltk/util/generate_experiments.py | 47 ++++++ 22 files changed, 444 insertions(+), 34 deletions(-) create mode 100644 configs/dev_mnist/exps/fedavg_direct.yaml create mode 100644 configs/dev_mnist/fedavg_direct.cfg.yaml create mode 100644 deploy/dev_generate/client_stub_medium.yml create mode 100644 deploy/dev_generate/description.yml create mode 100644 deploy/dev_generate/stub_default.yml create mode 100644 deploy/dev_generate/stub_fast.yml create mode 100644 deploy/dev_generate/system_stub.yml create mode 100644 fltk/util/generate_docker_compose_2.py create mode 100644 fltk/util/generate_experiments.py diff --git a/configs/dev_mnist/descr.yaml b/configs/dev_mnist/descr.yaml index d4357836..87b954ba 100644 --- a/configs/dev_mnist/descr.yaml +++ b/configs/dev_mnist/descr.yaml @@ -29,3 +29,4 @@ system: # nic: 'enp3s0' clients: amount: 2 +num_clients: 2 diff --git a/configs/dev_mnist/exps/fedavg.yaml b/configs/dev_mnist/exps/fedavg.yaml index 4c7ef222..89dc8a37 100644 --- a/configs/dev_mnist/exps/fedavg.yaml +++ b/configs/dev_mnist/exps/fedavg.yaml @@ -11,7 +11,7 @@ profiling_time: 100 warmup_round: false output_location: 'output/dev_p2' tensor_board_active: true -clients_per_round: 2 +clients_per_round: 4 node_groups: slow: [1, 1] medium: [2, 2] @@ -28,8 +28,11 @@ system: nic: 'eth0' # nic: 'enp3s0' clients: - amount: 2 + amount: 4 +num_clients: 4 # Individual configuration offload_stategy: vanilla deadline: 500 -experiment_prefix: 'dev_mnist_fedavg' +single_machine: false +real_time: true +experiment_prefix: 'util_fedavg' diff --git a/configs/dev_mnist/exps/fedavg_direct.yaml b/configs/dev_mnist/exps/fedavg_direct.yaml new file mode 100644 index 00000000..ab294bb3 --- /dev/null +++ b/configs/dev_mnist/exps/fedavg_direct.yaml @@ -0,0 +1,38 @@ +--- +# Experiment configuration +total_epochs: 3 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: false +profiling_time: 100 +warmup_round: false +output_location: 'output/dev_p2' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +system: + federator: + # hostname: '131.180.203.94' + hostname: '10.5.0.11' + nic: 'eth0' + # nic: 'enp3s0' + clients: + amount: 2 +num_clients: 2 +# Individual configuration +offload_stategy: vanilla +deadline: 500 +single_machine: true +real_time: false +experiment_prefix: 'util_fedavg_direct' diff --git a/configs/dev_mnist/fedavg.cfg.yaml b/configs/dev_mnist/fedavg.cfg.yaml index ecb5bc3e..17bd81b1 100644 --- a/configs/dev_mnist/fedavg.cfg.yaml +++ b/configs/dev_mnist/fedavg.cfg.yaml @@ -1,3 +1,5 @@ # Individual configuration offload_stategy: vanilla -deadline: 500 \ No newline at end of file +deadline: 500 +single_machine: false +real_time: true \ No newline at end of file diff --git a/configs/dev_mnist/fedavg_direct.cfg.yaml b/configs/dev_mnist/fedavg_direct.cfg.yaml new file mode 100644 index 00000000..25a64bda --- /dev/null +++ b/configs/dev_mnist/fedavg_direct.cfg.yaml @@ -0,0 +1,5 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500 +single_machine: true +real_time: false \ No newline at end of file diff --git a/configs/dev_mnist/gen.py b/configs/dev_mnist/gen.py index 168833f0..f268dc45 100644 --- a/configs/dev_mnist/gen.py +++ b/configs/dev_mnist/gen.py @@ -1,15 +1,14 @@ from pathlib import Path if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' + base_path = Path(__file__).parent + descr_path = base_path / 'descr.yaml' - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] + exp_cfg_list = [x for x in base_path.iterdir() if '.cfg' in x.suffixes] descr_data = '' with open(descr_path) as descr_f: descr_data = descr_f.read() - exps_path = path / 'exps' + exps_path = base_path / 'exps' exps_path.mkdir(parents=True, exist_ok=True) for exp_cfg in exp_cfg_list: exp_cfg_data = '' diff --git a/configs/dev_mnist/run.py b/configs/dev_mnist/run.py index 8e1fad76..3714a567 100644 --- a/configs/dev_mnist/run.py +++ b/configs/dev_mnist/run.py @@ -5,7 +5,7 @@ if __name__ == '__main__': name = 'dev' generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' + base_path = f'{Path(__file__).parent}' exp_list = [ 'fedavg.yaml', ] diff --git a/configs/dev_mnist_all/exps/fedavg.yaml b/configs/dev_mnist_all/exps/fedavg.yaml index 5e90d7fe..391bf17c 100644 --- a/configs/dev_mnist_all/exps/fedavg.yaml +++ b/configs/dev_mnist_all/exps/fedavg.yaml @@ -33,3 +33,4 @@ system: offload_stategy: vanilla deadline: 500000 experiment_prefix: 'dev_mnist_all_fedavg' +single_machine: false diff --git a/deploy/dev/client_stub_default.yml b/deploy/dev/client_stub_default.yml index 3a1774cf..e8e5b9ba 100644 --- a/deploy/dev/client_stub_default.yml +++ b/deploy/dev/client_stub_default.yml @@ -14,6 +14,8 @@ client_name: # name can be anything - RANK={rank} - WORLD_SIZE={world_size} - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: diff --git a/deploy/dev/client_stub_fast.yml b/deploy/dev/client_stub_fast.yml index f03012ff..4e8d2d78 100644 --- a/deploy/dev/client_stub_fast.yml +++ b/deploy/dev/client_stub_fast.yml @@ -14,6 +14,8 @@ client_name: # name can be anything - RANK={rank} - WORLD_SIZE={world_size} - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: diff --git a/deploy/dev/client_stub_medium.yml b/deploy/dev/client_stub_medium.yml index 49abdeb2..9d096797 100644 --- a/deploy/dev/client_stub_medium.yml +++ b/deploy/dev/client_stub_medium.yml @@ -14,6 +14,8 @@ client_name: # name can be anything - RANK={rank} - WORLD_SIZE={world_size} - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: diff --git a/deploy/dev/client_stub_slow.yml b/deploy/dev/client_stub_slow.yml index 9cbdabb5..deb37f37 100644 --- a/deploy/dev/client_stub_slow.yml +++ b/deploy/dev/client_stub_slow.yml @@ -14,6 +14,8 @@ client_name: # name can be anything - RANK={rank} - WORLD_SIZE={world_size} - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: diff --git a/deploy/dev/system_stub.yml b/deploy/dev/system_stub.yml index c84b2ecb..37404525 100644 --- a/deploy/dev/system_stub.yml +++ b/deploy/dev/system_stub.yml @@ -16,6 +16,8 @@ services: - RANK=0 - WORLD_SIZE={world_size} - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 ports: - "5000:5000" # {machine-port}:{docker-port} networks: diff --git a/deploy/dev_generate/client_stub_medium.yml b/deploy/dev_generate/client_stub_medium.yml new file mode 100644 index 00000000..9d096797 --- /dev/null +++ b/deploy/dev_generate/client_stub_medium.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '2' diff --git a/deploy/dev_generate/description.yml b/deploy/dev_generate/description.yml new file mode 100644 index 00000000..4a6f45b6 --- /dev/null +++ b/deploy/dev_generate/description.yml @@ -0,0 +1,19 @@ +federator: + stub-name: system_stub.yml + pin-cores: true + num-cores: 1 +clients: + fast: + stub-name: stub_default.yml + amount: 20 + pin-cores: true + num-cores: 1 + cpu-speed: 0.5 + cpu-variation: 0.16 + slow: + stub-name: stub_default.yml + amount: 0 + pin-cores: true + num-cores: 1 + cpu-speed: 1 + cpu-variation: 0 diff --git a/deploy/dev_generate/stub_default.yml b/deploy/dev_generate/stub_default.yml new file mode 100644 index 00000000..bc437b1c --- /dev/null +++ b/deploy/dev_generate/stub_default.yml @@ -0,0 +1,26 @@ +client_name: # name can be anything +# container_name: federation-lab-client2 # what the name for this container would be + cpuset: '{cpu_set}' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: + - ./data:/opt/federation-lab/data +# - ./docker_data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: '{num_cpus}' \ No newline at end of file diff --git a/deploy/dev_generate/stub_fast.yml b/deploy/dev_generate/stub_fast.yml new file mode 100644 index 00000000..3b4aee9c --- /dev/null +++ b/deploy/dev_generate/stub_fast.yml @@ -0,0 +1,25 @@ +client_name: # name can be anything + cpuset: {cpu_set} + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./docker_data:/opt/federation-lab/data + - ./data:/opt/federation-lab/data + - ./default_models:/opt/federation-lab/default_models + - ./data_loaders:/opt/federation-lab/data_loaders + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK={rank} + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 + ports: + - "5002:5000" # {machine-port}:{docker-port} + depends_on: + - "fl_server" + deploy: + resources: + limits: + cpus: {num_cpus} diff --git a/deploy/dev_generate/system_stub.yml b/deploy/dev_generate/system_stub.yml new file mode 100644 index 00000000..37404525 --- /dev/null +++ b/deploy/dev_generate/system_stub.yml @@ -0,0 +1,29 @@ +# creating a multi-container docker +version: "3.3" +services: + fl_server: # name can be anything + container_name: federation-lab-server # what the name for this container would be + cpuset: '0-2' + restart: "no" # if it crashes for example + build: . # look for the docker file where this file is currently located + volumes: +# - ./data/MNIST:/opt/federation-lab/data/MNIST + - ./data:/opt/federation-lab/data + - ./output:/opt/federation-lab/output + - ./fltk:/opt/federation-lab/fltk + environment: + - PYTHONUNBUFFERED=1 + - RANK=0 + - WORLD_SIZE={world_size} + - EXP_CONFIG=${EXP_CONFIG_FILE} + - MASTER_HOSTNAME=10.5.0.11 + - NIC=eth0 + ports: + - "5000:5000" # {machine-port}:{docker-port} + networks: + default: + ipv4_address: 10.5.0.11 +networks: + default: + external: + name: local_network_dev \ No newline at end of file diff --git a/fltk/__main__.py b/fltk/__main__.py index cf614012..8a6e8990 100644 --- a/fltk/__main__.py +++ b/fltk/__main__.py @@ -93,9 +93,9 @@ from fltk.core.client import Client -print(sys.path) +# print(sys.path) # from fltk.core.federator import Federator as Fed -print(list(Path.cwd().iterdir())) +# print(list(Path.cwd().iterdir())) import argparse from enum import Enum from pathlib import Path @@ -103,6 +103,8 @@ from fltk.core.federator import Federator from fltk.util.config import Config from fltk.util.definitions import Aggregations, Optimizations +from fltk.util.generate_experiments import generate + def run_single(config_path: Path): @@ -152,7 +154,7 @@ def run_remote(config_path: Path, rank: int, nic=None, host=None): init_method=f'tcp://{os.environ["MASTER_ADDR"]}:{os.environ["MASTER_PORT"]}' ) if rank != 0: - print(f'Starting worker {rank}') + print(f'Starting worker {rank} with world size={config.world_size}') rpc.init_rpc( f"client{rank}", rank=rank, @@ -194,10 +196,20 @@ def add_default_arguments(parser): parser = argparse.ArgumentParser(prog='fltk', description='Experiment launcher for the Federated Learning Testbed (fltk)') subparsers = parser.add_subparsers(dest="action", required=True) - launch_parser = subparsers.add_parser('launch-util') + util_docker_parser = subparsers.add_parser('util-docker') + util_docker_parser.add_argument('name', type=str) + util_docker_parser.add_argument('--clients', type=int) + util_generate_parser = subparsers.add_parser('util-generate') + util_generate_parser.add_argument('path', type=str) + util_run_parser = subparsers.add_parser('util-run') + util_run_parser.add_argument('path', type=str) + + # launch_parser.add_argument('action', choices=['docker', 'generate', 'run']) + # launch_parser.add_argument('path', help='path or key') + remote_parser = subparsers.add_parser('remote') single_machine_parser = subparsers.add_parser('single') - add_default_arguments(launch_parser) + # add_default_arguments(launch_parser) add_default_arguments(remote_parser) add_default_arguments(single_machine_parser) @@ -211,10 +223,15 @@ def add_default_arguments(parser): # util_parser.add_argument('action') # print(sys.argv) args = parser.parse_args() - if args.action == 'launch-util': - pass - # run_single(Path(args.config)) - if args.action == 'remote': + if args.action == 'util-docker': + print('docker') + elif args.action == 'util-generate': + path = Path(args.path) + print(f'generate for {path}') + generate(path) + elif args.action == 'util-run': + print('run') # run_single(Path(args.config)) + elif args.action == 'remote': run_remote(Path(args.config), args.rank, args.nic, args.host) else: # Run single machine mode diff --git a/fltk/core/federator.py b/fltk/core/federator.py index 43d9c392..8151d8a7 100644 --- a/fltk/core/federator.py +++ b/fltk/core/federator.py @@ -25,6 +25,8 @@ class LocalClient: exp_data: DataContainer +def cb_factory(future: torch.Future, method, *args, **kwargs): + future.then(lambda x: method(x, *args, **kwargs)) class Federator(Node): clients: List[LocalClient] = [] @@ -190,27 +192,23 @@ def exec_round(self, id: int): training_futures: List[torch.Future] = [] + # def cb_factory(future: torch.Future, method, client, client_weights, client_sizes, num_epochs, name): + # future.then(lambda x: method(x, client, client_weights, client_sizes, num_epochs, client.name)) - def training_cb(fut: torch.Future, client: LocalClient): + def training_cb(fut: torch.Future, client_ref: LocalClient, client_weights, client_sizes, num_epochs): train_loss, weights, accuracy, test_loss, round_duration, train_duration, test_duration = fut.wait() - client_weights[client.name] = weights - client_data_size = self.message(client.ref, Client.get_client_datasize) - client_sizes[client.name] = client_data_size - self.logger.info(f'Training callback for client {client.name}') - client.exp_data.append( + self.logger.info(f'Training callback for client {client_ref.name} with accuracy={accuracy}') + client_weights[client_ref.name] = weights + client_data_size = self.message(client_ref.ref, Client.get_client_datasize) + client_sizes[client_ref.name] = client_data_size + client_ref.exp_data.append( ClientRecord(id, train_duration, test_duration, round_duration, num_epochs, 0, accuracy, train_loss, test_loss)) for client in selected_clients: - # future: torch.Future - # if not self.real_time: - # future = torch.futures.Future() - # future.set_result(self.message(client.ref, Client.exec_round, num_epochs)) - # future.then(lambda x: training_cb(x, client)) - # training_futures.append(future) - # else: future = self.message_async(client.ref, Client.exec_round, num_epochs) - future.then(lambda x: training_cb(x, client)) + cb_factory(future, training_cb, client, client_weights, client_sizes, num_epochs) + self.logger.info(f'Request sent to client {client.name}') training_futures.append(future) def all_futures_done(futures: List[torch.Future])->bool: @@ -221,7 +219,7 @@ def all_futures_done(futures: List[torch.Future])->bool: # self.logger.info(f'Waiting for other clients') self.logger.info(f'Continue with rest [1]') - + time.sleep(3) # for client in selected_clients: # # pbar.set_description(f'[Round {id:>3}] Running clients') diff --git a/fltk/util/generate_docker_compose_2.py b/fltk/util/generate_docker_compose_2.py new file mode 100644 index 00000000..a35bd43d --- /dev/null +++ b/fltk/util/generate_docker_compose_2.py @@ -0,0 +1,164 @@ +import copy +from pathlib import Path +from pprint import pprint + +import yaml +import numpy as np + + +def load_yaml_file(file_path: Path): + with open(file_path) as file: + return yaml.full_load(file) + +def generate_client(id, template: dict, world_size: int, type='default', cpu_set=None, num_cpus=1): + local_template = copy.deepcopy(template) + key_name = list(local_template.keys())[0] + container_name = f'client_{type}_{id}' + local_template[container_name] = local_template.pop(key_name) + for key, item in enumerate(local_template[container_name]['environment']): + if item == 'RANK={rank}': + local_template[container_name]['environment'][key] = item.format(rank=id) + if item == 'WORLD_SIZE={world_size}': + local_template[container_name]['environment'][key] = item.format(world_size=world_size) + # for key, item in enumerate(local_template[container_name]): + # if item == 'cpuset: {cpu_set}': + # local_template[container_name][key] = item.format(cpu_set=cpu_set) + + local_template[container_name]['ports'] = [f'{5000+id}:5000'] + if cpu_set: + local_template[container_name]['cpuset'] = f'{cpu_set}' + else: + local_template[container_name].pop('cpuset') + local_template[container_name]['deploy']['resources']['limits']['cpus'] = f'{num_cpus}' + return local_template, container_name + +def gen_client(name: str, client_dict: dict, base_path: Path): + """ + rank (id) + num_cpu + cpu_set + name + """ + client_descr_template = { + 'rank': 0, + 'num_cpu': 1, + 'num_cores': None, + 'name': name, + 'stub-file': 'stub.yml' + } + print(Path.cwd()) + mu = client_dict['cpu-speed'] + sigma = client_dict['cpu-variation'] + n = client_dict['amount'] + np.random.seed(0) + stub_file = base_path / client_dict['stub-name'] + stub_data = load_yaml_file(stub_file) + if client_dict['pin-cores'] is True: + client_descr_template['num_cores'] = client_dict['num-cores'] + client_descr_template['stub-file'] = client_dict['stub-name'] + # print(name) + # pprint(stub_data) + client_cpu_speeds = np.abs(np.round(np.random.normal(mu, sigma, size=n), 2)) + client_descriptions = [] + for cpu_speed in client_cpu_speeds: + client_descr = copy.deepcopy(client_descr_template) + client_descr['num_cpu'] = cpu_speed + client_descriptions.append(client_descr) + # client_data = copy.deepcopy(client_dict) + # client_data.pop('cpu-variation') + # print(cpu_speed) + # print(np.random.normal(mu, sigma, size=n)) + # for k, v in client_dict.items(): + # print(k) + return client_descriptions +def generate_clients_proporties(clients_dict: dict, path: Path): + results = [] + for k,v in clients_dict.items(): + results += gen_client(k, v, path) + return results + +def generate_compose_file(path: Path): + """ + Used properties: + - World size + - num clients? + - path to deploy files + - random seed? + """ + # system = { + # + # 'federator': { + # 'stub-name': 'system_stub.yml', + # 'pin-cores': True, + # 'num-cores': 1 + # }, + # 'clients': { + # 'fast': { + # 'stub-name': 'stub_default.yml', + # 'amount': 1, + # 'pin-cores': True, + # 'num-cores': 3, + # 'cpu-speed': 3, + # 'cpu-variation': 0 + # }, + # 'slow': { + # 'stub-name': 'stub_default.yml', + # 'amount': 0, + # 'pin-cores': True, + # 'num-cores': 1, + # 'cpu-speed': 1, + # 'cpu-variation': 0 + # } + # } + # } + system_path = path / 'description.yml' + system = load_yaml_file(system_path) + # path = Path('deploy/dev_generate') + + client_descriptions = generate_clients_proporties(system['clients'], path) + last_core_id = 0 + world_size = len(client_descriptions) + 1 + system_template_path = path / 'system_stub.yml' + + system_template: dict = load_yaml_file(system_template_path) + + for key, item in enumerate(system_template['services']['fl_server']['environment']): + if item == 'WORLD_SIZE={world_size}': + system_template['services']['fl_server']['environment'][key] = item.format(world_size=world_size) + if system['federator']['pin-cores']: + cpu_set: str + amount = system['federator']['num-cores'] + if amount > 1: + cpu_set = f'{last_core_id}-{last_core_id+amount-1}' + else: + cpu_set = f'{last_core_id}' + system_template['services']['fl_server']['cpuset'] = cpu_set + last_core_id += amount + else: + system_template['services']['fl_server'].pop('cpuset') + + for idx, client_d in enumerate(client_descriptions): + stub_file = path / client_d['stub-file'] + stub_data = load_yaml_file(stub_file) + cpu_set = None + if client_d['num_cores']: + amount = client_d['num_cores'] + if amount > 1: + cpu_set = f'{last_core_id}-{last_core_id+amount-1}' + else: + cpu_set = f'{last_core_id}' + last_core_id += amount + local_template, container_name = generate_client(idx + 1, stub_data, world_size, client_d['name'], cpu_set, client_d['num_cpu']) + system_template['services'].update(local_template) + print(container_name) + + with open(r'./docker-compose.yml', 'w') as file: + yaml.dump(system_template, file, sort_keys=False) + + + +if __name__ == '__main__': + + path = Path('deploy/dev_generate') + results = generate_compose_file(path) + print('done') \ No newline at end of file diff --git a/fltk/util/generate_experiments.py b/fltk/util/generate_experiments.py new file mode 100644 index 00000000..d2f1eb4e --- /dev/null +++ b/fltk/util/generate_experiments.py @@ -0,0 +1,47 @@ +from pathlib import Path + + +def generate(base_path: Path): + descr_path = base_path / 'descr.yaml' + + exp_cfg_list = [x for x in base_path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = descr_f.read() + exps_path = base_path / 'exps' + exps_path.mkdir(parents=True, exist_ok=True) + for exp_cfg in exp_cfg_list: + exp_cfg_data = '' + with open(exp_cfg) as exp_f: + exp_cfg_data = exp_f.read() + + exp_data = descr_data + exp_cfg_data + exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) + with open(exps_path / filename, mode='w') as f: + f.write(exp_data) + print('Done') + +# if __name__ == '__main__': +# base_path = Path(__file__).parent +# descr_path = base_path / 'descr.yaml' +# +# exp_cfg_list = [x for x in base_path.iterdir() if '.cfg' in x.suffixes] +# descr_data = '' +# with open(descr_path) as descr_f: +# descr_data = descr_f.read() +# exps_path = base_path / 'exps' +# exps_path.mkdir(parents=True, exist_ok=True) +# for exp_cfg in exp_cfg_list: +# exp_cfg_data = '' +# with open(exp_cfg) as exp_f: +# exp_cfg_data = exp_f.read() +# +# exp_data = descr_data + exp_cfg_data +# exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' +# filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) +# with open(exps_path / filename, mode='w') as f: +# f.write(exp_data) +# print('Done') +# +# From 8506f1dec6710fa817dae9e9549a9703d9ef006b Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 15 Mar 2022 17:58:20 +0100 Subject: [PATCH 67/73] Fix docker high cpu usage --- Dockerfile | 17 ++- configs/dev_mnist/fedavg_direct.cfg.yaml | 5 - deploy/dev_generate/description.yml | 6 +- deploy/dev_generate/stub_default.yml | 1 + deploy/dev_generate/system_stub.yml | 1 + fltk/__main__.py | 158 +++-------------------- fltk/core/federator.py | 11 +- fltk/util/config.py | 5 +- fltk/util/data_container.py | 1 + fltk/util/generate_experiments.py | 111 +++++++++++++++- requirements.txt | 2 +- 11 files changed, 159 insertions(+), 159 deletions(-) delete mode 100644 configs/dev_mnist/fedavg_direct.cfg.yaml diff --git a/Dockerfile b/Dockerfile index 09cdfe0b..b5d5eb0e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,14 @@ +# FROM python:3-alpine + +# RUN mkdir /data +# VOLUME /data + +# EXPOSE 8080 + +# WORKDIR /data + +# CMD ["python", "-m" , "http.server", "8080"] + # Base image to start with FROM ubuntu:20.04 @@ -25,6 +36,8 @@ RUN python3 -m pip install -r requirements.txt ENV GLOO_SOCKET_IFNAME=$NIC ENV TP_SOCKET_IFNAME=$NIC +#ENV GLOO_SOCKET_IFNAME=eth0 +#ENV TP_SOCKET_IFNAME=eth0 #RUN mkdir -p ./data/MNIST #COPY ./data/MNIST ../data/MNIST @@ -46,6 +59,6 @@ COPY fltk ./fltk COPY configs ./configs #CMD python3 ./fltk/__main__.py single configs/experiment.yaml --rank=$RANK # CMD python3 -m fltk single configs/experiment_vanilla.yaml --rank=$RANK -#CMD python3 -m fltk single $EXP_CONFIG --rank=$RANK -CMD python3 -m fltk remote $EXP_CONFIG $RANK --nic=$NIC --host=$MASTER_HOSTNAME +# CMD python3 -m fltk single $EXP_CONFIG --rank=$RANK +CMD python3 -m fltk remote $EXP_CONFIG $RANK --nic=$NIC --host=$MASTER_HOSTNAME $OPTIONAL_PARAMS #CMD python3 setup.py \ No newline at end of file diff --git a/configs/dev_mnist/fedavg_direct.cfg.yaml b/configs/dev_mnist/fedavg_direct.cfg.yaml deleted file mode 100644 index 25a64bda..00000000 --- a/configs/dev_mnist/fedavg_direct.cfg.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500 -single_machine: true -real_time: false \ No newline at end of file diff --git a/deploy/dev_generate/description.yml b/deploy/dev_generate/description.yml index 4a6f45b6..cdfd507d 100644 --- a/deploy/dev_generate/description.yml +++ b/deploy/dev_generate/description.yml @@ -5,11 +5,11 @@ federator: clients: fast: stub-name: stub_default.yml - amount: 20 + amount: 2 pin-cores: true num-cores: 1 - cpu-speed: 0.5 - cpu-variation: 0.16 + cpu-speed: 1 + cpu-variation: 0 slow: stub-name: stub_default.yml amount: 0 diff --git a/deploy/dev_generate/stub_default.yml b/deploy/dev_generate/stub_default.yml index bc437b1c..4023a178 100644 --- a/deploy/dev_generate/stub_default.yml +++ b/deploy/dev_generate/stub_default.yml @@ -16,6 +16,7 @@ client_name: # name can be anything - EXP_CONFIG=${EXP_CONFIG_FILE} - MASTER_HOSTNAME=10.5.0.11 - NIC=eth0 + - OPTIONAL_PARAMS=${OPTIONAL_PARAMS} ports: - "5002:5000" # {machine-port}:{docker-port} depends_on: diff --git a/deploy/dev_generate/system_stub.yml b/deploy/dev_generate/system_stub.yml index 37404525..01270c8c 100644 --- a/deploy/dev_generate/system_stub.yml +++ b/deploy/dev_generate/system_stub.yml @@ -18,6 +18,7 @@ services: - EXP_CONFIG=${EXP_CONFIG_FILE} - MASTER_HOSTNAME=10.5.0.11 - NIC=eth0 + - OPTIONAL_PARAMS=${OPTIONAL_PARAMS} ports: - "5000:5000" # {machine-port}:{docker-port} networks: diff --git a/fltk/__main__.py b/fltk/__main__.py index 8a6e8990..2a82b64f 100644 --- a/fltk/__main__.py +++ b/fltk/__main__.py @@ -1,120 +1,20 @@ -# import os -# import random -# import sys -# import time -# -# import torch.distributed.rpc as rpc -# import logging -# -# import yaml -# import argparse -# -# import torch.multiprocessing as mp -# from fltk.federator import Federator -# from fltk.launch import run_single, run_spawn -# from fltk.util.base_config import BareConfig -# -# logging.basicConfig(level=logging.DEBUG) -# -# def add_default_arguments(parser): -# parser.add_argument('--world_size', type=str, default=None, -# help='Number of entities in the world. This is the number of clients + 1') -# -# def main(): -# parser = argparse.ArgumentParser(description='Experiment launcher for the Federated Learning Testbed') -# -# subparsers = parser.add_subparsers(dest="mode") -# -# single_parser = subparsers.add_parser('single') -# single_parser.add_argument('config', type=str) -# single_parser.add_argument('--rank', type=int) -# single_parser.add_argument('--nic', type=str, default=None) -# single_parser.add_argument('--host', type=str, default=None) -# add_default_arguments(single_parser) -# -# spawn_parser = subparsers.add_parser('spawn') -# spawn_parser.add_argument('config', type=str) -# add_default_arguments(spawn_parser) -# -# remote_parser = subparsers.add_parser('remote') -# remote_parser.add_argument('--rank', type=int) -# remote_parser.add_argument('--nic', type=str, default=None) -# remote_parser.add_argument('--host', type=str, default=None) -# add_default_arguments(remote_parser) -# args = parser.parse_args() -# if args.mode == 'remote': -# if args.rank is None or args.host is None or args.world_size is None or args.nic is None: -# print('Missing rank, host, world-size, or nic argument when in \'remote\' mode!') -# parser.print_help() -# exit(1) -# world_size = int(args.world_size) -# master_address = args.host -# nic = args.nic -# rank = int(args.rank) -# if rank == 0: -# print('Remote mode only supports ranks > 0!') -# exit(1) -# print(f'rank={args.rank}, world_size={world_size}, host={master_address}, args=None, nic={nic}') -# run_single(rank=args.rank, world_size=world_size, host=master_address, args=None, nic=nic) -# else: -# with open(args.config) as file: -# sleep_time = random.uniform(0, 5.0) -# time.sleep(sleep_time) -# cfg = BareConfig() -# yaml_data = yaml.load(file, Loader=yaml.FullLoader) -# cfg.merge_yaml(yaml_data) -# if args.mode == 'single': -# if args.rank is None: -# print('Missing rank argument when in \'single\' mode!') -# parser.print_help() -# exit(1) -# world_size = args.world_size -# master_address = args.host -# nic = args.nic -# -# if not world_size: -# world_size = yaml_data['system']['clients']['amount'] + 1 -# if not master_address: -# master_address = yaml_data['system']['federator']['hostname'] -# if not nic: -# nic = yaml_data['system']['federator']['nic'] -# print(f'rank={args.rank}, world_size={world_size}, host={master_address}, args=cfg, nic={nic}') -# run_single(rank=args.rank, world_size=world_size, host=master_address, args=cfg, nic=nic) -# else: -# run_spawn(cfg) -# -# if __name__ == "__main__": -# main() import os -import sys -from pathlib import Path - from torch.distributed import rpc - from fltk.core.client import Client - -# print(sys.path) -# from fltk.core.federator import Federator as Fed -# print(list(Path.cwd().iterdir())) import argparse -from enum import Enum from pathlib import Path - from fltk.core.federator import Federator from fltk.util.config import Config -from fltk.util.definitions import Aggregations, Optimizations -from fltk.util.generate_experiments import generate - +from fltk.util.generate_experiments import generate, run -def run_single(config_path: Path): +def run_single(config_path: Path, prefix: str = None): # We can iterate over all the experiments in the directory and execute it, as long as the system remains the same! # System = machines and its configuration - print(config_path) config = Config.FromYamlFile(config_path) config.world_size = config.num_clients + 1 - config.replication_id = 1 + config.replication_id = prefix federator_node = Federator('federator', 0, config.world_size, config) federator_node.run() @@ -137,10 +37,12 @@ def retrieve_network_params_from_config(config: Config, nic=None, host=None): nic = system_attr['federator']['nic'] return nic, host -def run_remote(config_path: Path, rank: int, nic=None, host=None): + +def run_remote(config_path: Path, rank: int, nic=None, host=None, prefix: str=None): print(config_path, rank) config = Config.FromYamlFile(config_path) config.world_size = config.num_clients + 1 + config.replication_id = prefix nic, host = retrieve_network_params_from_config(config, nic, host) if not nic or not host: print('Missing rank, host, world-size, or nic argument when in \'remote\' mode!') @@ -151,7 +53,9 @@ def run_remote(config_path: Path, rank: int, nic=None, host=None): options = rpc.TensorPipeRpcBackendOptions( num_worker_threads=16, rpc_timeout=0, # infinite timeout - init_method=f'tcp://{os.environ["MASTER_ADDR"]}:{os.environ["MASTER_PORT"]}' + # init_method=f'tcp://{os.environ["MASTER_ADDR"]}:{os.environ["MASTER_PORT"]}' + init_method='env://', + _transports=["uv"] ) if rank != 0: print(f'Starting worker {rank} with world size={config.world_size}') @@ -163,8 +67,6 @@ def run_remote(config_path: Path, rank: int, nic=None, host=None): ) client_node = Client(f'client{rank}', rank, config.world_size, config) client_node.remote_registration() - - # trainer passively waiting for ps to kick off training iterations else: print(f'Starting the ps with world size={config.world_size}') rpc.init_rpc( @@ -175,22 +77,15 @@ def run_remote(config_path: Path, rank: int, nic=None, host=None): ) federator_node = Federator('federator', 0, config.world_size, config) - # federator_node.create_clients() federator_node.run() federator_node.stop_all_clients() print('Ending program') - # if rank == 0: - # print('FEDERATOR!') - # else: - # print(f'CLIENT {rank}') - -def main(): - pass def add_default_arguments(parser): - parser.add_argument('config', type=str, - help='') + parser.add_argument('config', type=str, help='') + parser.add_argument('--prefix', type=str, default=None) + if __name__ == '__main__': parser = argparse.ArgumentParser(prog='fltk', description='Experiment launcher for the Federated Learning Testbed (fltk)') @@ -204,12 +99,8 @@ def add_default_arguments(parser): util_run_parser = subparsers.add_parser('util-run') util_run_parser.add_argument('path', type=str) - # launch_parser.add_argument('action', choices=['docker', 'generate', 'run']) - # launch_parser.add_argument('path', help='path or key') - remote_parser = subparsers.add_parser('remote') single_machine_parser = subparsers.add_parser('single') - # add_default_arguments(launch_parser) add_default_arguments(remote_parser) add_default_arguments(single_machine_parser) @@ -217,34 +108,17 @@ def add_default_arguments(parser): remote_parser.add_argument('--nic', type=str, default=None) remote_parser.add_argument('--host', type=str, default=None) - # single_parser = subparsers.add_parser('single', help='single help') - # single_parser.add_argument('config') - # util_parser = subparsers.add_parser('util', help='util help') - # util_parser.add_argument('action') - # print(sys.argv) args = parser.parse_args() if args.action == 'util-docker': - print('docker') + print('Unimplemented!') elif args.action == 'util-generate': path = Path(args.path) print(f'generate for {path}') generate(path) elif args.action == 'util-run': - print('run') # run_single(Path(args.config)) + run(Path(args.path)) elif args.action == 'remote': - run_remote(Path(args.config), args.rank, args.nic, args.host) + run_remote(Path(args.config), args.rank, args.nic, args.host, args.prefix) else: # Run single machine mode - run_single(Path(args.config)) - - # if args.mode == 'single': - # print('Single') - # c = Config(optimizer=Optimizations.fedprox) - # print(isinstance(Config.aggregation, Enum)) - # config = Config.FromYamlFile(args.config) - # - # auto = config.optimizer - # print(config) - # print('Parsed') - - # print(args) \ No newline at end of file + run_single(Path(args.config), args.prefix) diff --git a/fltk/core/federator.py b/fltk/core/federator.py index 8151d8a7..4975e0bc 100644 --- a/fltk/core/federator.py +++ b/fltk/core/federator.py @@ -39,7 +39,10 @@ def __init__(self, id: int, rank: int, world_size: int, config: Config): self.loss_function = self.config.get_loss_function()() self.num_rounds = config.rounds self.config = config - config.output_path = Path(config.output_path) / config.exp_name / f'{config.name}_r{config.replication_id}' + prefix_text = '' + if config.replication_id: + prefix_text = f'_r{config.replication_id}' + config.output_path = Path(config.output_path) / f'{config.experiment_prefix}{prefix_text}' self.exp_data = DataContainer('federator', config.output_path, FederatorRecord, config.save_data_append) self.aggregation_method = get_aggregation(config.aggregation) @@ -112,7 +115,8 @@ def run(self): self.client_load_data() self.get_client_data_sizes() self.clients_ready() - + # self.logger.info('Sleeping before starting communication') + # time.sleep(20) for communication_round in range(self.config.rounds): self.exec_round(communication_round) @@ -216,9 +220,10 @@ def all_futures_done(futures: List[torch.Future])->bool: while not all_futures_done(training_futures): time.sleep(0.1) + self.logger.info('') # self.logger.info(f'Waiting for other clients') - self.logger.info(f'Continue with rest [1]') + self.logger.info(f'Continue with rest [1]') time.sleep(3) # for client in selected_clients: diff --git a/fltk/util/config.py b/fltk/util/config.py index a6811735..9b262606 100644 --- a/fltk/util/config.py +++ b/fltk/util/config.py @@ -13,7 +13,6 @@ @dataclass class Config: # optimizer: Optimizations - name: str = '' batch_size: int = 1 test_batch_size: int = 1000 rounds: int = 2 @@ -56,7 +55,7 @@ class Config: world_size: int = 0 replication_id: int = None - exp_name: str = 'experiment' + experiment_prefix: str = '' real_time : bool = False @@ -72,6 +71,8 @@ def __init__(self, **kwargs) -> None: kwargs[name] = field.type(kwargs[name]) for name, value in kwargs.items(): self.__setattr__(name, value) + if name == 'output_location': + self.output_path = Path(value) def get_default_model_folder_path(self): return self.default_model_folder_path diff --git a/fltk/util/data_container.py b/fltk/util/data_container.py index 6a6f1350..46963eb9 100644 --- a/fltk/util/data_container.py +++ b/fltk/util/data_container.py @@ -60,6 +60,7 @@ def __init__(self, name: str, output_location: Path, record_type: DataRecord, ap self.append_mode = append_mode file_flag = 'a' if append_mode else 'w' self.file_handle = open(self.file_path, file_flag) + print(f'[<=========>] Creating data container at {self.file_path}') self.record_type = record_type if self.append_mode: open(self.file_path, 'w').close() diff --git a/fltk/util/generate_experiments.py b/fltk/util/generate_experiments.py index d2f1eb4e..c424c889 100644 --- a/fltk/util/generate_experiments.py +++ b/fltk/util/generate_experiments.py @@ -1,4 +1,16 @@ from pathlib import Path +import os +import yaml +from fltk.util.generate_docker_compose_2 import generate_compose_file + + +def rm_tree(pth: Path): + for child in pth.iterdir(): + if child.is_file(): + child.unlink() + # else: + # rm_tree(child) + # pth.rmdir() def generate(base_path: Path): @@ -9,6 +21,7 @@ def generate(base_path: Path): with open(descr_path) as descr_f: descr_data = descr_f.read() exps_path = base_path / 'exps' + rm_tree(exps_path) exps_path.mkdir(parents=True, exist_ok=True) for exp_cfg in exp_cfg_list: exp_cfg_data = '' @@ -16,12 +29,108 @@ def generate(base_path: Path): exp_cfg_data = exp_f.read() exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' + exp_data += f'\nexperiment_prefix: \'{base_path.name}_{exp_cfg.name.split(".")[0]}\'\n' filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) with open(exps_path / filename, mode='w') as f: f.write(exp_data) print('Done') + +# def run(): +# base_path = Path(__file__).parent +# descr_path = base_path / 'descr.yaml' +# +# exp_cfg_list = [x for x in base_path.iterdir() if '.cfg' in x.suffixes] +# descr_data = '' +# with open(descr_path) as descr_f: +# descr_data = descr_f.read() +# +# exps_path = base_path / 'exps' +# exps_path.mkdir(parents=True, exist_ok=True) +# for exp_cfg in exp_cfg_list: +# exp_cfg_data = '' +# replications = 1 +# with open(exp_cfg) as exp_f: +# exp_cfg_data = exp_f.read() +# for replication_id in range(replications): +# exp_data = descr_data + exp_cfg_data +# exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' +# filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) +# with open(exps_path / filename, mode='w') as f: +# f.write(exp_data) +# print('Done') + + +def run(base_path: Path): + print(f'Run {base_path}') + print(list(base_path.iterdir())) + descr_path = base_path / 'descr.yaml' + exp_cfg_list = [x for x in base_path.iterdir() if '.cfg' in x.suffixes] + descr_data = '' + with open(descr_path) as descr_f: + descr_data = yaml.safe_load(descr_f.read()) + + replications = 1 + if 'replications' in descr_data: + replications = descr_data['replications'] + run_docker = False + if 'docker_system' in descr_data: + # Run in docker + # Generate Docker + print(descr_data) + docker_deploy_path = Path(descr_data['docker_system']) + + print(docker_deploy_path) + run_docker = True + generate_compose_file(docker_deploy_path) + + exp_files = [x for x in (base_path / 'exps').iterdir() if x.suffix in ['.yaml', '.yml']] + + cmd_list = [] + print(exp_files) + if run_docker: + first_prefix = '--build' + for exp_cfg_file in exp_files: + for replication_id in range(replications): + cmd = f'export OPTIONAL_PARAMS="--prefix={replication_id}";export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + cmd_list.append(cmd) + # print(f'Running cmd: "{cmd}"') + # os.system(cmd) + first_prefix = '' + pass + else: + print('Switching to direct mode') + for exp_cfg_file in exp_files: + for replication_id in range(replications): + # cmd = f'export OPTIONAL_PARAMS="--prefix={replication_id}";export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + cmd = f'python3 -m fltk single {exp_cfg_file} --prefix={replication_id}' + cmd_list.append(cmd) + pass + + [print(x) for x in cmd_list] + for cmd in cmd_list: + print(f'Running cmd: "{cmd}"') + os.system(cmd) + print('Done') + # docker_system + + + # name = 'dev' + # generate_docker(name) + # base_path = f'{Path(__file__).parent}' + # exp_list = [ + # 'fedavg.yaml', + # ] + # exp_list = [f'{base_path}/exps/{x}' for x in exp_list] + # first_prefix = '--build' + # for exp_cfg_file in exp_list: + # cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' + # print(f'Running cmd: "{cmd}"') + # os.system(cmd) + # first_prefix = '' + + # print('Done') + # if __name__ == '__main__': # base_path = Path(__file__).parent # descr_path = base_path / 'descr.yaml' diff --git a/requirements.txt b/requirements.txt index 84ecb26b..a68a74d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,4 @@ seaborn matplotlib google-api-python-client google-auth-httplib2 -google-auth-oauthlib \ No newline at end of file +google-auth-oauthlib >= 0.4.1 \ No newline at end of file From 826d076be8b23006e7eba231f7c177fd3c19d934 Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 15 Mar 2022 18:07:14 +0100 Subject: [PATCH 68/73] Clean code --- fltk/__init__.py | 2 +- fltk/client.py | 798 ----------------------- fltk/federator.py | 852 ------------------------- fltk/launch.py | 72 --- fltk/util/config.py | 12 +- fltk/util/fed_avg.py | 12 - fltk/util/generate_docker_compose_2.py | 50 +- 7 files changed, 17 insertions(+), 1781 deletions(-) delete mode 100644 fltk/client.py delete mode 100644 fltk/federator.py delete mode 100644 fltk/launch.py delete mode 100644 fltk/util/fed_avg.py diff --git a/fltk/__init__.py b/fltk/__init__.py index d1eb1a0f..92989687 100644 --- a/fltk/__init__.py +++ b/fltk/__init__.py @@ -1,2 +1,2 @@ -__version__ = '0.3.2' \ No newline at end of file +__version__ = '0.4.0' \ No newline at end of file diff --git a/fltk/client.py b/fltk/client.py deleted file mode 100644 index c4cf6ccd..00000000 --- a/fltk/client.py +++ /dev/null @@ -1,798 +0,0 @@ -import copy -import datetime -import os -import random -import time -from dataclasses import dataclass -from typing import List - -import torch -from torch.distributed import rpc -import logging -import numpy as np -from sklearn.metrics import confusion_matrix -from sklearn.metrics import classification_report -from torch.distributed.rpc import RRef - -from fltk.schedulers import MinCapableStepLR -from fltk.strategy.aggregation import FedAvg -from fltk.strategy.offloading import OffloadingStrategy, parse_strategy -from fltk.util.arguments import Arguments -from fltk.util.fed_avg import average_nn_parameters -from fltk.util.log import FLLogger - -import yaml - -from fltk.util.profiler import Profiler -from fltk.util.profilerV2 import Profiler as P2 -from fltk.util.profilerV3 import Profiler as P3 -from fltk.util.results import EpochData -from fltk.util.timer import elapsed_timer - -logging.basicConfig( - level=logging.DEBUG, - - format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', -) - -global_dict = {} -global_model_weights = {} -global_model_data_size = 0 -global_sender_id = "" -global_offload_received = False -global_local_updates_left = 0 - - -def _call_method(method, rref, *args, **kwargs): - """helper for _remote_method()""" - return method(rref.local_value(), *args, **kwargs) - -def _remote_method(method, rref, *args, **kwargs): - """ - executes method(*args, **kwargs) on the from the machine that owns rref - - very similar to rref.remote().method(*args, **kwargs), but method() doesn't have to be in the remote scope - """ - args = [method, rref] + list(args) - return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) - - -def _remote_method_async(method, rref, *args, **kwargs) -> torch.Future: - args = [method, rref] + list(args) - return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) - - -def _remote_method_async_by_info(method, worker_info, *args, **kwargs): - args = [method, worker_info] + list(args) - return rpc.rpc_async(worker_info, _call_method, args=args, kwargs=kwargs) - -class Client: - counter = 0 - finished_init = False - dataset = None - epoch_results: List[EpochData] = [] - epoch_counter = 0 - server_ref = None - offloaded_net = None - - # Model offloading - received_offload_model = False - offloaded_model_weights = None - call_to_offload = False - client_to_offload_to : str = None - offloaded_model_ready = False - - strategy = OffloadingStrategy.VANILLA - - deadline_enabled = False - swyh_enabled = False - freeze_layers_enabled = False - offload_enabled = False - dyn_terminate = False - dyn_terminate_swyh = False - - terminate_training = False - offload_release = False - - def __init__(self, id, log_rref, rank, world_size, config = None): - # logging.info(f'Welcome to client {id}') - self.id = id - global_dict['id'] = id - global global_model_weights, global_offload_received, global_model_data_size, global_local_updates_left - global_model_weights = None - global_offload_received = False - global_local_updates_left = 0 - global_model_data_size = 0 - self.log_rref = log_rref - self.rank = rank - self.world_size = world_size - # self.args = Arguments(logging) - self.args = config - self.args.init_logger(logging) - self.device = self.init_device() - self.set_net(self.load_default_model()) - self.loss_function = self.args.get_loss_function()() - self.optimizer = self.args.get_optimizer()(self.net.parameters(), - **self.args.optimizer_args) - self.scheduler = MinCapableStepLR(self.args.get_logger(), self.optimizer, - self.args.get_scheduler_step_size(), - self.args.get_scheduler_gamma(), - self.args.get_min_lr()) - self.strategy = OffloadingStrategy.Parse(config.offload_strategy) - self.configure_strategy(self.strategy) - - def load_offloaded_model(self): - self.offloaded_net = self.load_default_model() - self.offloaded_net.to(self.device) - logging.info('Offloaded network loaded') - - def copy_offloaded_model_weights(self): - self.update_nn_parameters(global_model_weights, True) - logging.info('Parameters of offloaded model updated') - self.offloaded_model_ready = True - - def configure_strategy(self, strategy : OffloadingStrategy): - deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled, dyn_terminate, dyn_terminate_swyh = parse_strategy(strategy) - self.deadline_enabled = deadline_enabled - self.swyh_enabled = swyh_enabled - self.freeze_layers_enabled = freeze_layers_enabled - self.offload_enabled = offload_enabled - self.dyn_terminate = dyn_terminate - self.dyn_terminate_swyh = dyn_terminate_swyh - logging.info(f'Offloading strategy={strategy}') - logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, ' - f'swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, ' - f'offload={self.offload_enabled}, dyn_terminate={self.dyn_terminate}, ' - f'dyn_terminate_swyh={self.dyn_terminate_swyh}') - - def set_tau_eff(self, total): - client_weight = self.get_client_datasize() / total - n = self.get_client_datasize() - E = self.args.epochs_per_round - B = 16 # nicely hardcoded :) - tau_eff = int(E * n / B) * client_weight - if hasattr(self.optimizer, 'set_tau_eff'): - self.optimizer.set_tau_eff(tau_eff) - - def init_device(self): - if self.args.cuda and torch.cuda.is_available(): - return torch.device("cuda:0") - else: - return torch.device("cpu") - - def send_reference(self, server_ref): - self.local_log(f'Got worker_info from server {server_ref}') - self.server_ref = server_ref - - def terminate_training_endpoint(self): - logging.info('I got a call for training termination!') - self.terminate_training = True - - @staticmethod - def static_ping(): - print(f'Got static ping with global_dict={global_dict}') - - def ping(self): - self.local_log(f'Pong!') - self.local_log(f'Pong2! {self.id}') - return 'pong' - - - def rpc_test(self): - sleep_time = random.randint(1, 5) - time.sleep(sleep_time) - self.local_log(f'sleep for {sleep_time} seconds') - self.counter += 1 - log_line = f'Number of times called: {self.counter}' - self.local_log(log_line) - self.remote_log(log_line) - - def remote_log(self, message): - _remote_method_async(FLLogger.log, self.log_rref, self.id, message, time.time()) - - def local_log(self, message): - logging.info(f'[{self.id}: {time.time()}]: {message}') - - def set_configuration(self, config: str): - yaml_config = yaml.safe_load(config) - - def init(self): - pass - - def init_dataloader(self, ): - self.args.distributed = True - self.args.rank = self.rank - self.args.world_size = self.world_size - # self.dataset = DistCIFAR10Dataset(self.args) - self.dataset = self.args.DistDatasets[self.args.dataset_name](self.args) - self.finished_init = True - logging.info('Done with init') - - def is_ready(self): - logging.info("Client is ready") - return self.finished_init, RRef(self) - - def set_net(self, net): - self.net = net - self.net.to(self.device) - - def load_model_from_file(self, model_file_path): - model_class = self.args.get_net() - default_model_path = os.path.join(self.args.get_default_model_folder_path(), model_class.__name__ + ".model") - return self.load_model_from_file(default_model_path) - - def get_nn_parameters(self): - """ - Return the NN's parameters. - """ - return self.net.state_dict() - - def load_default_model(self): - """ - Load a model from default model file. - - This is used to ensure consistent default model behavior. - """ - model_class = self.args.get_net() - default_model_path = os.path.join(self.args.get_default_model_folder_path(), model_class.__name__ + ".model") - - return self.load_model_from_file(default_model_path) - - def load_model_from_file(self, model_file_path): - """ - Load a model from a file. - - :param model_file_path: string - """ - model_class = self.args.get_net() - model = model_class() - - if os.path.exists(model_file_path): - try: - model.load_state_dict(torch.load(model_file_path)) - except: - self.args.get_logger().warning("Couldn't load model. Attempting to map CUDA tensors to CPU to solve error.") - - model.load_state_dict(torch.load(model_file_path, map_location=torch.device('cpu'))) - else: - self.args.get_logger().warning("Could not find model: {}".format(model_file_path)) - - return model - - def get_client_index(self): - """ - Returns the client index. - """ - return self.client_idx - - def update_nn_parameters(self, new_params, is_offloaded_model = False): - """ - Update the NN's parameters. - - :param new_params: New weights for the neural network - :type new_params: dict - """ - if is_offloaded_model: - self.offloaded_net.load_state_dict(copy.deepcopy(new_params), strict=True) - else: - self.net.load_state_dict(copy.deepcopy(new_params), strict=True) - if self.log_rref: - self.remote_log(f'Weights of the model are updated') - - - def report_performance_async(self, performance_data): - self.local_log('Reporting performance') - from fltk.federator import Federator - return _remote_method_async(Federator.perf_metric_endpoint, self.server_ref, self.id, performance_data) - - def report_performance_estimate(self, performance_data): - self.local_log('Reporting performance estimate') - from fltk.federator import Federator - return _remote_method_async(Federator.perf_est_endpoint, self.server_ref, self.id, performance_data) - - @staticmethod - def offload_receive_endpoint(model_weights, num_train_samples, sender_id, local_updates_left): - print(f'Got the offload_receive_endpoint endpoint') - global global_model_weights, global_offload_received, global_model_data_size, global_sender_id, global_local_updates_left - global_model_weights = copy.deepcopy(model_weights.copy()) - global_model_data_size = num_train_samples - global_sender_id = sender_id - global_local_updates_left = local_updates_left - global_offload_received = True - - @staticmethod - def offload_receive_endpoint_2(string): - print(f'Got the offload_receive_endpoint endpoint') - print(f'Got the offload_receive_endpoint endpoint with arg={string}') - # global global_model_weights, global_offload_received - # global_model_weights = model_weights.copy(deep=True) - # global_offload_received = True - - - def call_to_offload_endpoint(self, client_to_offload: RRef, soft_deadline): - self.local_log(f'Got the call to offload endpoint to {client_to_offload}') - self.client_to_offload_to = client_to_offload - self.call_to_offload = True - - def release_from_offloading_endpoint(self): - logging.info('Got a release signal') - self.offload_release = True - - def freeze_layers2(self, until, net): - - def get_children(model: torch.nn.Module): - children = list(model.children()) - flatt_children = [] - if children == []: - return model - else: - for child in children: - try: - flatt_children.extend(get_children(child)) - except TypeError: - flatt_children.append(get_children(child)) - return flatt_children - - for idx, layer in enumerate(get_children(net)): - if idx < until: - print(f'[{idx}] Freezing layer: {layer}') - for param in layer.parameters(): - param.requires_grad = False - def freeze_layers(self, until): - ct = 0 - for child in self.net.children(): - ct += 1 - if ct < until: - for param in child.parameters(): - param.requires_grad = False - - - def unfreeze_layers(self): - for param in self.net.parameters(): - param.requires_grad = True - - def train(self, epoch, deadline: int = None, warmup=False, use_offloaded_model=False): - """ - - Different modes: - 1. Vanilla - 2. Deadline - 3. SWYH - 4. Just Freeze - 5. Model Offload - - - :: Vanilla - Disable deadline - Disable swyh - Disable offload - - :: Deadline - We need to keep track of the incoming deadline - We don't need to send data before the deadline - - :param epoch: Current epoch # - :type epoch: int - """ - - perf_data = { - 'total_duration': 0, - 'p_v2_data': None, - 'p_v1_data': None, - 'n_batches': 0 - } - - start_time = time.time() - - if use_offloaded_model: - for param in self.offloaded_net.parameters(): - param.requires_grad = True - deadline_threshold = self.args.deadline_threshold - train_stop_time = None - if self.deadline_enabled and deadline is not None: - train_stop_time = start_time + deadline - deadline_threshold - - # Ignore profiler for now - # p = Profiler() - # p.attach(self.net) - - # self.net.train() - global global_model_weights, global_offload_received - # deadline_time = None - # # save model - # if self.args.should_save_model(epoch): - # self.save_model(epoch, self.args.get_epoch_save_start_suffix()) - - running_loss = 0.0 - final_running_loss = 0.0 - if self.args.distributed: - self.dataset.train_sampler.set_epoch(epoch) - number_of_training_samples = len(self.dataset.get_train_loader()) - self.args.get_logger().info(f'{self.id}: Number of training samples: {number_of_training_samples}') - # self.args.get_logger().info(f'{self.id}: Number of training samples: {len(self.dataset.get_train_loader())}') - # Ignore profiler for now - # performance_metric_interval = 20 - # perf_resp = None - - # Profiling parameters - profiling_size = self.args.profiling_size - if profiling_size == -1: - profiling_size = number_of_training_samples - profiling_data = np.zeros(profiling_size) - profiling_forwards_data = np.zeros(profiling_size) - profiling_backwards_data = np.zeros(profiling_size) - pre_train_loop_data = np.zeros(profiling_size) - post_train_loop_data = np.zeros(profiling_size) - active_profiling = True - split_point = self.args.nets_split_point[self.args.net_name] - p = P2(profiling_size, split_point - 1) - p3 = P3(profiling_size, split_point - 1) - - profiler_active = False - # Freezing effect experiment - if self.rank in self.args.freeze_clients: - logging.info('I need to freeze!') - split_point = self.args.nets_split_point[self.args.net_name] - self.freeze_layers2(split_point, self.net) - else: - if use_offloaded_model: - p.attach(self.offloaded_net) - p3.attach(self.offloaded_net) - else: - p.attach(self.net) - p3.attach(self.net) - profiler_active = True - - control_start_time = time.time() - training_process = 0 - - def calc_optimal_offloading_point(profiler_data, time_till_deadline, iterations_left): - logging.info(f'Calc optimal point: profiler_data={profiler_data}, time_till_deadline={time_till_deadline}, iterations_left={iterations_left}') - ff, cf, cb, fb = profiler_data - full_network = ff + cf + cb + fb - frozen_network = ff + cf + cb - split_point = 0 - for z in range(iterations_left, -1, -1): - x = z - y = iterations_left - x - # print(z) - new_est_split = (x * full_network) + (y * frozen_network) - split_point = x - if new_est_split < time_till_deadline: - break - logging.info(f'The offloading point is a iteration: {split_point}') - logging.info(f'Estimated default runtime={full_network* iterations_left}') - logging.info(f'new_est_split={new_est_split}, deadline={deadline}') - - start_loop_time = time.time() - for i, (inputs, labels) in enumerate(self.dataset.get_train_loader(), 0): - loop_pre_train_start = time.time() - start_train_time = time.time() - - if self.dyn_terminate_swyh or self.dyn_terminate: - if self.terminate_training: - logging.info('Got a call to terminate training') - break - - if use_offloaded_model and i > global_local_updates_left: - logging.info(f'Stoppinng training of offloaded model; no local updates left; Was {global_local_updates_left}') - break - if self.offload_enabled and not warmup: - # Check if there is a call to offload - if self.call_to_offload: - self.args.get_logger().info('Got call to offload model') - model_weights = self.get_nn_parameters() - local_updates_left = number_of_training_samples - i - ret = rpc.rpc_async(self.client_to_offload_to, Client.offload_receive_endpoint, args=([model_weights, i, self.id, local_updates_left])) - print(f'Result of rref: {ret}') - # - self.call_to_offload = False - self.client_to_offload_to = None - # This number only works for cifar10cnn - # @TODO: Make this dynamic for other networks - # self.freeze_layers(5) - split_point = self.args.nets_split_point[self.args.net_name] - self.freeze_layers2(split_point, self.net) - - # Check if there is a model to incorporate - # Disable for now to offloading testing - # if global_offload_received: - # self.args.get_logger().info('Merging offloaded model') - # self.args.get_logger().info('FedAvg locally with offloaded model') - # updated_weights = FedAvg({'own': self.get_nn_parameters(), 'remote': global_model_weights}, {'own': i, 'remote': global_model_data_size}) - # - # # updated_weights = average_nn_parameters([self.get_nn_parameters(), global_model_weights]) - # self.args.get_logger().info('Updating local weights due to offloading') - # self.update_nn_parameters(updated_weights) - # global_offload_received = False - # global_model_weights = None - - if self.swyh_enabled and not warmup: - # Deadline - if train_stop_time is not None: - if time.time() >= train_stop_time: - self.args.get_logger().info('Stopping training due to deadline time') - break - # else: - # self.args.get_logger().info(f'Time to deadline: {train_stop_time - time.time()}') - - - - - inputs, labels = inputs.to(self.device), labels.to(self.device) - training_process = i - - # zero the parameter gradients - self.optimizer.zero_grad() - loop_pre_train_end = time.time() - if profiler_active: - p.signal_forward_start() - p3.signal_forward_start() - outputs = None - if use_offloaded_model: - outputs = self.offloaded_net(inputs) - else: - outputs = self.net(inputs) - loss = self.loss_function(outputs, labels) - post_train_time = time.time() - if active_profiling: - profiling_forwards_data[i] = post_train_time - start_train_time - - # Ignore profiler for now - # p.signal_backward_start() - if profiler_active: - p.signal_backward_start() - p3.signal_forward_end() - p3.signal_backwards_start() - loss.backward() - self.optimizer.step() - if profiler_active: - p3.signal_backwards_end() - p.step() - p3.step() - loop_post_train_start = time.time() - # print statistics - running_loss += loss.item() - if i % self.args.get_log_interval() == 0: - self.args.get_logger().info('[%d, %5d] loss: %.3f' % (epoch, i, running_loss / self.args.get_log_interval())) - final_running_loss = running_loss / self.args.get_log_interval() - running_loss = 0.0 - if active_profiling: - profiling_backwards_data[i] = time.time() - post_train_time - - # Ignore profiler for now - # p.set_warmup(True) - # if i % performance_metric_interval == 0: - # # perf_metrics = p.calc_metric(15) - # perf_metrics = p.export_data() - # self.args.get_logger().info(f'Number of events = {len(perf_metrics)}') - # perf_resp = self.report_performance_async(perf_metrics) - # p.reset() - if active_profiling: - # print(i) - end_train_time = time.time() - batch_duration = end_train_time - start_train_time - profiling_data[i] = batch_duration - if i == profiling_size-1: - profiler_active = False - active_profiling = False - p.remove_all_handles() - p3.remove_all_handles() - time_per_batch = profiling_data.mean() - logging.info(f'Average batch duration is {time_per_batch}') - profiler_data = p.aggregate_values() - p3_data = p3.aggregate_values() - logging.info(f'Profiler data: {profiler_data}') - logging.info(f'P3 Profiler data: {p3_data}') - calc_optimal_offloading_point(profiler_data, deadline, number_of_training_samples - i) - - # Estimated training time - est_total_time = number_of_training_samples * time_per_batch - logging.info(f'Estimated training time is {est_total_time}') - self.report_performance_estimate((time_per_batch, est_total_time, number_of_training_samples)) - - if self.freeze_layers_enabled and not warmup: - logging.info(f'Checking if need to freeze layers ? {est_total_time} > {deadline}') - if est_total_time > deadline: - logging.info('Will freeze layers to speed up computation') - # This number only works for cifar10cnn - # @TODO: Make this dynamic for other networks - # self.freeze_layers(5) - split_point = self.args.nets_split_point[self.args.net_name] - self.freeze_layers2(split_point, self.net) - # logging.info(f'Batch time is {batch_duration}') - - # Break away from loop for debug purposes - # if i > 5: - # break - loop_post_train_end = time.time() - if active_profiling: - pre_train_loop_data[i] = loop_pre_train_end - loop_pre_train_start - post_train_loop_data[i] = loop_post_train_end - loop_post_train_start - p.remove_all_handles() - p3.remove_all_handles() - control_end_time = time.time() - end_loop_time = time.time() - logging.info(f'Measure end time is {(control_end_time - control_start_time)}') - logging.info(f'Trained on {training_process} samples') - # logging.info(f'Profiler data: {p.get_values()}') - - perf_data['total_duration'] = control_end_time - control_start_time - perf_data['n_batches'] = len(self.dataset.get_train_loader()) - perf_data['p_v2_data'] = p.get_values() - perf_data['p_v3_data'] = p3.get_values() - perf_data['p_v1_data'] = profiling_data - perf_data['pre_train_loop_data'] = pre_train_loop_data - perf_data['post_train_loop_data'] = post_train_loop_data - perf_data['p_v1_pre_loop'] = start_loop_time - start_time - perf_data['p_v1_forwards'] = profiling_forwards_data - perf_data['p_v1_backwards'] = profiling_backwards_data - perf_data['loop_duration'] = end_loop_time - start_loop_time - if not warmup: - self.scheduler.step() - # logging.info(self.optimizer.param_groups) - scheduler_data = { - 'lr': self.scheduler.optimizer.param_groups[0]['lr'], - 'momentum': self.scheduler.optimizer.param_groups[0]['momentum'], - 'wd': self.scheduler.optimizer.param_groups[0]['weight_decay'], - } - - # Reset the layers - self.unfreeze_layers() - - # save model - if self.args.should_save_model(epoch): - self.save_model(epoch, self.args.get_epoch_save_end_suffix()) - perf_data['p_v1_post_loop'] = time.time() - control_end_time - return final_running_loss, self.get_nn_parameters(), training_process, scheduler_data, perf_data - - def test(self, use_offloaded_model = False): - if use_offloaded_model: - self.offloaded_net.eval() - else: - self.net.eval() - - correct = 0 - total = 0 - targets_ = [] - pred_ = [] - loss = 0.0 - with torch.no_grad(): - for (images, labels) in self.dataset.get_test_loader(): - images, labels = images.to(self.device), labels.to(self.device) - - if use_offloaded_model: - outputs = self.offloaded_net(images) - else: - outputs = self.net(images) - - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - - targets_.extend(labels.cpu().view_as(predicted).numpy()) - pred_.extend(predicted.cpu().numpy()) - - loss += self.loss_function(outputs, labels).item() - - accuracy = 100 * correct / total - confusion_mat = confusion_matrix(targets_, pred_) - accuracy_per_class = confusion_mat.diagonal() / confusion_mat.sum(1) - - class_precision = self.calculate_class_precision(confusion_mat) - class_recall = self.calculate_class_recall(confusion_mat) - if False: - self.args.get_logger().debug('Test set: Accuracy: {}/{} ({:.0f}%)'.format(correct, total, accuracy)) - self.args.get_logger().debug('Test set: Loss: {}'.format(loss)) - self.args.get_logger().debug("Classification Report:\n" + classification_report(targets_, pred_)) - self.args.get_logger().debug("Confusion Matrix:\n" + str(confusion_mat)) - self.args.get_logger().debug("Class precision: {}".format(str(class_precision))) - self.args.get_logger().debug("Class recall: {}".format(str(class_recall))) - - return accuracy, loss, class_precision, class_recall, accuracy_per_class - - - def run_epochs(self, num_epoch, deadline: int = None, warmup=False): - """ - Timing data to measure: - Total execution tim: - """ - self.terminate_training = False - start = time.time() - - start_time_train = datetime.datetime.now() - self.call_to_offload = False - self.dataset.get_train_sampler().set_epoch_size(num_epoch) - # Train locally - loss, weights, training_process, scheduler_data, perf_data = self.train(self.epoch_counter, deadline, warmup) - if self.terminate_training and self.dyn_terminate: - logging.info('Not testing data due to termination call') - self.terminate_training = False - return {'own': []} - elif self.terminate_training and self.dyn_terminate_swyh: - self.terminate_training = False - logging.info('Sending back weights due to terminate with swyh') - if not warmup: - self.epoch_counter += num_epoch - elapsed_time_train = datetime.datetime.now() - start_time_train - train_time_ms = int(elapsed_time_train.total_seconds()*1000) - post_training_time = time.time() - - start_time_test = datetime.datetime.now() - accuracy, test_loss, class_precision, class_recall, _accuracy_per_class = self.test() - elapsed_time_test = datetime.datetime.now() - start_time_test - test_time_ms = int(elapsed_time_test.total_seconds()*1000) - post_test_time = time.time() - - # Timing data that needs to be send back - duration_train = post_training_time - start - duration_test = post_test_time - post_training_time - logging.info( - f'Time for training={duration_train}, time for testing={duration_test}, total time={duration_train + duration_test}') - data = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss, accuracy, test_loss, - class_precision, class_recall, training_process, self.id, client_wall_time=time.time()) - self.epoch_results.append(data) - if hasattr(self.optimizer, 'pre_communicate'): # aka fednova or fedprox - self.optimizer.pre_communicate() - for k, v in weights.items(): - weights[k] = v.cpu() - response_obj = {'own': [data, weights, scheduler_data, perf_data]} - - global global_offload_received - if self.offload_enabled: - logging.info('Waiting to receive offload or being released') - while not (global_offload_received or self.offload_release): - time.sleep(0.1) - logging.info(f'Continuing after global_offload_received={global_offload_received} and offload_release={self.offload_release}') - if self.offload_enabled and global_offload_received: - self.configure_strategy(OffloadingStrategy.SWYH) - # self.configure_strategy(OffloadingStrategy.VANILLA) - logging.info('Processing offloaded model') - self.load_offloaded_model() - self.copy_offloaded_model_weights() - elapsed_time = time.time() - start - deadline -= elapsed_time - loss_offload, weights_offload, training_process_offload, scheduler_data_offload, perf_data_offload = self.train(self.epoch_counter, deadline, warmup, use_offloaded_model=True) - accuracy, test_loss, class_precision, class_recall, _accuracy_per_class = self.test(use_offloaded_model=True) - global global_sender_id - data_offload = EpochData(self.epoch_counter, num_epoch, train_time_ms, test_time_ms, loss_offload, accuracy, test_loss, - class_precision, class_recall, training_process, f'{global_sender_id}-offload', client_wall_time=time.time()) - - # Copy GPU tensors to CPU - for k, v in weights_offload.items(): - weights_offload[k] = v.cpu() - response_obj['offload'] = [ data_offload, weights_offload, scheduler_data_offload, perf_data_offload, global_sender_id] - self.configure_strategy(OffloadingStrategy.MODEL_OFFLOAD) - else: - logging.info(f'Not doing offloading due to offload_enabled={self.offload_enabled} and global_offload_received={global_offload_received}') - return response_obj - - def save_model(self, epoch, suffix): - """ - Saves the model if necessary. - """ - self.args.get_logger().debug("Saving model to flat file storage. Save #{}", epoch) - - if not os.path.exists(self.args.get_save_model_folder_path()): - os.mkdir(self.args.get_save_model_folder_path()) - - full_save_path = os.path.join(self.args.get_save_model_folder_path(), "model_" + str(self.client_idx) + "_" + str(epoch) + "_" + suffix + ".model") - torch.save(self.get_nn_parameters(), full_save_path) - - def calculate_class_precision(self, confusion_mat): - """ - Calculates the precision for each class from a confusion matrix. - """ - return np.diagonal(confusion_mat) / np.sum(confusion_mat, axis=0) - - def calculate_class_recall(self, confusion_mat): - """ - Calculates the recall for each class from a confusion matrix. - """ - return np.diagonal(confusion_mat) / np.sum(confusion_mat, axis=1) - - def get_client_datasize(self): - return len(self.dataset.get_train_sampler()) - - def __del__(self): - print(f'Client {self.id} is stopping') diff --git a/fltk/federator.py b/fltk/federator.py deleted file mode 100644 index 5acf545d..00000000 --- a/fltk/federator.py +++ /dev/null @@ -1,852 +0,0 @@ -import datetime -import time -from dataclasses import dataclass -from typing import List - -import pandas as pd -import torch -from dataclass_csv import DataclassWriter -from torch.distributed import rpc -from torch.distributed.rpc import RRef, get_worker_info -from torch.utils.data._utils.worker import WorkerInfo - -from fltk.client import Client -from fltk.datasets.data_distribution import distribute_batches_equally -from fltk.strategy.aggregation import FedAvg -from fltk.strategy.client_selection import random_selection, tifl_update_probs, tifl_select_tier_and_decrement -from fltk.strategy.offloading import OffloadingStrategy, parse_strategy -from fltk.util.arguments import Arguments -from fltk.util.base_config import BareConfig -from fltk.util.data_loader_utils import load_train_data_loader, load_test_data_loader, \ - generate_data_loaders_from_distributed_dataset -from fltk.util.fed_avg import average_nn_parameters -from fltk.util.log import FLLogger -from torchsummary import summary -from torch.utils.tensorboard import SummaryWriter -from pathlib import Path -import logging -import numpy as np -import copy - -# from fltk.util.profile_plots import stability_plot, parse_stability_data -from fltk.util.results import EpochData -from fltk.util.tensor_converter import convert_distributed_data_into_numpy - -logging.basicConfig( - level=logging.DEBUG, - format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s', -) - - -def _call_method(method, rref, *args, **kwargs): - return method(rref.local_value(), *args, **kwargs) - - -def _call_method_2(method, rref, *args, **kwargs): - print(method) - return method(rref, *args, **kwargs) - -def _remote_method(method, rref, *args, **kwargs): - args = [method, rref] + list(args) - return rpc.rpc_sync(rref.owner(), _call_method, args=args, kwargs=kwargs) - - -def _remote_method_async(method, rref, *args, **kwargs) -> torch.Future: - args = [method, rref] + list(args) - return rpc.rpc_async(rref.owner(), _call_method, args=args, kwargs=kwargs) - - -def _remote_method_async_by_name(method, client_name, *args, **kwargs) -> torch.Future: - args = [method, client_name] + list(args) - print(client_name) - print(_call_method_2) - return rpc.rpc_sync(client_name, _call_method_2, args=args, kwargs=kwargs) - - -class ClientRef: - ref = None - name = "" - data_size = 0 - tb_writer = None - tb_writer_offload = None - available = False - rank=None - - def __init__(self, name, ref, tensorboard_writer, tensorboard_writer_offload, rank): - self.name = name - self.ref = ref - self.tb_writer = tensorboard_writer - self.tb_writer_offload = tensorboard_writer_offload - self.rank = rank - - def __repr__(self): - return self.name - -@dataclass -class ClientResponse: - id: int - client: ClientRef - future: torch.Future - start_time: float = time.time() - end_time: float = 0 - done: bool = False - dropped = True - terminated = False - - def finish(self): - self.end_time = time.time() - self.done = True - self.dropped = False - print(f'>>>> \t\tClient {self.id} has a duration of {self.duration()}') - - def duration(self): - return self.end_time - self.start_time - - -class Federator: - """ - Central component of the Federated Learning System: The Federator - - The Federator is in charge of the following tasks: - - Have a copy of the global model - - Client selection - - Aggregating the client model weights/gradients - - Saving all the metrics - - Use tensorboard to report metrics - - Keep track of timing - - """ - clients: List[ClientRef] = [] - epoch_counter = 0 - client_data = {} - response_list : List[ClientResponse] = [] - response_id = 0 - - reference_lookup = {} - performance_estimate = {} - - # Strategies - deadline_enabled = False - swyh_enabled = False - freeze_layers_enabled = False - offload_enabled = False - dyn_terminate = False - dyn_terminate_swyh = False - warmup_active = False - node_groups = {} - tifl_tier_data = [] - tifl_tier_names = [] - tifl_selected_tier = '' - - exp_start_time = 0 - - strategy = OffloadingStrategy.VANILLA - - - # Keep track of the experiment data - exp_data_general = [] - - epoch_events = [] - - def __init__(self, client_id_triple, num_epochs = 3, config=None): - log_rref = rpc.RRef(FLLogger()) - self.log_rref = log_rref - self.num_epoch = num_epochs - self.config = config - self.tb_path = f'{config.output_location}/{config.experiment_prefix}' - self.ensure_path_exists(self.tb_path) - self.tb_writer = SummaryWriter(f'{self.tb_path}/{config.experiment_prefix}_federator') - self.strategy = OffloadingStrategy.Parse(config.offload_strategy) - self.configure_strategy(self.strategy) - self.create_clients(client_id_triple) - self.config.init_logger(logging) - self.performance_data = {} - - logging.info("Creating test client") - copy_sampler = config.data_sampler - config.data_sampler = "uniform" - self.test_data = Client("test", None, 1, 2, config) - config.data_sampler = copy_sampler - self.reference_lookup[get_worker_info().name] = RRef(self) - - if self.strategy == OffloadingStrategy.TIFL_BASIC or self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: - for k, v in self.config.node_groups.items(): - self.node_groups[k] = list(range(v[0], v[1]+1)) - self.tifl_tier_names.append(k) - - if self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: - num_tiers = len(self.tifl_tier_names) * 1.0 - start_credits = np.ceil(self.config.epochs / num_tiers) - logging.info(f'Tifl starting credits is {start_credits}') - for tier_name in self.tifl_tier_names: - self.tifl_tier_data.append([tier_name, 0, start_credits, 1 / num_tiers]) - residue = 1 - for t in self.tifl_tier_data: - residue -= t[3] - self.tifl_tier_data[0][3] += residue - - # def configure_strategy(self, strategy : OffloadingStrategy): - # if strategy == OffloadingStrategy.VANILLA: - # logging.info('Running with offloading strategy: VANILLA') - # self.deadline_enabled = False - # self.swyh_enabled = False - # self.freeze_layers_enabled = False - # self.offload_enabled = False - # if strategy == OffloadingStrategy.DEADLINE: - # logging.info('Running with offloading strategy: DEADLINE') - # self.deadline_enabled = True - # self.swyh_enabled = False - # self.freeze_layers_enabled = False - # self.offload_enabled = False - # if strategy == OffloadingStrategy.SWYH: - # logging.info('Running with offloading strategy: SWYH') - # self.deadline_enabled = True - # self.swyh_enabled = True - # self.freeze_layers_enabled = False - # self.offload_enabled = False - # if strategy == OffloadingStrategy.FREEZE: - # logging.info('Running with offloading strategy: FREEZE') - # self.deadline_enabled = True - # self.swyh_enabled = False - # self.freeze_layers_enabled = True - # self.offload_enabled = False - # if strategy == OffloadingStrategy.MODEL_OFFLOAD: - # logging.info('Running with offloading strategy: MODEL_OFFLOAD') - # self.deadline_enabled = True - # self.swyh_enabled = False - # self.freeze_layers_enabled = True - # self.offload_enabled = True - # if strategy == OffloadingStrategy.TIFL_BASIC: - # logging.info('Running with offloading strategy: TIFL_BASIC') - # self.deadline_enabled = False - # self.swyh_enabled = False - # self.freeze_layers_enabled = False - # self.offload_enabled = False - # logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, offload={self.offload_enabled}') - # - def configure_strategy(self, strategy : OffloadingStrategy): - deadline_enabled, swyh_enabled, freeze_layers_enabled, offload_enabled, dyn_terminate, dyn_terminate_swyh = parse_strategy(strategy) - self.deadline_enabled = deadline_enabled - self.swyh_enabled = swyh_enabled - self.freeze_layers_enabled = freeze_layers_enabled - self.offload_enabled = offload_enabled - self.dyn_terminate = dyn_terminate - self.dyn_terminate_swyh = dyn_terminate_swyh - logging.info(f'Offloading strategy={strategy}') - logging.info(f'Offload strategy params: deadline={self.deadline_enabled}, ' - f'swyh={self.swyh_enabled}, freeze={self.freeze_layers_enabled}, ' - f'offload={self.offload_enabled}, dyn_terminate={self.dyn_terminate}, ' - f'dyn_terminate_swyh={self.dyn_terminate_swyh}') - - def create_clients(self, client_id_triple): - for id, rank, world_size in client_id_triple: - client = rpc.remote(id, Client, kwargs=dict(id=id, log_rref=self.log_rref, rank=rank, world_size=world_size, config=self.config)) - writer = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}') - writer_offload = None - if self.offload_enabled: - writer_offload = SummaryWriter(f'{self.tb_path}/{self.config.experiment_prefix}_client_{id}_offload') - self.clients.append(ClientRef(id, client, tensorboard_writer=writer, tensorboard_writer_offload=writer_offload, rank=rank)) - self.client_data[id] = [] - - def record_epoch_event(self, event: str): - self.epoch_events.append(f'{time.time()} - [{self.epoch_counter}] - {event}') - - def select_clients(self, n = 2): - available_clients = list(filter(lambda x : x.available, self.clients)) - if self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: - tifl_update_probs(self.tifl_tier_data) - self.tifl_selected_tier = tifl_select_tier_and_decrement(self.tifl_tier_data) - client_subset = self.node_groups[self.tifl_selected_tier] - available_clients = list(filter(lambda x: x.rank in client_subset, self.clients)) - if self.strategy == OffloadingStrategy.TIFL_BASIC: - self.tifl_selected_tier = np.random.choice(list(self.node_groups.keys()), 1, replace=False)[0] - logging.info(f'TIFL: Sampling from group {self.tifl_selected_tier} out of{list(self.node_groups.keys())}') - client_subset = self.node_groups[self.tifl_selected_tier] - available_clients = list(filter(lambda x : x.rank in client_subset, self.clients)) - logging.info(f'TIFL: Sampling subgroup {available_clients}') - return random_selection(available_clients, n) - - def ping_all(self): - for client in self.clients: - logging.info(f'Sending ping to {client}') - t_start = time.time() - answer = _remote_method(Client.ping, client.ref) - t_end = time.time() - duration = (t_end - t_start)*1000 - logging.info(f'Ping to {client} is {duration:.3}ms') - - def rpc_test_all(self): - for client in self.clients: - res = _remote_method_async(Client.rpc_test, client.ref) - while not res.done(): - pass - - def client_load_data(self): - for client in self.clients: - _remote_method_async(Client.init_dataloader, client.ref) - - def clients_ready(self): - all_ready = False - ready_clients = [] - while not all_ready: - responses = [] - for client in self.clients: - if client.name not in ready_clients: - responses.append((client, _remote_method_async(Client.is_ready, client.ref))) - all_ready = True - for res in responses: - result, client_ref = res[1].wait() - if result: - self.reference_lookup[res[0].name] = client_ref - logging.info(f'{res[0]} is ready') - ready_clients.append(res[0]) - # Set the client to available - res[0].available = True - else: - logging.info(f'Waiting for {res[0]}') - all_ready = False - - time.sleep(2) - - # WorkerInfo(id=1, name="client1").local_value() - # rpc.rpc_sync(self.nameclients[0].ref.owner(), Client.ping, args=(self.clients[0].ref)) - logging.info(f'Sending a ping to client {self.clients[0].name}') - r_ref = rpc.remote(self.clients[0].name, Client.static_ping, args=()) - print(f'Result of rref: {r_ref.to_here()}') - logging.info('All clients are ready') - for idx, c in enumerate(self.clients): - logging.info(f'[{idx}]={c}') - - - def perf_metric_endpoint(self, node_id, perf_data): - if node_id not in self.performance_data.keys(): - self.performance_data[node_id] = [] - self.performance_data[node_id].append(perf_data) - - def perf_est_endpoint(self, node_id, performance_data): - logging.info(f'Received performance estimate of node {node_id}') - self.performance_estimate[node_id] = performance_data - - def send_clients_ref(self): - - for c in self.clients: - # _remote_method_async(Client.send_reference, c.ref, rpc.get_worker_info()) - _remote_method_async(Client.send_reference, c.ref, RRef(self)) - - def num_available_clients(self): - return sum(c.available == True for c in self.clients) - - def process_response_list(self): - for resp in self.response_list: - if resp.future.done(): - resp.finish() - resp.client.available = True - self.response_list = list(filter(lambda x: not x.done, self.response_list)) - - def ask_client_to_offload(self, client1_ref, client2_ref, soft_deadline): - logging.info(f'Offloading call from {client1_ref} to {client2_ref}') - # args = [method, rref] + list(args) - # rpc.rpc_sync(client1_ref, Client.call_to_offload_endpoint, args=(client2_ref)) - # print(_remote_method_async_by_name(Client.client_to_offload_to, client1_ref, client2_ref)) - _remote_method(Client.call_to_offload_endpoint, client1_ref, client2_ref, soft_deadline) - logging.info(f'Done with call to offload') - - def remote_run_epoch(self, epochs, warmup=False, first_epoch=False): - if warmup: - logging.info('This is a WARMUP round') - start_epoch_time = time.time() - deadline = self.config.deadline - deadline_time = self.config.deadline - if first_epoch: - deadline = self.config.first_deadline - deadline_time = self.config.first_deadline - """ - 1. Client selection - 2. Run local updates - 3. Retrieve data - 4. Aggregate data - """ - - client_weights = [] - - client_weights_dict = {} - client_training_process_dict = {} - - - self.record_epoch_event('Starting new round') - while self.num_available_clients() < self.config.clients_per_round: - logging.warning(f'Waiting for enough clients to become available. # Available Clients = {self.num_available_clients()}, but need {self.config.clients_per_round}') - self.process_response_list() - time.sleep(1) - - #### Client Selection #### - selected_clients = self.select_clients(self.config.clients_per_round) - - #### Send model to clients #### - responses = [] - for client in selected_clients: - logging.info(f'Send updated model to selected client: {client.name}') - responses.append( - (client, _remote_method_async(Client.update_nn_parameters, client.ref, new_params=self.test_data.get_nn_parameters()))) - - for res in responses: - res[1].wait() - logging.info('Weights are updated') - - ### Clients train locally - # Structure of the async message: - # - Client will respond with two messages: - - # Let clients train locally - - if not self.deadline_enabled: - deadline = 0 - responses: List[ClientResponse] = [] - for client in selected_clients: - cr = ClientResponse(self.response_id, client, _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs, deadline=deadline, warmup=warmup)) - cr.start_time = time.time() - self.response_id += 1 - self.response_list.append(cr) - responses.append(cr) - client.available = False - # responses.append((client, time.time(), _remote_method_async(Client.run_epochs, client.ref, num_epoch=epochs))) - self.epoch_counter += epochs - - # deadline_time = None - # Wait loop with deadline - start = time.time() - def reached_deadline(): - if deadline_time is None: - return False - # logging.info(f'{(time.time() - start)} >= {deadline_time}') - return (time.time() -start) >= deadline_time - - logging.info('Starting waiting period') - # Wait loop without deadline - all_finished = False - - # Debug for testing! - has_not_called = True - - show_perf_data = True - has_send_terminate = False - while not all_finished and not ((self.deadline_enabled and reached_deadline()) or warmup): - # if self.deadline_enabled and reached_deadline() - # if has_not_called and (time.time() -start) > 10: - # logging.info('Sending call to offload') - # has_not_called = False - # - # self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) - - # Check if all performance data has come in - has_all_perf_data = True - - if show_perf_data: - for sc in selected_clients: - if sc.name not in self.performance_estimate.keys(): - has_all_perf_data = False - - if has_all_perf_data: - logging.info('Got all performance data') - print(self.performance_estimate) - show_perf_data = False - - # Make offloading call - # @NOTE: this will only work for the two node scenario - - lowest_est_time = 0 - est_keys = list(self.performance_estimate.keys()) - - # for k, v in self.performance_estimate.items(): - # if v[1] > lowest_est_time: - # lowest_est_time = v[1] - # weak_client = k - # else: - # strong_client = k - - # (time_per_batch, est_total_time, number_of_training_samples) - if self.offload_enabled and not warmup: - first = True - weakest = 0 - strongest = 0 - weak_performance = 0 - strong_performance = 0 - summed_time = 0 - perf_estimate_copy = copy.deepcopy(self.performance_estimate) - offload_calls = [] - for i in range(int(np.floor(len(self.performance_estimate)/2))): - for k, v in perf_estimate_copy.items(): - summed_time += v[1] - # print(v) - if first: - first = False - est_total_time = v[1] - weakest = k - strongest = k - weak_performance = est_total_time - strong_performance = est_total_time - else: - est_total_time = v[1] - if est_total_time > weak_performance: - weak_performance = est_total_time - weakest = k - if est_total_time < strong_performance: - strong_performance = est_total_time - strongest = k - self.record_epoch_event(f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') - logging.info( - f'Offloading from {weakest} -> {strongest} due to {self.performance_estimate[weakest]} and {self.performance_estimate[strongest]}') - offload_calls.append([weakest, strongest]) - perf_estimate_copy.pop(weakest, None) - perf_estimate_copy.pop(strongest, None) - mean_time_est_time = (summed_time * 1.0) / len(self.performance_estimate.items()) - logging.info(f'Mean time for offloading={mean_time_est_time}') - logging.info('Sending call to offload') - for weak_node, strong_node in offload_calls: - self.ask_client_to_offload(self.reference_lookup[weak_node], strong_node, mean_time_est_time) - logging.info('Releasing clients') - for client in selected_clients: - _remote_method_async(Client.release_from_offloading_endpoint, client.ref) - - # if self.offload_enabled and not warmup: - # logging.info(f'self.performance_estimate={self.performance_estimate}') - # logging.info(f'est_keys={est_keys}') - # weak_client = est_keys[0] - # strong_client = est_keys[1] - # if self.performance_estimate[est_keys[1]][1] > self.performance_estimate[est_keys[0]][1]: - # weak_client = est_keys[1] - # strong_client = est_keys[0] - # - # logging.info(f'Offloading from {weak_client} -> {strong_client} due to {self.performance_estimate[weak_client]} and {self.performance_estimate[strong_client]}') - # logging.info('Sending call to offload') - # self.ask_client_to_offload(self.reference_lookup[selected_clients[0].name], selected_clients[1].name) - - # selected_clients[0] - # logging.info(f'Status of all_finished={all_finished} and deadline={reached_deadline()}') - all_finished = True - - for client_response in responses: - if client_response.future.done(): - if not client_response.done: - client_response.finish() - else: - all_finished = False - if not has_send_terminate and (self.dyn_terminate or self.dyn_terminate_swyh): - num_finished_responses = sum([1 for x in responses if x.done]) - percentage = num_finished_responses / len(responses) - if percentage > self.config.termination_percentage: - logging.info('Sending termination signal') - for cr in responses: - if not cr.done: - if self.dyn_terminate: - cr.terminated = True - _remote_method_async(Client.terminate_training_endpoint, cr.client.ref) - has_send_terminate = True - logging.info(f'Percentage of finished responses: {percentage}, do terminate ? {percentage} > {self.config.termination_percentage} = {percentage > self.config.termination_percentage}') - time.sleep(0.1) - logging.info(f'Stopped waiting due to all_finished={all_finished} and deadline={reached_deadline()}') - client_accuracies = [] - for client_response in responses: - if warmup: - break - client = client_response.client - logging.info(f'{client} had a exec time of {client_response.duration()} dropped?={client_response.dropped}') - if client_response.dropped: - client_response.end_time = time.time() - logging.info( - f'{client} had a exec time of {client_response.duration()} dropped?={client_response.dropped}') - - if not client_response.dropped and not client_response.terminated: - client.available = True - logging.info(f'Fetching response for client: {client}') - response_obj = client_response.future.wait() - epoch_data : EpochData - epoch_data, weights, scheduler_data, perf_data = response_obj['own'] - epoch_data.global_epoch_id = self.epoch_counter - epoch_data.global_wall_time = client_response.end_time - self.client_data[epoch_data.client_id].append(epoch_data) - - # logging.info(f'{client} had a loss of {epoch_data.loss}') - # logging.info(f'{client} had a epoch data of {epoch_data}') - # logging.info(f'{client} has trained on {epoch_data.training_process} samples') - self.record_epoch_event(f'{client} had an accuracy of {epoch_data.accuracy}') - self.record_epoch_event(f'{client} had an duration of {client_response.duration()}') - client_accuracies.append(epoch_data.accuracy) - # logging.info(f'{client} has perf data: {perf_data}') - elapsed_time = client_response.end_time - self.exp_start_time - - client.tb_writer.add_scalar('training loss', - epoch_data.loss_train, # for every 1000 minibatches - self.epoch_counter * client.data_size) - - client.tb_writer.add_scalar('accuracy', - epoch_data.accuracy, # for every 1000 minibatches - self.epoch_counter * client.data_size) - - client.tb_writer.add_scalar('accuracy wall time', - epoch_data.accuracy, # for every 1000 minibatches - elapsed_time) - client.tb_writer.add_scalar('training loss per epoch', - epoch_data.loss_train, # for every 1000 minibatches - self.epoch_counter) - - client.tb_writer.add_scalar('accuracy per epoch', - epoch_data.accuracy, # for every 1000 minibatches - self.epoch_counter) - - client.tb_writer.add_scalar('Client time per epoch', - client_response.duration(), # for every 1000 minibatches - self.epoch_counter) - - client.tb_writer.add_scalar('learning rate', - scheduler_data['lr'], - self.epoch_counter) - - client.tb_writer.add_scalar('momentum', - scheduler_data['momentum'], - self.epoch_counter) - - client.tb_writer.add_scalar('weight decay', - scheduler_data['wd'], - self.epoch_counter) - total_time_t1 = perf_data['total_duration'] - loop_duration = perf_data['loop_duration'] - p_v1_time = perf_data['p_v1_data'].mean() * perf_data['n_batches'] - p_v1_time_sum = perf_data['p_v1_data'].sum() - p_v1_pre_loop = perf_data['p_v1_pre_loop'] - p_v1_post_loop = perf_data['p_v1_post_loop'] - pre_train_loop_data = perf_data['pre_train_loop_data'] - post_train_loop_data = perf_data['post_train_loop_data'] - p_v2_forwards = (perf_data['p_v2_data'][0].mean() + perf_data['p_v2_data'][1].mean()) * perf_data['n_batches'] - p_v2_backwards = (perf_data['p_v2_data'][2].mean() + perf_data['p_v2_data'][3].mean()) * perf_data['n_batches'] - p_v3_forwards = (perf_data['p_v3_data'][0].mean() + perf_data['p_v3_data'][1].mean()) * perf_data[ - 'n_batches'] - p_v3_backwards = (perf_data['p_v3_data'][2].mean() + perf_data['p_v3_data'][3].mean()) * perf_data[ - 'n_batches'] - p_v2_time = sum([x.mean() for x in perf_data['p_v2_data']]) * perf_data['n_batches'] - p_v1_forwards = perf_data['p_v1_forwards'].mean() * perf_data['n_batches'] - p_v1_backwards = perf_data['p_v1_backwards'].mean() * perf_data['n_batches'] - - # logging.info(f'{client} has time estimates: {[total_time_t1, loop_duration, p_v1_time_sum, p_v1_time, p_v2_time, [p_v1_forwards, p_v1_backwards], [p_v2_forwards, p_v2_backwards]]}') - # logging.info(f'{client} combined times pre post loop stuff: {[p_v1_pre_loop, loop_duration, p_v1_post_loop]} = {sum([p_v1_pre_loop, loop_duration, p_v1_post_loop])} ? {total_time_t1}') - # logging.info(f'{client} p3 time = {p_v3_forwards} + {p_v3_backwards} = {p_v3_forwards+ p_v3_backwards}') - # logging.info(f'{client} Pre train loop time = {pre_train_loop_data.mean()}, post train loop time = {post_train_loop_data.mean()}') - # logging.info(f'{client} p_v1 data: {perf_data["p_v1_data"]}') - - - - client.tb_writer.add_scalar('train_time_estimate_delta', loop_duration - (p_v3_forwards+ p_v3_backwards), self.epoch_counter) - client.tb_writer.add_scalar('train_time_estimate_delta_2', loop_duration - (p_v2_forwards+ p_v2_backwards), self.epoch_counter) - - client_weights.append(weights) - client_weights_dict[client.name] = weights - client_training_process_dict[client.name] = epoch_data.training_process - - if self.strategy == OffloadingStrategy.TIFL_ADAPTIVE: - mean_tier_accuracy = np.mean(client_accuracies) - logging.info(f'TIFL:: the mean accuracy is {mean_tier_accuracy}') - for t in self.tifl_tier_data: - if t[0] == self.tifl_selected_tier: - t[1] = mean_tier_accuracy - - if 'offload' in response_obj: - epoch_data_offload, weights_offload, scheduler_data_offload, perf_data_offload, sender_id = response_obj['offload'] - if epoch_data_offload.client_id not in self.client_data: - self.client_data[epoch_data_offload.client_id] = [] - epoch_data_offload.global_epoch_id = self.epoch_counter - epoch_data_offload.global_wall_time = client_response.end_time - self.client_data[epoch_data_offload.client_id].append(epoch_data_offload) - - writer = client.tb_writer_offload - - writer.add_scalar('training loss', - epoch_data_offload.loss_train, # for every 1000 minibatches - self.epoch_counter * client.data_size) - - writer.add_scalar('accuracy', - epoch_data_offload.accuracy, # for every 1000 minibatches - self.epoch_counter * client.data_size) - - writer.add_scalar('accuracy wall time', - epoch_data_offload.accuracy, # for every 1000 minibatches - elapsed_time) - writer.add_scalar('training loss per epoch', - epoch_data_offload.loss_train, # for every 1000 minibatches - self.epoch_counter) - - writer.add_scalar('accuracy per epoch', - epoch_data_offload.accuracy, # for every 1000 minibatches - self.epoch_counter) - - writer.add_scalar('Client time per epoch', - client_response.duration(), # for every 1000 minibatches - self.epoch_counter) - - writer.add_scalar('learning rate', - scheduler_data_offload['lr'], - self.epoch_counter) - - writer.add_scalar('momentum', - scheduler_data_offload['momentum'], - self.epoch_counter) - - writer.add_scalar('weight decay', - scheduler_data_offload['wd'], - self.epoch_counter) - client_weights.append(weights_offload) - client_weights_dict[epoch_data_offload.client_id] = weights_offload - client_training_process_dict[epoch_data_offload.client_id] = epoch_data_offload.training_process - - self.performance_estimate = {} - if len(client_weights): - logging.info(f'Aggregating {len(client_weights)} models') - updated_model = FedAvg(client_weights_dict, client_training_process_dict) - # updated_model = average_nn_parameters(client_weights) - - # test global model - logging.info("Testing on global test set") - self.test_data.update_nn_parameters(updated_model) - accuracy, loss, class_precision, class_recall, accuracy_per_class = self.test_data.test() - # logging.info('Class precision') - # logging.warning(accuracy_per_class) - # logging.info('Class names') - # logging.info(self.test_data.dataset.test_dataset.class_to_idx) - # self.tb_writer.add_scalar('training loss', loss, self.epoch_counter * self.test_data.get_client_datasize()) # does not seem to work :( ) - self.tb_writer.add_scalar('Number of clients dropped', sum([1 for x in responses if x.dropped or x.terminated]), self.epoch_counter) - - self.tb_writer.add_scalar('accuracy', accuracy, self.epoch_counter * self.test_data.get_client_datasize()) - self.record_epoch_event(f'Global accuracy is {accuracy}') - self.tb_writer.add_scalar('accuracy per epoch', accuracy, self.epoch_counter) - elapsed_time = time.time() - self.exp_start_time - self.tb_writer.add_scalar('accuracy wall time', - accuracy, # for every 1000 minibatches - elapsed_time) - - class_acc_dict = {} - for idx, acc in enumerate(accuracy_per_class): - class_acc_dict[f'{idx}'] = acc - self.tb_writer.add_scalars('accuracy per class', class_acc_dict, self.epoch_counter) - self.record_epoch_event(f'Accuracy per class is {class_acc_dict}') - end_epoch_time = time.time() - duration = end_epoch_time - start_epoch_time - - - self.exp_data_general.append([self.epoch_counter, end_epoch_time, duration, accuracy, loss, class_precision, class_recall]) - - - def set_tau_eff(self): - total = sum(client.data_size for client in self.clients) - responses = [] - for client in self.clients: - responses.append((client, _remote_method_async(Client.set_tau_eff, client.ref, total))) - torch.futures.wait_all([x[1] for x in responses]) - # for client in self.clients: - # client.set_tau_eff(total) - - def save_experiment_data(self): - p = Path(f'./{self.tb_path}') - # file_output = f'./{self.config.output_location}' - exp_prefix = self.config.experiment_prefix - self.ensure_path_exists(p) - p /= f'{exp_prefix}-general_data.csv' - # general_filename = f'{file_output}/general_data.csv' - df = pd.DataFrame(self.exp_data_general, columns=['epoch', 'wall_time', 'duration', 'accuracy', 'loss', 'class_precision', 'class_recall']) - df.to_csv(p) - - def update_client_data_sizes(self): - responses = [] - for client in self.clients: - responses.append((client, _remote_method_async(Client.get_client_datasize, client.ref))) - for res in responses: - res[0].data_size = res[1].wait() - logging.info(f'{res[0]} had a result of datasize={res[0].data_size}') - # @TODO: Use datasize in aggregation method - - def remote_test_sync(self): - responses = [] - for client in self.clients: - responses.append((client, _remote_method_async(Client.test, client.ref))) - - for res in responses: - accuracy, loss, class_precision, class_recall = res[1].wait() - logging.info(f'{res[0]} had a result of accuracy={accuracy}') - - def flush_epoch_events(self): - file_output = f'./{self.tb_path}' - exp_prefix = self.config.experiment_prefix - file_epoch_events = f'{file_output}/{exp_prefix}_federator_events.txt' - self.ensure_path_exists(file_output) - - with open(file_epoch_events, 'a') as f: - for ev in self.epoch_events: - f.write(f'{ev}\n') - f.flush() - - self.epoch_events = [] - - def save_epoch_data(self): - file_output = f'./{self.tb_path}' - exp_prefix = self.config.experiment_prefix - self.ensure_path_exists(file_output) - for key in self.client_data: - filename = f'{file_output}/{exp_prefix}_{key}_epochs.csv' - logging.info(f'Saving data at {filename}') - with open(filename, "w") as f: - w = DataclassWriter(f, self.client_data[key], EpochData) - w.write() - - def ensure_path_exists(self, path): - Path(path).mkdir(parents=True, exist_ok=True) - - - def run(self): - """ - Main loop of the Federator - :return: - - - - Steps in federated learning process - - 1. Client selection - 2. Run local updates - 3. Retrieve data - 4. Aggregate data - """ - # # Make sure the clients have loaded all the data - self.send_clients_ref() - self.client_load_data() - self.test_data.init_dataloader() - self.ping_all() - self.clients_ready() - self.update_client_data_sizes() - self.set_tau_eff() - - epoch_to_run = self.num_epoch - addition = 0 - epoch_to_run = self.config.epochs - epoch_size = self.config.epochs_per_cycle - - if self.config.warmup_round: - logging.info('Running warmup round') - self.remote_run_epoch(epoch_size, warmup=True) - - self.exp_start_time = time.time() - for epoch in range(epoch_to_run): - self.process_response_list() - logging.info(f'Running epoch {epoch}') - self.remote_run_epoch(epoch_size) - self.flush_epoch_events() - addition += 1 - - - logging.info(f'Saving data') - self.save_epoch_data() - self.save_experiment_data() - - # Ignore profiler for now - # logging.info(f'Reporting profile data') - # for key in self.performance_data.keys(): - # parse_stability_data(self.performance_data[key], save_to_file=True) - logging.info(f'Federator is stopping') - diff --git a/fltk/launch.py b/fltk/launch.py deleted file mode 100644 index 8e5c783a..00000000 --- a/fltk/launch.py +++ /dev/null @@ -1,72 +0,0 @@ -import os -import sys -import torch.distributed.rpc as rpc -import logging - -import yaml -import argparse - -import torch.multiprocessing as mp -from fltk.federator import Federator -from fltk.util.base_config import BareConfig - -logging.basicConfig(level=logging.DEBUG) - - -def run_ps(rpc_ids_triple, args): - print(f'Starting the federator...') - fed = Federator(rpc_ids_triple, config=args) - fed.run() - -def run_single(rank, world_size, host = None, args = None, nic = None): - logging.info(f'Starting with rank={rank} and world size={world_size}') - if host: - os.environ['MASTER_ADDR'] = host - else: - os.environ['MASTER_ADDR'] = '0.0.0.0' - os.environ['MASTER_PORT'] = '5000' - if nic: - os.environ['GLOO_SOCKET_IFNAME'] = nic - os.environ['TP_SOCKET_IFNAME'] = nic - else: - os.environ['GLOO_SOCKET_IFNAME'] = 'eth0' - os.environ['TP_SOCKET_IFNAME'] = 'eth0' - logging.info(f'Starting with host={os.environ["MASTER_ADDR"]} and port={os.environ["MASTER_PORT"]}') - options = rpc.TensorPipeRpcBackendOptions( - num_worker_threads=16, - rpc_timeout=0, # infinite timeout - init_method=f'tcp://{os.environ["MASTER_ADDR"]}:{os.environ["MASTER_PORT"]}' - ) - - if rank != 0: - logging.info(f'Starting worker {rank}') - rpc.init_rpc( - f"client{rank}", - rank=rank, - world_size=world_size, - rpc_backend_options=options, - ) - # trainer passively waiting for ps to kick off training iterations - else: - logging.info('Starting the ps') - rpc.init_rpc( - "ps", - rank=rank, - world_size=world_size, - rpc_backend_options=options - - ) - run_ps([(f"client{r}", r, world_size) for r in range(1, world_size)], args) - # block until all rpc finish - rpc.shutdown() - - -def run_spawn(config): - world_size = config.world_size - master_address = config.federator_host - mp.spawn( - run_single, - args=(world_size, master_address, config), - nprocs=world_size, - join=True - ) \ No newline at end of file diff --git a/fltk/util/config.py b/fltk/util/config.py index 9b262606..5682059a 100644 --- a/fltk/util/config.py +++ b/fltk/util/config.py @@ -6,6 +6,7 @@ import torch import yaml +from fltk.util.log import getLogger from fltk.util.definitions import Dataset, Nets, DataSampler, Optimizations, LogLevel, Aggregations @@ -25,6 +26,9 @@ class Config: scheduler_step_size: int = 50 scheduler_gamma: float = 0.5 min_lr: float = 1e-10 + + # @TODO: Set seed from configuration + rng_seed = 0 # Enum optimizer: Optimizations = Optimizations.sgd optimizer_args = { @@ -73,7 +77,11 @@ def __init__(self, **kwargs) -> None: self.__setattr__(name, value) if name == 'output_location': self.output_path = Path(value) + self.update_rng_seed() + + def update_rng_seed(self): + torch.manual_seed(self.rng_seed) def get_default_model_folder_path(self): return self.default_model_folder_path @@ -100,9 +108,9 @@ def get_loss_function(self): @classmethod def FromYamlFile(cls, path: Path): - print(f'Loading yaml from {path.absolute()}') + getLogger(__name__).debug(f'Loading yaml from {path.absolute()}') with open(path) as file: content = yaml.safe_load(file) for k, v in content.items(): - print(f'Inserting key "{k}" into config') + getLogger(__name__).debug(f'Inserting key "{k}" into config') return cls(**content) diff --git a/fltk/util/fed_avg.py b/fltk/util/fed_avg.py deleted file mode 100644 index e60d1684..00000000 --- a/fltk/util/fed_avg.py +++ /dev/null @@ -1,12 +0,0 @@ -def average_nn_parameters(parameters): - """ - Averages passed parameters. - - :param parameters: nn model named parameters - :type parameters: list - """ - new_params = {} - for name in parameters[0].keys(): - new_params[name] = sum([param[name].data for param in parameters]) / len(parameters) - - return new_params diff --git a/fltk/util/generate_docker_compose_2.py b/fltk/util/generate_docker_compose_2.py index a35bd43d..7185fa01 100644 --- a/fltk/util/generate_docker_compose_2.py +++ b/fltk/util/generate_docker_compose_2.py @@ -1,7 +1,5 @@ import copy from pathlib import Path -from pprint import pprint - import yaml import numpy as np @@ -10,6 +8,7 @@ def load_yaml_file(file_path: Path): with open(file_path) as file: return yaml.full_load(file) + def generate_client(id, template: dict, world_size: int, type='default', cpu_set=None, num_cpus=1): local_template = copy.deepcopy(template) key_name = list(local_template.keys())[0] @@ -20,10 +19,6 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set local_template[container_name]['environment'][key] = item.format(rank=id) if item == 'WORLD_SIZE={world_size}': local_template[container_name]['environment'][key] = item.format(world_size=world_size) - # for key, item in enumerate(local_template[container_name]): - # if item == 'cpuset: {cpu_set}': - # local_template[container_name][key] = item.format(cpu_set=cpu_set) - local_template[container_name]['ports'] = [f'{5000+id}:5000'] if cpu_set: local_template[container_name]['cpuset'] = f'{cpu_set}' @@ -32,6 +27,7 @@ def generate_client(id, template: dict, world_size: int, type='default', cpu_set local_template[container_name]['deploy']['resources']['limits']['cpus'] = f'{num_cpus}' return local_template, container_name + def gen_client(name: str, client_dict: dict, base_path: Path): """ rank (id) @@ -56,27 +52,22 @@ def gen_client(name: str, client_dict: dict, base_path: Path): if client_dict['pin-cores'] is True: client_descr_template['num_cores'] = client_dict['num-cores'] client_descr_template['stub-file'] = client_dict['stub-name'] - # print(name) - # pprint(stub_data) client_cpu_speeds = np.abs(np.round(np.random.normal(mu, sigma, size=n), 2)) client_descriptions = [] for cpu_speed in client_cpu_speeds: client_descr = copy.deepcopy(client_descr_template) client_descr['num_cpu'] = cpu_speed client_descriptions.append(client_descr) - # client_data = copy.deepcopy(client_dict) - # client_data.pop('cpu-variation') - # print(cpu_speed) - # print(np.random.normal(mu, sigma, size=n)) - # for k, v in client_dict.items(): - # print(k) return client_descriptions + + def generate_clients_proporties(clients_dict: dict, path: Path): results = [] for k,v in clients_dict.items(): results += gen_client(k, v, path) return results + def generate_compose_file(path: Path): """ Used properties: @@ -85,32 +76,7 @@ def generate_compose_file(path: Path): - path to deploy files - random seed? """ - # system = { - # - # 'federator': { - # 'stub-name': 'system_stub.yml', - # 'pin-cores': True, - # 'num-cores': 1 - # }, - # 'clients': { - # 'fast': { - # 'stub-name': 'stub_default.yml', - # 'amount': 1, - # 'pin-cores': True, - # 'num-cores': 3, - # 'cpu-speed': 3, - # 'cpu-variation': 0 - # }, - # 'slow': { - # 'stub-name': 'stub_default.yml', - # 'amount': 0, - # 'pin-cores': True, - # 'num-cores': 1, - # 'cpu-speed': 1, - # 'cpu-variation': 0 - # } - # } - # } + system_path = path / 'description.yml' system = load_yaml_file(system_path) # path = Path('deploy/dev_generate') @@ -136,7 +102,6 @@ def generate_compose_file(path: Path): last_core_id += amount else: system_template['services']['fl_server'].pop('cpuset') - for idx, client_d in enumerate(client_descriptions): stub_file = path / client_d['stub-file'] stub_data = load_yaml_file(stub_file) @@ -151,14 +116,11 @@ def generate_compose_file(path: Path): local_template, container_name = generate_client(idx + 1, stub_data, world_size, client_d['name'], cpu_set, client_d['num_cpu']) system_template['services'].update(local_template) print(container_name) - with open(r'./docker-compose.yml', 'w') as file: yaml.dump(system_template, file, sort_keys=False) - if __name__ == '__main__': - path = Path('deploy/dev_generate') results = generate_compose_file(path) print('done') \ No newline at end of file From eca15cf5b379d7fde21a4925ea9ffc0cf8a692a5 Mon Sep 17 00:00:00 2001 From: bacox Date: Tue, 15 Mar 2022 18:21:49 +0100 Subject: [PATCH 69/73] Clean up dockerfile --- Dockerfile | 38 +++----------------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/Dockerfile b/Dockerfile index b5d5eb0e..afa10773 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,6 @@ -# FROM python:3-alpine - -# RUN mkdir /data -# VOLUME /data - -# EXPOSE 8080 - -# WORKDIR /data - -# CMD ["python", "-m" , "http.server", "8080"] - # Base image to start with FROM ubuntu:20.04 - -# Who maintains this DockerFile + MAINTAINER Bart Cox # Run build without interactive dialogue @@ -28,37 +16,17 @@ RUN apt-get update \ # Copy the current folder to the working directory COPY setup.py ./ COPY requirements.txt ./ -#COPY fltk ./fltk -#COPY configs ./configs # Install all required packages for the generator RUN python3 -m pip install -r requirements.txt ENV GLOO_SOCKET_IFNAME=$NIC ENV TP_SOCKET_IFNAME=$NIC -#ENV GLOO_SOCKET_IFNAME=eth0 -#ENV TP_SOCKET_IFNAME=eth0 - -#RUN mkdir -p ./data/MNIST -#COPY ./data/MNIST ../data/MNIST -#ADD fltk ./fedsim -#RUN ls -la -#COPY federated_learning.py ./ -#COPY custom_mnist.py ./ -#RUN ls -la ./fedsim # Expose the container's port to the host OS EXPOSE 5000 -# Run command by default for the executing container -# CMD ["python3", "/opt/Generatrix/rpc_parameter_server.py", "--world_size=2", "--rank=0", "--master_addr=192.168.144.2"] - -#CMD python3 /opt/federation-lab/rpc_parameter_server.py --world_size=$WORLD_SIZE --rank=$RANK --master_addr=10.5.0.11 -#CMD python3 /opt/federation-lab/federated_learning.py $RANK $WORLD_SIZE 10.5.0.11 COPY fltk ./fltk COPY configs ./configs -#CMD python3 ./fltk/__main__.py single configs/experiment.yaml --rank=$RANK -# CMD python3 -m fltk single configs/experiment_vanilla.yaml --rank=$RANK -# CMD python3 -m fltk single $EXP_CONFIG --rank=$RANK -CMD python3 -m fltk remote $EXP_CONFIG $RANK --nic=$NIC --host=$MASTER_HOSTNAME $OPTIONAL_PARAMS -#CMD python3 setup.py \ No newline at end of file + +CMD python3 -m fltk remote $EXP_CONFIG $RANK --nic=$NIC --host=$MASTER_HOSTNAME $OPTIONAL_PARAMS \ No newline at end of file From d2b7d80c421f06c150358ed374fe51977568c695 Mon Sep 17 00:00:00 2001 From: bacox Date: Wed, 16 Mar 2022 15:30:14 +0100 Subject: [PATCH 70/73] simplify experiment files --- .gitignore | 3 +- Dockerfile | 1 + configs/dev/descr.yaml | 31 ------------ configs/dev/dev_p2_fedavg.cfg.yaml | 3 -- configs/dev/exps/dev_p2_fedavg.yaml | 35 ------------- configs/dev/exps/dev_p2_fedprox.yaml | 37 -------------- configs/dev/gen.py | 27 ---------- configs/dev/run.py | 21 -------- configs/dev_mnist/exps/fedavg.yaml | 38 -------------- configs/dev_mnist/gen.py | 25 ---------- configs/dev_mnist/run.py | 20 -------- configs/dev_mnist_all/descr.yaml | 31 ------------ configs/dev_mnist_all/exps/fedavg.yaml | 36 ------------- configs/dev_mnist_all/exps/fednova.yaml | 36 ------------- configs/dev_mnist_all/exps/fedprox.yaml | 36 ------------- configs/dev_mnist_all/fedavg.cfg.yaml | 3 -- configs/dev_mnist_all/fednova.cfg.yaml | 4 -- configs/dev_mnist_all/fedprox.cfg.yaml | 4 -- configs/dev_mnist_all/gen.py | 26 ---------- configs/dev_mnist_all/run.py | 22 -------- .../p_freezing-iid_freeze.yaml | 34 ------------- .../p_freezing-iid_vanilla.yaml | 34 ------------- configs/effect-freezing/run.py | 18 ------- configs/exp_p2_vanilla.yaml | 30 ----------- configs/exp_p2_w_4_s2_vanilla.yaml | 30 ----------- configs/exp_p2_w_4_s4_vanilla.yaml | 30 ----------- configs/exp_p3_w_4_s4_baseline.yaml | 30 ----------- configs/exp_p3_w_4_s4_baseline_check.yaml | 30 ----------- configs/exp_p3_w_4_s4_baseline_long.yaml | 30 ----------- configs/exp_p3_w_4_s4_deadline.yaml | 30 ----------- configs/exp_p3_w_4_s4_deadline_check.yaml | 30 ----------- configs/exp_p3_w_4_s4_deadline_long.yaml | 30 ----------- configs/exp_p3_w_4_s4_freeze.yaml | 30 ----------- configs/exp_p3_w_4_s4_freeze_long.yaml | 30 ----------- configs/exp_p3_w_4_s4_slow_baseline.yaml | 30 ----------- configs/exp_p3_w_4_s4_slow_deadline.yaml | 30 ----------- configs/exp_p3_w_4_s4_swyh.yaml | 30 ----------- configs/exp_p3_w_4_s4_swyh_long.yaml | 30 ----------- configs/exp_p4_w_4_s2_swyh_non_iid.yaml | 30 ----------- configs/exp_p4_w_4_s2_vanilla_non_iid.yaml | 30 ----------- configs/exp_p5_w_4_s4_deadline_non_iid.yaml | 30 ----------- configs/exp_p5_w_4_s4_freeze_non_iid.yaml | 30 ----------- configs/exp_p5_w_4_s4_swyh_non_iid.yaml | 30 ----------- configs/exp_p5_w_4_s4_vanilla_non_iid.yaml | 30 ----------- configs/exp_p6_w_4_s4_deadline_non_iid.yaml | 30 ----------- configs/exp_p6_w_4_s4_swyh_non_iid.yaml | 30 ----------- configs/exp_p7_w_4_s2_deadline_non_iid.yaml | 30 ----------- configs/exp_p7_w_4_s2_freeze_non_iid.yaml | 30 ----------- configs/exp_p7_w_4_s2_swyh_non_iid.yaml | 30 ----------- configs/exp_p7_w_4_s2_vanilla_non_iid.yaml | 30 ----------- ...exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml | 32 ------------ .../exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml | 32 ------------ .../exp_p8_w_4_s2_offload_fmnist_non_iid.yaml | 32 ------------ .../exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml | 32 ------------ configs/exp_p8_w_4_s4_baseline_fmnist.yaml | 32 ------------ ...exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml | 32 ------------ configs/exp_p8_w_4_s4_freeze_fmnist.yaml | 32 ------------ .../exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml | 32 ------------ configs/exp_p8_w_4_s4_offload_fmnist.yaml | 32 ------------ .../exp_p8_w_4_s4_offload_fmnist_non_iid.yaml | 32 ------------ configs/exp_p8_w_4_s4_swyh_fmnist.yaml | 32 ------------ .../exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml | 32 ------------ configs/exp_p8_w_4_s4_vanilla_.yaml | 30 ----------- configs/exp_p8_w_4_s4_vanilla_fmnist.yaml | 32 ------------ configs/experiment.yaml | 28 ----------- configs/experiment_cifar100.yaml | 28 ----------- configs/experiment_deadline.yaml | 28 ----------- configs/experiment_fmnist.yaml | 28 ----------- configs/experiment_fmnist_offload.yaml | 30 ----------- configs/experiment_freeze.yaml | 28 ----------- configs/experiment_gcp_c20.yaml | 19 ------- configs/experiment_gcp_single.yaml | 19 ------- configs/experiment_offload.yaml | 28 ----------- configs/experiment_p18_full.yaml | 28 ----------- configs/experiment_swyh.yaml | 28 ----------- configs/experiment_swyh_first_long.yaml | 29 ----------- configs/experiment_swyh_warmup.yaml | 28 ----------- configs/experiment_vanilla.yaml | 28 ----------- configs/non_iid_experiment.yaml | 23 --------- .../p11A_freezoff_iid_dyn_terminate.yaml | 35 ------------- .../p11A_freezoff_iid_dyn_terminate_swyh.yaml | 35 ------------- .../p11A_freezoff_iid_fedavg.yaml | 34 ------------- .../p11A_freezoff_iid_fednova.yaml | 35 ------------- .../p11A_freezoff_iid_fedprox.yaml | 35 ------------- .../p11A_freezoff_iid_offload.yaml | 34 ------------- .../p11A_freezoff_iid_offload_strict.yaml | 35 ------------- .../p11A_freezoff_iid_tifl_adaptive.yaml | 34 ------------- .../p11A_freezoff_iid_tifl_basic.yaml | 34 ------------- configs/p11A_freezoff_iid/run.py | 30 ----------- .../p11_freezoff_iid_dyn_terminate.yaml | 35 ------------- .../p11_freezoff_iid_dyn_terminate_swyh.yaml | 35 ------------- .../p11_freezoff_iid_fedavg.yaml | 34 ------------- .../p11_freezoff_iid_fednova.yaml | 35 ------------- .../p11_freezoff_iid_fedprox.yaml | 35 ------------- .../p11_freezoff_iid_offload.yaml | 34 ------------- .../p11_freezoff_iid_offload_strict.yaml | 35 ------------- .../p11_freezoff_iid_tifl_adaptive.yaml | 34 ------------- .../p11_freezoff_iid_tifl_basic.yaml | 34 ------------- configs/p11_freezoff_iid/run.py | 30 ----------- .../p12_freezoff_iid_dyn_terminate_large.yaml | 35 ------------- ...freezoff_iid_dyn_terminate_swyh_large.yaml | 35 ------------- .../p12_freezoff_iid_fedavg_large.yaml | 34 ------------- .../p12_freezoff_iid_fednova_large.yaml | 35 ------------- .../p12_freezoff_iid_fedprox_large.yaml | 35 ------------- .../p12_freezoff_iid_offload_large.yaml | 34 ------------- ...p12_freezoff_iid_offload_strict_large.yaml | 35 ------------- .../p12_freezoff_iid_tifl_adaptive_large.yaml | 34 ------------- .../p12_freezoff_iid_tifl_basic_large.yaml | 34 ------------- configs/p12_freezoff_iid_large/run.py | 30 ----------- .../p13_variance_dev_dyn_terminate_large.yaml | 34 ------------- ...variance_dev_dyn_terminate_swyh_large.yaml | 34 ------------- .../p13_variance_dev_fedavg_large.yaml | 34 ------------- .../p13_variance_dev_fednova_large.yaml | 35 ------------- .../p13_variance_dev_fedprox_large.yaml | 35 ------------- .../p13_variance_dev_offload_large.yaml | 34 ------------- .../p13_variance_dev_tifl_adaptive_large.yaml | 34 ------------- .../p13_variance_dev_tifl_basic_large.yaml | 34 ------------- configs/p13_variance_dev/run.py | 29 ----------- configs/p14A_check_iid_cifar10_cnn/descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- configs/p14A_check_iid_cifar10_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- configs/p14A_check_iid_cifar10_cnn/run.py | 43 ---------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 27 ---------- .../exps/fedavg-iid-uniform.yaml | 35 ------------- .../exps/fedavg-non-iid-1.yaml | 35 ------------- .../exps/fedavg-non-iid-10.yaml | 35 ------------- .../exps/fedavg-non-iid-2.yaml | 35 ------------- .../exps/fedavg-non-iid-5.yaml | 35 ------------- .../fedavg-iid-uniform.cfg.yaml | 7 --- .../fedavg-non-iid-1.cfg.yaml | 7 --- .../fedavg-non-iid-10.cfg.yaml | 7 --- .../fedavg-non-iid-2.cfg.yaml | 7 --- .../fedavg-non-iid-5.cfg.yaml | 7 --- .../gen.py | 26 ---------- .../run.py | 34 ------------- configs/p14_check_iid_cifar10_cnn/descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../p14_check_iid_cifar10_cnn/fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- configs/p14_check_iid_cifar10_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- configs/p14_check_iid_cifar10_cnn/run.py | 43 ---------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- ..._freezoff_non_iid_dyn_terminate_large.yaml | 35 ------------- ...zoff_non_iid_dyn_terminate_swyh_large.yaml | 35 ------------- .../p15_freezoff_non_iid_fedavg_large.yaml | 34 ------------- .../p15_freezoff_non_iid_fednova_large.yaml | 35 ------------- .../p15_freezoff_non_iid_fedprox_large.yaml | 35 ------------- .../p15_freezoff_non_iid_offload_large.yaml | 34 ------------- ...freezoff_non_iid_offload_strict_large.yaml | 35 ------------- ..._freezoff_non_iid_tifl_adaptive_large.yaml | 34 ------------- ...p15_freezoff_non_iid_tifl_basic_large.yaml | 34 ------------- configs/p15_freezoff_non_iid_large/run.py | 30 ----------- .../p20_freezoff_iid_fmnist_cnn/descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- configs/p20_freezoff_iid_fmnist_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- configs/p20_freezoff_iid_fmnist_cnn/run.py | 35 ------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../p21_freezoff_non_iid_fmnist_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../p21_freezoff_non_iid_fmnist_cnn/run.py | 35 ------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../p22_freezoff_iid_cifar10_cnn/descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- configs/p22_freezoff_iid_cifar10_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- configs/p22_freezoff_iid_cifar10_cnn/run.py | 30 ----------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../p23_freezoff_non_iid_cifar10_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../p23_freezoff_non_iid_cifar10_cnn/run.py | 30 ----------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/offload_strict2.yaml | 37 -------------- .../exps/offload_strict3.yaml | 37 -------------- .../exps/offload_strict4.yaml | 37 -------------- .../exps/offload_strict5.yaml | 37 -------------- .../exps/offload_strict6.yaml | 37 -------------- .../exps/offload_strict7.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../p24_freezoff_iid_cifar10_cnn_w9s3/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../offload_strict2.cfg.yaml | 4 -- .../offload_strict3.cfg.yaml | 4 -- .../offload_strict4.cfg.yaml | 4 -- .../offload_strict5.cfg.yaml | 4 -- .../offload_strict6.cfg.yaml | 4 -- .../offload_strict7.cfg.yaml | 4 -- .../p24_freezoff_iid_cifar10_cnn_w9s3/run.py | 43 ---------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/offload_strict2.yaml | 37 -------------- .../exps/offload_strict3.yaml | 37 -------------- .../exps/offload_strict4.yaml | 37 -------------- .../exps/offload_strict5.yaml | 37 -------------- .../exps/offload_strict6.yaml | 37 -------------- .../exps/offload_strict7.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../offload_strict10.yaml | 4 -- .../offload_strict11.yaml | 4 -- .../offload_strict2.cfg.yaml | 4 -- .../offload_strict3.cfg.yaml | 4 -- .../offload_strict4.cfg.yaml | 4 -- .../offload_strict5.cfg.yaml | 4 -- .../offload_strict6.cfg.yaml | 4 -- .../offload_strict7.cfg.yaml | 4 -- .../offload_strict8.yaml | 4 -- .../offload_strict9.yaml | 4 -- .../run.py | 48 ------------------ .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/offload_strict4.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../p26_freezoff_iid_mnist_cnn_w9s3/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../offload_strict4.cfg.yaml | 4 -- .../p26_freezoff_iid_mnist_cnn_w9s3/run.py | 38 -------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/offload_strict4.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../offload_strict4.cfg.yaml | 4 -- .../run.py | 38 -------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 27 ---------- .../exps/fedavg-iid-uniform.yaml | 35 ------------- .../exps/fedavg-non-iid-1.yaml | 35 ------------- .../exps/fedavg-non-iid-10.yaml | 35 ------------- .../exps/fedavg-non-iid-2.yaml | 35 ------------- .../exps/fedavg-non-iid-5.yaml | 35 ------------- .../fedavg-iid-uniform.cfg.yaml | 7 --- .../fedavg-non-iid-1.cfg.yaml | 7 --- .../fedavg-non-iid-10.cfg.yaml | 7 --- .../fedavg-non-iid-2.cfg.yaml | 7 --- .../fedavg-non-iid-5.cfg.yaml | 7 --- .../p28_effect_of_non_iid_ness_mnist/gen.py | 26 ---------- .../p28_effect_of_non_iid_ness_mnist/run.py | 34 ------------- .../p29_effect_of_freezing_mnist/descr.yaml | 27 ---------- .../exps/fedavg-iid-freeze-0.yaml | 36 ------------- .../exps/fedavg-iid-freeze-10.yaml | 36 ------------- .../exps/fedavg-iid-freeze-100.yaml | 36 ------------- .../exps/fedavg-iid-freeze-20.yaml | 36 ------------- .../exps/fedavg-iid-freeze-30.yaml | 36 ------------- .../exps/fedavg-iid-freeze-40.yaml | 36 ------------- .../exps/fedavg-iid-freeze-50.yaml | 36 ------------- .../exps/fedavg-iid-freeze-60.yaml | 36 ------------- .../exps/fedavg-iid-freeze-70.yaml | 36 ------------- .../exps/fedavg-iid-freeze-80.yaml | 36 ------------- .../exps/fedavg-iid-freeze-90.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-0.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-10.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-100.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-20.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-30.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-40.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-50.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-60.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-70.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-80.yaml | 36 ------------- .../exps/fedavg-non_iid-freeze-90.yaml | 36 ------------- .../fedavg-iid-freeze-0.cfg.yaml | 8 --- .../fedavg-iid-freeze-10.cfg.yaml | 8 --- .../fedavg-iid-freeze-100.cfg.yaml | 8 --- .../fedavg-iid-freeze-20.cfg.yaml | 8 --- .../fedavg-iid-freeze-30.cfg.yaml | 8 --- .../fedavg-iid-freeze-40.cfg.yaml | 8 --- .../fedavg-iid-freeze-50.cfg.yaml | 8 --- .../fedavg-iid-freeze-60.cfg.yaml | 8 --- .../fedavg-iid-freeze-70.cfg.yaml | 8 --- .../fedavg-iid-freeze-80.cfg.yaml | 8 --- .../fedavg-iid-freeze-90.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-0.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-10.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-100.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-20.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-30.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-40.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-50.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-60.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-70.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-80.cfg.yaml | 8 --- .../fedavg-non_iid-freeze-90.cfg.yaml | 8 --- configs/p29_effect_of_freezing_mnist/gen.py | 26 ---------- configs/p29_effect_of_freezing_mnist/run.py | 50 ------------------- configs/p30_freezing_effect_dev/descr.yaml | 27 ---------- .../exps/fedavg-iid-freeze-0.yaml | 36 ------------- .../exps/fedavg-iid-freeze-100.yaml | 36 ------------- .../exps/fedavg-iid-freeze-16.yaml | 36 ------------- .../exps/fedavg-iid-freeze-33.yaml | 36 ------------- .../exps/fedavg-iid-freeze-50.yaml | 36 ------------- .../exps/fedavg-iid-freeze-66.yaml | 36 ------------- .../exps/fedavg-iid-freeze-83.yaml | 36 ------------- .../exps/fedavg-non-iid-freeze-0.yaml | 36 ------------- .../exps/fedavg-non-iid-freeze-100.yaml | 36 ------------- .../exps/fedavg-non-iid-freeze-16.yaml | 36 ------------- .../exps/fedavg-non-iid-freeze-33.yaml | 36 ------------- .../exps/fedavg-non-iid-freeze-50.yaml | 36 ------------- .../exps/fedavg-non-iid-freeze-66.yaml | 36 ------------- .../exps/fedavg-non-iid-freeze-83.yaml | 36 ------------- .../fedavg-iid-freeze-0.cfg.yaml | 8 --- .../fedavg-iid-freeze-100.cfg.yaml | 8 --- .../fedavg-iid-freeze-16.cfg.yaml | 8 --- .../fedavg-iid-freeze-33.cfg.yaml | 8 --- .../fedavg-iid-freeze-50.cfg.yaml | 8 --- .../fedavg-iid-freeze-66.cfg.yaml | 8 --- .../fedavg-iid-freeze-83.cfg.yaml | 8 --- .../fedavg-non-iid-freeze-0.cfg.yaml | 8 --- .../fedavg-non-iid-freeze-100.cfg.yaml | 8 --- .../fedavg-non-iid-freeze-16.cfg.yaml | 8 --- .../fedavg-non-iid-freeze-33.cfg.yaml | 8 --- .../fedavg-non-iid-freeze-50.cfg.yaml | 8 --- .../fedavg-non-iid-freeze-66.cfg.yaml | 8 --- .../fedavg-non-iid-freeze-83.cfg.yaml | 8 --- configs/p30_freezing_effect_dev/gen.py | 26 ---------- configs/p30_freezing_effect_dev/run.py | 42 ---------------- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/offload_strict2.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../offload_strict2.cfg.yaml | 4 -- .../run.py | 39 --------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../p32_freezoff_iid_cifar10_cnn/descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- configs/p32_freezoff_iid_cifar10_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- configs/p32_freezoff_iid_cifar10_cnn/run.py | 30 ----------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../p33_freezoff_non_iid_cifar10_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../p33_freezoff_non_iid_cifar10_cnn/run.py | 30 ----------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/offload_strict2.yaml | 37 -------------- .../exps/offload_strict3.yaml | 37 -------------- .../exps/offload_strict4.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../offload_strict2.cfg.yaml | 4 -- .../offload_strict3.cfg.yaml | 4 -- .../offload_strict4.cfg.yaml | 4 -- .../run.py | 41 --------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../run.py | 37 -------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../p35_freezoff_non_iid_5_cifar10_cnn/gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../p35_freezoff_non_iid_5_cifar10_cnn/run.py | 37 -------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- .../descr.yaml | 32 ------------ .../dyn_terminate.cfg.yaml | 3 -- .../dyn_terminate_swyh.cfg.yaml | 3 -- .../exps/dyn_terminate.yaml | 36 ------------- .../exps/dyn_terminate_swyh.yaml | 37 -------------- .../exps/fedavg.yaml | 36 ------------- .../exps/fednova.yaml | 37 -------------- .../exps/fedprox.yaml | 37 -------------- .../exps/offload.yaml | 36 ------------- .../exps/offload_strict.yaml | 37 -------------- .../exps/offload_strict2.yaml | 37 -------------- .../exps/offload_strict3.yaml | 37 -------------- .../exps/offload_strict4.yaml | 37 -------------- .../exps/tifl_adaptive.yaml | 36 ------------- .../exps/tifl_basic.yaml | 36 ------------- .../fedavg.cfg.yaml | 3 -- .../fednova.cfg.yaml | 4 -- .../fedprox.cfg.yaml | 4 -- .../gen.py | 26 ---------- .../offload.cfg.yaml | 3 -- .../offload_strict.cfg.yaml | 4 -- .../offload_strict2.cfg.yaml | 4 -- .../offload_strict3.cfg.yaml | 4 -- .../offload_strict4.cfg.yaml | 4 -- .../run.py | 41 --------------- .../tifl_adaptive.cfg.yaml | 3 -- .../tifl_basic.cfg.yaml | 3 -- configs/terminate/p_terminate_terminate.yaml | 35 ------------- .../terminate/p_terminate_terminate_swyh.yaml | 35 ------------- configs/terminate/p_terminate_vanilla.yaml | 35 ------------- configs/terminate/p_terminate_vanilla_s3.yaml | 35 ------------- configs/terminate/run.py | 23 --------- configs/tifl-15/exp_p15_baseline.yaml | 34 ------------- configs/tifl-15/exp_p15_tifl-adaptive.yaml | 34 ------------- configs/tifl-15/exp_p15_tifl-basic.yaml | 34 ------------- configs/tifl-15/exp_p3_tifl.yaml | 34 ------------- configs/tifl-15/run.py | 18 ------- deploy/dev/client_stub_default.yml | 27 ---------- deploy/dev/client_stub_fast.yml | 26 ---------- deploy/dev/client_stub_medium.yml | 26 ---------- deploy/dev/client_stub_slow.yml | 26 ---------- deploy/dev/system_stub.yml | 29 ----------- deploy/dev_generate/client_stub_medium.yml | 26 ---------- deploy/dev_generate/description.yml | 19 ------- deploy/dev_generate/stub_fast.yml | 25 ---------- .../{dev_generate => docker}/stub_default.yml | 0 .../{dev_generate => docker}/system_stub.yml | 0 deploy/p11_freezoff/client_stub_default.yml | 26 ---------- deploy/p11_freezoff/client_stub_fast.yml | 25 ---------- deploy/p11_freezoff/client_stub_medium.yml | 25 ---------- deploy/p11_freezoff/client_stub_slow.yml | 26 ---------- deploy/p11_freezoff/system_stub.yml | 27 ---------- .../p11_freezoff_fast/client_stub_default.yml | 26 ---------- deploy/p11_freezoff_fast/client_stub_fast.yml | 25 ---------- .../p11_freezoff_fast/client_stub_medium.yml | 25 ---------- deploy/p11_freezoff_fast/client_stub_slow.yml | 26 ---------- deploy/p11_freezoff_fast/system_stub.yml | 27 ---------- .../client_stub_default.yml | 26 ---------- .../client_stub_fast.yml | 25 ---------- .../client_stub_medium.yml | 25 ---------- .../client_stub_slow.yml | 25 ---------- deploy/p23_freezoff_w9s3-half/system_stub.yml | 27 ---------- .../p23_freezoff_w9s3/client_stub_default.yml | 26 ---------- deploy/p23_freezoff_w9s3/client_stub_fast.yml | 25 ---------- .../p23_freezoff_w9s3/client_stub_medium.yml | 25 ---------- deploy/p23_freezoff_w9s3/client_stub_slow.yml | 25 ---------- deploy/p23_freezoff_w9s3/system_stub.yml | 27 ---------- .../client_stub_default.yml | 26 ---------- .../client_stub_fast.yml | 25 ---------- .../client_stub_medium.yml | 25 ---------- .../client_stub_slow.yml | 25 ---------- deploy/p23_freezoff_w9s3_fast/system_stub.yml | 27 ---------- .../client_stub_default.yml | 26 ---------- .../p28_non_iid_effect/client_stub_fast.yml | 25 ---------- .../p28_non_iid_effect/client_stub_medium.yml | 25 ---------- .../p28_non_iid_effect/client_stub_slow.yml | 25 ---------- deploy/p28_non_iid_effect/system_stub.yml | 27 ---------- deploy/templates/client_stub_default.yml | 25 ---------- deploy/templates/client_stub_fast.yml | 24 --------- deploy/templates/client_stub_medium.yml | 24 --------- deploy/templates/client_stub_slow.yml | 24 --------- deploy/templates/system_stub.yml | 27 ---------- deploy/terminate/client_stub_default.yml | 25 ---------- deploy/terminate/client_stub_fast.yml | 24 --------- deploy/terminate/client_stub_medium.yml | 24 --------- deploy/terminate/client_stub_slow.yml | 24 --------- deploy/terminate/system_stub.yml | 27 ---------- deploy/tifl-15/client_stub_default.yml | 26 ---------- deploy/tifl-15/client_stub_fast.yml | 25 ---------- deploy/tifl-15/client_stub_medium.yml | 25 ---------- deploy/tifl-15/client_stub_slow.yml | 25 ---------- deploy/tifl-15/system_stub.yml | 27 ---------- .../example_docker/descr.yaml | 41 +++++++++------ .../example_docker}/fedavg.cfg.yaml | 0 .../example_native}/descr.yaml | 12 +---- .../example_native/fedavg.cfg.yaml | 3 +- fltk/util/generate_docker_compose_2.py | 46 ++++++++++------- fltk/util/generate_experiments.py | 23 +++++++-- 706 files changed, 79 insertions(+), 16524 deletions(-) delete mode 100644 configs/dev/descr.yaml delete mode 100644 configs/dev/dev_p2_fedavg.cfg.yaml delete mode 100644 configs/dev/exps/dev_p2_fedavg.yaml delete mode 100644 configs/dev/exps/dev_p2_fedprox.yaml delete mode 100644 configs/dev/gen.py delete mode 100644 configs/dev/run.py delete mode 100644 configs/dev_mnist/exps/fedavg.yaml delete mode 100644 configs/dev_mnist/gen.py delete mode 100644 configs/dev_mnist/run.py delete mode 100644 configs/dev_mnist_all/descr.yaml delete mode 100644 configs/dev_mnist_all/exps/fedavg.yaml delete mode 100644 configs/dev_mnist_all/exps/fednova.yaml delete mode 100644 configs/dev_mnist_all/exps/fedprox.yaml delete mode 100644 configs/dev_mnist_all/fedavg.cfg.yaml delete mode 100644 configs/dev_mnist_all/fednova.cfg.yaml delete mode 100644 configs/dev_mnist_all/fedprox.cfg.yaml delete mode 100644 configs/dev_mnist_all/gen.py delete mode 100644 configs/dev_mnist_all/run.py delete mode 100644 configs/effect-freezing/p_freezing-iid_freeze.yaml delete mode 100644 configs/effect-freezing/p_freezing-iid_vanilla.yaml delete mode 100644 configs/effect-freezing/run.py delete mode 100644 configs/exp_p2_vanilla.yaml delete mode 100644 configs/exp_p2_w_4_s2_vanilla.yaml delete mode 100644 configs/exp_p2_w_4_s4_vanilla.yaml delete mode 100644 configs/exp_p3_w_4_s4_baseline.yaml delete mode 100644 configs/exp_p3_w_4_s4_baseline_check.yaml delete mode 100644 configs/exp_p3_w_4_s4_baseline_long.yaml delete mode 100644 configs/exp_p3_w_4_s4_deadline.yaml delete mode 100644 configs/exp_p3_w_4_s4_deadline_check.yaml delete mode 100644 configs/exp_p3_w_4_s4_deadline_long.yaml delete mode 100644 configs/exp_p3_w_4_s4_freeze.yaml delete mode 100644 configs/exp_p3_w_4_s4_freeze_long.yaml delete mode 100644 configs/exp_p3_w_4_s4_slow_baseline.yaml delete mode 100644 configs/exp_p3_w_4_s4_slow_deadline.yaml delete mode 100644 configs/exp_p3_w_4_s4_swyh.yaml delete mode 100644 configs/exp_p3_w_4_s4_swyh_long.yaml delete mode 100644 configs/exp_p4_w_4_s2_swyh_non_iid.yaml delete mode 100644 configs/exp_p4_w_4_s2_vanilla_non_iid.yaml delete mode 100644 configs/exp_p5_w_4_s4_deadline_non_iid.yaml delete mode 100644 configs/exp_p5_w_4_s4_freeze_non_iid.yaml delete mode 100644 configs/exp_p5_w_4_s4_swyh_non_iid.yaml delete mode 100644 configs/exp_p5_w_4_s4_vanilla_non_iid.yaml delete mode 100644 configs/exp_p6_w_4_s4_deadline_non_iid.yaml delete mode 100644 configs/exp_p6_w_4_s4_swyh_non_iid.yaml delete mode 100644 configs/exp_p7_w_4_s2_deadline_non_iid.yaml delete mode 100644 configs/exp_p7_w_4_s2_freeze_non_iid.yaml delete mode 100644 configs/exp_p7_w_4_s2_swyh_non_iid.yaml delete mode 100644 configs/exp_p7_w_4_s2_vanilla_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s4_baseline_fmnist.yaml delete mode 100644 configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s4_freeze_fmnist.yaml delete mode 100644 configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s4_offload_fmnist.yaml delete mode 100644 configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s4_swyh_fmnist.yaml delete mode 100644 configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml delete mode 100644 configs/exp_p8_w_4_s4_vanilla_.yaml delete mode 100644 configs/exp_p8_w_4_s4_vanilla_fmnist.yaml delete mode 100644 configs/experiment.yaml delete mode 100644 configs/experiment_cifar100.yaml delete mode 100644 configs/experiment_deadline.yaml delete mode 100644 configs/experiment_fmnist.yaml delete mode 100644 configs/experiment_fmnist_offload.yaml delete mode 100644 configs/experiment_freeze.yaml delete mode 100644 configs/experiment_gcp_c20.yaml delete mode 100644 configs/experiment_gcp_single.yaml delete mode 100644 configs/experiment_offload.yaml delete mode 100644 configs/experiment_p18_full.yaml delete mode 100644 configs/experiment_swyh.yaml delete mode 100644 configs/experiment_swyh_first_long.yaml delete mode 100644 configs/experiment_swyh_warmup.yaml delete mode 100644 configs/experiment_vanilla.yaml delete mode 100644 configs/non_iid_experiment.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml delete mode 100644 configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml delete mode 100644 configs/p11A_freezoff_iid/run.py delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml delete mode 100644 configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml delete mode 100644 configs/p11_freezoff_iid/run.py delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml delete mode 100644 configs/p12_freezoff_iid_large/run.py delete mode 100644 configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml delete mode 100644 configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml delete mode 100644 configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml delete mode 100644 configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml delete mode 100644 configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml delete mode 100644 configs/p13_variance_dev/p13_variance_dev_offload_large.yaml delete mode 100644 configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml delete mode 100644 configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml delete mode 100644 configs/p13_variance_dev/run.py delete mode 100644 configs/p14A_check_iid_cifar10_cnn/descr.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/gen.py delete mode 100644 configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/run.py delete mode 100644 configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/gen.py delete mode 100644 configs/p14B_effect_of_non_iid_ness_cifar10/run.py delete mode 100644 configs/p14_check_iid_cifar10_cnn/descr.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/offload.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/gen.py delete mode 100644 configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/run.py delete mode 100644 configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml delete mode 100644 configs/p15_freezoff_non_iid_large/run.py delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/descr.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/gen.py delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/run.py delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/gen.py delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/run.py delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/descr.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/gen.py delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/run.py delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/gen.py delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/run.py delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml delete mode 100644 configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml delete mode 100644 configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml delete mode 100644 configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml delete mode 100644 configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/descr.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/gen.py delete mode 100644 configs/p28_effect_of_non_iid_ness_mnist/run.py delete mode 100644 configs/p29_effect_of_freezing_mnist/descr.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml delete mode 100644 configs/p29_effect_of_freezing_mnist/gen.py delete mode 100644 configs/p29_effect_of_freezing_mnist/run.py delete mode 100644 configs/p30_freezing_effect_dev/descr.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml delete mode 100644 configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml delete mode 100644 configs/p30_freezing_effect_dev/gen.py delete mode 100644 configs/p30_freezing_effect_dev/run.py delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml delete mode 100644 configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/descr.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/gen.py delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/run.py delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/gen.py delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/run.py delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml delete mode 100644 configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml delete mode 100644 configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml delete mode 100644 configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml delete mode 100644 configs/terminate/p_terminate_terminate.yaml delete mode 100644 configs/terminate/p_terminate_terminate_swyh.yaml delete mode 100644 configs/terminate/p_terminate_vanilla.yaml delete mode 100644 configs/terminate/p_terminate_vanilla_s3.yaml delete mode 100644 configs/terminate/run.py delete mode 100644 configs/tifl-15/exp_p15_baseline.yaml delete mode 100644 configs/tifl-15/exp_p15_tifl-adaptive.yaml delete mode 100644 configs/tifl-15/exp_p15_tifl-basic.yaml delete mode 100644 configs/tifl-15/exp_p3_tifl.yaml delete mode 100644 configs/tifl-15/run.py delete mode 100644 deploy/dev/client_stub_default.yml delete mode 100644 deploy/dev/client_stub_fast.yml delete mode 100644 deploy/dev/client_stub_medium.yml delete mode 100644 deploy/dev/client_stub_slow.yml delete mode 100644 deploy/dev/system_stub.yml delete mode 100644 deploy/dev_generate/client_stub_medium.yml delete mode 100644 deploy/dev_generate/description.yml delete mode 100644 deploy/dev_generate/stub_fast.yml rename deploy/{dev_generate => docker}/stub_default.yml (100%) rename deploy/{dev_generate => docker}/system_stub.yml (100%) delete mode 100644 deploy/p11_freezoff/client_stub_default.yml delete mode 100644 deploy/p11_freezoff/client_stub_fast.yml delete mode 100644 deploy/p11_freezoff/client_stub_medium.yml delete mode 100644 deploy/p11_freezoff/client_stub_slow.yml delete mode 100644 deploy/p11_freezoff/system_stub.yml delete mode 100644 deploy/p11_freezoff_fast/client_stub_default.yml delete mode 100644 deploy/p11_freezoff_fast/client_stub_fast.yml delete mode 100644 deploy/p11_freezoff_fast/client_stub_medium.yml delete mode 100644 deploy/p11_freezoff_fast/client_stub_slow.yml delete mode 100644 deploy/p11_freezoff_fast/system_stub.yml delete mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_default.yml delete mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_fast.yml delete mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_medium.yml delete mode 100644 deploy/p23_freezoff_w9s3-half/client_stub_slow.yml delete mode 100644 deploy/p23_freezoff_w9s3-half/system_stub.yml delete mode 100644 deploy/p23_freezoff_w9s3/client_stub_default.yml delete mode 100644 deploy/p23_freezoff_w9s3/client_stub_fast.yml delete mode 100644 deploy/p23_freezoff_w9s3/client_stub_medium.yml delete mode 100644 deploy/p23_freezoff_w9s3/client_stub_slow.yml delete mode 100644 deploy/p23_freezoff_w9s3/system_stub.yml delete mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_default.yml delete mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml delete mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml delete mode 100644 deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml delete mode 100644 deploy/p23_freezoff_w9s3_fast/system_stub.yml delete mode 100644 deploy/p28_non_iid_effect/client_stub_default.yml delete mode 100644 deploy/p28_non_iid_effect/client_stub_fast.yml delete mode 100644 deploy/p28_non_iid_effect/client_stub_medium.yml delete mode 100644 deploy/p28_non_iid_effect/client_stub_slow.yml delete mode 100644 deploy/p28_non_iid_effect/system_stub.yml delete mode 100644 deploy/templates/client_stub_default.yml delete mode 100644 deploy/templates/client_stub_fast.yml delete mode 100644 deploy/templates/client_stub_medium.yml delete mode 100644 deploy/templates/client_stub_slow.yml delete mode 100644 deploy/templates/system_stub.yml delete mode 100644 deploy/terminate/client_stub_default.yml delete mode 100644 deploy/terminate/client_stub_fast.yml delete mode 100644 deploy/terminate/client_stub_medium.yml delete mode 100644 deploy/terminate/client_stub_slow.yml delete mode 100644 deploy/terminate/system_stub.yml delete mode 100644 deploy/tifl-15/client_stub_default.yml delete mode 100644 deploy/tifl-15/client_stub_fast.yml delete mode 100644 deploy/tifl-15/client_stub_medium.yml delete mode 100644 deploy/tifl-15/client_stub_slow.yml delete mode 100644 deploy/tifl-15/system_stub.yml rename configs/dev_mnist/exps/fedavg_direct.yaml => experiments/example_docker/descr.yaml (52%) rename {configs/dev_mnist => experiments/example_docker}/fedavg.cfg.yaml (100%) rename {configs/dev_mnist => experiments/example_native}/descr.yaml (68%) rename configs/dev/dev_p2_fedprox.cfg.yaml => experiments/example_native/fedavg.cfg.yaml (64%) diff --git a/.gitignore b/.gitignore index dfd70190..80d1de1c 100644 --- a/.gitignore +++ b/.gitignore @@ -146,4 +146,5 @@ docker_data *.tmp.txt docker-compose.yml -refactor-notes.md \ No newline at end of file +refactor-notes.md +experiments/**/exps/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index afa10773..72b75692 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,5 +28,6 @@ EXPOSE 5000 COPY fltk ./fltk COPY configs ./configs +COPY experiments ./experiments CMD python3 -m fltk remote $EXP_CONFIG $RANK --nic=$NIC --host=$MASTER_HOSTNAME $OPTIONAL_PARAMS \ No newline at end of file diff --git a/configs/dev/descr.yaml b/configs/dev/descr.yaml deleted file mode 100644 index fab64941..00000000 --- a/configs/dev/descr.yaml +++ /dev/null @@ -1,31 +0,0 @@ ---- -# Experiment configuration -total_epochs: 11 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 diff --git a/configs/dev/dev_p2_fedavg.cfg.yaml b/configs/dev/dev_p2_fedavg.cfg.yaml deleted file mode 100644 index ecb5bc3e..00000000 --- a/configs/dev/dev_p2_fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500 \ No newline at end of file diff --git a/configs/dev/exps/dev_p2_fedavg.yaml b/configs/dev/exps/dev_p2_fedavg.yaml deleted file mode 100644 index a253f11d..00000000 --- a/configs/dev/exps/dev_p2_fedavg.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 11 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 -# Individual configuration -offload_stategy: vanilla -deadline: 500 -experiment_prefix: 'dev_p2_fedavg' diff --git a/configs/dev/exps/dev_p2_fedprox.yaml b/configs/dev/exps/dev_p2_fedprox.yaml deleted file mode 100644 index d1884c0a..00000000 --- a/configs/dev/exps/dev_p2_fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 11 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 -# Individual configuration -offload_stategy: vanilla -deadline: 500 -optimizer: FedProx - -experiment_prefix: 'dev_p2_fedprox' diff --git a/configs/dev/gen.py b/configs/dev/gen.py deleted file mode 100644 index 267dadf6..00000000 --- a/configs/dev/gen.py +++ /dev/null @@ -1,27 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = 'configs/dev' - - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/dev/run.py b/configs/dev/run.py deleted file mode 100644 index 4193edce..00000000 --- a/configs/dev/run.py +++ /dev/null @@ -1,21 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'dev' - generate_docker(name) - base_path = 'configs/dev' - exp_list = [ - 'dev_p2_fedavg.yaml', - 'dev_p2_fedprox.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/configs/dev_mnist/exps/fedavg.yaml b/configs/dev_mnist/exps/fedavg.yaml deleted file mode 100644 index 89dc8a37..00000000 --- a/configs/dev_mnist/exps/fedavg.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- -# Experiment configuration -total_epochs: 3 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 4 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -num_clients: 4 -# Individual configuration -offload_stategy: vanilla -deadline: 500 -single_machine: false -real_time: true -experiment_prefix: 'util_fedavg' diff --git a/configs/dev_mnist/gen.py b/configs/dev_mnist/gen.py deleted file mode 100644 index f268dc45..00000000 --- a/configs/dev_mnist/gen.py +++ /dev/null @@ -1,25 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = Path(__file__).parent - descr_path = base_path / 'descr.yaml' - - exp_cfg_list = [x for x in base_path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = base_path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/dev_mnist/run.py b/configs/dev_mnist/run.py deleted file mode 100644 index 3714a567..00000000 --- a/configs/dev_mnist/run.py +++ /dev/null @@ -1,20 +0,0 @@ -from pathlib import Path - -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'dev' - generate_docker(name) - base_path = f'{Path(__file__).parent}' - exp_list = [ - 'fedavg.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') diff --git a/configs/dev_mnist_all/descr.yaml b/configs/dev_mnist_all/descr.yaml deleted file mode 100644 index 998743a6..00000000 --- a/configs/dev_mnist_all/descr.yaml +++ /dev/null @@ -1,31 +0,0 @@ ---- -# Experiment configuration -total_epochs: 2 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 diff --git a/configs/dev_mnist_all/exps/fedavg.yaml b/configs/dev_mnist_all/exps/fedavg.yaml deleted file mode 100644 index 391bf17c..00000000 --- a/configs/dev_mnist_all/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 2 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'dev_mnist_all_fedavg' -single_machine: false diff --git a/configs/dev_mnist_all/exps/fednova.yaml b/configs/dev_mnist_all/exps/fednova.yaml deleted file mode 100644 index 4528c8dd..00000000 --- a/configs/dev_mnist_all/exps/fednova.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 2 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'dev_mnist_all_fednova' diff --git a/configs/dev_mnist_all/exps/fedprox.yaml b/configs/dev_mnist_all/exps/fedprox.yaml deleted file mode 100644 index a325437c..00000000 --- a/configs/dev_mnist_all/exps/fedprox.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 2 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 100 -warmup_round: false -output_location: 'output/dev_p2' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'dev_mnist_all_fedprox' diff --git a/configs/dev_mnist_all/fedavg.cfg.yaml b/configs/dev_mnist_all/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/dev_mnist_all/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/dev_mnist_all/fednova.cfg.yaml b/configs/dev_mnist_all/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/dev_mnist_all/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/dev_mnist_all/fedprox.cfg.yaml b/configs/dev_mnist_all/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/dev_mnist_all/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/dev_mnist_all/gen.py b/configs/dev_mnist_all/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/dev_mnist_all/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/dev_mnist_all/run.py b/configs/dev_mnist_all/run.py deleted file mode 100644 index c2483d49..00000000 --- a/configs/dev_mnist_all/run.py +++ /dev/null @@ -1,22 +0,0 @@ -from pathlib import Path - -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'dev' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'fednova.yaml', - 'fedprox.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') diff --git a/configs/effect-freezing/p_freezing-iid_freeze.yaml b/configs/effect-freezing/p_freezing-iid_freeze.yaml deleted file mode 100644 index d078e4ea..00000000 --- a/configs/effect-freezing/p_freezing-iid_freeze.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p_freezing_iid_freeze' -offload_stategy: freeze -profiling_time: 100 -deadline: 1 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 diff --git a/configs/effect-freezing/p_freezing-iid_vanilla.yaml b/configs/effect-freezing/p_freezing-iid_vanilla.yaml deleted file mode 100644 index d1b3592c..00000000 --- a/configs/effect-freezing/p_freezing-iid_vanilla.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p_freezing_iid_vanilla' -offload_stategy: vanilla -profiling_time: 100 -deadline: 1 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 diff --git a/configs/effect-freezing/run.py b/configs/effect-freezing/run.py deleted file mode 100644 index 7dded93e..00000000 --- a/configs/effect-freezing/run.py +++ /dev/null @@ -1,18 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'dev' - generate_docker(name, 10, True) - base_path = 'configs/effect-freezing' - exp_list = ['p_freezing-iid_freeze.yaml','p_freezing-iid_vanilla.yaml'] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/configs/exp_p2_vanilla.yaml b/configs/exp_p2_vanilla.yaml deleted file mode 100644 index 3ef45031..00000000 --- a/configs/exp_p2_vanilla.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 250 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p2_vanilla' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 5 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 19 diff --git a/configs/exp_p2_w_4_s2_vanilla.yaml b/configs/exp_p2_w_4_s2_vanilla.yaml deleted file mode 100644 index d306a678..00000000 --- a/configs/exp_p2_w_4_s2_vanilla.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 100 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p2_w4_s2_vanilla' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p2_w_4_s4_vanilla.yaml b/configs/exp_p2_w_4_s4_vanilla.yaml deleted file mode 100644 index 66a45d77..00000000 --- a/configs/exp_p2_w_4_s4_vanilla.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 100 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p2_w4_s4_vanilla' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_baseline.yaml b/configs/exp_p3_w_4_s4_baseline.yaml deleted file mode 100644 index 89c91bf4..00000000 --- a/configs/exp_p3_w_4_s4_baseline.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_baseline' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_baseline_check.yaml b/configs/exp_p3_w_4_s4_baseline_check.yaml deleted file mode 100644 index 38903f2e..00000000 --- a/configs/exp_p3_w_4_s4_baseline_check.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_baseline_check' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_baseline_long.yaml b/configs/exp_p3_w_4_s4_baseline_long.yaml deleted file mode 100644 index 70c1373c..00000000 --- a/configs/exp_p3_w_4_s4_baseline_long.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 250 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_baseline_long' -offload_stategy: vanilla -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_deadline.yaml b/configs/exp_p3_w_4_s4_deadline.yaml deleted file mode 100644 index 6e72e0f0..00000000 --- a/configs/exp_p3_w_4_s4_deadline.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_deadline' -offload_stategy: deadline -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_deadline_check.yaml b/configs/exp_p3_w_4_s4_deadline_check.yaml deleted file mode 100644 index d0bf9250..00000000 --- a/configs/exp_p3_w_4_s4_deadline_check.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_deadline_check' -offload_stategy: deadline -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_deadline_long.yaml b/configs/exp_p3_w_4_s4_deadline_long.yaml deleted file mode 100644 index 24c814a8..00000000 --- a/configs/exp_p3_w_4_s4_deadline_long.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 250 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_deadline_long' -offload_stategy: deadline -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_freeze.yaml b/configs/exp_p3_w_4_s4_freeze.yaml deleted file mode 100644 index e0aaa770..00000000 --- a/configs/exp_p3_w_4_s4_freeze.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_freeze' -offload_stategy: freeze -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_freeze_long.yaml b/configs/exp_p3_w_4_s4_freeze_long.yaml deleted file mode 100644 index c91144ad..00000000 --- a/configs/exp_p3_w_4_s4_freeze_long.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 250 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_freeze_long' -offload_stategy: freeze -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_slow_baseline.yaml b/configs/exp_p3_w_4_s4_slow_baseline.yaml deleted file mode 100644 index 8e6bea48..00000000 --- a/configs/exp_p3_w_4_s4_slow_baseline.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_slow_baseline' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_slow_deadline.yaml b/configs/exp_p3_w_4_s4_slow_deadline.yaml deleted file mode 100644 index 6e72e0f0..00000000 --- a/configs/exp_p3_w_4_s4_slow_deadline.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_deadline' -offload_stategy: deadline -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_swyh.yaml b/configs/exp_p3_w_4_s4_swyh.yaml deleted file mode 100644 index e43ce120..00000000 --- a/configs/exp_p3_w_4_s4_swyh.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_swyh' -offload_stategy: swyh -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p3_w_4_s4_swyh_long.yaml b/configs/exp_p3_w_4_s4_swyh_long.yaml deleted file mode 100644 index 6e898a94..00000000 --- a/configs/exp_p3_w_4_s4_swyh_long.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 250 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_w4_s4_swyh_long' -offload_stategy: swyh -profiling_time: 100 -deadline: 35 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p4_w_4_s2_swyh_non_iid.yaml b/configs/exp_p4_w_4_s2_swyh_non_iid.yaml deleted file mode 100644 index afdcef17..00000000 --- a/configs/exp_p4_w_4_s2_swyh_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p4_w4_s2_swyh_non_iid' -offload_stategy: swyh -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p4_w_4_s2_vanilla_non_iid.yaml b/configs/exp_p4_w_4_s2_vanilla_non_iid.yaml deleted file mode 100644 index 187a816b..00000000 --- a/configs/exp_p4_w_4_s2_vanilla_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p4_w4_s2_vanilla_non_iid' -offload_stategy: vanilla -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p5_w_4_s4_deadline_non_iid.yaml b/configs/exp_p5_w_4_s4_deadline_non_iid.yaml deleted file mode 100644 index 81cdbf31..00000000 --- a/configs/exp_p5_w_4_s4_deadline_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 100 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p5_w4_s4_deadline_non_iid' -offload_stategy: deadline -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p5_w_4_s4_freeze_non_iid.yaml b/configs/exp_p5_w_4_s4_freeze_non_iid.yaml deleted file mode 100644 index ec7f11b6..00000000 --- a/configs/exp_p5_w_4_s4_freeze_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 100 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p5_w4_s4_freeze_non_iid' -offload_stategy: freeze -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p5_w_4_s4_swyh_non_iid.yaml b/configs/exp_p5_w_4_s4_swyh_non_iid.yaml deleted file mode 100644 index 49f4e6ed..00000000 --- a/configs/exp_p5_w_4_s4_swyh_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 100 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p5_w4_s4_swyh_non_iid' -offload_stategy: swyh -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p5_w_4_s4_vanilla_non_iid.yaml b/configs/exp_p5_w_4_s4_vanilla_non_iid.yaml deleted file mode 100644 index b3290d2d..00000000 --- a/configs/exp_p5_w_4_s4_vanilla_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 100 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p5_w4_s4_vanilla_non_iid' -offload_stategy: vanilla -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p6_w_4_s4_deadline_non_iid.yaml b/configs/exp_p6_w_4_s4_deadline_non_iid.yaml deleted file mode 100644 index 07e8c38e..00000000 --- a/configs/exp_p6_w_4_s4_deadline_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p6_w4_s4_deadline_non_iid' -offload_stategy: deadline -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p6_w_4_s4_swyh_non_iid.yaml b/configs/exp_p6_w_4_s4_swyh_non_iid.yaml deleted file mode 100644 index 31859721..00000000 --- a/configs/exp_p6_w_4_s4_swyh_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p6_w4_s4_swhy_non_iid' -offload_stategy: swyh -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p7_w_4_s2_deadline_non_iid.yaml b/configs/exp_p7_w_4_s2_deadline_non_iid.yaml deleted file mode 100644 index ef30cb9d..00000000 --- a/configs/exp_p7_w_4_s2_deadline_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p7_w4_s2_deadline_non_iid' -offload_stategy: deadline -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p7_w_4_s2_freeze_non_iid.yaml b/configs/exp_p7_w_4_s2_freeze_non_iid.yaml deleted file mode 100644 index 1a72f579..00000000 --- a/configs/exp_p7_w_4_s2_freeze_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p7_w4_s2_freeze_non_iid' -offload_stategy: freeze -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p7_w_4_s2_swyh_non_iid.yaml b/configs/exp_p7_w_4_s2_swyh_non_iid.yaml deleted file mode 100644 index 7f8c7332..00000000 --- a/configs/exp_p7_w_4_s2_swyh_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p7_w4_s2_swyh_non_iid' -offload_stategy: swyh -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p7_w_4_s2_vanilla_non_iid.yaml b/configs/exp_p7_w_4_s2_vanilla_non_iid.yaml deleted file mode 100644 index 2662656e..00000000 --- a/configs/exp_p7_w_4_s2_vanilla_non_iid.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p7_w4_s2_vanilla_non_iid' -offload_stategy: vanilla -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml deleted file mode 100644 index 218bffdd..00000000 --- a/configs/exp_p8_w_4_s2_baseline_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s2_baseline_fmnist_non_iid' -offload_stategy: vanilla -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml deleted file mode 100644 index fe437054..00000000 --- a/configs/exp_p8_w_4_s2_freeze_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s2_freeze_fmnist_non_iid' -offload_stategy: freeze -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml deleted file mode 100644 index 7194ea73..00000000 --- a/configs/exp_p8_w_4_s2_offload_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s2_offload_fmnist_non_iid' -offload_stategy: offload -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml deleted file mode 100644 index 836b99c8..00000000 --- a/configs/exp_p8_w_4_s2_swyh_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s2_swyh_fmnist_non_iid' -offload_stategy: swyh -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_baseline_fmnist.yaml b/configs/exp_p8_w_4_s4_baseline_fmnist.yaml deleted file mode 100644 index 9b5dbca1..00000000 --- a/configs/exp_p8_w_4_s4_baseline_fmnist.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_baseline_fmnist' -offload_stategy: vanilla -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml deleted file mode 100644 index aca5e153..00000000 --- a/configs/exp_p8_w_4_s4_baseline_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_baseline_fmnist_non_iid' -offload_stategy: vanilla -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_freeze_fmnist.yaml b/configs/exp_p8_w_4_s4_freeze_fmnist.yaml deleted file mode 100644 index 50788622..00000000 --- a/configs/exp_p8_w_4_s4_freeze_fmnist.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_freeze_fmnist' -offload_stategy: freeze -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml deleted file mode 100644 index bb4bad80..00000000 --- a/configs/exp_p8_w_4_s4_freeze_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_freeze_fmnist_non_iid' -offload_stategy: freeze -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_offload_fmnist.yaml b/configs/exp_p8_w_4_s4_offload_fmnist.yaml deleted file mode 100644 index 04e5ef9d..00000000 --- a/configs/exp_p8_w_4_s4_offload_fmnist.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_offload_fmnist' -offload_stategy: offload -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml deleted file mode 100644 index 5d2f1618..00000000 --- a/configs/exp_p8_w_4_s4_offload_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_offload_fmnist_non_iid' -offload_stategy: offload -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_swyh_fmnist.yaml b/configs/exp_p8_w_4_s4_swyh_fmnist.yaml deleted file mode 100644 index bce29ecd..00000000 --- a/configs/exp_p8_w_4_s4_swyh_fmnist.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_swyh_fmnist' -offload_stategy: swyh -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml b/configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml deleted file mode 100644 index 97a5ba7f..00000000 --- a/configs/exp_p8_w_4_s4_swyh_fmnist_non_iid.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_swyh_fmnist_non_iid' -offload_stategy: swyh -profiling_time: 100 -deadline: 17.5 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "limit labels flex" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_vanilla_.yaml b/configs/exp_p8_w_4_s4_vanilla_.yaml deleted file mode 100644 index e03d0b4d..00000000 --- a/configs/exp_p8_w_4_s4_vanilla_.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_vanilla' -offload_stategy: vanilla -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/exp_p8_w_4_s4_vanilla_fmnist.yaml b/configs/exp_p8_w_4_s4_vanilla_fmnist.yaml deleted file mode 100644 index 29a819b3..00000000 --- a/configs/exp_p8_w_4_s4_vanilla_fmnist.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -#net: Cifar10CNN -#dataset: cifar10 -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p8_w4_s4_vanilla_fmnist' -offload_stategy: vanilla -profiling_time: 100 -deadline: 37 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/experiment.yaml b/configs/experiment.yaml deleted file mode 100644 index 8e180b73..00000000 --- a/configs/experiment.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_sample' -offload_stategy: deadline -profiling_time: -1 -deadline: 1000 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 4 diff --git a/configs/experiment_cifar100.yaml b/configs/experiment_cifar100.yaml deleted file mode 100644 index b579942a..00000000 --- a/configs/experiment_cifar100.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 60 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar100ResNet -dataset: cifar100 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_cifar100' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 4 diff --git a/configs/experiment_deadline.yaml b/configs/experiment_deadline.yaml deleted file mode 100644 index c038e3a6..00000000 --- a/configs/experiment_deadline.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_offload_deadline' -offload_stategy: deadline -profiling_time: 50 -deadline: 140 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 2 diff --git a/configs/experiment_fmnist.yaml b/configs/experiment_fmnist.yaml deleted file mode 100644 index cb9ce75d..00000000 --- a/configs/experiment_fmnist.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_fmnist' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 4 diff --git a/configs/experiment_fmnist_offload.yaml b/configs/experiment_fmnist_offload.yaml deleted file mode 100644 index 696808cf..00000000 --- a/configs/experiment_fmnist_offload.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -#net: Cifar10CNN -#dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_fmnist_offload' -offload_stategy: offload -profiling_time: 100 -deadline: 13 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 4 diff --git a/configs/experiment_freeze.yaml b/configs/experiment_freeze.yaml deleted file mode 100644 index 78631070..00000000 --- a/configs/experiment_freeze.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_freeze_deadline' -offload_stategy: freeze -profiling_time: 50 -deadline: 140 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 2 diff --git a/configs/experiment_gcp_c20.yaml b/configs/experiment_gcp_c20.yaml deleted file mode 100644 index 193d61c9..00000000 --- a/configs/experiment_gcp_c20.yaml +++ /dev/null @@ -1,19 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: true -experiment_prefix: 'ex-gcp-c20' -output_location: 'output' -tensor_board_active: true -clients_per_round: 10 -system: - federator: - hostname: '192.168.0.129' - nic: 'ens4' - clients: - amount: 20 diff --git a/configs/experiment_gcp_single.yaml b/configs/experiment_gcp_single.yaml deleted file mode 100644 index 19d1d03c..00000000 --- a/configs/experiment_gcp_single.yaml +++ /dev/null @@ -1,19 +0,0 @@ ---- -# Experiment configuration -total_epochs: 5 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: true -experiment_prefix: 'experiment_single_machine' -output_location: 'output' -tensor_board_active: true -clients_per_round: 1 -system: - federator: - hostname: '131.180.40.72' - nic: 'ens4' - clients: - amount: 1 diff --git a/configs/experiment_offload.yaml b/configs/experiment_offload.yaml deleted file mode 100644 index ccf8c0c1..00000000 --- a/configs/experiment_offload.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 1 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_model_offload_deadline_fedavg_test' -offload_stategy: offload -profiling_time: 50 -deadline: 140 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 2 diff --git a/configs/experiment_p18_full.yaml b/configs/experiment_p18_full.yaml deleted file mode 100644 index a01c944a..00000000 --- a/configs/experiment_p18_full.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_p18_full' -offload_stategy: deadline -profiling_time: -1 -deadline: 1000 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 18 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 18 diff --git a/configs/experiment_swyh.yaml b/configs/experiment_swyh.yaml deleted file mode 100644 index 86b185fd..00000000 --- a/configs/experiment_swyh.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_swyh_deadline' -offload_stategy: swyh -profiling_time: 50 -deadline: 140 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 2 diff --git a/configs/experiment_swyh_first_long.yaml b/configs/experiment_swyh_first_long.yaml deleted file mode 100644 index 7089d52b..00000000 --- a/configs/experiment_swyh_first_long.yaml +++ /dev/null @@ -1,29 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_swyh_first_long_deadline' -offload_stategy: swyh -profiling_time: 50 -deadline: 140 -first_deadline: 400 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 2 diff --git a/configs/experiment_swyh_warmup.yaml b/configs/experiment_swyh_warmup.yaml deleted file mode 100644 index 72400588..00000000 --- a/configs/experiment_swyh_warmup.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_swyh__warmup_deadline' -offload_stategy: swyh -profiling_time: 50 -deadline: 140 -warmup_round: true -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 2 diff --git a/configs/experiment_vanilla.yaml b/configs/experiment_vanilla.yaml deleted file mode 100644 index 2ab96331..00000000 --- a/configs/experiment_vanilla.yaml +++ /dev/null @@ -1,28 +0,0 @@ ---- -# Experiment configuration -total_epochs: 20 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_offload_vanilla' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 2 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '10.5.0.11' - nic: 'eth0' - clients: - amount: 2 diff --git a/configs/non_iid_experiment.yaml b/configs/non_iid_experiment.yaml deleted file mode 100644 index 1bf59ed8..00000000 --- a/configs/non_iid_experiment.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_non_iid(dirichlet)' -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '192.168.1.108' - nic: 'wlp4s0' - clients: - amount: 10 # must be multiple of the number of labels for q-sampler and limit-labels diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml deleted file mode 100644 index 8262f35c..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_dyn_terminate' -offload_stategy: dynamic-terminate -profiling_time: 20 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -termination_percentage: 0.7 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml deleted file mode 100644 index ff5ccea7..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_dyn_terminate_swyh.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_dyn_terminate_swyh' -offload_stategy: dynamic-terminate-swyh -profiling_time: 20 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -termination_percentage: 0.7 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml deleted file mode 100644 index 190acd98..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedavg.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_fedavg' -offload_stategy: vanilla -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml deleted file mode 100644 index 13ca9fd6..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fednova.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_fednova' -offload_stategy: vanilla -optimizer: FedNova -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml deleted file mode 100644 index 2515b230..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_fedprox.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_fedprox' -offload_stategy: vanilla -optimizer: FedProx -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml deleted file mode 100644 index 942b7daa..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_offload' -offload_stategy: offload -profiling_time: 20 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml deleted file mode 100644 index 9cc228e6..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_offload_strict.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_offload_strict' -offload_stategy: offload -profiling_time: 20 -deadline: 7 -deadline_threshold: 2 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml deleted file mode 100644 index 41d525d1..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_adaptive.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_tifl_adaptive' -offload_stategy: tifl-adaptive -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml b/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml deleted file mode 100644 index d432df2c..00000000 --- a/configs/p11A_freezoff_iid/p11A_freezoff_iid_tifl_basic.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11A_freezoff_iid_tifl_basic' -offload_stategy: tifl-basic -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11A' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11A_freezoff_iid/run.py b/configs/p11A_freezoff_iid/run.py deleted file mode 100644 index d15e51c2..00000000 --- a/configs/p11A_freezoff_iid/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = 'configs/p11A_freezoff_iid' - exp_list = [ - # 'p11A_freezoff_iid_fedprox.yaml', - # 'p11A_freezoff_iid_fednova.yaml', - # 'p11A_freezoff_iid_offload.yaml', - 'p11A_freezoff_iid_offload_strict.yaml', - # 'p11A_freezoff_iid_dyn_terminate_swyh.yaml', - 'p11A_freezoff_iid_fedavg.yaml', - 'p11A_freezoff_iid_tifl_adaptive.yaml', - # 'p11A_freezoff_iid_dyn_terminate.yaml', - 'p11A_freezoff_iid_tifl_basic.yaml' - ] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - - - diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml deleted file mode 100644 index 6c791172..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_dyn_terminate' -offload_stategy: dynamic-terminate -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -termination_percentage: 0.7 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml deleted file mode 100644 index d439fe4a..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_dyn_terminate_swyh.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_dyn_terminate_swyh' -offload_stategy: dynamic-terminate-swyh -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -termination_percentage: 0.7 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml deleted file mode 100644 index cc912b84..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_fedavg.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_fedavg' -offload_stategy: vanilla -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml deleted file mode 100644 index d4ed5682..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_fednova.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_fednova' -offload_stategy: vanilla -optimizer: FedNova -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml deleted file mode 100644 index 0d45f094..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_fedprox.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_fedprox' -offload_stategy: vanilla -optimizer: FedProx -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml deleted file mode 100644 index b81032a8..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_offload.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_offload' -offload_stategy: offload -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml deleted file mode 100644 index 39135d17..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_offload_strict.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_offload_strict' -offload_stategy: offload -profiling_time: 30 -deadline: 26 -deadline_threshold: 1 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml deleted file mode 100644 index d7a6d012..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_adaptive.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_tifl_adaptive' -offload_stategy: tifl-adaptive -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml b/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml deleted file mode 100644 index 2c15a6a3..00000000 --- a/configs/p11_freezoff_iid/p11_freezoff_iid_tifl_basic.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p11_freezoff_iid_tifl_basic' -offload_stategy: tifl-basic -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p11' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p11_freezoff_iid/run.py b/configs/p11_freezoff_iid/run.py deleted file mode 100644 index b4e7f1db..00000000 --- a/configs/p11_freezoff_iid/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = 'configs/p11_freezoff_iid' - exp_list = [ - # 'p11_freezoff_iid_fedprox.yaml', - # 'p11_freezoff_iid_fednova.yaml', - # 'p11_freezoff_iid_offload.yaml', - 'p11_freezoff_iid_offload_strict.yaml', - # 'p11_freezoff_iid_dyn_terminate_swyh.yaml', - 'p11_freezoff_iid_fedavg.yaml', - 'p11_freezoff_iid_tifl_adaptive.yaml', - # 'p11_freezoff_iid_dyn_terminate.yaml', - 'p11_freezoff_iid_tifl_basic.yaml' - ] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - - - diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml deleted file mode 100644 index a9567bf1..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_dyn_terminate' -offload_stategy: dynamic-terminate -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -termination_percentage: 0.7 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml deleted file mode 100644 index 0c089d8e..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_dyn_terminate_swyh_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_dyn_terminate_swyh' -offload_stategy: dynamic-terminate-swyh -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -termination_percentage: 0.7 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml deleted file mode 100644 index 24327e6f..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedavg_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_fedavg' -offload_stategy: vanilla -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml deleted file mode 100644 index cde9e011..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fednova_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_fednova' -offload_stategy: vanilla -optimizer: FedNova -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml deleted file mode 100644 index e39700ba..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_fedprox_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_fedprox' -offload_stategy: vanilla -optimizer: FedProx -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml deleted file mode 100644 index f8045a71..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_offload' -offload_stategy: offload -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml deleted file mode 100644 index 3ff0a335..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_offload_strict_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_offload_strict' -offload_stategy: offload -profiling_time: 20 -deadline: 7 -deadline_threshold: 2 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml deleted file mode 100644 index be7ca59a..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_adaptive_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_tifl_adaptive' -offload_stategy: tifl-adaptive -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml b/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml deleted file mode 100644 index 40b89646..00000000 --- a/configs/p12_freezoff_iid_large/p12_freezoff_iid_tifl_basic_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p12_freezoff_iid_tifl_basic' -offload_stategy: tifl-basic -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p12_freezoff_iid_large/run.py b/configs/p12_freezoff_iid_large/run.py deleted file mode 100644 index af24f5d5..00000000 --- a/configs/p12_freezoff_iid_large/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = 'configs/p12_freezoff_iid_large' - exp_list = [ - 'p12_freezoff_iid_offload_strict_large.yaml', - 'p12_freezoff_iid_offload_large.yaml', - 'p12_freezoff_iid_fedprox_large.yaml', - 'p12_freezoff_iid_fednova_large.yaml', - 'p12_freezoff_iid_dyn_terminate_swyh_large.yaml', - 'p12_freezoff_iid_fedavg_large.yaml', - 'p12_freezoff_iid_tifl_adaptive_large.yaml', - 'p12_freezoff_iid_dyn_terminate_large.yaml', - 'p12_freezoff_iid_tifl_basic_large.yaml' - ] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - - - diff --git a/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml b/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml deleted file mode 100644 index 7288b6be..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_dyn_terminate' -offload_stategy: dynamic-terminate -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml b/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml deleted file mode 100644 index dd78fd8c..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_dyn_terminate_swyh_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_terminate_swyh' -offload_stategy: dynamic-terminate-swyh -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml b/configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml deleted file mode 100644 index 37495688..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_fedavg_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_fedavg' -offload_stategy: vanilla -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml b/configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml deleted file mode 100644 index 4c705ab2..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_fednova_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_fednova' -offload_stategy: vanilla -optimizer: FedNova -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml b/configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml deleted file mode 100644 index 63213871..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_fedprox_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_fedprox' -offload_stategy: vanilla -optimizer: FedProx -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_offload_large.yaml b/configs/p13_variance_dev/p13_variance_dev_offload_large.yaml deleted file mode 100644 index 4ea37194..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_offload_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_offload' -offload_stategy: offload -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml b/configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml deleted file mode 100644 index b24dc6ab..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_tifl_adaptive_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_adaptive' -offload_stategy: tifl-adaptive -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml b/configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml deleted file mode 100644 index 60f06c0c..00000000 --- a/configs/p13_variance_dev/p13_variance_dev_tifl_basic_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p13_variance_dev_tifl_basic' -offload_stategy: tifl-basic -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p12' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p13_variance_dev/run.py b/configs/p13_variance_dev/run.py deleted file mode 100644 index 085c4d6c..00000000 --- a/configs/p13_variance_dev/run.py +++ /dev/null @@ -1,29 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p13_w6' - generate_docker(name) - base_path = 'configs/p13_variance_dev' - exp_list = [ - 'p13_variance_dev_offload_large.yaml', - # 'p13_variance_dev_fedprox_large.yaml', - # 'p13_variance_dev_fednova_large.yaml', - # 'p13_variance_dev_dyn_terminate_swyh_large.yaml', - # 'p13_variance_dev_fedavg_large.yaml', - # 'p13_variance_dev_tifl_adaptive_large.yaml', - # 'p13_variance_dev_dyn_terminate_large.yaml', - # 'p13_variance_dev_tifl_basic_large.yaml' - ] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - - - diff --git a/configs/p14A_check_iid_cifar10_cnn/descr.yaml b/configs/p14A_check_iid_cifar10_cnn/descr.yaml deleted file mode 100644 index 594a03b8..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index 23f5d126..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p14A_check_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 2ce3217b..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p14A_check_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml deleted file mode 100644 index 0c3d3b94..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p14A_check_iid_cifar10_cnn_fedavg' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml deleted file mode 100644 index 7b93eabd..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p14A_check_iid_cifar10_cnn_fednova' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml deleted file mode 100644 index 3ebf2e49..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p14A_check_iid_cifar10_cnn_fedprox' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml deleted file mode 100644 index f3503858..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p14A_check_iid_cifar10_cnn_offload' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml deleted file mode 100644 index bb23ed2d..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p14A_check_iid_cifar10_cnn_offload_strict' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index fab8cbf1..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p14A_check_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml deleted file mode 100644 index 6ae1e298..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14A' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p14A_check_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/gen.py b/configs/p14A_check_iid_cifar10_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/run.py b/configs/p14A_check_iid_cifar10_cnn/run.py deleted file mode 100644 index 460f8b1e..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/run.py +++ /dev/null @@ -1,43 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'generate_check_w18_fast' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - # 'offload_strict.yaml', - # 'offload_strict2.yaml', - # 'offload_strict3.yaml', - # 'offload_strict4.yaml', - # 'offload_strict5.yaml', - # 'offload_strict6.yaml', - # 'offload_strict7.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p14A_check_iid_cifar10_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml deleted file mode 100644 index 7c09bd98..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/descr.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14B' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml deleted file mode 100644 index ad80682e..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-iid-uniform.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14B' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-iid-uniform' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml deleted file mode 100644 index 99c58d6c..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-1.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14B' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 1 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-1' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml deleted file mode 100644 index 63dde8d5..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-10.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14B' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 10 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-10' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml deleted file mode 100644 index a81d65c3..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-2.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14B' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-2' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml deleted file mode 100644 index d911b976..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/exps/fedavg-non-iid-5.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14B' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p14B_effect_of_non_iid_ness_cifar10_fedavg-non-iid-5' diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml deleted file mode 100644 index 65bda1b5..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-iid-uniform.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml deleted file mode 100644 index 645c08d9..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-1.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 1 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml deleted file mode 100644 index 7a92bbb2..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-10.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 10 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml deleted file mode 100644 index 876dae51..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-2.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml b/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml deleted file mode 100644 index 79bbc150..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/fedavg-non-iid-5.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/gen.py b/configs/p14B_effect_of_non_iid_ness_cifar10/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p14B_effect_of_non_iid_ness_cifar10/run.py b/configs/p14B_effect_of_non_iid_ness_cifar10/run.py deleted file mode 100644 index 1037b72f..00000000 --- a/configs/p14B_effect_of_non_iid_ness_cifar10/run.py +++ /dev/null @@ -1,34 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p28_non_iid_effect' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg-non-iid-5.yaml', - 'fedavg-iid-uniform.yaml', - 'fedavg-non-iid-10.yaml', - 'fedavg-non-iid-1.yaml', - 'fedavg-non-iid-2.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - - diff --git a/configs/p14_check_iid_cifar10_cnn/descr.yaml b/configs/p14_check_iid_cifar10_cnn/descr.yaml deleted file mode 100644 index a840e572..00000000 --- a/configs/p14_check_iid_cifar10_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 diff --git a/configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p14_check_iid_cifar10_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p14_check_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index 5d6cffe7..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 69343556..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p14_check_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml deleted file mode 100644 index f1c1bad5..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_fedavg' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml b/configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml deleted file mode 100644 index 7a76e627..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p14_check_iid_cifar10_cnn_fednova' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml deleted file mode 100644 index f968c79a..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p14_check_iid_cifar10_cnn_fedprox' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/offload.yaml b/configs/p14_check_iid_cifar10_cnn/exps/offload.yaml deleted file mode 100644 index 4fcab48b..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_offload' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml deleted file mode 100644 index a782648c..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p14_check_iid_cifar10_cnn_offload_strict' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index 92547493..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml deleted file mode 100644 index 1c292e0a..00000000 --- a/configs/p14_check_iid_cifar10_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 200 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p14' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 4 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 4 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p14_check_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p14_check_iid_cifar10_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p14_check_iid_cifar10_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p14_check_iid_cifar10_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/gen.py b/configs/p14_check_iid_cifar10_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p14_check_iid_cifar10_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p14_check_iid_cifar10_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p14_check_iid_cifar10_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/run.py b/configs/p14_check_iid_cifar10_cnn/run.py deleted file mode 100644 index 5732f7e7..00000000 --- a/configs/p14_check_iid_cifar10_cnn/run.py +++ /dev/null @@ -1,43 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'generate_check_w4' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - # 'offload_strict.yaml', - # 'offload_strict2.yaml', - # 'offload_strict3.yaml', - # 'offload_strict4.yaml', - # 'offload_strict5.yaml', - # 'offload_strict6.yaml', - # 'offload_strict7.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p14_check_iid_cifar10_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p14_check_iid_cifar10_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml deleted file mode 100644 index 3ca4f5f5..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_dyn_terminate' -offload_stategy: dynamic-terminate -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -termination_percentage: 0.7 -node_groups: - slow: [1, 2] - medium: [3, 4] - fast: [5, 6] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml deleted file mode 100644 index ec74786b..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_dyn_terminate_swyh' -offload_stategy: dynamic-terminate-swyh -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -termination_percentage: 0.7 -node_groups: - slow: [1, 2] - medium: [3, 4] - fast: [5, 6] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml deleted file mode 100644 index de16a2ec..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedavg_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_fedavg' -offload_stategy: vanilla -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 2] - medium: [3, 4] - fast: [5, 6] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml deleted file mode 100644 index 28458841..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fednova_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_fednova' -offload_stategy: vanilla -optimizer: FedNova -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 2] - medium: [3, 4] - fast: [5, 6] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml deleted file mode 100644 index 0f70e659..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_fedprox_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_fedprox' -offload_stategy: vanilla -optimizer: FedProx -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 2] - medium: [3, 4] - fast: [5, 6] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml deleted file mode 100644 index ea0b4f7f..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_offload' -offload_stategy: offload -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 2] - medium: [3, 4] - fast: [5, 6] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml deleted file mode 100644 index 74448f53..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_offload_strict_large.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_offload_strict_large' -offload_stategy: offload -profiling_time: 30 -deadline: 7 -deadline_threshold: 2 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 2] - medium: [3, 4] - fast: [5, 6] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml deleted file mode 100644 index 9bb2c524..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_adaptive_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_tifl_adaptive' -offload_stategy: tifl-adaptive -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml b/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml deleted file mode 100644 index 4a32ca6f..00000000 --- a/configs/p15_freezoff_non_iid_large/p15_freezoff_non_iid_tifl_basic_large.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'p15_freezoff_non_iid_tifl_basic' -offload_stategy: tifl-basic -profiling_time: 30 -deadline: 50000 -warmup_round: false -output_location: 'output/p15' -tensor_board_active: true -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p15_freezoff_non_iid_large/run.py b/configs/p15_freezoff_non_iid_large/run.py deleted file mode 100644 index 7b6797e4..00000000 --- a/configs/p15_freezoff_non_iid_large/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = 'configs/p15_freezoff_non_iid_large' - exp_list = [ - # 'p15_freezoff_non_iid_offload_strict_large.yaml', - # 'p15_freezoff_non_iid_offload_large.yaml', - # 'p15_freezoff_non_iid_fedprox_large.yaml', - # 'p15_freezoff_non_iid_fednova_large.yaml', - 'p15_freezoff_non_iid_dyn_terminate_swyh_large.yaml', - # 'p15_freezoff_non_iid_fedavg_large.yaml', - 'p15_freezoff_non_iid_dyn_terminate_large.yaml', - 'p15_freezoff_non_iid_tifl_adaptive_large.yaml', - 'p15_freezoff_non_iid_tifl_basic_large.yaml' - ] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - - - diff --git a/configs/p20_freezoff_iid_fmnist_cnn/descr.yaml b/configs/p20_freezoff_iid_fmnist_cnn/descr.yaml deleted file mode 100644 index a1241ede..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index 3aad2be6..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_dyn_terminate' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 94567f35..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_dyn_terminate_swyh' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml deleted file mode 100644 index 4189dcb5..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_fedavg' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml deleted file mode 100644 index d8e9c159..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_fednova' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml deleted file mode 100644 index 0599b51f..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_fedprox' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml deleted file mode 100644 index 624f8fbd..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_offload' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml deleted file mode 100644 index 9b4a6d51..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_offload_strict' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index 17f8aac7..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_tifl_adaptive' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml b/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml deleted file mode 100644 index 3da03869..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p20' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p20_freezoff_iid_fmnist_cnn_tifl_basic' diff --git a/configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/gen.py b/configs/p20_freezoff_iid_fmnist_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/run.py b/configs/p20_freezoff_iid_fmnist_cnn/run.py deleted file mode 100644 index d2b2e028..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/run.py +++ /dev/null @@ -1,35 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') diff --git a/configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml b/configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p20_freezoff_iid_fmnist_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml deleted file mode 100644 index 0c77063a..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index b98791f7..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_dyn_terminate' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index a402cc14..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_dyn_terminate_swyh' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml deleted file mode 100644 index 11334b11..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_fedavg' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml deleted file mode 100644 index 9842d72e..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_fednova' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml deleted file mode 100644 index af82207f..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_fedprox' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml deleted file mode 100644 index 4e09f4cb..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_offload' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml deleted file mode 100644 index fd15fd06..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_offload_strict' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index 5d5b03fe..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_tifl_adaptive' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml deleted file mode 100644 index ab79764c..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p21' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p21_freezoff_non_iid_fmnist_cnn_tifl_basic' diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/gen.py b/configs/p21_freezoff_non_iid_fmnist_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/run.py b/configs/p21_freezoff_non_iid_fmnist_cnn/run.py deleted file mode 100644 index d2b2e028..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/run.py +++ /dev/null @@ -1,35 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml b/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p21_freezoff_non_iid_fmnist_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/descr.yaml b/configs/p22_freezoff_iid_cifar10_cnn/descr.yaml deleted file mode 100644 index b98f9f11..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index 94c0963c..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 8ecdd49a..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml deleted file mode 100644 index 57b3ad7e..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedavg' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml deleted file mode 100644 index 75dc378f..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fednova' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml deleted file mode 100644 index ddba20a8..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedprox' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml deleted file mode 100644 index 66785fd7..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml deleted file mode 100644 index d11399f4..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload_strict' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index 00519096..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml deleted file mode 100644 index cd2109d2..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/gen.py b/configs/p22_freezoff_iid_cifar10_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/run.py b/configs/p22_freezoff_iid_cifar10_cnn/run.py deleted file mode 100644 index d3cc22bf..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path - -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - # 'offload_strict.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p22_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml deleted file mode 100644 index 06dcc5b1..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index fc4aceea..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 38ec7094..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml deleted file mode 100644 index fc9fb303..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedavg' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml deleted file mode 100644 index 9f61a507..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fednova' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml deleted file mode 100644 index c53e7753..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedprox' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml deleted file mode 100644 index c7dd2978..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml deleted file mode 100644 index e52c19d4..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload_strict' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index cefa2ae0..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml deleted file mode 100644 index 284f47b5..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/gen.py b/configs/p23_freezoff_non_iid_cifar10_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/run.py b/configs/p23_freezoff_non_iid_cifar10_cnn/run.py deleted file mode 100644 index 3f289cc1..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path - -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p23_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml deleted file mode 100644 index 12d65dca..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml deleted file mode 100644 index 8198e4f1..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 64a7079e..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml deleted file mode 100644 index 28d40fca..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml deleted file mode 100644 index 309ec520..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml deleted file mode 100644 index 7ff26439..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml deleted file mode 100644 index 5405552e..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml deleted file mode 100644 index 6dc599f8..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 18 -deadline_threshold: 2 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml deleted file mode 100644 index ec52f440..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 20 -deadline_threshold: 3 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml deleted file mode 100644 index 4bf53a83..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 18 -deadline_threshold: 5 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict3' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml deleted file mode 100644 index c7a821a1..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 3 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict4' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml deleted file mode 100644 index 96a22f5a..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 19 -deadline_threshold: 3 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict5' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml deleted file mode 100644 index 65fe0dda..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 3 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict6' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml deleted file mode 100644 index def40d09..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 2 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_offload_strict7' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml deleted file mode 100644 index 400673ef..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml deleted file mode 100644 index 44c19d0e..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p24' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p24_freezoff_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml deleted file mode 100644 index 1287aa02..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 18 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml deleted file mode 100644 index ea5b7853..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 20 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml deleted file mode 100644 index 7b37118c..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 18 -deadline_threshold: 5 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml deleted file mode 100644 index 871612b2..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml deleted file mode 100644 index 40874f3d..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 19 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml deleted file mode 100644 index 29d51951..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml deleted file mode 100644 index b6c7084a..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py deleted file mode 100644 index 761eefd8..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/run.py +++ /dev/null @@ -1,43 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p23_w9s3' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - # 'fedavg.yaml', - # 'offload_strict.yaml', - # 'offload_strict2.yaml', - # 'offload_strict3.yaml', - # 'offload_strict4.yaml', - 'offload_strict5.yaml', - 'offload_strict6.yaml', - 'offload_strict7.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p24_freezoff_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml deleted file mode 100644 index 7883c126..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml deleted file mode 100644 index f2eef747..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index ba7edf86..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml deleted file mode 100644 index 612cc586..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml deleted file mode 100644 index f001c2bf..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml deleted file mode 100644 index 70f3db66..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml deleted file mode 100644 index 612d7668..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml deleted file mode 100644 index 55f85fb4..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 18 -deadline_threshold: 2 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml deleted file mode 100644 index adb6b6fc..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 20 -deadline_threshold: 3 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml deleted file mode 100644 index afa90fc8..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 5 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict3' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml deleted file mode 100644 index 9aaf58bb..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 3 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict4' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml deleted file mode 100644 index 32a41da9..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict5.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 19 -deadline_threshold: 3 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict5' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml deleted file mode 100644 index d1c78b4a..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict6.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 3 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict6' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml deleted file mode 100644 index f49e5dee..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict7.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 2 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict7' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml deleted file mode 100644 index d2e1ea7a..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml deleted file mode 100644 index 25e05da2..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p25' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p25_freezoff_non_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml deleted file mode 100644 index 1287aa02..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 18 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml deleted file mode 100644 index a1ee25bc..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict10.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml deleted file mode 100644 index 6048f43b..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict11.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml deleted file mode 100644 index ea5b7853..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 20 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml deleted file mode 100644 index 8cfc95a8..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 5 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml deleted file mode 100644 index 871612b2..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 18 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml deleted file mode 100644 index 40874f3d..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict5.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 19 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml deleted file mode 100644 index 29d51951..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict6.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml deleted file mode 100644 index b6c7084a..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict7.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 20 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml deleted file mode 100644 index d70e0e3d..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict8.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 19 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml deleted file mode 100644 index 87225b97..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict9.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 19 -deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py deleted file mode 100644 index ab8ced7f..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/run.py +++ /dev/null @@ -1,48 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p23_w9s3' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - # 'fedavg.yaml', - # 'offload_strict.yaml', - # 'offload_strict2.yaml', - # 'offload_strict3.yaml', - # 'offload_strict4.yaml', - # 'offload_strict5.yaml', - # 'offload_strict6.yaml', - # 'offload_strict7.yaml', - 'offload_strict8.yaml', - 'offload_strict9.yaml', - 'offload_strict10.yaml', - 'offload_strict11.yaml', - - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p25_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml deleted file mode 100644 index 847dd378..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml deleted file mode 100644 index 0d8ddd4b..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_dyn_terminate' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index f37eae09..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml deleted file mode 100644 index 4867e937..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_fedavg' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml deleted file mode 100644 index 2276565a..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_fednova' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml deleted file mode 100644 index de6b1824..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_fedprox' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml deleted file mode 100644 index b557e0d8..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_offload' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml deleted file mode 100644 index 9e239bb2..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 9 -deadline_threshold: 1 -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_offload_strict' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml deleted file mode 100644 index ea43cc12..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 9 -deadline_threshold: 1 -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_offload_strict4' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml deleted file mode 100644 index 445b8965..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_tifl_adaptive' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml deleted file mode 100644 index a21d3198..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p26' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p26_freezoff_iid_mnist_cnn_w9s3_tifl_basic' diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py b/configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml deleted file mode 100644 index 41073058..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 9 -deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml deleted file mode 100644 index aa3df65e..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 9 -deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py b/configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py deleted file mode 100644 index 6daede8b..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/run.py +++ /dev/null @@ -1,38 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p23_w9s3' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'offload_strict4.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p26_freezoff_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml deleted file mode 100644 index 8e10f30e..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml deleted file mode 100644 index 7292e375..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_dyn_terminate' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 4e729a66..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml deleted file mode 100644 index 1cdd98d4..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_fedavg' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml deleted file mode 100644 index 89e075b3..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_fednova' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml deleted file mode 100644 index f12611a6..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_fedprox' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml deleted file mode 100644 index ae96e514..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_offload' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml deleted file mode 100644 index 089ebcaf..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 9 -deadline_threshold: 1 -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_offload_strict' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml deleted file mode 100644 index ab8cf176..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/offload_strict4.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload-strict -deadline: 9 -deadline_threshold: 1 -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_offload_strict4' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml deleted file mode 100644 index 0602959d..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_tifl_adaptive' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml deleted file mode 100644 index b75f25b0..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 5 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p27' -tensor_board_active: true -termination_percentage: 0.6 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p27_freezoff_non_iid_mnist_cnn_w9s3_tifl_basic' diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml deleted file mode 100644 index 41073058..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 9 -deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml deleted file mode 100644 index aa3df65e..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/offload_strict4.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload-strict -deadline: 9 -deadline_threshold: 1 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py deleted file mode 100644 index 6daede8b..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/run.py +++ /dev/null @@ -1,38 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p23_w9s3' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'offload_strict4.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p27_freezoff_non_iid_mnist_cnn_w9s3/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml b/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml deleted file mode 100644 index ba67d24a..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/descr.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p28' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml deleted file mode 100644 index aea42ff7..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-iid-uniform.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p28' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-iid-uniform' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml deleted file mode 100644 index 3ba1e18e..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-1.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p28' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 1 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-1' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml deleted file mode 100644 index e91a1444..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-10.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p28' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 10 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-10' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml deleted file mode 100644 index bdc94c89..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-2.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p28' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-2' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml b/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml deleted file mode 100644 index bcacd129..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/exps/fedavg-non-iid-5.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p28' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -experiment_prefix: 'p28_effect_of_non_iid_ness_mnist_fedavg-non-iid-5' diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml deleted file mode 100644 index 65bda1b5..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-iid-uniform.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml deleted file mode 100644 index 645c08d9..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-1.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 1 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml deleted file mode 100644 index 7a92bbb2..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-10.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 10 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml deleted file mode 100644 index 876dae51..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-2.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml b/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml deleted file mode 100644 index 79bbc150..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/fedavg-non-iid-5.cfg.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused \ No newline at end of file diff --git a/configs/p28_effect_of_non_iid_ness_mnist/gen.py b/configs/p28_effect_of_non_iid_ness_mnist/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p28_effect_of_non_iid_ness_mnist/run.py b/configs/p28_effect_of_non_iid_ness_mnist/run.py deleted file mode 100644 index 1037b72f..00000000 --- a/configs/p28_effect_of_non_iid_ness_mnist/run.py +++ /dev/null @@ -1,34 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p28_non_iid_effect' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg-non-iid-5.yaml', - 'fedavg-iid-uniform.yaml', - 'fedavg-non-iid-10.yaml', - 'fedavg-non-iid-1.yaml', - 'fedavg-non-iid-2.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - - diff --git a/configs/p29_effect_of_freezing_mnist/descr.yaml b/configs/p29_effect_of_freezing_mnist/descr.yaml deleted file mode 100644 index 40f964ef..00000000 --- a/configs/p29_effect_of_freezing_mnist/descr.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml deleted file mode 100644 index 38642015..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-0.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-0' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml deleted file mode 100644 index 280fde8a..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-10.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-10' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml deleted file mode 100644 index f4371b42..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-100.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-100' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml deleted file mode 100644 index a55e48a4..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-20.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-20' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml deleted file mode 100644 index 3ab8f8a4..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-30.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-30' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml deleted file mode 100644 index 03500d8e..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-40.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-40' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml deleted file mode 100644 index 83c97db4..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-50.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-50' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml deleted file mode 100644 index b47c0bbb..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-60.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-60' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml deleted file mode 100644 index 3520be09..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-70.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-70' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml deleted file mode 100644 index 021485b6..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-80.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-80' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml deleted file mode 100644 index 3c9b2bda..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-iid-freeze-90.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-iid-freeze-90' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml deleted file mode 100644 index 5e41e478..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-0.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-0' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml deleted file mode 100644 index 4f3cd557..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-10.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-10' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml deleted file mode 100644 index aa4f26af..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-100.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-100' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml deleted file mode 100644 index 53932748..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-20.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-20' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml deleted file mode 100644 index 58b9ac71..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-30.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-30' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml deleted file mode 100644 index 9437dd2f..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-40.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-40' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml deleted file mode 100644 index 6d680b4c..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-50.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-50' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml deleted file mode 100644 index a54ef747..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-60.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-60' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml deleted file mode 100644 index 1a2acd61..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-70.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-70' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml deleted file mode 100644 index 666a2d67..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-80.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-80' diff --git a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml b/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml deleted file mode 100644 index ec92b09d..00000000 --- a/configs/p29_effect_of_freezing_mnist/exps/fedavg-non_iid-freeze-90.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p29' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 10 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] -experiment_prefix: 'p29_effect_of_freezing_mnist_fedavg-non_iid-freeze-90' diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml deleted file mode 100644 index 7a53cb0f..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-0.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml deleted file mode 100644 index b17e30a6..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-10.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml deleted file mode 100644 index bd803e4f..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-100.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml deleted file mode 100644 index 0a5689e0..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-20.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml deleted file mode 100644 index 6e1c7303..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-30.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml deleted file mode 100644 index 7c856f57..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-40.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml deleted file mode 100644 index b69a2dd7..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-50.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml deleted file mode 100644 index c53f6f96..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-60.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml deleted file mode 100644 index 6539a04c..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-70.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml deleted file mode 100644 index e8699931..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-80.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml deleted file mode 100644 index f99391ec..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-iid-freeze-90.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml deleted file mode 100644 index ab6135eb..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-0.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml deleted file mode 100644 index 295d3428..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-10.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml deleted file mode 100644 index 077ef99a..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-100.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 10] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml deleted file mode 100644 index 3de07281..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-20.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml deleted file mode 100644 index 91a88894..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-30.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml deleted file mode 100644 index 4ceae941..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-40.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml deleted file mode 100644 index 9f8c7a99..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-50.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml deleted file mode 100644 index 671fb412..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-60.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml deleted file mode 100644 index 75cdd8a2..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-70.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml deleted file mode 100644 index 9e82753a..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-80.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml b/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml deleted file mode 100644 index d9055b37..00000000 --- a/configs/p29_effect_of_freezing_mnist/fedavg-non_iid-freeze-90.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6, 7, 8, 9] \ No newline at end of file diff --git a/configs/p29_effect_of_freezing_mnist/gen.py b/configs/p29_effect_of_freezing_mnist/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p29_effect_of_freezing_mnist/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p29_effect_of_freezing_mnist/run.py b/configs/p29_effect_of_freezing_mnist/run.py deleted file mode 100644 index ae621347..00000000 --- a/configs/p29_effect_of_freezing_mnist/run.py +++ /dev/null @@ -1,50 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p28_non_iid_effect' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg-iid-freeze-0.yaml', - 'fedavg-iid-freeze-30.yaml', - 'fedavg-iid-freeze-70.yaml', - 'fedavg-non_iid-freeze-100.yaml', - 'fedavg-non_iid-freeze-40.yaml', - 'fedavg-non_iid-freeze-80.yaml', - 'fedavg-iid-freeze-100.yaml', - 'fedavg-iid-freeze-40.yaml', - 'fedavg-iid-freeze-80.yaml', - 'fedavg-non_iid-freeze-10.yaml', - 'fedavg-non_iid-freeze-50.yaml', - 'fedavg-non_iid-freeze-90.yaml', - 'fedavg-iid-freeze-10.yaml', - 'fedavg-iid-freeze-50.yaml', - 'fedavg-iid-freeze-90.yaml', - 'fedavg-non_iid-freeze-20.yaml', - 'fedavg-non_iid-freeze-60.yaml', - 'fedavg-iid-freeze-20.yaml', - 'fedavg-iid-freeze-60.yaml', - 'fedavg-non_iid-freeze-0.yaml', - 'fedavg-non_iid-freeze-30.yaml', - 'fedavg-non_iid-freeze-70.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p30_freezing_effect_dev/descr.yaml b/configs/p30_freezing_effect_dev/descr.yaml deleted file mode 100644 index cd26fcfa..00000000 --- a/configs/p30_freezing_effect_dev/descr.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml deleted file mode 100644 index bb7e53a6..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-0.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-0' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml deleted file mode 100644 index 55b4adb3..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-100.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-100' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml deleted file mode 100644 index 3320a44b..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-16.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-16' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml deleted file mode 100644 index 5105d2eb..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-33.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-33' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml deleted file mode 100644 index 38601371..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-50.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-50' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml deleted file mode 100644 index 3d1c1628..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-66.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-66' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml deleted file mode 100644 index 1732f166..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-iid-freeze-83.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-iid-freeze-83' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml deleted file mode 100644 index 46355b63..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-0.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-0' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml deleted file mode 100644 index ea0edb03..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-100.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-100' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml deleted file mode 100644 index 86a47161..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-16.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-16' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml deleted file mode 100644 index a382b7ab..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-33.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-33' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml deleted file mode 100644 index 5fcdc08f..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-50.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-50' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml deleted file mode 100644 index 69e32d91..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-66.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-66' diff --git a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml b/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml deleted file mode 100644 index 0ff8e473..00000000 --- a/configs/p30_freezing_effect_dev/exps/fedavg-non-iid-freeze-83.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: MNISTCNN -dataset: mnist -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p30' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 6 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] -experiment_prefix: 'p30_freezing_effect_dev_fedavg-non-iid-freeze-83' diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml deleted file mode 100644 index 7a53cb0f..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-0.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml deleted file mode 100644 index c53f6f96..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-100.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml deleted file mode 100644 index b17e30a6..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-16.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml deleted file mode 100644 index 0a5689e0..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-33.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml deleted file mode 100644 index 6e1c7303..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-50.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml deleted file mode 100644 index 7c856f57..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-66.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml deleted file mode 100644 index b69a2dd7..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-iid-freeze-83.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml deleted file mode 100644 index ab6135eb..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-0.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml deleted file mode 100644 index 671fb412..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-100.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5, 6] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml deleted file mode 100644 index 295d3428..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-16.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml deleted file mode 100644 index 3de07281..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-33.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml deleted file mode 100644 index 91a88894..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-50.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml deleted file mode 100644 index 4ceae941..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-66.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml b/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml deleted file mode 100644 index 9f8c7a99..00000000 --- a/configs/p30_freezing_effect_dev/fedavg-non-iid-freeze-83.cfg.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -freeze_clients: [1, 2, 3, 4, 5] \ No newline at end of file diff --git a/configs/p30_freezing_effect_dev/gen.py b/configs/p30_freezing_effect_dev/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p30_freezing_effect_dev/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p30_freezing_effect_dev/run.py b/configs/p30_freezing_effect_dev/run.py deleted file mode 100644 index 690abaea..00000000 --- a/configs/p30_freezing_effect_dev/run.py +++ /dev/null @@ -1,42 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p30_dev' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg-iid-freeze-0.yaml', - 'fedavg-iid-freeze-16.yaml', - 'fedavg-iid-freeze-50.yaml', - 'fedavg-iid-freeze-83.yaml', - 'fedavg-non-iid-freeze-100.yaml', - 'fedavg-non-iid-freeze-33.yaml', - 'fedavg-non-iid-freeze-66.yaml', - 'fedavg-iid-freeze-100.yaml', - 'fedavg-iid-freeze-33.yaml', - 'fedavg-iid-freeze-66.yaml', - 'fedavg-non-iid-freeze-0.yaml', - 'fedavg-non-iid-freeze-16.yaml', - 'fedavg-non-iid-freeze-50.yaml', - 'fedavg-non-iid-freeze-83.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml deleted file mode 100644 index 8dd342b3..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml deleted file mode 100644 index ddd63597..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index f92d5f3a..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml deleted file mode 100644 index de88d8ed..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml deleted file mode 100644 index 13210854..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml deleted file mode 100644 index 9881414a..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml deleted file mode 100644 index 98e4a085..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml deleted file mode 100644 index 7100314c..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 34 -deadline_threshold: 2 -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml deleted file mode 100644 index 38db1e51..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 35 -deadline_threshold: 2 -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml deleted file mode 100644 index ae5d1bcd..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml deleted file mode 100644 index 45fbcdc3..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p31' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p31_freezoff_non_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml deleted file mode 100644 index d2a1ae5b..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 34 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml deleted file mode 100644 index 8f107382..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 35 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py deleted file mode 100644 index 10c67efe..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/run.py +++ /dev/null @@ -1,39 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - # name = 'p23_w9s3-half' - name = 'p23_w9s3' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'offload_strict2.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'offload.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p31_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/descr.yaml b/configs/p32_freezoff_iid_cifar10_cnn/descr.yaml deleted file mode 100644 index b3e88009..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p32' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index 94c0963c..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 8ecdd49a..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml deleted file mode 100644 index 57b3ad7e..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedavg' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml deleted file mode 100644 index 75dc378f..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fednova' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml deleted file mode 100644 index ddba20a8..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_fedprox' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml deleted file mode 100644 index 66785fd7..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml deleted file mode 100644 index d11399f4..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_offload_strict' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index 00519096..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml deleted file mode 100644 index cd2109d2..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p22' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p22_freezoff_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/gen.py b/configs/p32_freezoff_iid_cifar10_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/run.py b/configs/p32_freezoff_iid_cifar10_cnn/run.py deleted file mode 100644 index d3cc22bf..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path - -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - # 'offload_strict.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p32_freezoff_iid_cifar10_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml deleted file mode 100644 index ae61894f..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p33' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index fc4aceea..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 38ec7094..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml deleted file mode 100644 index fc9fb303..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedavg' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml deleted file mode 100644 index 9f61a507..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fednova' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml deleted file mode 100644 index c53e7753..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_fedprox' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml deleted file mode 100644 index c7dd2978..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml deleted file mode 100644 index e52c19d4..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_offload_strict' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index cefa2ae0..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_adaptive' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml deleted file mode 100644 index 284f47b5..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p23' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 6 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p23_freezoff_non_iid_cifar10_cnn_tifl_basic' diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/gen.py b/configs/p33_freezoff_non_iid_cifar10_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/run.py b/configs/p33_freezoff_non_iid_cifar10_cnn/run.py deleted file mode 100644 index 3f289cc1..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/run.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path - -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - 'offload_strict.yaml', - 'fednova.yaml', - 'fedprox.yaml', - 'offload.yaml', - 'tifl_adaptive.yaml', - 'tifl_basic.yaml', - 'dyn_terminate_swyh.yaml', - 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p33_freezoff_non_iid_cifar10_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml deleted file mode 100644 index 03073a5c..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml deleted file mode 100644 index 37e8a04a..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 634d40b3..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_dyn_terminate_swyh' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml deleted file mode 100644 index ccaa69df..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_fedavg' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml deleted file mode 100644 index d3aea80a..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_fednova' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml deleted file mode 100644 index 121218a1..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_fedprox' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml deleted file mode 100644 index 0ea6f35c..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml deleted file mode 100644 index 58d5c49f..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 33 -deadline_threshold: 2 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml deleted file mode 100644 index 3e000c48..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict2.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 34 -deadline_threshold: 2 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict2' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml deleted file mode 100644 index 0cc9dba8..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict3.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 36 -deadline_threshold: 3 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict3' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml deleted file mode 100644 index 0eeb0782..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/offload_strict4.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 38 -deadline_threshold: 3 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_offload_strict4' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml deleted file mode 100644 index 4f97443e..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_tifl_adaptive' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml deleted file mode 100644 index a01d9d72..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p34' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p34_freezoff_non_iid_cifar10_cnn_w9s3_tifl_basic' diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml deleted file mode 100644 index ba287d60..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 33 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml deleted file mode 100644 index d2a1ae5b..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict2.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 34 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml deleted file mode 100644 index bcd99ddf..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict3.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 36 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml deleted file mode 100644 index 9f884b16..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/offload_strict4.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 38 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py deleted file mode 100644 index 9e73a7ba..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/run.py +++ /dev/null @@ -1,41 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p23_w9s3-half' - # name = 'p23_w9s3' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - # 'fedavg.yaml', - # 'offload_strict.yaml', - # 'offload_strict2.yaml', - 'offload_strict3.yaml', - 'offload_strict4.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'offload.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml b/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p34_freezoff_non_iid_cifar10_cnn_w9s3/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml deleted file mode 100644 index 26406f56..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml deleted file mode 100644 index 9bf1330f..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 69eb49ea..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml deleted file mode 100644 index e8fea4bd..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedavg' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml deleted file mode 100644 index ca6f7508..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fednova' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml deleted file mode 100644 index f9882b88..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedprox' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml deleted file mode 100644 index b857c0e0..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml deleted file mode 100644 index 03732d06..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload_strict' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml deleted file mode 100644 index 7d0552be..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_adaptive' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml deleted file mode 100644 index 77600133..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_basic' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py deleted file mode 100644 index 8484ca9a..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/run.py +++ /dev/null @@ -1,37 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - # 'offload_strict.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn copy/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml deleted file mode 100644 index 26406f56..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml deleted file mode 100644 index 9bf1330f..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index 69eb49ea..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_dyn_terminate_swyh' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml deleted file mode 100644 index e8fea4bd..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedavg' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml deleted file mode 100644 index ca6f7508..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fednova' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml deleted file mode 100644 index f9882b88..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_fedprox' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml deleted file mode 100644 index b857c0e0..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml deleted file mode 100644 index 03732d06..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_offload_strict' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml deleted file mode 100644 index 7d0552be..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_adaptive' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml deleted file mode 100644 index 77600133..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 2 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p35' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 2 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 5 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p35_freezoff_non_iid_5_cifar10_cnn_tifl_basic' diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml deleted file mode 100644 index f07a9c58..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 7 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py b/configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py deleted file mode 100644 index 8484ca9a..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/run.py +++ /dev/null @@ -1,37 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p11_freezoff' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - # 'offload_strict.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'offload.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml b/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p35_freezoff_non_iid_5_cifar10_cnn/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml deleted file mode 100644 index 73e25c49..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/descr.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml deleted file mode 100644 index 279369ef..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml deleted file mode 100644 index 578998b4..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/dyn_terminate_swyh.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml deleted file mode 100644 index bc057921..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate -deadline: 500000 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_dyn_terminate' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml deleted file mode 100644 index cce27aef..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/dyn_terminate_swyh.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: dynamic-terminate-swyh -deadline: 500000 - -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_dyn_terminate_swyh' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml deleted file mode 100644 index 76521c3a..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedavg.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_fedavg' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml deleted file mode 100644 index 651a5c6d..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fednova.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_fednova' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml deleted file mode 100644 index 0830ef17..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/fedprox.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_fedprox' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml deleted file mode 100644 index 7719dec5..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 500000 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml deleted file mode 100644 index 0ec1313c..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 33 -deadline_threshold: 2 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml deleted file mode 100644 index 4cdec706..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict2.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 34 -deadline_threshold: 2 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict2' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml deleted file mode 100644 index 78f24757..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict3.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 36 -deadline_threshold: 3 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict3' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml deleted file mode 100644 index 8b28e12c..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/offload_strict4.yaml +++ /dev/null @@ -1,37 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: offload -deadline: 38 -deadline_threshold: 3 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_offload_strict4' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml deleted file mode 100644 index 056d4c26..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_adaptive.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_tifl_adaptive' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml deleted file mode 100644 index 4e7d6a9f..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/exps/tifl_basic.yaml +++ /dev/null @@ -1,36 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10ResNet -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -profiling_time: 30 -warmup_round: false -output_location: 'output/p36' -tensor_board_active: true -termination_percentage: 0.7 -clients_per_round: 3 -node_groups: - slow: [1, 3] - medium: [4, 6] - fast: [7, 9] -sampler: "n labels" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) || "n labels" -sampler_args: - - 2 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 9 -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 -experiment_prefix: 'p36_freezoff_non_iid_cifar10_resnet_w9s3_tifl_basic' diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml deleted file mode 100644 index 3b4615d1..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedavg.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml deleted file mode 100644 index ca0e2a55..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fednova.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedNova \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml deleted file mode 100644 index f66490e9..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/fedprox.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: vanilla -deadline: 500000 -optimizer: FedProx \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py deleted file mode 100644 index 168833f0..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/gen.py +++ /dev/null @@ -1,26 +0,0 @@ -from pathlib import Path - -if __name__ == '__main__': - base_path = f'configs/{Path(__file__).parent.name}' - path = Path(base_path) - descr_path = path / 'descr.yaml' - - exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes] - descr_data = '' - with open(descr_path) as descr_f: - descr_data = descr_f.read() - exps_path = path / 'exps' - exps_path.mkdir(parents=True, exist_ok=True) - for exp_cfg in exp_cfg_list: - exp_cfg_data = '' - with open(exp_cfg) as exp_f: - exp_cfg_data = exp_f.read() - - exp_data = descr_data + exp_cfg_data - exp_data += f'\nexperiment_prefix: \'{Path(__file__).parent.name}_{exp_cfg.name.split(".")[0]}\'\n' - filename = '.'.join([exp_cfg.name.split('.')[0], exp_cfg.name.split('.')[2]]) - with open(exps_path / filename, mode='w') as f: - f.write(exp_data) - print('Done') - - diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml deleted file mode 100644 index 3febf08b..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml deleted file mode 100644 index ba287d60..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 33 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml deleted file mode 100644 index d2a1ae5b..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict2.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 34 -deadline_threshold: 2 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml deleted file mode 100644 index bcd99ddf..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict3.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 36 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml deleted file mode 100644 index 9f884b16..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/offload_strict4.cfg.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Individual configuration -offload_stategy: offload -deadline: 38 -deadline_threshold: 3 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py deleted file mode 100644 index 2a60ce38..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/run.py +++ /dev/null @@ -1,41 +0,0 @@ -from pathlib import Path -import time -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - EVENT_FILE="exp_events.txt" - name = 'p23_w9s3_fast' - # name = 'p23_w9s3' - generate_docker(name) - base_path = f'configs/{Path(__file__).parent.name}' - exp_list = [ - 'fedavg.yaml', - # 'offload_strict.yaml', - # 'offload_strict2.yaml', - # 'offload_strict3.yaml', - # 'offload_strict4.yaml', - # 'fednova.yaml', - # 'fedprox.yaml', - # 'tifl_adaptive.yaml', - # 'tifl_basic.yaml', - # 'offload.yaml', - # 'dyn_terminate_swyh.yaml', - # 'dyn_terminate.yaml', - ] - exp_list = [f'{base_path}/exps/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - os.system(f'echo "[$(date +"%T")] Starting {exp_cfg_file}" >> {EVENT_FILE}') - start = time.time() - - - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - elapsed = time.time() - start - os.system(f'echo "[$(date +"%T")] Finished with {exp_cfg_file} in {elapsed} seconds" >> {EVENT_FILE}') - - print('Done') - - diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml deleted file mode 100644 index e0ca9fbd..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_adaptive.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-adaptive -deadline: 500000 \ No newline at end of file diff --git a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml b/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml deleted file mode 100644 index b12b53b3..00000000 --- a/configs/p36_freezoff_non_iid_cifar10_resnet_w9s3/tifl_basic.cfg.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Individual configuration -offload_stategy: tifl-basic -deadline: 500000 \ No newline at end of file diff --git a/configs/terminate/p_terminate_terminate.yaml b/configs/terminate/p_terminate_terminate.yaml deleted file mode 100644 index 4d2e182a..00000000 --- a/configs/terminate/p_terminate_terminate.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p_terminate_terminate' -offload_stategy: dynamic-terminate -profiling_time: 100 -deadline: 1 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 12 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -termination_percentage: 0.7 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 16 diff --git a/configs/terminate/p_terminate_terminate_swyh.yaml b/configs/terminate/p_terminate_terminate_swyh.yaml deleted file mode 100644 index 056630b3..00000000 --- a/configs/terminate/p_terminate_terminate_swyh.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p_terminate_terminate_swyh' -offload_stategy: dynamic-terminate-swyh -profiling_time: 100 -deadline: 1 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 12 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -termination_percentage: 0.7 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 16 diff --git a/configs/terminate/p_terminate_vanilla.yaml b/configs/terminate/p_terminate_vanilla.yaml deleted file mode 100644 index e1057a80..00000000 --- a/configs/terminate/p_terminate_vanilla.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p_terminate_vanilla' -offload_stategy: vanilla -profiling_time: 100 -deadline: 1 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 12 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -termination_percentage: 0.7 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 16 diff --git a/configs/terminate/p_terminate_vanilla_s3.yaml b/configs/terminate/p_terminate_vanilla_s3.yaml deleted file mode 100644 index ad196427..00000000 --- a/configs/terminate/p_terminate_vanilla_s3.yaml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p_terminate_vanilla_s3' -offload_stategy: vanilla -profiling_time: 100 -deadline: 1 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 12 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -termination_percentage: 0.7 -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 16 diff --git a/configs/terminate/run.py b/configs/terminate/run.py deleted file mode 100644 index c6a9bdad..00000000 --- a/configs/terminate/run.py +++ /dev/null @@ -1,23 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'terminate' - generate_docker(name, 16, True) - base_path = 'configs/terminate' - exp_list = [ - 'p_terminate_terminate.yaml', - 'p_terminate_terminate_swyh.yaml', - 'p_terminate_vanilla.yaml', - 'p_terminate_vanilla_s3.yaml' - ] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/configs/tifl-15/exp_p15_baseline.yaml b/configs/tifl-15/exp_p15_baseline.yaml deleted file mode 100644 index a3410567..00000000 --- a/configs/tifl-15/exp_p15_baseline.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p15_baseline' -offload_stategy: vanilla -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/tifl-15/exp_p15_tifl-adaptive.yaml b/configs/tifl-15/exp_p15_tifl-adaptive.yaml deleted file mode 100644 index 72d17fb2..00000000 --- a/configs/tifl-15/exp_p15_tifl-adaptive.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p15_tifl-adaptive' -offload_stategy: tifl-adaptive -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/tifl-15/exp_p15_tifl-basic.yaml b/configs/tifl-15/exp_p15_tifl-basic.yaml deleted file mode 100644 index 872751c7..00000000 --- a/configs/tifl-15/exp_p15_tifl-basic.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p15_tifl-basic' -offload_stategy: tifl-basic -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 3 -node_groups: - slow: [1, 6] - medium: [7, 12] - fast: [13, 18] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 18 diff --git a/configs/tifl-15/exp_p3_tifl.yaml b/configs/tifl-15/exp_p3_tifl.yaml deleted file mode 100644 index f17b4b8d..00000000 --- a/configs/tifl-15/exp_p3_tifl.yaml +++ /dev/null @@ -1,34 +0,0 @@ ---- -# Experiment configuration -total_epochs: 50 -epochs_per_cycle: 1 -wait_for_clients: true -net: FashionMNISTCNN -dataset: fashion-mnist -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'exp_p3_tifl-basic' -offload_stategy: tifl-adaptive -profiling_time: 100 -deadline: 500 -warmup_round: false -output_location: 'output' -tensor_board_active: true -clients_per_round: 1 -node_groups: - slow: [1, 1] - medium: [2, 2] - fast: [3, 3] -sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 3 diff --git a/configs/tifl-15/run.py b/configs/tifl-15/run.py deleted file mode 100644 index db1a5128..00000000 --- a/configs/tifl-15/run.py +++ /dev/null @@ -1,18 +0,0 @@ -from fltk.util.generate_docker_compose import run as generate_docker -import os -if __name__ == '__main__': - name = 'tifl-15' - generate_docker(name) - base_path = 'configs/tifl-15' - exp_list = ['exp_p15_baseline.yaml', 'exp_p15_tifl-adaptive.yaml', 'exp_p15_tifl-basic.yaml'] - exp_list = [f'{base_path}/{x}' for x in exp_list] - first_prefix = '--build' - for exp_cfg_file in exp_list: - cmd = f'export EXP_CONFIG_FILE="{exp_cfg_file}"; docker-compose --compatibility up {first_prefix};' - print(f'Running cmd: "{cmd}"') - os.system(cmd) - first_prefix = '' - - print('Done') - - diff --git a/deploy/dev/client_stub_default.yml b/deploy/dev/client_stub_default.yml deleted file mode 100644 index e8e5b9ba..00000000 --- a/deploy/dev/client_stub_default.yml +++ /dev/null @@ -1,27 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - MASTER_HOSTNAME=10.5.0.11 - - NIC=eth0 - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' -# memory: 1024M diff --git a/deploy/dev/client_stub_fast.yml b/deploy/dev/client_stub_fast.yml deleted file mode 100644 index 4e8d2d78..00000000 --- a/deploy/dev/client_stub_fast.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - MASTER_HOSTNAME=10.5.0.11 - - NIC=eth0 - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '3' diff --git a/deploy/dev/client_stub_medium.yml b/deploy/dev/client_stub_medium.yml deleted file mode 100644 index 9d096797..00000000 --- a/deploy/dev/client_stub_medium.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - MASTER_HOSTNAME=10.5.0.11 - - NIC=eth0 - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' diff --git a/deploy/dev/client_stub_slow.yml b/deploy/dev/client_stub_slow.yml deleted file mode 100644 index deb37f37..00000000 --- a/deploy/dev/client_stub_slow.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - MASTER_HOSTNAME=10.5.0.11 - - NIC=eth0 - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.5' \ No newline at end of file diff --git a/deploy/dev/system_stub.yml b/deploy/dev/system_stub.yml deleted file mode 100644 index 37404525..00000000 --- a/deploy/dev/system_stub.yml +++ /dev/null @@ -1,29 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-2' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - MASTER_HOSTNAME=10.5.0.11 - - NIC=eth0 - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/dev_generate/client_stub_medium.yml b/deploy/dev_generate/client_stub_medium.yml deleted file mode 100644 index 9d096797..00000000 --- a/deploy/dev_generate/client_stub_medium.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - MASTER_HOSTNAME=10.5.0.11 - - NIC=eth0 - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' diff --git a/deploy/dev_generate/description.yml b/deploy/dev_generate/description.yml deleted file mode 100644 index cdfd507d..00000000 --- a/deploy/dev_generate/description.yml +++ /dev/null @@ -1,19 +0,0 @@ -federator: - stub-name: system_stub.yml - pin-cores: true - num-cores: 1 -clients: - fast: - stub-name: stub_default.yml - amount: 2 - pin-cores: true - num-cores: 1 - cpu-speed: 1 - cpu-variation: 0 - slow: - stub-name: stub_default.yml - amount: 0 - pin-cores: true - num-cores: 1 - cpu-speed: 1 - cpu-variation: 0 diff --git a/deploy/dev_generate/stub_fast.yml b/deploy/dev_generate/stub_fast.yml deleted file mode 100644 index 3b4aee9c..00000000 --- a/deploy/dev_generate/stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - MASTER_HOSTNAME=10.5.0.11 - - NIC=eth0 - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: {num_cpus} diff --git a/deploy/dev_generate/stub_default.yml b/deploy/docker/stub_default.yml similarity index 100% rename from deploy/dev_generate/stub_default.yml rename to deploy/docker/stub_default.yml diff --git a/deploy/dev_generate/system_stub.yml b/deploy/docker/system_stub.yml similarity index 100% rename from deploy/dev_generate/system_stub.yml rename to deploy/docker/system_stub.yml diff --git a/deploy/p11_freezoff/client_stub_default.yml b/deploy/p11_freezoff/client_stub_default.yml deleted file mode 100644 index 43d6c919..00000000 --- a/deploy/p11_freezoff/client_stub_default.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=default - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' -# memory: 1024M diff --git a/deploy/p11_freezoff/client_stub_fast.yml b/deploy/p11_freezoff/client_stub_fast.yml deleted file mode 100644 index 2c40393f..00000000 --- a/deploy/p11_freezoff/client_stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=fast - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' diff --git a/deploy/p11_freezoff/client_stub_medium.yml b/deploy/p11_freezoff/client_stub_medium.yml deleted file mode 100644 index 677accdf..00000000 --- a/deploy/p11_freezoff/client_stub_medium.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=medium - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.75' diff --git a/deploy/p11_freezoff/client_stub_slow.yml b/deploy/p11_freezoff/client_stub_slow.yml deleted file mode 100644 index bafa7e5a..00000000 --- a/deploy/p11_freezoff/client_stub_slow.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=slow - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.5' - # cpus: '0.1' \ No newline at end of file diff --git a/deploy/p11_freezoff/system_stub.yml b/deploy/p11_freezoff/system_stub.yml deleted file mode 100644 index 77a19443..00000000 --- a/deploy/p11_freezoff/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-1' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/p11_freezoff_fast/client_stub_default.yml b/deploy/p11_freezoff_fast/client_stub_default.yml deleted file mode 100644 index 43d6c919..00000000 --- a/deploy/p11_freezoff_fast/client_stub_default.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=default - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' -# memory: 1024M diff --git a/deploy/p11_freezoff_fast/client_stub_fast.yml b/deploy/p11_freezoff_fast/client_stub_fast.yml deleted file mode 100644 index d7c98ce0..00000000 --- a/deploy/p11_freezoff_fast/client_stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=fast - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '3' diff --git a/deploy/p11_freezoff_fast/client_stub_medium.yml b/deploy/p11_freezoff_fast/client_stub_medium.yml deleted file mode 100644 index 677accdf..00000000 --- a/deploy/p11_freezoff_fast/client_stub_medium.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=medium - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.75' diff --git a/deploy/p11_freezoff_fast/client_stub_slow.yml b/deploy/p11_freezoff_fast/client_stub_slow.yml deleted file mode 100644 index f1ef01a8..00000000 --- a/deploy/p11_freezoff_fast/client_stub_slow.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=slow - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.1' - # cpus: '0.1' \ No newline at end of file diff --git a/deploy/p11_freezoff_fast/system_stub.yml b/deploy/p11_freezoff_fast/system_stub.yml deleted file mode 100644 index 77a19443..00000000 --- a/deploy/p11_freezoff_fast/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-1' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_default.yml b/deploy/p23_freezoff_w9s3-half/client_stub_default.yml deleted file mode 100644 index 43d6c919..00000000 --- a/deploy/p23_freezoff_w9s3-half/client_stub_default.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=default - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' -# memory: 1024M diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_fast.yml b/deploy/p23_freezoff_w9s3-half/client_stub_fast.yml deleted file mode 100644 index d7c98ce0..00000000 --- a/deploy/p23_freezoff_w9s3-half/client_stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=fast - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '3' diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_medium.yml b/deploy/p23_freezoff_w9s3-half/client_stub_medium.yml deleted file mode 100644 index f6bded5d..00000000 --- a/deploy/p23_freezoff_w9s3-half/client_stub_medium.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=medium - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' diff --git a/deploy/p23_freezoff_w9s3-half/client_stub_slow.yml b/deploy/p23_freezoff_w9s3-half/client_stub_slow.yml deleted file mode 100644 index a5ead21c..00000000 --- a/deploy/p23_freezoff_w9s3-half/client_stub_slow.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=slow - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.5' \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3-half/system_stub.yml b/deploy/p23_freezoff_w9s3-half/system_stub.yml deleted file mode 100644 index 77a19443..00000000 --- a/deploy/p23_freezoff_w9s3-half/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-1' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3/client_stub_default.yml b/deploy/p23_freezoff_w9s3/client_stub_default.yml deleted file mode 100644 index 43d6c919..00000000 --- a/deploy/p23_freezoff_w9s3/client_stub_default.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=default - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' -# memory: 1024M diff --git a/deploy/p23_freezoff_w9s3/client_stub_fast.yml b/deploy/p23_freezoff_w9s3/client_stub_fast.yml deleted file mode 100644 index d7c98ce0..00000000 --- a/deploy/p23_freezoff_w9s3/client_stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=fast - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '3' diff --git a/deploy/p23_freezoff_w9s3/client_stub_medium.yml b/deploy/p23_freezoff_w9s3/client_stub_medium.yml deleted file mode 100644 index f6bded5d..00000000 --- a/deploy/p23_freezoff_w9s3/client_stub_medium.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=medium - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' diff --git a/deploy/p23_freezoff_w9s3/client_stub_slow.yml b/deploy/p23_freezoff_w9s3/client_stub_slow.yml deleted file mode 100644 index bdd138f4..00000000 --- a/deploy/p23_freezoff_w9s3/client_stub_slow.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=slow - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.75' \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3/system_stub.yml b/deploy/p23_freezoff_w9s3/system_stub.yml deleted file mode 100644 index 77a19443..00000000 --- a/deploy/p23_freezoff_w9s3/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-1' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_default.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_default.yml deleted file mode 100644 index 43d6c919..00000000 --- a/deploy/p23_freezoff_w9s3_fast/client_stub_default.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=default - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' -# memory: 1024M diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml deleted file mode 100644 index d7c98ce0..00000000 --- a/deploy/p23_freezoff_w9s3_fast/client_stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=fast - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '3' diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml deleted file mode 100644 index f6bded5d..00000000 --- a/deploy/p23_freezoff_w9s3_fast/client_stub_medium.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=medium - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' diff --git a/deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml b/deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml deleted file mode 100644 index 19a0ab36..00000000 --- a/deploy/p23_freezoff_w9s3_fast/client_stub_slow.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=slow - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' \ No newline at end of file diff --git a/deploy/p23_freezoff_w9s3_fast/system_stub.yml b/deploy/p23_freezoff_w9s3_fast/system_stub.yml deleted file mode 100644 index 77a19443..00000000 --- a/deploy/p23_freezoff_w9s3_fast/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-1' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/p28_non_iid_effect/client_stub_default.yml b/deploy/p28_non_iid_effect/client_stub_default.yml deleted file mode 100644 index 43d6c919..00000000 --- a/deploy/p28_non_iid_effect/client_stub_default.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=default - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' -# memory: 1024M diff --git a/deploy/p28_non_iid_effect/client_stub_fast.yml b/deploy/p28_non_iid_effect/client_stub_fast.yml deleted file mode 100644 index 2c40393f..00000000 --- a/deploy/p28_non_iid_effect/client_stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=fast - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' diff --git a/deploy/p28_non_iid_effect/client_stub_medium.yml b/deploy/p28_non_iid_effect/client_stub_medium.yml deleted file mode 100644 index 677accdf..00000000 --- a/deploy/p28_non_iid_effect/client_stub_medium.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=medium - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.75' diff --git a/deploy/p28_non_iid_effect/client_stub_slow.yml b/deploy/p28_non_iid_effect/client_stub_slow.yml deleted file mode 100644 index 75771735..00000000 --- a/deploy/p28_non_iid_effect/client_stub_slow.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=slow - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.1' \ No newline at end of file diff --git a/deploy/p28_non_iid_effect/system_stub.yml b/deploy/p28_non_iid_effect/system_stub.yml deleted file mode 100644 index 77a19443..00000000 --- a/deploy/p28_non_iid_effect/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-1' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/templates/client_stub_default.yml b/deploy/templates/client_stub_default.yml deleted file mode 100644 index 3a1774cf..00000000 --- a/deploy/templates/client_stub_default.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' -# memory: 1024M diff --git a/deploy/templates/client_stub_fast.yml b/deploy/templates/client_stub_fast.yml deleted file mode 100644 index f03012ff..00000000 --- a/deploy/templates/client_stub_fast.yml +++ /dev/null @@ -1,24 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '3' diff --git a/deploy/templates/client_stub_medium.yml b/deploy/templates/client_stub_medium.yml deleted file mode 100644 index 49abdeb2..00000000 --- a/deploy/templates/client_stub_medium.yml +++ /dev/null @@ -1,24 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' diff --git a/deploy/templates/client_stub_slow.yml b/deploy/templates/client_stub_slow.yml deleted file mode 100644 index 9cbdabb5..00000000 --- a/deploy/templates/client_stub_slow.yml +++ /dev/null @@ -1,24 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.5' \ No newline at end of file diff --git a/deploy/templates/system_stub.yml b/deploy/templates/system_stub.yml deleted file mode 100644 index c84b2ecb..00000000 --- a/deploy/templates/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-2' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/terminate/client_stub_default.yml b/deploy/terminate/client_stub_default.yml deleted file mode 100644 index 3a1774cf..00000000 --- a/deploy/terminate/client_stub_default.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '2' -# memory: 1024M diff --git a/deploy/terminate/client_stub_fast.yml b/deploy/terminate/client_stub_fast.yml deleted file mode 100644 index f03012ff..00000000 --- a/deploy/terminate/client_stub_fast.yml +++ /dev/null @@ -1,24 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '3' diff --git a/deploy/terminate/client_stub_medium.yml b/deploy/terminate/client_stub_medium.yml deleted file mode 100644 index 8ed98ed0..00000000 --- a/deploy/terminate/client_stub_medium.yml +++ /dev/null @@ -1,24 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' diff --git a/deploy/terminate/client_stub_slow.yml b/deploy/terminate/client_stub_slow.yml deleted file mode 100644 index caa4daae..00000000 --- a/deploy/terminate/client_stub_slow.yml +++ /dev/null @@ -1,24 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.25' \ No newline at end of file diff --git a/deploy/terminate/system_stub.yml b/deploy/terminate/system_stub.yml deleted file mode 100644 index c84b2ecb..00000000 --- a/deploy/terminate/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-2' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/deploy/tifl-15/client_stub_default.yml b/deploy/tifl-15/client_stub_default.yml deleted file mode 100644 index 43d6c919..00000000 --- a/deploy/tifl-15/client_stub_default.yml +++ /dev/null @@ -1,26 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=default - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' -# memory: 1024M diff --git a/deploy/tifl-15/client_stub_fast.yml b/deploy/tifl-15/client_stub_fast.yml deleted file mode 100644 index 2c40393f..00000000 --- a/deploy/tifl-15/client_stub_fast.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=fast - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '1' diff --git a/deploy/tifl-15/client_stub_medium.yml b/deploy/tifl-15/client_stub_medium.yml deleted file mode 100644 index 666e7891..00000000 --- a/deploy/tifl-15/client_stub_medium.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./docker_data:/opt/federation-lab/data - - ./data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=medium - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.5' diff --git a/deploy/tifl-15/client_stub_slow.yml b/deploy/tifl-15/client_stub_slow.yml deleted file mode 100644 index ae578071..00000000 --- a/deploy/tifl-15/client_stub_slow.yml +++ /dev/null @@ -1,25 +0,0 @@ -client_name: # name can be anything -# container_name: federation-lab-client2 # what the name for this container would be - cpuset: {cpu_set} - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: - - ./data:/opt/federation-lab/data -# - ./docker_data:/opt/federation-lab/data - - ./default_models:/opt/federation-lab/default_models - - ./data_loaders:/opt/federation-lab/data_loaders - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK={rank} - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - - NODE_GROUP=slow - ports: - - "5002:5000" # {machine-port}:{docker-port} - depends_on: - - "fl_server" - deploy: - resources: - limits: - cpus: '0.25' \ No newline at end of file diff --git a/deploy/tifl-15/system_stub.yml b/deploy/tifl-15/system_stub.yml deleted file mode 100644 index 77a19443..00000000 --- a/deploy/tifl-15/system_stub.yml +++ /dev/null @@ -1,27 +0,0 @@ -# creating a multi-container docker -version: "3.3" -services: - fl_server: # name can be anything - container_name: federation-lab-server # what the name for this container would be - cpuset: '0-1' - restart: "no" # if it crashes for example - build: . # look for the docker file where this file is currently located - volumes: -# - ./data/MNIST:/opt/federation-lab/data/MNIST - - ./data:/opt/federation-lab/data - - ./output:/opt/federation-lab/output - - ./fltk:/opt/federation-lab/fltk - environment: - - PYTHONUNBUFFERED=1 - - RANK=0 - - WORLD_SIZE={world_size} - - EXP_CONFIG=${EXP_CONFIG_FILE} - ports: - - "5000:5000" # {machine-port}:{docker-port} - networks: - default: - ipv4_address: 10.5.0.11 -networks: - default: - external: - name: local_network_dev \ No newline at end of file diff --git a/configs/dev_mnist/exps/fedavg_direct.yaml b/experiments/example_docker/descr.yaml similarity index 52% rename from configs/dev_mnist/exps/fedavg_direct.yaml rename to experiments/example_docker/descr.yaml index ab294bb3..79695fcf 100644 --- a/configs/dev_mnist/exps/fedavg_direct.yaml +++ b/experiments/example_docker/descr.yaml @@ -9,7 +9,7 @@ dataset: mnist cuda: false profiling_time: 100 warmup_round: false -output_location: 'output/dev_p2' +output_location: 'output/example_docker' tensor_board_active: true clients_per_round: 2 node_groups: @@ -17,22 +17,31 @@ node_groups: medium: [2, 2] fast: [3, 3] sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: - 0.07 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 num_clients: 2 -# Individual configuration -offload_stategy: vanilla -deadline: 500 -single_machine: true -real_time: false -experiment_prefix: 'util_fedavg_direct' +replications: 2 +deploy: + docker: + base_path: deploy/docker + federator: + stub-name: system_stub.yml + pin-cores: true + num-cores: 1 + clients: + fast: + stub-name: stub_default.yml + amount: 2 + pin-cores: true + num-cores: 1 + cpu-speed: 1 + cpu-variation: 0 + slow: + stub-name: stub_default.yml + amount: 0 + pin-cores: true + num-cores: 1 + cpu-speed: 1 + cpu-variation: 0 + diff --git a/configs/dev_mnist/fedavg.cfg.yaml b/experiments/example_docker/fedavg.cfg.yaml similarity index 100% rename from configs/dev_mnist/fedavg.cfg.yaml rename to experiments/example_docker/fedavg.cfg.yaml diff --git a/configs/dev_mnist/descr.yaml b/experiments/example_native/descr.yaml similarity index 68% rename from configs/dev_mnist/descr.yaml rename to experiments/example_native/descr.yaml index 87b954ba..d5e5386f 100644 --- a/configs/dev_mnist/descr.yaml +++ b/experiments/example_native/descr.yaml @@ -9,7 +9,7 @@ dataset: mnist cuda: false profiling_time: 100 warmup_round: false -output_location: 'output/dev_p2' +output_location: 'output/example_native' tensor_board_active: true clients_per_round: 2 node_groups: @@ -17,16 +17,8 @@ node_groups: medium: [2, 2] fast: [3, 3] sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) sampler_args: - 0.07 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused -system: - federator: - # hostname: '131.180.203.94' - hostname: '10.5.0.11' - nic: 'eth0' - # nic: 'enp3s0' - clients: - amount: 2 num_clients: 2 +replications: 2 diff --git a/configs/dev/dev_p2_fedprox.cfg.yaml b/experiments/example_native/fedavg.cfg.yaml similarity index 64% rename from configs/dev/dev_p2_fedprox.cfg.yaml rename to experiments/example_native/fedavg.cfg.yaml index 7b4cc2bb..25a64bda 100644 --- a/configs/dev/dev_p2_fedprox.cfg.yaml +++ b/experiments/example_native/fedavg.cfg.yaml @@ -1,4 +1,5 @@ # Individual configuration offload_stategy: vanilla deadline: 500 -optimizer: FedProx +single_machine: true +real_time: false \ No newline at end of file diff --git a/fltk/util/generate_docker_compose_2.py b/fltk/util/generate_docker_compose_2.py index 7185fa01..28c9025d 100644 --- a/fltk/util/generate_docker_compose_2.py +++ b/fltk/util/generate_docker_compose_2.py @@ -1,3 +1,4 @@ +import argparse import copy from pathlib import Path import yaml @@ -67,20 +68,8 @@ def generate_clients_proporties(clients_dict: dict, path: Path): results += gen_client(k, v, path) return results - -def generate_compose_file(path: Path): - """ - Used properties: - - World size - - num clients? - - path to deploy files - - random seed? - """ - - system_path = path / 'description.yml' - system = load_yaml_file(system_path) - # path = Path('deploy/dev_generate') - +def generate_compose_file_from_dict(system: dict): + path = Path(system['base_path']) client_descriptions = generate_clients_proporties(system['clients'], path) last_core_id = 0 world_size = len(client_descriptions) + 1 @@ -95,7 +84,7 @@ def generate_compose_file(path: Path): cpu_set: str amount = system['federator']['num-cores'] if amount > 1: - cpu_set = f'{last_core_id}-{last_core_id+amount-1}' + cpu_set = f'{last_core_id}-{last_core_id + amount - 1}' else: cpu_set = f'{last_core_id}' system_template['services']['fl_server']['cpuset'] = cpu_set @@ -109,18 +98,39 @@ def generate_compose_file(path: Path): if client_d['num_cores']: amount = client_d['num_cores'] if amount > 1: - cpu_set = f'{last_core_id}-{last_core_id+amount-1}' + cpu_set = f'{last_core_id}-{last_core_id + amount - 1}' else: cpu_set = f'{last_core_id}' last_core_id += amount - local_template, container_name = generate_client(idx + 1, stub_data, world_size, client_d['name'], cpu_set, client_d['num_cpu']) + local_template, container_name = generate_client(idx + 1, stub_data, world_size, client_d['name'], cpu_set, + client_d['num_cpu']) system_template['services'].update(local_template) print(container_name) with open(r'./docker-compose.yml', 'w') as file: yaml.dump(system_template, file, sort_keys=False) +def generate_compose_file(path: Path): + """ + Used properties: + - World size + - num clients? + - path to deploy files + - random seed? + """ + + system_path = path / 'description.yml' + system = load_yaml_file(system_path) + # path = Path('deploy/dev_generate') + generate_compose_file_from_dict(system) + + if __name__ == '__main__': - path = Path('deploy/dev_generate') + parser = argparse.ArgumentParser(description='Generate docker-compose file') + parser.add_argument('path', type=str, + help='Path to a deployment config folder') + parser.add_argument('--clients', type=int, help='Set the number of clients in the system', default=None) + args = parser.parse_args() + path = Path(args.path) results = generate_compose_file(path) print('done') \ No newline at end of file diff --git a/fltk/util/generate_experiments.py b/fltk/util/generate_experiments.py index c424c889..09b39f73 100644 --- a/fltk/util/generate_experiments.py +++ b/fltk/util/generate_experiments.py @@ -1,7 +1,8 @@ +import copy from pathlib import Path import os import yaml -from fltk.util.generate_docker_compose_2 import generate_compose_file +from fltk.util.generate_docker_compose_2 import generate_compose_file, generate_compose_file_from_dict def rm_tree(pth: Path): @@ -13,6 +14,16 @@ def rm_tree(pth: Path): # pth.rmdir() +def check_num_clients_consistency(cfg_data: dict): + if type(cfg_data) is str: + cfg_data = yaml.safe_load(copy.deepcopy(cfg_data)) + + if 'deploy' in cfg_data and 'docker' in cfg_data['deploy']: + num_docker_clients = sum([x['amount'] for x in cfg_data['deploy']['docker']['clients'].values()]) + if cfg_data['num_clients'] != num_docker_clients: + print('[Warning]\t Number of docker clients is not equal to the num_clients property!') + + def generate(base_path: Path): descr_path = base_path / 'descr.yaml' @@ -23,6 +34,8 @@ def generate(base_path: Path): exps_path = base_path / 'exps' rm_tree(exps_path) exps_path.mkdir(parents=True, exist_ok=True) + + check_num_clients_consistency(descr_data) for exp_cfg in exp_cfg_list: exp_cfg_data = '' with open(exp_cfg) as exp_f: @@ -74,15 +87,17 @@ def run(base_path: Path): if 'replications' in descr_data: replications = descr_data['replications'] run_docker = False - if 'docker_system' in descr_data: + if 'deploy' in descr_data and 'docker' in descr_data['deploy']: + # if 'docker_system' in descr_data: # Run in docker # Generate Docker print(descr_data) - docker_deploy_path = Path(descr_data['docker_system']) + docker_deploy_path = Path(descr_data['deploy']['docker']['base_path']) print(docker_deploy_path) run_docker = True - generate_compose_file(docker_deploy_path) + generate_compose_file_from_dict(descr_data['deploy']['docker']) + # generate_compose_file(docker_deploy_path) exp_files = [x for x in (base_path / 'exps').iterdir() if x.suffix in ['.yaml', '.yml']] From 2a22902574c5a275e15bd2fa2969703103106c9c Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 17 Mar 2022 10:52:08 +0100 Subject: [PATCH 71/73] Update analysis script --- examples/README.md | 1 + experiments/example_docker/descr.yaml | 1 + experiments/example_native/descr.yaml | 5 ++- fltk/core/federator.py | 2 +- fltk/util/analysis.py | 60 +++++++++++++++++++++++---- 5 files changed, 58 insertions(+), 11 deletions(-) create mode 100644 examples/README.md diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..d3f2afac --- /dev/null +++ b/examples/README.md @@ -0,0 +1 @@ +These examples are outdated! \ No newline at end of file diff --git a/experiments/example_docker/descr.yaml b/experiments/example_docker/descr.yaml index 79695fcf..b1a7aaa2 100644 --- a/experiments/example_docker/descr.yaml +++ b/experiments/example_docker/descr.yaml @@ -1,6 +1,7 @@ --- # Experiment configuration total_epochs: 3 +rounds: 5 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN diff --git a/experiments/example_native/descr.yaml b/experiments/example_native/descr.yaml index d5e5386f..c254640b 100644 --- a/experiments/example_native/descr.yaml +++ b/experiments/example_native/descr.yaml @@ -1,6 +1,7 @@ --- # Experiment configuration total_epochs: 3 +rounds: 5 epochs_per_cycle: 1 wait_for_clients: true net: MNISTCNN @@ -20,5 +21,5 @@ sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" ( sampler_args: - 0.07 # label limit || q probability || alpha || unused - 42 # random seed || random seed || random seed || unused -num_clients: 2 -replications: 2 +num_clients: 10 +replications: 5 diff --git a/fltk/core/federator.py b/fltk/core/federator.py index 4975e0bc..99fde847 100644 --- a/fltk/core/federator.py +++ b/fltk/core/federator.py @@ -245,6 +245,6 @@ def all_futures_done(futures: List[torch.Future])->bool: end_time = time.time() duration = end_time - start_time - self.exp_data.append(FederatorRecord(len(selected_clients), 0, duration, test_loss, test_accuracy)) + self.exp_data.append(FederatorRecord(len(selected_clients), id, duration, test_loss, test_accuracy)) self.logger.info(f'[Round {id:>3}] Round duration is {duration} seconds') diff --git a/fltk/util/analysis.py b/fltk/util/analysis.py index b35ce4c8..1fca083f 100644 --- a/fltk/util/analysis.py +++ b/fltk/util/analysis.py @@ -7,7 +7,8 @@ import seaborn as sns import re -# alt.renderers.enable('mimetype') +from matplotlib.lines import Line2D + def get_cwd() -> Path: return Path.cwd() @@ -46,21 +47,64 @@ def plot_client_duration(df: pd.DataFrame): plt.tight_layout() plt.show() +def plot_federator_accuracy(df: pd.DataFrame): + plt.figure() + g = sns.lineplot(data=df, x='round_id', y='test_accuracy') + # df.plot(x="date", y="column2", ax=ax2, legend=False, color="r") + sns.lineplot(ax=g.axes.twinx(), data=df, x='round_id', y='test_loss', color='r') + plt.title('Federator test accuracy') + g.legend(handles=[Line2D([], [], marker='_', color="r", label='test_loss'), + Line2D([], [], marker='_', color="b", label='test_accuracy')]) + plt.tight_layout() + plt.show() -def analyse(path: Path): - cwd = get_cwd() - output_path = cwd / get_exp_name(path) - ensure_path_exists(output_path) +def plot_clients_accuracy(df: pd.DataFrame): + plt.figure() + g = sns.lineplot(data=df, x='round_id', y='accuracy', hue='node_name') + plt.title('Client test accuracy') + plt.tight_layout() + plt.show() + + +def load_replication(path: Path, replication_id: int): all_files = [x for x in path.iterdir() if x.is_file()] federator_files = [x for x in all_files if 'federator' in x.name] client_files = [x for x in all_files if x.name.startswith('client')] federator_data = load_and_merge_dfs(federator_files) + federator_data['replication'] = replication_id client_data = load_and_merge_dfs(client_files) + client_data['replication'] = replication_id + return federator_data, client_data - # print(len(client_data), len(federator_data)) - plot_client_duration(client_data) - # What do we want to plot in terms of data? +def analyse(path: Path): + # cwd = get_cwd() + # output_path = cwd / get_exp_name(path) + # ensure_path_exists(output_path) + replications = [x for x in path.iterdir() if x.is_dir()] + print(replications) + client_dfs = [] + federator_dfs = [] + for replication_path in replications: + replication_id = int(replication_path.name.split('_')[-1][1:]) + federator_data, client_data = load_replication(replication_path, replication_id) + client_dfs.append(client_data) + federator_dfs.append(federator_data) + + federator_df = pd.concat(federator_dfs, ignore_index=True) + client_df = pd.concat(client_dfs, ignore_index=True) + # all_files = [x for x in path.iterdir() if x.is_file()] + # federator_files = [x for x in all_files if 'federator' in x.name] + # client_files = [x for x in all_files if x.name.startswith('client')] + # + # federator_data = load_and_merge_dfs(federator_files) + # client_data = load_and_merge_dfs(client_files) + # + # # print(len(client_data), len(federator_data)) + plot_client_duration(client_df) + plot_federator_accuracy(federator_df) + plot_clients_accuracy(client_df) + # # What do we want to plot in terms of data? if __name__ == '__main__': From 672f9268a9accd74806e7a46ebc262ef4619b4eb Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 17 Mar 2022 10:53:19 +0100 Subject: [PATCH 72/73] Add cuda example --- experiments/example_cuda/descr.yaml | 25 ++++++++++++++++++++++++ experiments/example_cuda/fedavg.cfg.yaml | 5 +++++ 2 files changed, 30 insertions(+) create mode 100644 experiments/example_cuda/descr.yaml create mode 100644 experiments/example_cuda/fedavg.cfg.yaml diff --git a/experiments/example_cuda/descr.yaml b/experiments/example_cuda/descr.yaml new file mode 100644 index 00000000..b42db5f4 --- /dev/null +++ b/experiments/example_cuda/descr.yaml @@ -0,0 +1,25 @@ +--- +# Experiment configuration +total_epochs: 3 +rounds: 5 +epochs_per_cycle: 1 +wait_for_clients: true +net: MNISTCNN +dataset: mnist +# Use cuda is available; setting to false will force CPU +cuda: true +profiling_time: 100 +warmup_round: false +output_location: 'output/example_cuda' +tensor_board_active: true +clients_per_round: 2 +node_groups: + slow: [1, 1] + medium: [2, 2] + fast: [3, 3] +sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) +sampler_args: + - 0.07 # label limit || q probability || alpha || unused + - 42 # random seed || random seed || random seed || unused +num_clients: 10 +replications: 5 diff --git a/experiments/example_cuda/fedavg.cfg.yaml b/experiments/example_cuda/fedavg.cfg.yaml new file mode 100644 index 00000000..25a64bda --- /dev/null +++ b/experiments/example_cuda/fedavg.cfg.yaml @@ -0,0 +1,5 @@ +# Individual configuration +offload_stategy: vanilla +deadline: 500 +single_machine: true +real_time: false \ No newline at end of file From ce1936aff99d1dfb110016a56bee21c1165a8fe5 Mon Sep 17 00:00:00 2001 From: bacox Date: Thu, 17 Mar 2022 14:17:52 +0100 Subject: [PATCH 73/73] Fix device misconfiguration --- fltk/core/node.py | 2 +- fltk/util/config.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fltk/core/node.py b/fltk/core/node.py index 6d8aac0c..6cd0958e 100644 --- a/fltk/core/node.py +++ b/fltk/core/node.py @@ -56,7 +56,7 @@ def _config(self, config: Config): self.config.rank = self.rank self.config.world_size = self.world_size self.cuda = config.cuda - self.init_device() + self.device = self.init_device() self.distributed = config.distributed self.set_net(self.load_default_model()) diff --git a/fltk/util/config.py b/fltk/util/config.py index 5682059a..c83356db 100644 --- a/fltk/util/config.py +++ b/fltk/util/config.py @@ -70,6 +70,10 @@ class Config: def __init__(self, **kwargs) -> None: enum_fields = [x for x in self.__dataclass_fields__.items() if isinstance(x[1].type, Enum) or isinstance(x[1].type, EnumMeta)] + if 'dataset' in kwargs and 'dataset_name' not in kwargs: + kwargs['dataset_name'] = kwargs['dataset'] + if 'net' in kwargs and 'net_name' not in kwargs: + kwargs['net_name'] = kwargs['net'] for name, field in enum_fields: if name in kwargs and isinstance(kwargs[name], str): kwargs[name] = field.type(kwargs[name])