From 16c8154070b75877d17cd9a0c5b440e54141fa3e Mon Sep 17 00:00:00 2001 From: Jeroen Galjaard Date: Mon, 23 Aug 2021 11:08:32 +0200 Subject: [PATCH] Refactor config and create json dataclasses --- configs/cloud_experiment.yaml | 41 ++++++++------- configs/experiment.yaml | 15 ------ configs/local_experiment.yaml | 38 -------------- configs/non_iid_experiment.yaml | 23 --------- fltk/util/base_config.py | 88 ++++++++++++++++----------------- 5 files changed, 62 insertions(+), 143 deletions(-) delete mode 100644 configs/experiment.yaml delete mode 100644 configs/local_experiment.yaml delete mode 100644 configs/non_iid_experiment.yaml diff --git a/configs/cloud_experiment.yaml b/configs/cloud_experiment.yaml index f3fa8c35..b4810539 100644 --- a/configs/cloud_experiment.yaml +++ b/configs/cloud_experiment.yaml @@ -1,24 +1,23 @@ -# Experiment configuration -total_epochs: 130 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -sampler: "uniform" -sampler_args: - - 0.5 # p degree - - 42 # random seed -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_single_machine' -output_location: 'output' -tensor_board_active: true -clients_per_round: 50 -system: - federator: +cluster: + orchestrator: + wait_for_clients: true # Use the SERVICE provided by the fl-server to connect - hostname: 'fl-server.test.svc.cluster.local' + service: 'fl-server.test.svc.cluster.local' # Default NIC is eth0 nic: 'eth0' - clients: - amount: 50 + worker: + prefix: 'client' +execution_config: + experiment_prefix: 'cloud_experiment' + tensor_board_active: true + cuda: false + net: + save_model: false + save_temp_model: false + save_epoch_interval: 1 + save_model_path: "models" + epoch_save_start_suffix: "start" + epoch_save_end_suffix: "end" + reproducability: + torch_seed: 42 + arrival_seed: 123 diff --git a/configs/experiment.yaml b/configs/experiment.yaml deleted file mode 100644 index 3e84f2e3..00000000 --- a/configs/experiment.yaml +++ /dev/null @@ -1,15 +0,0 @@ ---- -# Experiment configuration -total_epochs: 5 -epochs_per_cycle: 1 -# Use cuda is available; setting to false will force CPU -cuda: true -experiment_prefix: 'experiment_sample' -output_location: 'output' -tensor_board_active: true -system: - federator: - hostname: '10.5.0.2' - nic: 'eth0' - clients: - amount: 1 diff --git a/configs/local_experiment.yaml b/configs/local_experiment.yaml deleted file mode 100644 index 4786295a..00000000 --- a/configs/local_experiment.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Experiment configuration -total_epochs: 35 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -sampler: "uniform" -sampler_args: - - 0.5 # p degree - - 42 # random seed -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_single_machine' -output_location: 'output' -tensor_board_active: true -clients_per_round: 3 -system: - federator: - # Use the SERVICE provided by the fl-server to connect - hostname: '172.18.0.2' - # Default NIC is eth0 - nic: 'eth0' - clients: - amount: 3 -# For a simple config is provided in configs/poison.example.yaml -poison: - seed: 420 - ratio: 0.2 - attack: - type: "flip" - config: - - 5: 3 -antidote: - type: "clustering" - f: 0 - k: 1 - - diff --git a/configs/non_iid_experiment.yaml b/configs/non_iid_experiment.yaml deleted file mode 100644 index 1bf59ed8..00000000 --- a/configs/non_iid_experiment.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -# Experiment configuration -total_epochs: 30 -epochs_per_cycle: 1 -wait_for_clients: true -net: Cifar10CNN -dataset: cifar10 -# Use cuda is available; setting to false will force CPU -cuda: false -experiment_prefix: 'experiment_non_iid(dirichlet)' -output_location: 'output' -tensor_board_active: true -clients_per_round: 4 -sampler: "dirichlet" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default) -sampler_args: - - 0.07 # label limit || q probability || alpha || unused - - 42 # random seed || random seed || random seed || unused -system: - federator: - hostname: '192.168.1.108' - nic: 'wlp4s0' - clients: - amount: 10 # must be multiple of the number of labels for q-sampler and limit-labels diff --git a/fltk/util/base_config.py b/fltk/util/base_config.py index 39369129..c8ba83cb 100644 --- a/fltk/util/base_config.py +++ b/fltk/util/base_config.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict import torch from dataclasses_json import dataclass_json @@ -9,10 +8,10 @@ # SEED = 1 # torch.manual_seed(SEED) + @dataclass @dataclass_json -class ExecutionConfig(): - cuda: bool = False +class GeneralNetConfig: save_model: bool = False save_temp_model: bool = False save_epoch_interval: int = 1 @@ -21,14 +20,52 @@ class ExecutionConfig(): epoch_save_end_suffix = "end" +@dataclass(frozen=True) +@dataclass_json +class ReproducabilityConfig: + torch_seed: int + arrival_seed: int + + +@dataclass +@dataclass_json +class ExecutionConfig: + general_net: GeneralNetConfig + reproducability: ReproducabilityConfig + experiment_prefix: str = "experiment" + tensorboard_active: str = True + cuda: bool = False + + +@dataclass +@dataclass_json +class OrchestratorConfig: + service: str + nic: str + + +@dataclass +@dataclass_json +class ClientConfig: + prefix: str + + +@dataclass +@dataclass_json +class ClusterConfig: + orchestrator: OrchestratorConfig + client: ClientConfig + wait_for_clients: bool = True + + @dataclass @dataclass_json class BareConfig(object): # Configuration parameters for PyTorch and models that are generated. - execution_config = ExecutionConfig() + execution_config: ExecutionConfig + cluster_config: ClusterConfig def __init__(self): - # TODO: Move to external class/object self.train_data_loader_pickle_path = { 'cifar10': 'data_loaders/cifar10/train_data_loader.pickle', @@ -48,47 +85,6 @@ def __init__(self): self.default_model_folder_path = "default_models" self.data_path = "data" - def merge_yaml(self, cfg: Dict[str, str] = {}): - """ - @deprecated This function will become redundant after using dataclasses_json to load the values into the object. - """ - if 'total_epochs' in cfg: - self.epochs = cfg['total_epochs'] - if 'epochs_per_cycle' in cfg: - self.epochs_per_cycle = cfg['epochs_per_cycle'] - if 'wait_for_clients' in cfg: - self.wait_for_clients = cfg['wait_for_clients'] - if 'net' in cfg: - self.set_net_by_name(cfg['net']) - if 'dataset' in cfg: - self.dataset_name = cfg['dataset'] - if 'experiment_prefix' in cfg: - self.experiment_prefix = cfg['experiment_prefix'] - if 'output_location' in cfg: - self.output_location = cfg['output_location'] - if 'tensor_board_active' in cfg: - self.tensor_board_active = cfg['tensor_board_active'] - if 'clients_per_round' in cfg: - self.clients_per_round = cfg['clients_per_round'] - if 'system' in cfg: - if 'clients' in cfg['system']: - if 'amount' in cfg['system']['clients']: - self.world_size = cfg['system']['clients']['amount'] + 1 - - if 'system' in cfg: - if 'federator' in cfg['system']: - if 'hostname' in cfg['system']['federator']: - self.federator_host = cfg['system']['federator']['hostname'] - if 'cuda' in cfg: - if cfg['cuda']: - self.cuda = True - else: - self.cuda = False - if 'sampler' in cfg: - self.data_sampler = cfg['sampler'] - if 'sampler_args' in cfg: - self.data_sampler_args = cfg['sampler_args'] - def get_dataloader_list(self): """ @deprecated